307 files changed, 216297 insertions, 0 deletions
diff --git a/contrib/ffmpeg/libavcodec/4xm.c b/contrib/ffmpeg/libavcodec/4xm.c
new file mode 100644
index 000000000..ea60e9bf2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/4xm.c
@@ -0,0 +1,759 @@
+/*
+ * 4XM codec
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file 4xm.c
+ * 4XM codec.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+#define BLOCK_TYPE_VLC_BITS 5
+#define ACDC_VLC_BITS 9
+
+#define CFRAME_BUFFER_COUNT 100
+
+static const uint8_t block_type_tab[4][8][2]={
+  {   //{8,4,2}x{8,4,2}
+    { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0}
+  },{ //{8,4}x1
+    { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0}
+  },{ //1x{8,4}
+    { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0}
+  },{ //1x2, 2x1
+    { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}
+  }
+};
+
+static const uint8_t size2index[4][4]={
+  {-1, 3, 1, 1},
+  { 3, 0, 0, 0},
+  { 2, 0, 0, 0},
+  { 2, 0, 0, 0},
+};
+
+static const int8_t mv[256][2]={
+{  0,  0},{  0, -1},{ -1,  0},{  1,  0},{  0,  1},{ -1, -1},{  1, -1},{ -1,  1},
+{  1,  1},{  0, -2},{ -2,  0},{  2,  0},{  0,  2},{ -1, -2},{  1, -2},{ -2, -1},
+{  2, -1},{ -2,  1},{  2,  1},{ -1,  2},{  1,  2},{ -2, -2},{  2, -2},{ -2,  2},
+{  2,  2},{  0, -3},{ -3,  0},{  3,  0},{  0,  3},{ -1, -3},{  1, -3},{ -3, -1},
+{  3, -1},{ -3,  1},{  3,  1},{ -1,  3},{  1,  3},{ -2, -3},{  2, -3},{ -3, -2},
+{  3, -2},{ -3,  2},{  3,  2},{ -2,  3},{  2,  3},{  0, -4},{ -4,  0},{  4,  0},
+{  0,  4},{ -1, -4},{  1, -4},{ -4, -1},{  4, -1},{  4,  1},{ -1,  4},{  1,  4},
+{ -3, -3},{ -3,  3},{  3,  3},{ -2, -4},{ -4, -2},{  4, -2},{ -4,  2},{ -2,  4},
+{  2,  4},{ -3, -4},{  3, -4},{  4, -3},{ -5,  0},{ -4,  3},{ -3,  4},{  3,  4},
+{ -1, -5},{ -5, -1},{ -5,  1},{ -1,  5},{ -2, -5},{  2, -5},{  5, -2},{  5,  2},
+{ -4, -4},{ -4,  4},{ -3, -5},{ -5, -3},{ -5,  3},{  3,  5},{ -6,  0},{  0,  6},
+{ -6, -1},{ -6,  1},{  1,  6},{  2, -6},{ -6,  2},{  2,  6},{ -5, -4},{  5,  4},
+{  4,  5},{ -6, -3},{  6,  3},{ -7,  0},{ -1, -7},{  5, -5},{ -7,  1},{ -1,  7},
+{  4, -6},{  6,  4},{ -2, -7},{ -7,  2},{ -3, -7},{  7, -3},{  3,  7},{  6, -5},
+{  0, -8},{ -1, -8},{ -7, -4},{ -8,  1},{  4,  7},{  2, -8},{ -2,  8},{  6,  6},
+{ -8,  3},{  5, -7},{ -5,  7},{  8, -4},{  0, -9},{ -9, -1},{  1,  9},{  7, -6},
+{ -7,  6},{ -5, -8},{ -5,  8},{ -9,  3},{  9, -4},{  7, -7},{  8, -6},{  6,  8},
+{ 10,  1},{-10,  2},{  9, -5},{ 10, -3},{ -8, -7},{-10, -4},{  6, -9},{-11,  0},
+{ 11,  1},{-11, -2},{ -2, 11},{  7, -9},{ -7,  9},{ 10,  6},{ -4, 11},{  8, -9},
+{  8,  9},{  5, 11},{  7,-10},{ 12, -3},{ 11,  6},{ -9, -9},{  8, 10},{  5, 12},
+{-11,  7},{ 13,  2},{  6,-12},{ 10,  9},{-11,  8},{ -7, 12},{  0, 14},{ 14, -2},
+{ -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{  5, 14},{-15, -1},{-14, -6},{  3,-15},
+{ 11,-11},{ -7, 14},{ -5, 15},{  8,-14},{ 15,  6},{  3, 16},{  7,-15},{-16,  5},
+{  0, 17},{-16, -6},{-10, 14},{-16,  7},{ 12, 13},{-16,  8},{-17,  6},{-18,  3},
+{ -7, 17},{ 15, 11},{ 16, 10},{  2,-19},{  3,-19},{-11,-16},{-18,  8},{-19, -6},
+{  2,-20},{-17,-11},{-10,-18},{  8, 19},{-21, -1},{-20,  7},{ -4, 21},{ 21,  5},
+{ 15, 16},{  2,-22},{-10,-20},{-22,  5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5},
+{ 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24},
+{ 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27,  6},{  1,-28},
+{-11, 26},{-17,-23},{  7, 28},{ 11,-27},{ 29,  5},{-23,-19},{-28,-11},{-21, 22},
+{-30,  7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27},
+{-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32}
+};
+
+// this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table
+static const uint8_t dequant_table[64]={
+ 16, 15, 13, 19, 24, 31, 28, 17,
+ 17, 23, 25, 31, 36, 63, 45, 21,
+ 18, 24, 27, 37, 52, 59, 49, 20,
+ 16, 28, 34, 40, 60, 80, 51, 20,
+ 18, 31, 48, 66, 68, 86, 56, 21,
+ 19, 38, 56, 59, 64, 64, 48, 20,
+ 27, 48, 55, 55, 56, 51, 35, 15,
+ 20, 35, 34, 32, 31, 22, 15,  8,
+};
+
+static VLC block_type_vlc[4];
+
+
+typedef struct CFrameBuffer{
+    unsigned int allocated_size;
+    unsigned int size;
+    int id;
+    uint8_t *data;
+}CFrameBuffer;
+
+typedef struct FourXContext{
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame current_picture, last_picture;
+    GetBitContext pre_gb;          ///< ac/dc prefix
+    GetBitContext gb;
+    uint8_t *bytestream;
+    uint16_t *wordstream;
+    int mv[256];
+    VLC pre_vlc;
+    int last_dc;
+    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
+    uint8_t *bitstream_buffer;
+    unsigned int bitstream_buffer_size;
+    CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
+} FourXContext;
+
+
+#define FIX_1_082392200  70936
+#define FIX_1_414213562  92682
+#define FIX_1_847759065 121095
+#define FIX_2_613125930 171254
+
+#define MULTIPLY(var,const)  (((var)*(const)) >> 16)
+
+static void idct(DCTELEM block[64]){
+    int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    int tmp10, tmp11, tmp12, tmp13;
+    int z5, z10, z11, z12, z13;
+    int i;
+    int temp[64];
+
+    for(i=0; i<8; i++){
+        tmp10 = block[8*0 + i] + block[8*4 + i];
+        tmp11 = block[8*0 + i] - block[8*4 + i];
+
+        tmp13 =          block[8*2 + i] + block[8*6 + i];
+        tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13;
+
+        tmp0 = tmp10 + tmp13;
+        tmp3 = tmp10 - tmp13;
+        tmp1 = tmp11 + tmp12;
+        tmp2 = tmp11 - tmp12;
+
+        z13 = block[8*5 + i] + block[8*3 + i];
+        z10 = block[8*5 + i] - block[8*3 + i];
+        z11 = block[8*1 + i] + block[8*7 + i];
+        z12 = block[8*1 + i] - block[8*7 + i];
+
+        tmp7  =          z11 + z13;
+        tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
+
+        z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
+        tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
+        tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
+
+        tmp6 = tmp12 - tmp7;
+        tmp5 = tmp11 - tmp6;
+        tmp4 = tmp10 + tmp5;
+
+        temp[8*0 + i] = tmp0 + tmp7;
+        temp[8*7 + i] = tmp0 - tmp7;
+        temp[8*1 + i] = tmp1 + tmp6;
+        temp[8*6 + i] = tmp1 - tmp6;
+        temp[8*2 + i] = tmp2 + tmp5;
+        temp[8*5 + i] = tmp2 - tmp5;
+        temp[8*4 + i] = tmp3 + tmp4;
+        temp[8*3 + i] = tmp3 - tmp4;
+    }
+
+    for(i=0; i<8*8; i+=8){
+        tmp10 = temp[0 + i] + temp[4 + i];
+        tmp11 = temp[0 + i] - temp[4 + i];
+
+        tmp13 = temp[2 + i] + temp[6 + i];
+        tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
+
+        tmp0 = tmp10 + tmp13;
+        tmp3 = tmp10 - tmp13;
+        tmp1 = tmp11 + tmp12;
+        tmp2 = tmp11 - tmp12;
+
+        z13 = temp[5 + i] + temp[3 + i];
+        z10 = temp[5 + i] - temp[3 + i];
+        z11 = temp[1 + i] + temp[7 + i];
+        z12 = temp[1 + i] - temp[7 + i];
+
+        tmp7 = z11 + z13;
+        tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
+
+        z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
+        tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
+        tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
+
+        tmp6 = tmp12 - tmp7;
+        tmp5 = tmp11 - tmp6;
+        tmp4 = tmp10 + tmp5;
+
+        block[0 + i] = (tmp0 + tmp7)>>6;
+        block[7 + i] = (tmp0 - tmp7)>>6;
+        block[1 + i] = (tmp1 + tmp6)>>6;
+        block[6 + i] = (tmp1 - tmp6)>>6;
+        block[2 + i] = (tmp2 + tmp5)>>6;
+        block[5 + i] = (tmp2 - tmp5)>>6;
+        block[4 + i] = (tmp3 + tmp4)>>6;
+        block[3 + i] = (tmp3 - tmp4)>>6;
+    }
+}
+
+static void init_vlcs(FourXContext *f){
+    int i;
+
+    for(i=0; i<4; i++){
+        init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7,
+                 &block_type_tab[i][0][1], 2, 1,
+                 &block_type_tab[i][0][0], 2, 1, 1);
+    }
+}
+
+static void init_mv(FourXContext *f){
+    int i;
+
+    for(i=0; i<256; i++){
+        f->mv[i] = mv[i][0] + mv[i][1]*f->current_picture.linesize[0]/2;
+    }
+}
+
+static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){
+   int i;
+   dc*= 0x10001;
+
+   switch(log2w){
+   case 0:
+        for(i=0; i<h; i++){
+            dst[0] = scale*src[0] + dc;
+            if(scale) src += stride;
+            dst += stride;
+        }
+        break;
+    case 1:
+        for(i=0; i<h; i++){
+            ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
+            if(scale) src += stride;
+            dst += stride;
+        }
+        break;
+    case 2:
+        for(i=0; i<h; i++){
+            ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
+            ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
+            if(scale) src += stride;
+            dst += stride;
+        }
+        break;
+    case 3:
+        for(i=0; i<h; i++){
+            ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
+            ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
+            ((uint32_t*)dst)[2] = scale*((uint32_t*)src)[2] + dc;
+            ((uint32_t*)dst)[3] = scale*((uint32_t*)src)[3] + dc;
+            if(scale) src += stride;
+            dst += stride;
+        }
+        break;
+    default: assert(0);
+    }
+}
+
+static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){
+    const int index= size2index[log2h][log2w];
+    const int h= 1<<log2h;
+    int code= get_vlc2(&f->gb, block_type_vlc[index].table, BLOCK_TYPE_VLC_BITS, 1);
+
+    assert(code>=0 && code<=6);
+
+    if(code == 0){
+        src += f->mv[ *f->bytestream++ ];
+        mcdc(dst, src, log2w, h, stride, 1, 0);
+    }else if(code == 1){
+        log2h--;
+        decode_p_block(f, dst                  , src                  , log2w, log2h, stride);
+        decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride);
+    }else if(code == 2){
+        log2w--;
+        decode_p_block(f, dst             , src             , log2w, log2h, stride);
+        decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride);
+    }else if(code == 4){
+        src += f->mv[ *f->bytestream++ ];
+        mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++));
+    }else if(code == 5){
+        mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++));
+    }else if(code == 6){
+        if(log2w){
+            dst[0] = le2me_16(*f->wordstream++);
+            dst[1] = le2me_16(*f->wordstream++);
+        }else{
+            dst[0     ] = le2me_16(*f->wordstream++);
+            dst[stride] = le2me_16(*f->wordstream++);
+        }
+    }
+}
+
+static int get32(void *p){
+    return le2me_32(*(uint32_t*)p);
+}
+
+static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){
+    int x, y;
+    const int width= f->avctx->width;
+    const int height= f->avctx->height;
+    uint16_t *src= (uint16_t*)f->last_picture.data[0];
+    uint16_t *dst= (uint16_t*)f->current_picture.data[0];
+    const int stride= f->current_picture.linesize[0]>>1;
+    const unsigned int bitstream_size= get32(buf+8);
+    const unsigned int bytestream_size= get32(buf+16);
+    const unsigned int wordstream_size= get32(buf+12);
+
+    if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length
+       || bitstream_size  > (1<<26)
+       || bytestream_size > (1<<26)
+       || wordstream_size > (1<<26)
+       ){
+        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
+        bitstream_size+ bytestream_size+ wordstream_size - length);
+        return -1;
+    }
+
+    f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4);
+    init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
+
+    f->wordstream= (uint16_t*)(buf + 20 + bitstream_size);
+    f->bytestream= buf + 20 + bitstream_size + wordstream_size;
+
+    init_mv(f);
+
+    for(y=0; y<height; y+=8){
+        for(x=0; x<width; x+=8){
+            decode_p_block(f, dst + x, src + x, 3, 3, stride);
+        }
+        src += 8*stride;
+        dst += 8*stride;
+    }
+
+    if(bitstream_size != (get_bits_count(&f->gb)+31)/32*4)
+        av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
+            bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
+            bytestream_size - (f->bytestream - (buf + 20 + bitstream_size + wordstream_size)),
+            wordstream_size - (((uint8_t*)f->wordstream) - (buf + 20 + bitstream_size))
+        );
+
+    return 0;
+}
+
+/**
+ * decode block and dequantize.
+ * Note this is allmost identical to mjpeg
+ */
+static int decode_i_block(FourXContext *f, DCTELEM *block){
+    int code, i, j, level, val;
+
+    /* DC coef */
+    val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
+    if (val>>4){
+        av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
+    }
+
+    if(val)
+        val = get_xbits(&f->gb, val);
+
+    val = val * dequant_table[0] + f->last_dc;
+    f->last_dc =
+    block[0] = val;
+    /* AC coefs */
+    i = 1;
+    for(;;) {
+        code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
+
+        /* EOB */
+        if (code == 0)
+            break;
+        if (code == 0xf0) {
+            i += 16;
+        } else {
+            level = get_xbits(&f->gb, code & 0xf);
+            i += code >> 4;
+            if (i >= 64) {
+                av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
+                return 0;
+            }
+
+            j= ff_zigzag_direct[i];
+            block[j] = level * dequant_table[j];
+            i++;
+            if (i >= 64)
+                break;
+        }
+    }
+
+    return 0;
+}
+
+static inline void idct_put(FourXContext *f, int x, int y){
+    DCTELEM (*block)[64]= f->block;
+    int stride= f->current_picture.linesize[0]>>1;
+    int i;
+    uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
+
+    for(i=0; i<4; i++){
+        block[i][0] += 0x80*8*8;
+        idct(block[i]);
+    }
+
+    if(!(f->avctx->flags&CODEC_FLAG_GRAY)){
+        for(i=4; i<6; i++) idct(block[i]);
+    }
+
+/* Note transform is:
+y= ( 1b + 4g + 2r)/14
+cb=( 3b - 2g - 1r)/14
+cr=(-1b - 4g + 5r)/14
+*/
+    for(y=0; y<8; y++){
+        for(x=0; x<8; x++){
+            DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
+            int cb= block[4][x + 8*y];
+            int cr= block[5][x + 8*y];
+            int cg= (cb + cr)>>1;
+            int y;
+
+            cb+=cb;
+
+            y = temp[0];
+            dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
+            y = temp[1];
+            dst[1       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
+            y = temp[8];
+            dst[  stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
+            y = temp[9];
+            dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
+            dst += 2;
+        }
+        dst += 2*stride - 2*8;
+    }
+}
+
+static int decode_i_mb(FourXContext *f){
+    int i;
+
+    f->dsp.clear_blocks(f->block[0]);
+
+    for(i=0; i<6; i++){
+        if(decode_i_block(f, f->block[i]) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){
+    int frequency[512];
+    uint8_t flag[512];
+    int up[512];
+    uint8_t len_tab[257];
+    int bits_tab[257];
+    int start, end;
+    uint8_t *ptr= buf;
+    int j;
+
+    memset(frequency, 0, sizeof(frequency));
+    memset(up, -1, sizeof(up));
+
+    start= *ptr++;
+    end= *ptr++;
+    for(;;){
+        int i;
+
+        for(i=start; i<=end; i++){
+            frequency[i]= *ptr++;
+//            printf("%d %d %d\n", start, end, frequency[i]);
+        }
+        start= *ptr++;
+        if(start==0) break;
+
+        end= *ptr++;
+    }
+    frequency[256]=1;
+
+    while((ptr - buf)&3) ptr++; // 4byte align
+
+//    for(j=0; j<16; j++)
+//        printf("%2X", ptr[j]);
+
+    for(j=257; j<512; j++){
+        int min_freq[2]= {256*256, 256*256};
+        int smallest[2]= {0, 0};
+        int i;
+        for(i=0; i<j; i++){
+            if(frequency[i] == 0) continue;
+            if(frequency[i] < min_freq[1]){
+                if(frequency[i] < min_freq[0]){
+                    min_freq[1]= min_freq[0]; smallest[1]= smallest[0];
+                    min_freq[0]= frequency[i];smallest[0]= i;
+                }else{
+                    min_freq[1]= frequency[i];smallest[1]= i;
+                }
+            }
+        }
+        if(min_freq[1] == 256*256) break;
+
+        frequency[j]= min_freq[0] + min_freq[1];
+        flag[ smallest[0] ]= 0;
+        flag[ smallest[1] ]= 1;
+        up[ smallest[0] ]=
+        up[ smallest[1] ]= j;
+        frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
+    }
+
+    for(j=0; j<257; j++){
+        int node;
+        int len=0;
+        int bits=0;
+
+        for(node= j; up[node] != -1; node= up[node]){
+            bits += flag[node]<<len;
+            len++;
+            if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
+        }
+
+        bits_tab[j]= bits;
+        len_tab[j]= len;
+    }
+
+    init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
+             len_tab , 1, 1,
+             bits_tab, 4, 4, 0);
+
+    return ptr;
+}
+
+static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){
+    int x, y;
+    const int width= f->avctx->width;
+    const int height= f->avctx->height;
+    uint16_t *dst= (uint16_t*)f->current_picture.data[0];
+    const int stride= f->current_picture.linesize[0]>>1;
+    const unsigned int bitstream_size= get32(buf);
+    const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8);
+    unsigned int prestream_size= 4*get32(buf + bitstream_size + 4);
+    uint8_t *prestream= buf + bitstream_size + 12;
+
+    if(prestream_size + bitstream_size + 12 != length
+       || bitstream_size > (1<<26)
+       || prestream_size > (1<<26)){
+        av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
+        return -1;
+    }
+
+    prestream= read_huffman_tables(f, prestream);
+
+    init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
+
+    prestream_size= length + buf - prestream;
+
+    f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)prestream, prestream_size/4);
+    init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);
+
+    f->last_dc= 0*128*8*8;
+
+    for(y=0; y<height; y+=16){
+        for(x=0; x<width; x+=16){
+            if(decode_i_mb(f) < 0)
+                return -1;
+
+            idct_put(f, x, y);
+        }
+        dst += 16*stride;
+    }
+
+    if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
+        av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    FourXContext * const f = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame *p, temp;
+    int i, frame_4cc, frame_size;
+
+    frame_4cc= get32(buf);
+    if(buf_size != get32(buf+4)+8 || buf_size < 20){
+        av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, get32(buf+4));
+    }
+
+    if(frame_4cc == ff_get_fourcc("cfrm")){
+        int free_index=-1;
+        const int data_size= buf_size - 20;
+        const int id= get32(buf+12);
+        const int whole_size= get32(buf+16);
+        CFrameBuffer *cfrm;
+
+        for(i=0; i<CFRAME_BUFFER_COUNT; i++){
+            if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
+                av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
+        }
+
+        for(i=0; i<CFRAME_BUFFER_COUNT; i++){
+            if(f->cfrm[i].id   == id) break;
+            if(f->cfrm[i].size == 0 ) free_index= i;
+        }
+
+        if(i>=CFRAME_BUFFER_COUNT){
+            i= free_index;
+            f->cfrm[i].id= id;
+        }
+        cfrm= &f->cfrm[i];
+
+        cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
+            av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
+            return -1;
+        }
+
+        memcpy(cfrm->data + cfrm->size, buf+20, data_size);
+        cfrm->size += data_size;
+
+        if(cfrm->size >= whole_size){
+            buf= cfrm->data;
+            frame_size= cfrm->size;
+
+            if(id != avctx->frame_number){
+                av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
+            }
+
+            cfrm->size= cfrm->id= 0;
+            frame_4cc= ff_get_fourcc("pfrm");
+        }else
+            return buf_size;
+    }else{
+        buf= buf + 12;
+        frame_size= buf_size - 12;
+    }
+
+    temp= f->current_picture;
+    f->current_picture= f->last_picture;
+    f->last_picture= temp;
+
+    p= &f->current_picture;
+    avctx->coded_frame= p;
+
+    avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 1;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    if(frame_4cc == ff_get_fourcc("ifrm")){
+        p->pict_type= I_TYPE;
+        if(decode_i_frame(f, buf, frame_size) < 0)
+            return -1;
+    }else if(frame_4cc == ff_get_fourcc("pfrm")){
+        p->pict_type= P_TYPE;
+        if(decode_p_frame(f, buf, frame_size) < 0)
+            return -1;
+    }else if(frame_4cc == ff_get_fourcc("snd_")){
+        av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size);
+    }else{
+        av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size);
+    }
+
+#if 0
+for(i=0; i<20; i++){
+    printf("%2X %c ", buf[i], clip(buf[i],16,126));
+}
+#endif
+
+    p->key_frame= p->pict_type == I_TYPE;
+
+    *picture= *p;
+    *data_size = sizeof(AVPicture);
+
+    emms_c();
+
+    return buf_size;
+}
+
+
+static void common_init(AVCodecContext *avctx){
+    FourXContext * const f = avctx->priv_data;
+
+    dsputil_init(&f->dsp, avctx);
+
+    f->avctx= avctx;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    FourXContext * const f = avctx->priv_data;
+
+    common_init(avctx);
+    init_vlcs(f);
+
+    avctx->pix_fmt= PIX_FMT_RGB565;
+
+    return 0;
+}
+
+
+static int decode_end(AVCodecContext *avctx){
+    FourXContext * const f = avctx->priv_data;
+    int i;
+
+    av_freep(&f->bitstream_buffer);
+    f->bitstream_buffer_size=0;
+    for(i=0; i<CFRAME_BUFFER_COUNT; i++){
+        av_freep(&f->cfrm[i].data);
+        f->cfrm[i].allocated_size= 0;
+    }
+    free_vlc(&f->pre_vlc);
+
+    return 0;
+}
+
+AVCodec fourxm_decoder = {
+    "4xm",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_4XM,
+    sizeof(FourXContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    /*CODEC_CAP_DR1,*/
+};
+
diff --git a/contrib/ffmpeg/libavcodec/8bps.c b/contrib/ffmpeg/libavcodec/8bps.c
new file mode 100644
index 000000000..297465043
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/8bps.c
@@ -0,0 +1,236 @@
+/*
+ * Quicktime Planar RGB (8BPS) Video Decoder
+ * Copyright (C) 2003 Roberto Togni
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file 8bps.c
+ * QT 8BPS Video Decoder by Roberto Togni <rtogni at bresciaonline dot it>
+ * For more information about the 8BPS format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * Supports: PAL8 (RGB 8bpp, paletted)
+ *         : BGR24 (RGB 24bpp) (can also output it as RGBA32)
+ *         : RGBA32 (RGB 32bpp, 4th plane is probably alpha and it's ignored)
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+
+static const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1};
+
+/*
+ * Decoder context
+ */
+typedef struct EightBpsContext {
+
+        AVCodecContext *avctx;
+        AVFrame pic;
+
+        unsigned char planes;
+        unsigned char planemap[4];
+} EightBpsContext;
+
+
+/*
+ *
+ * Decode a frame
+ *
+ */
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+        EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
+        unsigned char *encoded = (unsigned char *)buf;
+        unsigned char *pixptr, *pixptr_end;
+        unsigned int height = avctx->height; // Real image height
+        unsigned int dlen, p, row;
+        unsigned char *lp, *dp;
+        unsigned char count;
+        unsigned int px_inc;
+        unsigned int planes = c->planes;
+        unsigned char *planemap = c->planemap;
+
+        if(c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
+
+        c->pic.reference = 0;
+        c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+        if(avctx->get_buffer(avctx, &c->pic) < 0){
+                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                return -1;
+        }
+
+        /* Set data pointer after line lengths */
+        dp = encoded + planes * (height << 1);
+
+        /* Ignore alpha plane, don't know what to do with it */
+        if (planes == 4)
+                planes--;
+
+        px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
+
+        for (p = 0; p < planes; p++) {
+                /* Lines length pointer for this plane */
+                lp = encoded + p * (height << 1);
+
+                /* Decode a plane */
+                for(row = 0; row < height; row++) {
+                        pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
+                        pixptr_end = pixptr + c->pic.linesize[0];
+                        dlen = be2me_16(*(unsigned short *)(lp+row*2));
+                        /* Decode a row of this plane */
+                        while(dlen > 0) {
+                                if(dp + 1 >= buf+buf_size) return -1;
+                                if ((count = *dp++) <= 127) {
+                                        count++;
+                                        dlen -= count + 1;
+                                        if (pixptr + count * px_inc > pixptr_end)
+                                            break;
+                                        if(dp + count > buf+buf_size) return -1;
+                                        while(count--) {
+                                                *pixptr = *dp++;
+                                                pixptr += px_inc;
+                                        }
+                                } else {
+                                        count = 257 - count;
+                                        if (pixptr + count * px_inc > pixptr_end)
+                                            break;
+                                        while(count--) {
+                                                *pixptr = *dp;
+                                                pixptr += px_inc;
+                                        }
+                                        dp++;
+                                        dlen -= 2;
+                                }
+                        }
+                }
+        }
+
+        if (avctx->palctrl) {
+                memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
+                if (avctx->palctrl->palette_changed) {
+                        c->pic.palette_has_changed = 1;
+                        avctx->palctrl->palette_changed = 0;
+                } else
+                        c->pic.palette_has_changed = 0;
+        }
+
+        *data_size = sizeof(AVFrame);
+        *(AVFrame*)data = c->pic;
+
+        /* always report that the buffer was completely consumed */
+        return buf_size;
+}
+
+
+/*
+ *
+ * Init 8BPS decoder
+ *
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+        EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
+
+        c->avctx = avctx;
+        avctx->has_b_frames = 0;
+
+        c->pic.data[0] = NULL;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+
+        switch (avctx->bits_per_sample) {
+                case 8:
+                        avctx->pix_fmt = PIX_FMT_PAL8;
+                        c->planes = 1;
+                        c->planemap[0] = 0; // 1st plane is palette indexes
+                        if (avctx->palctrl == NULL) {
+                                av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
+                                return -1;
+                        }
+                        break;
+                case 24:
+                        avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
+                        c->planes = 3;
+                        c->planemap[0] = 2; // 1st plane is red
+                        c->planemap[1] = 1; // 2nd plane is green
+                        c->planemap[2] = 0; // 3rd plane is blue
+                        break;
+                case 32:
+                        avctx->pix_fmt = PIX_FMT_RGBA32;
+                        c->planes = 4;
+#ifdef WORDS_BIGENDIAN
+                        c->planemap[0] = 1; // 1st plane is red
+                        c->planemap[1] = 2; // 2nd plane is green
+                        c->planemap[2] = 3; // 3rd plane is blue
+                        c->planemap[3] = 0; // 4th plane is alpha???
+#else
+                        c->planemap[0] = 2; // 1st plane is red
+                        c->planemap[1] = 1; // 2nd plane is green
+                        c->planemap[2] = 0; // 3rd plane is blue
+                        c->planemap[3] = 3; // 4th plane is alpha???
+#endif
+                        break;
+                default:
+                        av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
+                        return -1;
+        }
+
+  return 0;
+}
+
+
+
+
+/*
+ *
+ * Uninit 8BPS decoder
+ *
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+        EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
+
+        if (c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
+
+        return 0;
+}
+
+
+
+AVCodec eightbps_decoder = {
+        "8bps",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_8BPS,
+        sizeof(EightBpsContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/Makefile b/contrib/ffmpeg/libavcodec/Makefile
new file mode 100644
index 000000000..03c1ae43d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/Makefile
@@ -0,0 +1,456 @@
+#
+# libavcodec Makefile
+# (c) 2000-2005 Fabrice Bellard
+#
+include ../config.mak
+
+CFLAGS+=-I$(SRC_PATH)/libswscale $(AMR_CFLAGS)
+
+OBJS= bitstream.o \
+      utils.o \
+      allcodecs.o \
+      mpegvideo.o \
+      jrevdct.o \
+      jfdctfst.o \
+      jfdctint.o\
+      mjpeg.o \
+      resample.o \
+      resample2.o \
+      dsputil.o \
+      motion_est.o \
+      imgconvert.o \
+      mpeg12.o \
+      mpegaudiodec.o \
+      simple_idct.o \
+      ratecontrol.o \
+      eval.o \
+      error_resilience.o \
+      fft.o \
+      mdct.o \
+      raw.o \
+      golomb.o \
+      cabac.o\
+      faandct.o \
+      parser.o \
+      vp3dsp.o \
+      h264idct.o \
+      rangecoder.o \
+      pnm.o \
+      h263.o \
+      msmpeg4.o \
+      h263dec.o \
+      opt.o \
+      bitstream_filter.o \
+      audioconvert.o \
+
+
+HEADERS = avcodec.h opt.h
+
+OBJS-$(CONFIG_AASC_DECODER)            += aasc.o
+OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc.o
+OBJS-$(CONFIG_ALAC_DECODER)            += alac.o
+OBJS-$(CONFIG_ASV1_DECODER)            += asv1.o
+OBJS-$(CONFIG_ASV1_ENCODER)            += asv1.o
+OBJS-$(CONFIG_ASV2_DECODER)            += asv1.o
+OBJS-$(CONFIG_ASV2_ENCODER)            += asv1.o
+OBJS-$(CONFIG_AVS_DECODER)             += avs.o
+OBJS-$(CONFIG_BMP_DECODER)             += bmp.o
+OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdsp.o
+OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
+OBJS-$(CONFIG_CLJR_DECODER)            += cljr.o
+OBJS-$(CONFIG_CLJR_ENCODER)            += cljr.o
+OBJS-$(CONFIG_COOK_DECODER)            += cook.o
+OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o lzo.o
+OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
+OBJS-$(CONFIG_DSICINVIDEO_DECODER)     += dsicinav.o
+OBJS-$(CONFIG_DSICINAUDIO_DECODER)     += dsicinav.o
+OBJS-$(CONFIG_DVBSUB_DECODER)          += dvbsubdec.o
+OBJS-$(CONFIG_DVBSUB_ENCODER)          += dvbsub.o
+OBJS-$(CONFIG_DVDSUB_DECODER)          += dvdsubdec.o
+OBJS-$(CONFIG_DVDSUB_ENCODER)          += dvdsubenc.o
+OBJS-$(CONFIG_DVVIDEO_DECODER)         += dv.o
+OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dv.o
+OBJS-$(CONFIG_EIGHTBPS_DECODER)        += 8bps.o
+OBJS-$(CONFIG_FFV1_DECODER)            += ffv1.o
+OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1.o
+OBJS-$(CONFIG_FFVHUFF_DECODER)         += huffyuv.o
+OBJS-$(CONFIG_FFVHUFF_ENCODER)         += huffyuv.o
+OBJS-$(CONFIG_FLAC_DECODER)            += flac.o
+OBJS-$(CONFIG_FLAC_ENCODER)            += flacenc.o
+OBJS-$(CONFIG_FLASHSV_DECODER)         += flashsv.o
+OBJS-$(CONFIG_FLIC_DECODER)            += flicvideo.o
+OBJS-$(CONFIG_FOURXM_DECODER)          += 4xm.o
+OBJS-$(CONFIG_FRAPS_DECODER)           += fraps.o
+OBJS-$(CONFIG_GIF_DECODER)             += gifdec.o lzw.o
+OBJS-$(CONFIG_GIF_ENCODER)             += gif.o
+OBJS-$(CONFIG_H261_DECODER)            += h261.o
+OBJS-$(CONFIG_H261_ENCODER)            += h261.o
+OBJS-$(CONFIG_H264_DECODER)            += h264.o
+OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o
+OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o
+OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
+OBJS-$(CONFIG_IMC_DECODER)             += imc.o
+OBJS-$(CONFIG_INDEO2_DECODER)          += indeo2.o
+OBJS-$(CONFIG_INDEO3_DECODER)          += indeo3.o
+OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
+OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
+OBJS-$(CONFIG_KMVC_DECODER)            += kmvc.o
+OBJS-$(CONFIG_LOCO_DECODER)            += loco.o
+OBJS-$(CONFIG_MACE3_DECODER)           += mace.o
+OBJS-$(CONFIG_MACE6_DECODER)           += mace.o
+OBJS-$(CONFIG_MMVIDEO_DECODER)         += mmvideo.o
+OBJS-$(CONFIG_MP2_ENCODER)             += mpegaudio.o
+OBJS-$(CONFIG_MSRLE_DECODER)           += msrle.o
+OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
+OBJS-$(CONFIG_MSZH_DECODER)            += lcl.o
+OBJS-$(CONFIG_NUV_DECODER)             += nuv.o rtjpeg.o lzo.o
+OBJS-$(CONFIG_PNG_DECODER)             += png.o
+OBJS-$(CONFIG_PNG_ENCODER)             += png.o
+OBJS-$(CONFIG_QDM2_DECODER)            += qdm2.o
+OBJS-$(CONFIG_QDRAW_DECODER)           += qdrw.o
+OBJS-$(CONFIG_QPEG_DECODER)            += qpeg.o
+OBJS-$(CONFIG_QTRLE_DECODER)           += qtrle.o
+OBJS-$(CONFIG_RA_144_DECODER)          += ra144.o
+OBJS-$(CONFIG_RA_288_DECODER)          += ra288.o
+OBJS-$(CONFIG_ROQ_DECODER)             += roqvideo.o
+OBJS-$(CONFIG_ROQ_DPCM_DECODER)        += dpcm.o
+OBJS-$(CONFIG_RPZA_DECODER)            += rpza.o
+OBJS-$(CONFIG_RV10_DECODER)            += rv10.o
+OBJS-$(CONFIG_RV10_ENCODER)            += rv10.o
+OBJS-$(CONFIG_RV20_DECODER)            += rv10.o
+OBJS-$(CONFIG_RV20_ENCODER)            += rv10.o
+OBJS-$(CONFIG_SHORTEN_DECODER)         += shorten.o
+OBJS-$(CONFIG_SMACKAUD_DECODER)        += smacker.o
+OBJS-$(CONFIG_SMACKER_DECODER)         += smacker.o
+OBJS-$(CONFIG_SMC_DECODER)             += smc.o
+OBJS-$(CONFIG_SNOW_DECODER)            += snow.o
+OBJS-$(CONFIG_SNOW_ENCODER)            += snow.o
+OBJS-$(CONFIG_SOL_DPCM_DECODER)        += dpcm.o
+OBJS-$(CONFIG_SONIC_DECODER)           += sonic.o
+OBJS-$(CONFIG_SONIC_ENCODER)           += sonic.o
+OBJS-$(CONFIG_SONIC_LS_DECODER)        += sonic.o
+OBJS-$(CONFIG_SVQ1_DECODER)            += svq1.o
+OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1.o
+OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o
+OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
+OBJS-$(CONFIG_THEORA_DECODER)          += vp3.o
+OBJS-$(CONFIG_TIERTEXSEQVIDEO_DECODER) += tiertexseqv.o
+OBJS-$(CONFIG_TIFF_DECODER)            += tiff.o lzw.o
+OBJS-$(CONFIG_TRUEMOTION1_DECODER)     += truemotion1.o
+OBJS-$(CONFIG_TRUEMOTION2_DECODER)     += truemotion2.o
+OBJS-$(CONFIG_TRUESPEECH_DECODER)      += truespeech.o
+OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o
+OBJS-$(CONFIG_TTA_DECODER)             += tta.o
+OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
+OBJS-$(CONFIG_VC1_DECODER)             += vc1.o vc1dsp.o
+OBJS-$(CONFIG_VCR1_DECODER)            += vcr1.o
+OBJS-$(CONFIG_VCR1_ENCODER)            += vcr1.o
+OBJS-$(CONFIG_VMDAUDIO_DECODER)        += vmdav.o
+OBJS-$(CONFIG_VMDVIDEO_DECODER)        += vmdav.o
+OBJS-$(CONFIG_VMNC_DECODER)            += vmnc.o
+OBJS-$(CONFIG_VORBIS_DECODER)          += vorbis.o vorbis_data.o
+OBJS-$(CONFIG_VORBIS_ENCODER)          += vorbis_enc.o vorbis.o vorbis_data.o
+OBJS-$(CONFIG_VP3_DECODER)             += vp3.o
+OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o
+OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o
+OBJS-$(CONFIG_VQA_DECODER)             += vqavideo.o
+OBJS-$(CONFIG_WAVPACK_DECODER)         += wavpack.o
+OBJS-$(CONFIG_WMAV1_DECODER)           += wmadec.o
+OBJS-$(CONFIG_WMAV2_DECODER)           += wmadec.o
+OBJS-$(CONFIG_WMV3_DECODER)            += vc1.o vc1dsp.o
+OBJS-$(CONFIG_WNV1_DECODER)            += wnv1.o
+OBJS-$(CONFIG_WS_SND1_DECODER)         += ws-snd1.o
+OBJS-$(CONFIG_XAN_DPCM_DECODER)        += dpcm.o
+OBJS-$(CONFIG_XAN_WC3_DECODER)         += xan.o
+OBJS-$(CONFIG_XAN_WC4_DECODER)         += xan.o
+OBJS-$(CONFIG_XL_DECODER)              += xl.o
+OBJS-$(CONFIG_ZLIB_DECODER)            += lcl.o
+OBJS-$(CONFIG_ZLIB_ENCODER)            += lcl.o
+OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
+
+OBJS-$(CONFIG_PCM_S32LE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S32LE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S32BE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S32BE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U32LE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U32LE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U32BE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U32BE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S24LE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S24LE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S24BE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S24BE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U24LE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U24LE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U24BE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U24BE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S24DAUD_DECODER)     += pcm.o
+OBJS-$(CONFIG_PCM_S24DAUD_ENCODER)     += pcm.o
+OBJS-$(CONFIG_PCM_S16LE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S16LE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S16BE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S16BE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U16LE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U16LE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U16BE_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_U16BE_ENCODER)       += pcm.o
+OBJS-$(CONFIG_PCM_S8_DECODER)          += pcm.o
+OBJS-$(CONFIG_PCM_S8_ENCODER)          += pcm.o
+OBJS-$(CONFIG_PCM_U8_DECODER)          += pcm.o
+OBJS-$(CONFIG_PCM_U8_ENCODER)          += pcm.o
+OBJS-$(CONFIG_PCM_ALAW_DECODER)        += pcm.o
+OBJS-$(CONFIG_PCM_ALAW_ENCODER)        += pcm.o
+OBJS-$(CONFIG_PCM_MULAW_DECODER)       += pcm.o
+OBJS-$(CONFIG_PCM_MULAW_ENCODER)       += pcm.o
+
+OBJS-$(CONFIG_ADPCM_4XM_DECODER)       += adpcm.o
+OBJS-$(CONFIG_ADPCM_4XM_ENCODER)       += adpcm.o
+OBJS-$(CONFIG_ADPCM_ADX_DECODER)       += adx.o
+OBJS-$(CONFIG_ADPCM_ADX_ENCODER)       += adx.o
+OBJS-$(CONFIG_ADPCM_CT_DECODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_CT_ENCODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_EA_DECODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_EA_ENCODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_G726_DECODER)      += g726.o
+OBJS-$(CONFIG_ADPCM_G726_ENCODER)      += g726.o
+OBJS-$(CONFIG_ADPCM_IMA_DK3_DECODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_DK3_ENCODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_DK4_DECODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_DK4_ENCODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_QT_DECODER)    += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_QT_ENCODER)    += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_SMJPEG_DECODER)+= adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_SMJPEG_ENCODER)+= adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_WAV_DECODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_WAV_ENCODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_WS_DECODER)    += adpcm.o
+OBJS-$(CONFIG_ADPCM_IMA_WS_ENCODER)    += adpcm.o
+OBJS-$(CONFIG_ADPCM_MS_DECODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_MS_ENCODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_SBPRO_2_DECODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_SBPRO_2_ENCODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_SBPRO_3_DECODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_SBPRO_3_ENCODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_SBPRO_4_DECODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_SBPRO_4_ENCODER)   += adpcm.o
+OBJS-$(CONFIG_ADPCM_SWF_DECODER)       += adpcm.o
+OBJS-$(CONFIG_ADPCM_SWF_ENCODER)       += adpcm.o
+OBJS-$(CONFIG_ADPCM_XA_DECODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_XA_ENCODER)        += adpcm.o
+OBJS-$(CONFIG_ADPCM_YAMAHA_DECODER)    += adpcm.o
+OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER)    += adpcm.o
+
+OBJS-$(CONFIG_FAAD)                    += faad.o
+OBJS-$(CONFIG_FAAC)                    += faac.o
+OBJS-$(CONFIG_XVID)                    += xvidff.o xvid_rc.o
+OBJS-$(CONFIG_X264)                    += x264.o
+OBJS-$(CONFIG_MP3LAME)                 += mp3lameaudio.o
+OBJS-$(CONFIG_LIBVORBIS)               += oggvorbis.o
+OBJS-$(CONFIG_LIBGSM)                  += libgsm.o
+
+# currently using liba52 for ac3 decoding
+OBJS-$(CONFIG_A52)                     += a52dec.o
+
+# using builtin liba52 or runtime linked liba52.so.0
+OBJS-$(CONFIG_A52)$(CONFIG_A52BIN)     += liba52/bit_allocate.o \
+                                          liba52/bitstream.o    \
+                                          liba52/downmix.o      \
+                                          liba52/imdct.o        \
+                                          liba52/parse.o        \
+                                          liba52/crc.o          \
+                                          liba52/resample.o
+
+# currently using libdts for dts decoding
+OBJS-$(CONFIG_DTS)                     += dtsdec.o
+
+OBJS-$(CONFIG_AMR)                     += amr.o
+OBJS-$(CONFIG_AMR_NB)                  += amr_float/sp_dec.o     \
+                                          amr_float/sp_enc.o     \
+                                          amr_float/interf_dec.o \
+                                          amr_float/interf_enc.o
+
+ifeq ($(CONFIG_AMR_NB_FIXED),yes)
+EXTRAOBJS += amr/*.o
+EXTRADEPS=amrlibs
+endif
+
+OBJS-$(CONFIG_AMR_WB)                  += amrwb_float/dec_acelp.o \
+                                          amrwb_float/dec_dtx.o   \
+                                          amrwb_float/dec_gain.o  \
+                                          amrwb_float/dec_if.o    \
+                                          amrwb_float/dec_lpc.o   \
+                                          amrwb_float/dec_main.o  \
+                                          amrwb_float/dec_rom.o   \
+                                          amrwb_float/dec_util.o  \
+                                          amrwb_float/enc_acelp.o \
+                                          amrwb_float/enc_dtx.o   \
+                                          amrwb_float/enc_gain.o  \
+                                          amrwb_float/enc_if.o    \
+                                          amrwb_float/enc_lpc.o   \
+                                          amrwb_float/enc_main.o  \
+                                          amrwb_float/enc_rom.o   \
+                                          amrwb_float/enc_util.o  \
+                                          amrwb_float/if_rom.o
+
+OBJS-$(CONFIG_AAC_PARSER)              += parser.o
+OBJS-$(CONFIG_AC3_PARSER)              += parser.o
+OBJS-$(CONFIG_CAVSVIDEO_PARSER)        += cavs.o parser.o
+OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsubdec.o
+OBJS-$(CONFIG_DVDSUB_PARSER)           += dvdsubdec.o
+OBJS-$(CONFIG_H261_PARSER)             += h261.o
+OBJS-$(CONFIG_H263_PARSER)             += h263dec.o
+OBJS-$(CONFIG_H264_PARSER)             += h264.o
+OBJS-$(CONFIG_MJPEG_PARSER)            += mjpeg.o
+OBJS-$(CONFIG_MPEG4VIDEO_PARSER)       += parser.o
+OBJS-$(CONFIG_MPEGAUDIO_PARSER)        += parser.o
+OBJS-$(CONFIG_MPEGVIDEO_PARSER)        += parser.o
+OBJS-$(CONFIG_PNM_PARSER)              += pnm.o
+
+OBJS-$(HAVE_PTHREADS)                  += pthread.o
+OBJS-$(HAVE_W32THREADS)                += w32thread.o
+OBJS-$(HAVE_OS2THREADS)                += os2thread.o
+OBJS-$(HAVE_BEOSTHREADS)               += beosthread.o
+
+OBJS-$(HAVE_XVMC_ACCEL)                += xvmcvideo.o
+
+ifneq ($(CONFIG_SWSCALER),yes)
+OBJS += imgresample.o
+endif
+
+# i386 mmx specific stuff
+ifeq ($(TARGET_MMX),yes)
+OBJS += i386/fdct_mmx.o \
+        i386/cputest.o \
+        i386/dsputil_mmx.o \
+        i386/mpegvideo_mmx.o \
+        i386/motion_est_mmx.o \
+        i386/simple_idct_mmx.o \
+        i386/idct_mmx_xvid.o \
+        i386/fft_sse.o \
+        i386/vp3dsp_mmx.o \
+        i386/vp3dsp_sse2.o \
+        i386/fft_3dn.o \
+        i386/fft_3dn2.o \
+        i386/snowdsp_mmx.o \
+
+ifeq ($(CONFIG_GPL),yes)
+OBJS += i386/idct_mmx.o
+endif
+ifeq ($(CONFIG_CAVS_DECODER),yes)
+OBJS += i386/cavsdsp_mmx.o
+endif
+endif
+
+# armv4l specific stuff
+ASM_OBJS-$(TARGET_ARCH_ARMV4L)         += armv4l/jrevdct_arm.o     \
+                                          armv4l/simple_idct_arm.o \
+                                          armv4l/dsputil_arm_s.o   \
+
+OBJS-$(TARGET_ARCH_ARMV4L)             += armv4l/dsputil_arm.o   \
+                                          armv4l/mpegvideo_arm.o \
+
+OBJS-$(TARGET_IWMMXT)                  += armv4l/dsputil_iwmmxt.o   \
+                                          armv4l/mpegvideo_iwmmxt.o \
+
+ASM_OBJS-$(TARGET_ARMV5TE)             += armv4l/simple_idct_armv5te.o \
+
+# sun sparc
+OBJS-$(TARGET_ARCH_SPARC)              += sparc/dsputil_vis.o \
+
+sparc/dsputil_vis.o: CFLAGS += -mcpu=ultrasparc -mtune=ultrasparc
+
+# sun mediaLib specific stuff
+OBJS-$(HAVE_MLIB)                      += mlib/dsputil_mlib.o \
+
+# alpha specific stuff
+OBJS-$(TARGET_ARCH_ALPHA)              += alpha/dsputil_alpha.o     \
+                                          alpha/mpegvideo_alpha.o   \
+                                          alpha/simple_idct_alpha.o \
+                                          alpha/motion_est_alpha.o  \
+
+ASM_OBJS-$(TARGET_ARCH_ALPHA)          += alpha/dsputil_alpha_asm.o  \
+                                          alpha/motion_est_mvi_asm.o \
+
+OBJS-$(TARGET_ARCH_POWERPC)            += ppc/dsputil_ppc.o   \
+                                          ppc/mpegvideo_ppc.o \
+
+OBJS-$(TARGET_MMI)                     += ps2/dsputil_mmi.o   \
+                                          ps2/idct_mmi.o      \
+                                          ps2/mpegvideo_mmi.o \
+
+OBJS-$(TARGET_ARCH_SH4)                += sh4/idct_sh4.o      \
+                                          sh4/dsputil_sh4.o   \
+                                          sh4/dsputil_align.o \
+
+OBJS-$(TARGET_ALTIVEC)                 += ppc/dsputil_altivec.o      \
+                                          ppc/mpegvideo_altivec.o    \
+                                          ppc/idct_altivec.o         \
+                                          ppc/fft_altivec.o          \
+                                          ppc/gmc_altivec.o          \
+                                          ppc/fdct_altivec.o         \
+                                          ppc/float_altivec.o        \
+
+ifeq ($(TARGET_ALTIVEC),yes)
+OBJS-$(CONFIG_H264_DECODER)            += ppc/h264_altivec.o
+OBJS-$(CONFIG_SNOW_DECODER)            += ppc/snow_altivec.o
+OBJS-$(CONFIG_VC1_DECODER)             += ppc/vc1dsp_altivec.o
+OBJS-$(CONFIG_WMV3_DECODER)            += ppc/vc1dsp_altivec.o
+endif
+
+OBJS-$(TARGET_ARCH_BFIN)               += bfin/dsputil_bfin.o \
+
+CFLAGS += $(CFLAGS-yes)
+OBJS += $(OBJS-yes)
+ASM_OBJS += $(ASM_OBJS-yes)
+
+EXTRALIBS := -L$(BUILD_ROOT)/libavutil -lavutil$(BUILDSUF) $(EXTRALIBS)
+
+NAME=avcodec
+ifeq ($(BUILD_SHARED),yes)
+LIBVERSION=$(LAVCVERSION)
+LIBMAJOR=$(LAVCMAJOR)
+endif
+
+TESTS= imgresample-test fft-test
+ifeq ($(TARGET_ARCH_X86),yes)
+TESTS+= cpuid_test dct-test motion-test
+endif
+
+include ../common.mak
+
+amrlibs:
+	$(MAKE) -C amr spclib fipoplib
+
+tests: apiexample $(TESTS)
+
+clean::
+	rm -f \
+	   i386/*.o i386/*~ \
+	   armv4l/*.o armv4l/*~ \
+	   mlib/*.o mlib/*~ \
+	   alpha/*.o alpha/*~ \
+	   ppc/*.o ppc/*~ \
+	   ps2/*.o ps2/*~ \
+	   sh4/*.o sh4/*~ \
+	   sparc/*.o sparc/*~ \
+	   liba52/*.o liba52/*~ \
+	   amr_float/*.o \
+	   apiexample $(TESTS)
+	-$(MAKE) -C amr clean
+	-$(MAKE) -C amrwb_float -f makefile.gcc clean
+
+apiexample: apiexample.o $(LIB)
+
+cpuid_test: i386/cputest.c
+	$(CC) $(CFLAGS) -D__TEST__ -o $@ $<
+
+imgresample-test: imgresample.c $(LIB)
+	$(CC) $(CFLAGS) -DTEST -o $@ $^ $(EXTRALIBS)
+
+dct-test: dct-test.o fdctref.o $(LIB)
+
+motion-test: motion_test.o $(LIB)
+
+fft-test: fft-test.o $(LIB)
+
+.PHONY: amrlibs tests
diff --git a/contrib/ffmpeg/libavcodec/a52dec.c b/contrib/ffmpeg/libavcodec/a52dec.c
new file mode 100644
index 000000000..dec25138e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/a52dec.c
@@ -0,0 +1,257 @@
+/*
+ * A52 decoder
+ * Copyright (c) 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file a52dec.c
+ * A52 decoder.
+ */
+
+#include "avcodec.h"
+#include "liba52/a52.h"
+
+#ifdef CONFIG_A52BIN
+#include <dlfcn.h>
+static const char* liba52name = "liba52.so.0";
+#endif
+
+/**
+ * liba52 - Copyright (C) Aaron Holtzman
+ * released under the GPL license.
+ */
+typedef struct AC3DecodeState {
+    uint8_t inbuf[4096]; /* input buffer */
+    uint8_t *inbuf_ptr;
+    int frame_size;
+    int flags;
+    int channels;
+    a52_state_t* state;
+    sample_t* samples;
+
+    /*
+     * virtual method table
+     *
+     * using this function table so the liba52 doesn't
+     * have to be really linked together with ffmpeg
+     * and might be linked in runtime - this allows binary
+     * distribution of ffmpeg library which doens't depend
+     * on liba52 library - but if user has it installed
+     * it will be used - user might install such library
+     * separately
+     */
+    void* handle;
+    a52_state_t* (*a52_init)(uint32_t mm_accel);
+    sample_t* (*a52_samples)(a52_state_t * state);
+    int (*a52_syncinfo)(uint8_t * buf, int * flags,
+                          int * sample_rate, int * bit_rate);
+    int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags,
+                       sample_t * level, sample_t bias);
+    void (*a52_dynrng)(a52_state_t * state,
+                         sample_t (* call) (sample_t, void *), void * data);
+    int (*a52_block)(a52_state_t * state);
+    void (*a52_free)(a52_state_t * state);
+
+} AC3DecodeState;
+
+#ifdef CONFIG_A52BIN
+static void* dlsymm(void* handle, const char* symbol)
+{
+    void* f = dlsym(handle, symbol);
+    if (!f)
+        av_log( NULL, AV_LOG_ERROR, "A52 Decoder - function '%s' can't be resolved\n", symbol);
+    return f;
+}
+#endif
+
+static int a52_decode_init(AVCodecContext *avctx)
+{
+    AC3DecodeState *s = avctx->priv_data;
+
+#ifdef CONFIG_A52BIN
+    s->handle = dlopen(liba52name, RTLD_LAZY);
+    if (!s->handle)
+    {
+        av_log( avctx, AV_LOG_ERROR, "A52 library %s could not be opened! \n%s\n", liba52name, dlerror());
+        return -1;
+    }
+    s->a52_init = (a52_state_t* (*)(uint32_t)) dlsymm(s->handle, "a52_init");
+    s->a52_samples = (sample_t* (*)(a52_state_t*)) dlsymm(s->handle, "a52_samples");
+    s->a52_syncinfo = (int (*)(uint8_t*, int*, int*, int*)) dlsymm(s->handle, "a52_syncinfo");
+    s->a52_frame = (int (*)(a52_state_t*, uint8_t*, int*, sample_t*, sample_t)) dlsymm(s->handle, "a52_frame");
+    s->a52_block = (int (*)(a52_state_t*)) dlsymm(s->handle, "a52_block");
+    s->a52_free = (void (*)(a52_state_t*)) dlsymm(s->handle, "a52_free");
+    if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo
+        || !s->a52_frame || !s->a52_block || !s->a52_free)
+    {
+        dlclose(s->handle);
+        return -1;
+    }
+#else
+    /* static linked version */
+    s->handle = 0;
+    s->a52_init = a52_init;
+    s->a52_samples = a52_samples;
+    s->a52_syncinfo = a52_syncinfo;
+    s->a52_frame = a52_frame;
+    s->a52_block = a52_block;
+    s->a52_free = a52_free;
+#endif
+    s->state = s->a52_init(0); /* later use CPU flags */
+    s->samples = s->a52_samples(s->state);
+    s->inbuf_ptr = s->inbuf;
+    s->frame_size = 0;
+
+    return 0;
+}
+
+/**** the following two functions comes from a52dec */
+static inline int blah (int32_t i)
+{
+    if (i > 0x43c07fff)
+        return 32767;
+    else if (i < 0x43bf8000)
+        return -32768;
+    return i - 0x43c00000;
+}
+
+static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
+{
+    int i, j, c;
+    int32_t * f = (int32_t *) _f;       // XXX assumes IEEE float format
+
+    j = 0;
+    nchannels *= 256;
+    for (i = 0; i < 256; i++) {
+        for (c = 0; c < nchannels; c += 256)
+            s16[j++] = blah (f[i + c]);
+    }
+}
+
+/**** end */
+
+#define HEADER_SIZE 7
+
+static int a52_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    AC3DecodeState *s = avctx->priv_data;
+    uint8_t *buf_ptr;
+    int flags, i, len;
+    int sample_rate, bit_rate;
+    short *out_samples = data;
+    float level;
+    static const int ac3_channels[8] = {
+        2, 1, 2, 3, 3, 4, 4, 5
+    };
+
+    buf_ptr = buf;
+    while (buf_size > 0) {
+        len = s->inbuf_ptr - s->inbuf;
+        if (s->frame_size == 0) {
+            /* no header seen : find one. We need at least 7 bytes to parse it */
+            len = HEADER_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+            if ((s->inbuf_ptr - s->inbuf) == HEADER_SIZE) {
+                len = s->a52_syncinfo(s->inbuf, &s->flags, &sample_rate, &bit_rate);
+                if (len == 0) {
+                    /* no sync found : move by one byte (inefficient, but simple!) */
+                    memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
+                    s->inbuf_ptr--;
+                } else {
+                    s->frame_size = len;
+                    /* update codec info */
+                    avctx->sample_rate = sample_rate;
+                    s->channels = ac3_channels[s->flags & 7];
+                    if (s->flags & A52_LFE)
+                        s->channels++;
+                    if (avctx->channels == 0)
+                        /* No specific number of channel requested */
+                        avctx->channels = s->channels;
+                    else if (s->channels < avctx->channels) {
+                        av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
+                        avctx->channels = s->channels;
+                    }
+                    avctx->bit_rate = bit_rate;
+                }
+            }
+        } else if (len < s->frame_size) {
+            len = s->frame_size - len;
+            if (len > buf_size)
+                len = buf_size;
+
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+        } else {
+            flags = s->flags;
+            if (avctx->channels == 1)
+                flags = A52_MONO;
+            else if (avctx->channels == 2)
+                flags = A52_STEREO;
+            else
+                flags |= A52_ADJUST_LEVEL;
+            level = 1;
+            if (s->a52_frame(s->state, s->inbuf, &flags, &level, 384)) {
+            fail:
+                s->inbuf_ptr = s->inbuf;
+                s->frame_size = 0;
+                continue;
+            }
+            for (i = 0; i < 6; i++) {
+                if (s->a52_block(s->state))
+                    goto fail;
+                float_to_int(s->samples, out_samples + i * 256 * avctx->channels, avctx->channels);
+            }
+            s->inbuf_ptr = s->inbuf;
+            s->frame_size = 0;
+            *data_size = 6 * avctx->channels * 256 * sizeof(int16_t);
+            break;
+        }
+    }
+    return buf_ptr - buf;
+}
+
+static int a52_decode_end(AVCodecContext *avctx)
+{
+    AC3DecodeState *s = avctx->priv_data;
+    s->a52_free(s->state);
+#ifdef CONFIG_A52BIN
+    dlclose(s->handle);
+#endif
+    return 0;
+}
+
+AVCodec ac3_decoder = {
+    "ac3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AC3,
+    sizeof(AC3DecodeState),
+    a52_decode_init,
+    NULL,
+    a52_decode_end,
+    a52_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/aasc.c b/contrib/ffmpeg/libavcodec/aasc.c
new file mode 100644
index 000000000..6c8e3166e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/aasc.c
@@ -0,0 +1,176 @@
+/*
+ * Autodesc RLE Decoder
+ * Copyright (C) 2005 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aasc.c
+ * Autodesc RLE Video Decoder by Konstantin Shishkov
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+typedef struct AascContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+} AascContext;
+
+#define FETCH_NEXT_STREAM_BYTE() \
+    if (stream_ptr >= buf_size) \
+    { \
+      av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (fetch)\n"); \
+      break; \
+    } \
+    stream_byte = buf[stream_ptr++];
+
+static int aasc_decode_init(AVCodecContext *avctx)
+{
+    AascContext *s = (AascContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+
+    avctx->pix_fmt = PIX_FMT_BGR24;
+    avctx->has_b_frames = 0;
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int aasc_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+    AascContext *s = (AascContext *)avctx->priv_data;
+    int stream_ptr = 4;
+    unsigned char rle_code;
+    unsigned char stream_byte;
+    int pixel_ptr = 0;
+    int row_dec, row_ptr;
+    int frame_size;
+    int i;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    row_dec = s->frame.linesize[0];
+    row_ptr = (s->avctx->height - 1) * row_dec;
+    frame_size = row_dec * s->avctx->height;
+
+    while (row_ptr >= 0) {
+        FETCH_NEXT_STREAM_BYTE();
+        rle_code = stream_byte;
+        if (rle_code == 0) {
+            /* fetch the next byte to see how to handle escape code */
+            FETCH_NEXT_STREAM_BYTE();
+            if (stream_byte == 0) {
+                /* line is done, goto the next one */
+                row_ptr -= row_dec;
+                pixel_ptr = 0;
+            } else if (stream_byte == 1) {
+                /* decode is done */
+                break;
+            } else if (stream_byte == 2) {
+                /* reposition frame decode coordinates */
+                FETCH_NEXT_STREAM_BYTE();
+                pixel_ptr += stream_byte;
+                FETCH_NEXT_STREAM_BYTE();
+                row_ptr -= stream_byte * row_dec;
+            } else {
+                /* copy pixels from encoded stream */
+                if ((pixel_ptr + stream_byte > avctx->width * 3) ||
+                    (row_ptr < 0)) {
+                    av_log(s->avctx, AV_LOG_ERROR, " AASC: frame ptr just went out of bounds (copy1)\n");
+                    break;
+                }
+
+                rle_code = stream_byte;
+                if (stream_ptr + rle_code > buf_size) {
+                    av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (copy2)\n");
+                    break;
+                }
+
+                for (i = 0; i < rle_code; i++) {
+                    FETCH_NEXT_STREAM_BYTE();
+                    s->frame.data[0][row_ptr + pixel_ptr] = stream_byte;
+                    pixel_ptr++;
+                }
+                if (rle_code & 1)
+                    stream_ptr++;
+            }
+        } else {
+            /* decode a run of data */
+            if ((pixel_ptr + rle_code > avctx->width * 3) ||
+                (row_ptr < 0)) {
+                av_log(s->avctx, AV_LOG_ERROR, " AASC: frame ptr just went out of bounds (run1)\n");
+                break;
+            }
+
+            FETCH_NEXT_STREAM_BYTE();
+
+            while(rle_code--) {
+                s->frame.data[0][row_ptr + pixel_ptr] = stream_byte;
+                pixel_ptr++;
+            }
+        }
+    }
+
+    /* one last sanity check on the way out */
+    if (stream_ptr < buf_size)
+        av_log(s->avctx, AV_LOG_ERROR, " AASC: ended frame decode with bytes left over (%d < %d)\n",
+            stream_ptr, buf_size);
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int aasc_decode_end(AVCodecContext *avctx)
+{
+    AascContext *s = (AascContext *)avctx->priv_data;
+
+    /* release the last frame */
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec aasc_decoder = {
+    "aasc",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_AASC,
+    sizeof(AascContext),
+    aasc_decode_init,
+    NULL,
+    aasc_decode_end,
+    aasc_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/ac3.h b/contrib/ffmpeg/libavcodec/ac3.h
new file mode 100644
index 000000000..5daa9750f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ac3.h
@@ -0,0 +1,65 @@
+/*
+ * Common code between AC3 encoder and decoder
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file ac3.h
+ * Common code between AC3 encoder and decoder.
+ */
+
+#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
+#define AC3_MAX_CHANNELS 6 /* including LFE channel */
+
+#define NB_BLOCKS 6 /* number of PCM blocks inside an AC3 frame */
+#define AC3_FRAME_SIZE (NB_BLOCKS * 256)
+
+/* exponent encoding strategy */
+#define EXP_REUSE 0
+#define EXP_NEW   1
+
+#define EXP_D15   1
+#define EXP_D25   2
+#define EXP_D45   3
+
+typedef struct AC3BitAllocParameters {
+    int fscod; /* frequency */
+    int halfratecod;
+    int sgain, sdecay, fdecay, dbknee, floor;
+    int cplfleak, cplsleak;
+} AC3BitAllocParameters;
+
+#if 0
+extern const uint16_t ac3_freqs[3];
+extern const uint16_t ac3_bitratetab[19];
+extern const int16_t ac3_window[256];
+extern const uint8_t sdecaytab[4];
+extern const uint8_t fdecaytab[4];
+extern const uint16_t sgaintab[4];
+extern const uint16_t dbkneetab[4];
+extern const uint16_t floortab[8];
+extern const uint16_t fgaintab[8];
+#endif
+
+void ac3_common_init(void);
+void ac3_parametric_bit_allocation(AC3BitAllocParameters *s, uint8_t *bap,
+                                   int8_t *exp, int start, int end,
+                                   int snroffset, int fgain, int is_lfe,
+                                   int deltbae,int deltnseg,
+                                   uint8_t *deltoffst, uint8_t *deltlen, uint8_t *deltba);
diff --git a/contrib/ffmpeg/libavcodec/ac3dec.c b/contrib/ffmpeg/libavcodec/ac3dec.c
new file mode 100644
index 000000000..b6bebfb59
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ac3dec.c
@@ -0,0 +1,184 @@
+/*
+ * AC3 decoder
+ * Copyright (c) 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file ac3dec.c
+ * AC3 decoder.
+ */
+
+//#define DEBUG
+
+#include "avcodec.h"
+#include "libac3/ac3.h"
+
+/* currently, I use libac3 which is Copyright (C) Aaron Holtzman and
+   released under the GPL license. I may reimplement it someday... */
+typedef struct AC3DecodeState {
+    uint8_t inbuf[4096]; /* input buffer */
+    uint8_t *inbuf_ptr;
+    int frame_size;
+    int flags;
+    int channels;
+    ac3_state_t state;
+} AC3DecodeState;
+
+static int ac3_decode_init(AVCodecContext *avctx)
+{
+    AC3DecodeState *s = avctx->priv_data;
+
+    ac3_init ();
+    s->inbuf_ptr = s->inbuf;
+    s->frame_size = 0;
+    return 0;
+}
+
+stream_samples_t samples;
+
+/**** the following two functions comes from ac3dec */
+static inline int blah (int32_t i)
+{
+    if (i > 0x43c07fff)
+        return 32767;
+    else if (i < 0x43bf8000)
+        return -32768;
+    else
+        return i - 0x43c00000;
+}
+
+static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
+{
+    int i, j, c;
+    int32_t * f = (int32_t *) _f;       // XXX assumes IEEE float format
+
+    j = 0;
+    nchannels *= 256;
+    for (i = 0; i < 256; i++) {
+        for (c = 0; c < nchannels; c += 256)
+            s16[j++] = blah (f[i + c]);
+    }
+}
+
+/**** end */
+
+#define HEADER_SIZE 7
+
+static int ac3_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    AC3DecodeState *s = avctx->priv_data;
+    uint8_t *buf_ptr;
+    int flags, i, len;
+    int sample_rate, bit_rate;
+    short *out_samples = data;
+    float level;
+    static const int ac3_channels[8] = {
+        2, 1, 2, 3, 3, 4, 4, 5
+    };
+
+    buf_ptr = buf;
+    while (buf_size > 0) {
+        len = s->inbuf_ptr - s->inbuf;
+        if (s->frame_size == 0) {
+            /* no header seen : find one. We need at least 7 bytes to parse it */
+            len = HEADER_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+            if ((s->inbuf_ptr - s->inbuf) == HEADER_SIZE) {
+                len = ac3_syncinfo (s->inbuf, &s->flags, &sample_rate, &bit_rate);
+                if (len == 0) {
+                    /* no sync found : move by one byte (inefficient, but simple!) */
+                    memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
+                    s->inbuf_ptr--;
+                } else {
+                    s->frame_size = len;
+                    /* update codec info */
+                    avctx->sample_rate = sample_rate;
+                    s->channels = ac3_channels[s->flags & 7];
+                    if (s->flags & AC3_LFE)
+                        s->channels++;
+                    if (avctx->channels == 0)
+                        /* No specific number of channel requested */
+                        avctx->channels = s->channels;
+                    else if (s->channels < avctx->channels) {
+                        av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
+                        avctx->channels = s->channels;
+                    }
+                    avctx->bit_rate = bit_rate;
+                }
+            }
+        } else if (len < s->frame_size) {
+            len = s->frame_size - len;
+            if (len > buf_size)
+                len = buf_size;
+
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+        } else {
+            flags = s->flags;
+            if (avctx->channels == 1)
+                flags = AC3_MONO;
+            else if (avctx->channels == 2)
+                flags = AC3_STEREO;
+            else
+                flags |= AC3_ADJUST_LEVEL;
+            level = 1;
+            if (ac3_frame (&s->state, s->inbuf, &flags, &level, 384)) {
+            fail:
+                s->inbuf_ptr = s->inbuf;
+                s->frame_size = 0;
+                continue;
+            }
+            for (i = 0; i < 6; i++) {
+                if (ac3_block (&s->state))
+                    goto fail;
+                float_to_int (*samples, out_samples + i * 256 * avctx->channels, avctx->channels);
+            }
+            s->inbuf_ptr = s->inbuf;
+            s->frame_size = 0;
+            *data_size = 6 * avctx->channels * 256 * sizeof(int16_t);
+            break;
+        }
+    }
+    return buf_ptr - buf;
+}
+
+static int ac3_decode_end(AVCodecContext *s)
+{
+    return 0;
+}
+
+AVCodec ac3_decoder = {
+    "ac3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AC3,
+    sizeof(AC3DecodeState),
+    ac3_decode_init,
+    NULL,
+    ac3_decode_end,
+    ac3_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/ac3enc.c b/contrib/ffmpeg/libavcodec/ac3enc.c
new file mode 100644
index 000000000..c8c8920ed
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ac3enc.c
@@ -0,0 +1,1557 @@
+/*
+ * The simplest AC3 encoder
+ * Copyright (c) 2000 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file ac3enc.c
+ * The simplest AC3 encoder.
+ */
+//#define DEBUG
+//#define DEBUG_BITALLOC
+#include "avcodec.h"
+#include "bitstream.h"
+#include "crc.h"
+#include "ac3.h"
+
+typedef struct AC3EncodeContext {
+    PutBitContext pb;
+    int nb_channels;
+    int nb_all_channels;
+    int lfe_channel;
+    int bit_rate;
+    unsigned int sample_rate;
+    unsigned int bsid;
+    unsigned int frame_size_min; /* minimum frame size in case rounding is necessary */
+    unsigned int frame_size; /* current frame size in words */
+    unsigned int bits_written;
+    unsigned int samples_written;
+    int halfratecod;
+    unsigned int frmsizecod;
+    unsigned int fscod; /* frequency */
+    unsigned int acmod;
+    int lfe;
+    unsigned int bsmod;
+    short last_samples[AC3_MAX_CHANNELS][256];
+    unsigned int chbwcod[AC3_MAX_CHANNELS];
+    int nb_coefs[AC3_MAX_CHANNELS];
+
+    /* bitrate allocation control */
+    int sgaincod, sdecaycod, fdecaycod, dbkneecod, floorcod;
+    AC3BitAllocParameters bit_alloc;
+    int csnroffst;
+    int fgaincod[AC3_MAX_CHANNELS];
+    int fsnroffst[AC3_MAX_CHANNELS];
+    /* mantissa encoding */
+    int mant1_cnt, mant2_cnt, mant4_cnt;
+} AC3EncodeContext;
+
+#include "ac3tab.h"
+
+#define MDCT_NBITS 9
+#define N         (1 << MDCT_NBITS)
+
+/* new exponents are sent if their Norm 1 exceed this number */
+#define EXP_DIFF_THRESHOLD 1000
+
+static void fft_init(int ln);
+
+static inline int16_t fix15(float a)
+{
+    int v;
+    v = (int)(a * (float)(1 << 15));
+    if (v < -32767)
+        v = -32767;
+    else if (v > 32767)
+        v = 32767;
+    return v;
+}
+
+static inline int calc_lowcomp1(int a, int b0, int b1)
+{
+    if ((b0 + 256) == b1) {
+        a = 384 ;
+    } else if (b0 > b1) {
+        a = a - 64;
+        if (a < 0) a=0;
+    }
+    return a;
+}
+
+static inline int calc_lowcomp(int a, int b0, int b1, int bin)
+{
+    if (bin < 7) {
+        if ((b0 + 256) == b1) {
+            a = 384 ;
+        } else if (b0 > b1) {
+            a = a - 64;
+            if (a < 0) a=0;
+        }
+    } else if (bin < 20) {
+        if ((b0 + 256) == b1) {
+            a = 320 ;
+        } else if (b0 > b1) {
+            a= a - 64;
+            if (a < 0) a=0;
+        }
+    } else {
+        a = a - 128;
+        if (a < 0) a=0;
+    }
+    return a;
+}
+
+/* AC3 bit allocation. The algorithm is the one described in the AC3
+   spec. */
+void ac3_parametric_bit_allocation(AC3BitAllocParameters *s, uint8_t *bap,
+                                   int8_t *exp, int start, int end,
+                                   int snroffset, int fgain, int is_lfe,
+                                   int deltbae,int deltnseg,
+                                   uint8_t *deltoffst, uint8_t *deltlen, uint8_t *deltba)
+{
+    int bin,i,j,k,end1,v,v1,bndstrt,bndend,lowcomp,begin;
+    int fastleak,slowleak,address,tmp;
+    int16_t psd[256]; /* scaled exponents */
+    int16_t bndpsd[50]; /* interpolated exponents */
+    int16_t excite[50]; /* excitation */
+    int16_t mask[50];   /* masking value */
+
+    /* exponent mapping to PSD */
+    for(bin=start;bin<end;bin++) {
+        psd[bin]=(3072 - (exp[bin] << 7));
+    }
+
+    /* PSD integration */
+    j=start;
+    k=masktab[start];
+    do {
+        v=psd[j];
+        j++;
+        end1=bndtab[k+1];
+        if (end1 > end) end1=end;
+        for(i=j;i<end1;i++) {
+            int c,adr;
+            /* logadd */
+            v1=psd[j];
+            c=v-v1;
+            if (c >= 0) {
+                adr=c >> 1;
+                if (adr > 255) adr=255;
+                v=v + latab[adr];
+            } else {
+                adr=(-c) >> 1;
+                if (adr > 255) adr=255;
+                v=v1 + latab[adr];
+            }
+            j++;
+        }
+        bndpsd[k]=v;
+        k++;
+    } while (end > bndtab[k]);
+
+    /* excitation function */
+    bndstrt = masktab[start];
+    bndend = masktab[end-1] + 1;
+
+    if (bndstrt == 0) {
+        lowcomp = 0;
+        lowcomp = calc_lowcomp1(lowcomp, bndpsd[0], bndpsd[1]) ;
+        excite[0] = bndpsd[0] - fgain - lowcomp ;
+        lowcomp = calc_lowcomp1(lowcomp, bndpsd[1], bndpsd[2]) ;
+        excite[1] = bndpsd[1] - fgain - lowcomp ;
+        begin = 7 ;
+        for (bin = 2; bin < 7; bin++) {
+            if (!(is_lfe && bin == 6))
+                lowcomp = calc_lowcomp1(lowcomp, bndpsd[bin], bndpsd[bin+1]) ;
+            fastleak = bndpsd[bin] - fgain ;
+            slowleak = bndpsd[bin] - s->sgain ;
+            excite[bin] = fastleak - lowcomp ;
+            if (!(is_lfe && bin == 6)) {
+                if (bndpsd[bin] <= bndpsd[bin+1]) {
+                    begin = bin + 1 ;
+                    break ;
+                }
+            }
+        }
+
+        end1=bndend;
+        if (end1 > 22) end1=22;
+
+        for (bin = begin; bin < end1; bin++) {
+            if (!(is_lfe && bin == 6))
+                lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin) ;
+
+            fastleak -= s->fdecay ;
+            v = bndpsd[bin] - fgain;
+            if (fastleak < v) fastleak = v;
+
+            slowleak -= s->sdecay ;
+            v = bndpsd[bin] - s->sgain;
+            if (slowleak < v) slowleak = v;
+
+            v=fastleak - lowcomp;
+            if (slowleak > v) v=slowleak;
+
+            excite[bin] = v;
+        }
+        begin = 22;
+    } else {
+        /* coupling channel */
+        begin = bndstrt;
+
+        fastleak = (s->cplfleak << 8) + 768;
+        slowleak = (s->cplsleak << 8) + 768;
+    }
+
+    for (bin = begin; bin < bndend; bin++) {
+        fastleak -= s->fdecay ;
+        v = bndpsd[bin] - fgain;
+        if (fastleak < v) fastleak = v;
+        slowleak -= s->sdecay ;
+        v = bndpsd[bin] - s->sgain;
+        if (slowleak < v) slowleak = v;
+
+        v=fastleak;
+        if (slowleak > v) v = slowleak;
+        excite[bin] = v;
+    }
+
+    /* compute masking curve */
+
+    for (bin = bndstrt; bin < bndend; bin++) {
+        v1 = excite[bin];
+        tmp = s->dbknee - bndpsd[bin];
+        if (tmp > 0) {
+            v1 += tmp >> 2;
+        }
+        v=hth[bin >> s->halfratecod][s->fscod];
+        if (v1 > v) v=v1;
+        mask[bin] = v;
+    }
+
+    /* delta bit allocation */
+
+    if (deltbae == 0 || deltbae == 1) {
+        int band, seg, delta;
+        band = 0 ;
+        for (seg = 0; seg < deltnseg; seg++) {
+            band += deltoffst[seg] ;
+            if (deltba[seg] >= 4) {
+                delta = (deltba[seg] - 3) << 7;
+            } else {
+                delta = (deltba[seg] - 4) << 7;
+            }
+            for (k = 0; k < deltlen[seg]; k++) {
+                mask[band] += delta ;
+                band++ ;
+            }
+        }
+    }
+
+    /* compute bit allocation */
+
+    i = start ;
+    j = masktab[start] ;
+    do {
+        v=mask[j];
+        v -= snroffset ;
+        v -= s->floor ;
+        if (v < 0) v = 0;
+        v &= 0x1fe0 ;
+        v += s->floor ;
+
+        end1=bndtab[j] + bndsz[j];
+        if (end1 > end) end1=end;
+
+        for (k = i; k < end1; k++) {
+            address = (psd[i] - v) >> 5 ;
+            if (address < 0) address=0;
+            else if (address > 63) address=63;
+            bap[i] = baptab[address];
+            i++;
+        }
+    } while (end > bndtab[j++]) ;
+}
+
+typedef struct IComplex {
+    short re,im;
+} IComplex;
+
+static void fft_init(int ln)
+{
+    int i, j, m, n;
+    float alpha;
+
+    n = 1 << ln;
+
+    for(i=0;i<(n/2);i++) {
+        alpha = 2 * M_PI * (float)i / (float)n;
+        costab[i] = fix15(cos(alpha));
+        sintab[i] = fix15(sin(alpha));
+    }
+
+    for(i=0;i<n;i++) {
+        m=0;
+        for(j=0;j<ln;j++) {
+            m |= ((i >> j) & 1) << (ln-j-1);
+        }
+        fft_rev[i]=m;
+    }
+}
+
+/* butter fly op */
+#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
+{\
+  int ax, ay, bx, by;\
+  bx=pre1;\
+  by=pim1;\
+  ax=qre1;\
+  ay=qim1;\
+  pre = (bx + ax) >> 1;\
+  pim = (by + ay) >> 1;\
+  qre = (bx - ax) >> 1;\
+  qim = (by - ay) >> 1;\
+}
+
+#define MUL16(a,b) ((a) * (b))
+
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+   pre = (MUL16(are, bre) - MUL16(aim, bim)) >> 15;\
+   pim = (MUL16(are, bim) + MUL16(bre, aim)) >> 15;\
+}
+
+
+/* do a 2^n point complex fft on 2^ln points. */
+static void fft(IComplex *z, int ln)
+{
+    int        j, l, np, np2;
+    int        nblocks, nloops;
+    register IComplex *p,*q;
+    int tmp_re, tmp_im;
+
+    np = 1 << ln;
+
+    /* reverse */
+    for(j=0;j<np;j++) {
+        int k;
+        IComplex tmp;
+        k = fft_rev[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+
+    /* pass 0 */
+
+    p=&z[0];
+    j=(np >> 1);
+    do {
+        BF(p[0].re, p[0].im, p[1].re, p[1].im,
+           p[0].re, p[0].im, p[1].re, p[1].im);
+        p+=2;
+    } while (--j != 0);
+
+    /* pass 1 */
+
+    p=&z[0];
+    j=np >> 2;
+    do {
+        BF(p[0].re, p[0].im, p[2].re, p[2].im,
+           p[0].re, p[0].im, p[2].re, p[2].im);
+        BF(p[1].re, p[1].im, p[3].re, p[3].im,
+           p[1].re, p[1].im, p[3].im, -p[3].re);
+        p+=4;
+    } while (--j != 0);
+
+    /* pass 2 .. ln-1 */
+
+    nblocks = np >> 3;
+    nloops = 1 << 2;
+    np2 = np >> 1;
+    do {
+        p = z;
+        q = z + nloops;
+        for (j = 0; j < nblocks; ++j) {
+
+            BF(p->re, p->im, q->re, q->im,
+               p->re, p->im, q->re, q->im);
+
+            p++;
+            q++;
+            for(l = nblocks; l < np2; l += nblocks) {
+                CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im);
+                BF(p->re, p->im, q->re, q->im,
+                   p->re, p->im, tmp_re, tmp_im);
+                p++;
+                q++;
+            }
+            p += nloops;
+            q += nloops;
+        }
+        nblocks = nblocks >> 1;
+        nloops = nloops << 1;
+    } while (nblocks != 0);
+}
+
+/* do a 512 point mdct */
+static void mdct512(int32_t *out, int16_t *in)
+{
+    int i, re, im, re1, im1;
+    int16_t rot[N];
+    IComplex x[N/4];
+
+    /* shift to simplify computations */
+    for(i=0;i<N/4;i++)
+        rot[i] = -in[i + 3*N/4];
+    for(i=N/4;i<N;i++)
+        rot[i] = in[i - N/4];
+
+    /* pre rotation */
+    for(i=0;i<N/4;i++) {
+        re = ((int)rot[2*i] - (int)rot[N-1-2*i]) >> 1;
+        im = -((int)rot[N/2+2*i] - (int)rot[N/2-1-2*i]) >> 1;
+        CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]);
+    }
+
+    fft(x, MDCT_NBITS - 2);
+
+    /* post rotation */
+    for(i=0;i<N/4;i++) {
+        re = x[i].re;
+        im = x[i].im;
+        CMUL(re1, im1, re, im, xsin1[i], xcos1[i]);
+        out[2*i] = im1;
+        out[N/2-1-2*i] = re1;
+    }
+}
+
+/* XXX: use another norm ? */
+static int calc_exp_diff(uint8_t *exp1, uint8_t *exp2, int n)
+{
+    int sum, i;
+    sum = 0;
+    for(i=0;i<n;i++) {
+        sum += abs(exp1[i] - exp2[i]);
+    }
+    return sum;
+}
+
+static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS],
+                                 uint8_t exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                                 int ch, int is_lfe)
+{
+    int i, j;
+    int exp_diff;
+
+    /* estimate if the exponent variation & decide if they should be
+       reused in the next frame */
+    exp_strategy[0][ch] = EXP_NEW;
+    for(i=1;i<NB_BLOCKS;i++) {
+        exp_diff = calc_exp_diff(exp[i][ch], exp[i-1][ch], N/2);
+#ifdef DEBUG
+        av_log(NULL, AV_LOG_DEBUG, "exp_diff=%d\n", exp_diff);
+#endif
+        if (exp_diff > EXP_DIFF_THRESHOLD)
+            exp_strategy[i][ch] = EXP_NEW;
+        else
+            exp_strategy[i][ch] = EXP_REUSE;
+    }
+    if (is_lfe)
+        return;
+
+    /* now select the encoding strategy type : if exponents are often
+       recoded, we use a coarse encoding */
+    i = 0;
+    while (i < NB_BLOCKS) {
+        j = i + 1;
+        while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE)
+            j++;
+        switch(j - i) {
+        case 1:
+            exp_strategy[i][ch] = EXP_D45;
+            break;
+        case 2:
+        case 3:
+            exp_strategy[i][ch] = EXP_D25;
+            break;
+        default:
+            exp_strategy[i][ch] = EXP_D15;
+            break;
+        }
+        i = j;
+    }
+}
+
+/* set exp[i] to min(exp[i], exp1[i]) */
+static void exponent_min(uint8_t exp[N/2], uint8_t exp1[N/2], int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        if (exp1[i] < exp[i])
+            exp[i] = exp1[i];
+    }
+}
+
+/* update the exponents so that they are the ones the decoder will
+   decode. Return the number of bits used to code the exponents */
+static int encode_exp(uint8_t encoded_exp[N/2],
+                      uint8_t exp[N/2],
+                      int nb_exps,
+                      int exp_strategy)
+{
+    int group_size, nb_groups, i, j, k, exp_min;
+    uint8_t exp1[N/2];
+
+    switch(exp_strategy) {
+    case EXP_D15:
+        group_size = 1;
+        break;
+    case EXP_D25:
+        group_size = 2;
+        break;
+    default:
+    case EXP_D45:
+        group_size = 4;
+        break;
+    }
+    nb_groups = ((nb_exps + (group_size * 3) - 4) / (3 * group_size)) * 3;
+
+    /* for each group, compute the minimum exponent */
+    exp1[0] = exp[0]; /* DC exponent is handled separately */
+    k = 1;
+    for(i=1;i<=nb_groups;i++) {
+        exp_min = exp[k];
+        assert(exp_min >= 0 && exp_min <= 24);
+        for(j=1;j<group_size;j++) {
+            if (exp[k+j] < exp_min)
+                exp_min = exp[k+j];
+        }
+        exp1[i] = exp_min;
+        k += group_size;
+    }
+
+    /* constraint for DC exponent */
+    if (exp1[0] > 15)
+        exp1[0] = 15;
+
+    /* Decrease the delta between each groups to within 2
+     * so that they can be differentially encoded */
+    for (i=1;i<=nb_groups;i++)
+        exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
+    for (i=nb_groups-1;i>=0;i--)
+        exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
+
+    /* now we have the exponent values the decoder will see */
+    encoded_exp[0] = exp1[0];
+    k = 1;
+    for(i=1;i<=nb_groups;i++) {
+        for(j=0;j<group_size;j++) {
+            encoded_exp[k+j] = exp1[i];
+        }
+        k += group_size;
+    }
+
+#if defined(DEBUG)
+    av_log(NULL, AV_LOG_DEBUG, "exponents: strategy=%d\n", exp_strategy);
+    for(i=0;i<=nb_groups * group_size;i++) {
+        av_log(NULL, AV_LOG_DEBUG, "%d ", encoded_exp[i]);
+    }
+    av_log(NULL, AV_LOG_DEBUG, "\n");
+#endif
+
+    return 4 + (nb_groups / 3) * 7;
+}
+
+/* return the size in bits taken by the mantissa */
+static int compute_mantissa_size(AC3EncodeContext *s, uint8_t *m, int nb_coefs)
+{
+    int bits, mant, i;
+
+    bits = 0;
+    for(i=0;i<nb_coefs;i++) {
+        mant = m[i];
+        switch(mant) {
+        case 0:
+            /* nothing */
+            break;
+        case 1:
+            /* 3 mantissa in 5 bits */
+            if (s->mant1_cnt == 0)
+                bits += 5;
+            if (++s->mant1_cnt == 3)
+                s->mant1_cnt = 0;
+            break;
+        case 2:
+            /* 3 mantissa in 7 bits */
+            if (s->mant2_cnt == 0)
+                bits += 7;
+            if (++s->mant2_cnt == 3)
+                s->mant2_cnt = 0;
+            break;
+        case 3:
+            bits += 3;
+            break;
+        case 4:
+            /* 2 mantissa in 7 bits */
+            if (s->mant4_cnt == 0)
+                bits += 7;
+            if (++s->mant4_cnt == 2)
+                s->mant4_cnt = 0;
+            break;
+        case 14:
+            bits += 14;
+            break;
+        case 15:
+            bits += 16;
+            break;
+        default:
+            bits += mant - 1;
+            break;
+        }
+    }
+    return bits;
+}
+
+
+static int bit_alloc(AC3EncodeContext *s,
+                     uint8_t bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                     uint8_t encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                     uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS],
+                     int frame_bits, int csnroffst, int fsnroffst)
+{
+    int i, ch;
+
+    /* compute size */
+    for(i=0;i<NB_BLOCKS;i++) {
+        s->mant1_cnt = 0;
+        s->mant2_cnt = 0;
+        s->mant4_cnt = 0;
+        for(ch=0;ch<s->nb_all_channels;ch++) {
+            ac3_parametric_bit_allocation(&s->bit_alloc,
+                                          bap[i][ch], (int8_t *)encoded_exp[i][ch],
+                                          0, s->nb_coefs[ch],
+                                          (((csnroffst-15) << 4) +
+                                           fsnroffst) << 2,
+                                          fgaintab[s->fgaincod[ch]],
+                                          ch == s->lfe_channel,
+                                          2, 0, NULL, NULL, NULL);
+            frame_bits += compute_mantissa_size(s, bap[i][ch],
+                                                 s->nb_coefs[ch]);
+        }
+    }
+#if 0
+    printf("csnr=%d fsnr=%d frame_bits=%d diff=%d\n",
+           csnroffst, fsnroffst, frame_bits,
+           16 * s->frame_size - ((frame_bits + 7) & ~7));
+#endif
+    return 16 * s->frame_size - frame_bits;
+}
+
+#define SNR_INC1 4
+
+static int compute_bit_allocation(AC3EncodeContext *s,
+                                  uint8_t bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                                  uint8_t encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2],
+                                  uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS],
+                                  int frame_bits)
+{
+    int i, ch;
+    int csnroffst, fsnroffst;
+    uint8_t bap1[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    static int frame_bits_inc[8] = { 0, 0, 2, 2, 2, 4, 2, 4 };
+
+    /* init default parameters */
+    s->sdecaycod = 2;
+    s->fdecaycod = 1;
+    s->sgaincod = 1;
+    s->dbkneecod = 2;
+    s->floorcod = 4;
+    for(ch=0;ch<s->nb_all_channels;ch++)
+        s->fgaincod[ch] = 4;
+
+    /* compute real values */
+    s->bit_alloc.fscod = s->fscod;
+    s->bit_alloc.halfratecod = s->halfratecod;
+    s->bit_alloc.sdecay = sdecaytab[s->sdecaycod] >> s->halfratecod;
+    s->bit_alloc.fdecay = fdecaytab[s->fdecaycod] >> s->halfratecod;
+    s->bit_alloc.sgain = sgaintab[s->sgaincod];
+    s->bit_alloc.dbknee = dbkneetab[s->dbkneecod];
+    s->bit_alloc.floor = floortab[s->floorcod];
+
+    /* header size */
+    frame_bits += 65;
+    // if (s->acmod == 2)
+    //    frame_bits += 2;
+    frame_bits += frame_bits_inc[s->acmod];
+
+    /* audio blocks */
+    for(i=0;i<NB_BLOCKS;i++) {
+        frame_bits += s->nb_channels * 2 + 2; /* blksw * c, dithflag * c, dynrnge, cplstre */
+        if (s->acmod == 2) {
+            frame_bits++; /* rematstr */
+            if(i==0) frame_bits += 4;
+        }
+        frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */
+        if (s->lfe)
+            frame_bits++; /* lfeexpstr */
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (exp_strategy[i][ch] != EXP_REUSE)
+                frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */
+        }
+        frame_bits++; /* baie */
+        frame_bits++; /* snr */
+        frame_bits += 2; /* delta / skip */
+    }
+    frame_bits++; /* cplinu for block 0 */
+    /* bit alloc info */
+    /* sdcycod[2], fdcycod[2], sgaincod[2], dbpbcod[2], floorcod[3] */
+    /* csnroffset[6] */
+    /* (fsnoffset[4] + fgaincod[4]) * c */
+    frame_bits += 2*4 + 3 + 6 + s->nb_all_channels * (4 + 3);
+
+    /* auxdatae, crcrsv */
+    frame_bits += 2;
+
+    /* CRC */
+    frame_bits += 16;
+
+    /* now the big work begins : do the bit allocation. Modify the snr
+       offset until we can pack everything in the requested frame size */
+
+    csnroffst = s->csnroffst;
+    while (csnroffst >= 0 &&
+           bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
+        csnroffst -= SNR_INC1;
+    if (csnroffst < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Bit allocation failed, try increasing the bitrate, -ab 384 for example!\n");
+        return -1;
+    }
+    while ((csnroffst + SNR_INC1) <= 63 &&
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
+                     csnroffst + SNR_INC1, 0) >= 0) {
+        csnroffst += SNR_INC1;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+    while ((csnroffst + 1) <= 63 &&
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, csnroffst + 1, 0) >= 0) {
+        csnroffst++;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+
+    fsnroffst = 0;
+    while ((fsnroffst + SNR_INC1) <= 15 &&
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
+                     csnroffst, fsnroffst + SNR_INC1) >= 0) {
+        fsnroffst += SNR_INC1;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+    while ((fsnroffst + 1) <= 15 &&
+           bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
+                     csnroffst, fsnroffst + 1) >= 0) {
+        fsnroffst++;
+        memcpy(bap, bap1, sizeof(bap1));
+    }
+
+    s->csnroffst = csnroffst;
+    for(ch=0;ch<s->nb_all_channels;ch++)
+        s->fsnroffst[ch] = fsnroffst;
+#if defined(DEBUG_BITALLOC)
+    {
+        int j;
+
+        for(i=0;i<6;i++) {
+            for(ch=0;ch<s->nb_all_channels;ch++) {
+                printf("Block #%d Ch%d:\n", i, ch);
+                printf("bap=");
+                for(j=0;j<s->nb_coefs[ch];j++) {
+                    printf("%d ",bap[i][ch][j]);
+                }
+                printf("\n");
+            }
+        }
+    }
+#endif
+    return 0;
+}
+
+void ac3_common_init(void)
+{
+    int i, j, k, l, v;
+    /* compute bndtab and masktab from bandsz */
+    k = 0;
+    l = 0;
+    for(i=0;i<50;i++) {
+        bndtab[i] = l;
+        v = bndsz[i];
+        for(j=0;j<v;j++) masktab[k++]=i;
+        l += v;
+    }
+    bndtab[50] = l;
+}
+
+
+static int AC3_encode_init(AVCodecContext *avctx)
+{
+    int freq = avctx->sample_rate;
+    int bitrate = avctx->bit_rate;
+    int channels = avctx->channels;
+    AC3EncodeContext *s = avctx->priv_data;
+    int i, j, ch;
+    float alpha;
+    static const uint8_t acmod_defs[6] = {
+        0x01, /* C */
+        0x02, /* L R */
+        0x03, /* L C R */
+        0x06, /* L R SL SR */
+        0x07, /* L C R SL SR */
+        0x07, /* L C R SL SR (+LFE) */
+    };
+
+    avctx->frame_size = AC3_FRAME_SIZE;
+
+    /* number of channels */
+    if (channels < 1 || channels > 6)
+        return -1;
+    s->acmod = acmod_defs[channels - 1];
+    s->lfe = (channels == 6) ? 1 : 0;
+    s->nb_all_channels = channels;
+    s->nb_channels = channels > 5 ? 5 : channels;
+    s->lfe_channel = s->lfe ? 5 : -1;
+
+    /* frequency */
+    for(i=0;i<3;i++) {
+        for(j=0;j<3;j++)
+            if ((ac3_freqs[j] >> i) == freq)
+                goto found;
+    }
+    return -1;
+ found:
+    s->sample_rate = freq;
+    s->halfratecod = i;
+    s->fscod = j;
+    s->bsid = 8 + s->halfratecod;
+    s->bsmod = 0; /* complete main audio service */
+
+    /* bitrate & frame size */
+    bitrate /= 1000;
+    for(i=0;i<19;i++) {
+        if ((ac3_bitratetab[i] >> s->halfratecod) == bitrate)
+            break;
+    }
+    if (i == 19)
+        return -1;
+    s->bit_rate = bitrate;
+    s->frmsizecod = i << 1;
+    s->frame_size_min = (bitrate * 1000 * AC3_FRAME_SIZE) / (freq * 16);
+    s->bits_written = 0;
+    s->samples_written = 0;
+    s->frame_size = s->frame_size_min;
+
+    /* bit allocation init */
+    for(ch=0;ch<s->nb_channels;ch++) {
+        /* bandwidth for each channel */
+        /* XXX: should compute the bandwidth according to the frame
+           size, so that we avoid anoying high freq artefacts */
+        s->chbwcod[ch] = 50; /* sample bandwidth as mpeg audio layer 2 table 0 */
+        s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37;
+    }
+    if (s->lfe) {
+        s->nb_coefs[s->lfe_channel] = 7; /* fixed */
+    }
+    /* initial snr offset */
+    s->csnroffst = 40;
+
+    ac3_common_init();
+
+    /* mdct init */
+    fft_init(MDCT_NBITS - 2);
+    for(i=0;i<N/4;i++) {
+        alpha = 2 * M_PI * (i + 1.0 / 8.0) / (float)N;
+        xcos1[i] = fix15(-cos(alpha));
+        xsin1[i] = fix15(-sin(alpha));
+    }
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    return 0;
+}
+
+/* output the AC3 frame header */
+static void output_frame_header(AC3EncodeContext *s, unsigned char *frame)
+{
+    init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
+
+    put_bits(&s->pb, 16, 0x0b77); /* frame header */
+    put_bits(&s->pb, 16, 0); /* crc1: will be filled later */
+    put_bits(&s->pb, 2, s->fscod);
+    put_bits(&s->pb, 6, s->frmsizecod + (s->frame_size - s->frame_size_min));
+    put_bits(&s->pb, 5, s->bsid);
+    put_bits(&s->pb, 3, s->bsmod);
+    put_bits(&s->pb, 3, s->acmod);
+    if ((s->acmod & 0x01) && s->acmod != 0x01)
+        put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
+    if (s->acmod & 0x04)
+        put_bits(&s->pb, 2, 1); /* XXX -6 dB */
+    if (s->acmod == 0x02)
+        put_bits(&s->pb, 2, 0); /* surround not indicated */
+    put_bits(&s->pb, 1, s->lfe); /* LFE */
+    put_bits(&s->pb, 5, 31); /* dialog norm: -31 db */
+    put_bits(&s->pb, 1, 0); /* no compression control word */
+    put_bits(&s->pb, 1, 0); /* no lang code */
+    put_bits(&s->pb, 1, 0); /* no audio production info */
+    put_bits(&s->pb, 1, 0); /* no copyright */
+    put_bits(&s->pb, 1, 1); /* original bitstream */
+    put_bits(&s->pb, 1, 0); /* no time code 1 */
+    put_bits(&s->pb, 1, 0); /* no time code 2 */
+    put_bits(&s->pb, 1, 0); /* no addtional bit stream info */
+}
+
+/* symetric quantization on 'levels' levels */
+static inline int sym_quant(int c, int e, int levels)
+{
+    int v;
+
+    if (c >= 0) {
+        v = (levels * (c << e)) >> 24;
+        v = (v + 1) >> 1;
+        v = (levels >> 1) + v;
+    } else {
+        v = (levels * ((-c) << e)) >> 24;
+        v = (v + 1) >> 1;
+        v = (levels >> 1) - v;
+    }
+    assert (v >= 0 && v < levels);
+    return v;
+}
+
+/* asymetric quantization on 2^qbits levels */
+static inline int asym_quant(int c, int e, int qbits)
+{
+    int lshift, m, v;
+
+    lshift = e + qbits - 24;
+    if (lshift >= 0)
+        v = c << lshift;
+    else
+        v = c >> (-lshift);
+    /* rounding */
+    v = (v + 1) >> 1;
+    m = (1 << (qbits-1));
+    if (v >= m)
+        v = m - 1;
+    assert(v >= -m);
+    return v & ((1 << qbits)-1);
+}
+
+/* Output one audio block. There are NB_BLOCKS audio blocks in one AC3
+   frame */
+static void output_audio_block(AC3EncodeContext *s,
+                               uint8_t exp_strategy[AC3_MAX_CHANNELS],
+                               uint8_t encoded_exp[AC3_MAX_CHANNELS][N/2],
+                               uint8_t bap[AC3_MAX_CHANNELS][N/2],
+                               int32_t mdct_coefs[AC3_MAX_CHANNELS][N/2],
+                               int8_t global_exp[AC3_MAX_CHANNELS],
+                               int block_num)
+{
+    int ch, nb_groups, group_size, i, baie, rbnd;
+    uint8_t *p;
+    uint16_t qmant[AC3_MAX_CHANNELS][N/2];
+    int exp0, exp1;
+    int mant1_cnt, mant2_cnt, mant4_cnt;
+    uint16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr;
+    int delta0, delta1, delta2;
+
+    for(ch=0;ch<s->nb_channels;ch++)
+        put_bits(&s->pb, 1, 0); /* 512 point MDCT */
+    for(ch=0;ch<s->nb_channels;ch++)
+        put_bits(&s->pb, 1, 1); /* no dither */
+    put_bits(&s->pb, 1, 0); /* no dynamic range */
+    if (block_num == 0) {
+        /* for block 0, even if no coupling, we must say it. This is a
+           waste of bit :-) */
+        put_bits(&s->pb, 1, 1); /* coupling strategy present */
+        put_bits(&s->pb, 1, 0); /* no coupling strategy */
+    } else {
+        put_bits(&s->pb, 1, 0); /* no new coupling strategy */
+    }
+
+    if (s->acmod == 2)
+      {
+        if(block_num==0)
+          {
+            /* first block must define rematrixing (rematstr)  */
+            put_bits(&s->pb, 1, 1);
+
+            /* dummy rematrixing rematflg(1:4)=0 */
+            for (rbnd=0;rbnd<4;rbnd++)
+              put_bits(&s->pb, 1, 0);
+          }
+        else
+          {
+            /* no matrixing (but should be used in the future) */
+            put_bits(&s->pb, 1, 0);
+          }
+      }
+
+#if defined(DEBUG)
+    {
+      static int count = 0;
+      av_log(NULL, AV_LOG_DEBUG, "Block #%d (%d)\n", block_num, count++);
+    }
+#endif
+    /* exponent strategy */
+    for(ch=0;ch<s->nb_channels;ch++) {
+        put_bits(&s->pb, 2, exp_strategy[ch]);
+    }
+
+    if (s->lfe) {
+        put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
+    }
+
+    for(ch=0;ch<s->nb_channels;ch++) {
+        if (exp_strategy[ch] != EXP_REUSE)
+            put_bits(&s->pb, 6, s->chbwcod[ch]);
+    }
+
+    /* exponents */
+    for (ch = 0; ch < s->nb_all_channels; ch++) {
+        switch(exp_strategy[ch]) {
+        case EXP_REUSE:
+            continue;
+        case EXP_D15:
+            group_size = 1;
+            break;
+        case EXP_D25:
+            group_size = 2;
+            break;
+        default:
+        case EXP_D45:
+            group_size = 4;
+            break;
+        }
+        nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
+        p = encoded_exp[ch];
+
+        /* first exponent */
+        exp1 = *p++;
+        put_bits(&s->pb, 4, exp1);
+
+        /* next ones are delta encoded */
+        for(i=0;i<nb_groups;i++) {
+            /* merge three delta in one code */
+            exp0 = exp1;
+            exp1 = p[0];
+            p += group_size;
+            delta0 = exp1 - exp0 + 2;
+
+            exp0 = exp1;
+            exp1 = p[0];
+            p += group_size;
+            delta1 = exp1 - exp0 + 2;
+
+            exp0 = exp1;
+            exp1 = p[0];
+            p += group_size;
+            delta2 = exp1 - exp0 + 2;
+
+            put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2);
+        }
+
+        if (ch != s->lfe_channel)
+            put_bits(&s->pb, 2, 0); /* no gain range info */
+    }
+
+    /* bit allocation info */
+    baie = (block_num == 0);
+    put_bits(&s->pb, 1, baie);
+    if (baie) {
+        put_bits(&s->pb, 2, s->sdecaycod);
+        put_bits(&s->pb, 2, s->fdecaycod);
+        put_bits(&s->pb, 2, s->sgaincod);
+        put_bits(&s->pb, 2, s->dbkneecod);
+        put_bits(&s->pb, 3, s->floorcod);
+    }
+
+    /* snr offset */
+    put_bits(&s->pb, 1, baie); /* always present with bai */
+    if (baie) {
+        put_bits(&s->pb, 6, s->csnroffst);
+        for(ch=0;ch<s->nb_all_channels;ch++) {
+            put_bits(&s->pb, 4, s->fsnroffst[ch]);
+            put_bits(&s->pb, 3, s->fgaincod[ch]);
+        }
+    }
+
+    put_bits(&s->pb, 1, 0); /* no delta bit allocation */
+    put_bits(&s->pb, 1, 0); /* no data to skip */
+
+    /* mantissa encoding : we use two passes to handle the grouping. A
+       one pass method may be faster, but it would necessitate to
+       modify the output stream. */
+
+    /* first pass: quantize */
+    mant1_cnt = mant2_cnt = mant4_cnt = 0;
+    qmant1_ptr = qmant2_ptr = qmant4_ptr = NULL;
+
+    for (ch = 0; ch < s->nb_all_channels; ch++) {
+        int b, c, e, v;
+
+        for(i=0;i<s->nb_coefs[ch];i++) {
+            c = mdct_coefs[ch][i];
+            e = encoded_exp[ch][i] - global_exp[ch];
+            b = bap[ch][i];
+            switch(b) {
+            case 0:
+                v = 0;
+                break;
+            case 1:
+                v = sym_quant(c, e, 3);
+                switch(mant1_cnt) {
+                case 0:
+                    qmant1_ptr = &qmant[ch][i];
+                    v = 9 * v;
+                    mant1_cnt = 1;
+                    break;
+                case 1:
+                    *qmant1_ptr += 3 * v;
+                    mant1_cnt = 2;
+                    v = 128;
+                    break;
+                default:
+                    *qmant1_ptr += v;
+                    mant1_cnt = 0;
+                    v = 128;
+                    break;
+                }
+                break;
+            case 2:
+                v = sym_quant(c, e, 5);
+                switch(mant2_cnt) {
+                case 0:
+                    qmant2_ptr = &qmant[ch][i];
+                    v = 25 * v;
+                    mant2_cnt = 1;
+                    break;
+                case 1:
+                    *qmant2_ptr += 5 * v;
+                    mant2_cnt = 2;
+                    v = 128;
+                    break;
+                default:
+                    *qmant2_ptr += v;
+                    mant2_cnt = 0;
+                    v = 128;
+                    break;
+                }
+                break;
+            case 3:
+                v = sym_quant(c, e, 7);
+                break;
+            case 4:
+                v = sym_quant(c, e, 11);
+                switch(mant4_cnt) {
+                case 0:
+                    qmant4_ptr = &qmant[ch][i];
+                    v = 11 * v;
+                    mant4_cnt = 1;
+                    break;
+                default:
+                    *qmant4_ptr += v;
+                    mant4_cnt = 0;
+                    v = 128;
+                    break;
+                }
+                break;
+            case 5:
+                v = sym_quant(c, e, 15);
+                break;
+            case 14:
+                v = asym_quant(c, e, 14);
+                break;
+            case 15:
+                v = asym_quant(c, e, 16);
+                break;
+            default:
+                v = asym_quant(c, e, b - 1);
+                break;
+            }
+            qmant[ch][i] = v;
+        }
+    }
+
+    /* second pass : output the values */
+    for (ch = 0; ch < s->nb_all_channels; ch++) {
+        int b, q;
+
+        for(i=0;i<s->nb_coefs[ch];i++) {
+            q = qmant[ch][i];
+            b = bap[ch][i];
+            switch(b) {
+            case 0:
+                break;
+            case 1:
+                if (q != 128)
+                    put_bits(&s->pb, 5, q);
+                break;
+            case 2:
+                if (q != 128)
+                    put_bits(&s->pb, 7, q);
+                break;
+            case 3:
+                put_bits(&s->pb, 3, q);
+                break;
+            case 4:
+                if (q != 128)
+                    put_bits(&s->pb, 7, q);
+                break;
+            case 14:
+                put_bits(&s->pb, 14, q);
+                break;
+            case 15:
+                put_bits(&s->pb, 16, q);
+                break;
+            default:
+                put_bits(&s->pb, b - 1, q);
+                break;
+            }
+        }
+    }
+}
+
+#define CRC16_POLY ((1 << 0) | (1 << 2) | (1 << 15) | (1 << 16))
+
+static unsigned int mul_poly(unsigned int a, unsigned int b, unsigned int poly)
+{
+    unsigned int c;
+
+    c = 0;
+    while (a) {
+        if (a & 1)
+            c ^= b;
+        a = a >> 1;
+        b = b << 1;
+        if (b & (1 << 16))
+            b ^= poly;
+    }
+    return c;
+}
+
+static unsigned int pow_poly(unsigned int a, unsigned int n, unsigned int poly)
+{
+    unsigned int r;
+    r = 1;
+    while (n) {
+        if (n & 1)
+            r = mul_poly(r, a, poly);
+        a = mul_poly(a, a, poly);
+        n >>= 1;
+    }
+    return r;
+}
+
+
+/* compute log2(max(abs(tab[]))) */
+static int log2_tab(int16_t *tab, int n)
+{
+    int i, v;
+
+    v = 0;
+    for(i=0;i<n;i++) {
+        v |= abs(tab[i]);
+    }
+    return av_log2(v);
+}
+
+static void lshift_tab(int16_t *tab, int n, int lshift)
+{
+    int i;
+
+    if (lshift > 0) {
+        for(i=0;i<n;i++) {
+            tab[i] <<= lshift;
+        }
+    } else if (lshift < 0) {
+        lshift = -lshift;
+        for(i=0;i<n;i++) {
+            tab[i] >>= lshift;
+        }
+    }
+}
+
+/* fill the end of the frame and compute the two crcs */
+static int output_frame_end(AC3EncodeContext *s)
+{
+    int frame_size, frame_size_58, n, crc1, crc2, crc_inv;
+    uint8_t *frame;
+
+    frame_size = s->frame_size; /* frame size in words */
+    /* align to 8 bits */
+    flush_put_bits(&s->pb);
+    /* add zero bytes to reach the frame size */
+    frame = s->pb.buf;
+    n = 2 * s->frame_size - (pbBufPtr(&s->pb) - frame) - 2;
+    assert(n >= 0);
+    if(n>0)
+      memset(pbBufPtr(&s->pb), 0, n);
+
+    /* Now we must compute both crcs : this is not so easy for crc1
+       because it is at the beginning of the data... */
+    frame_size_58 = (frame_size >> 1) + (frame_size >> 3);
+    crc1 = bswap_16(av_crc(av_crc8005, 0, frame + 4, 2 * frame_size_58 - 4));
+    /* XXX: could precompute crc_inv */
+    crc_inv = pow_poly((CRC16_POLY >> 1), (16 * frame_size_58) - 16, CRC16_POLY);
+    crc1 = mul_poly(crc_inv, crc1, CRC16_POLY);
+    frame[2] = crc1 >> 8;
+    frame[3] = crc1;
+
+    crc2 = bswap_16(av_crc(av_crc8005, 0, frame + 2 * frame_size_58, (frame_size - frame_size_58) * 2 - 2));
+    frame[2*frame_size - 2] = crc2 >> 8;
+    frame[2*frame_size - 1] = crc2;
+
+    //    printf("n=%d frame_size=%d\n", n, frame_size);
+    return frame_size * 2;
+}
+
+static int AC3_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame, int buf_size, void *data)
+{
+    AC3EncodeContext *s = avctx->priv_data;
+    int16_t *samples = data;
+    int i, j, k, v, ch;
+    int16_t input_samples[N];
+    int32_t mdct_coef[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    uint8_t exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS];
+    uint8_t encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    uint8_t bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2];
+    int8_t exp_samples[NB_BLOCKS][AC3_MAX_CHANNELS];
+    int frame_bits;
+
+    frame_bits = 0;
+    for(ch=0;ch<s->nb_all_channels;ch++) {
+        /* fixed mdct to the six sub blocks & exponent computation */
+        for(i=0;i<NB_BLOCKS;i++) {
+            int16_t *sptr;
+            int sinc;
+
+            /* compute input samples */
+            memcpy(input_samples, s->last_samples[ch], N/2 * sizeof(int16_t));
+            sinc = s->nb_all_channels;
+            sptr = samples + (sinc * (N/2) * i) + ch;
+            for(j=0;j<N/2;j++) {
+                v = *sptr;
+                input_samples[j + N/2] = v;
+                s->last_samples[ch][j] = v;
+                sptr += sinc;
+            }
+
+            /* apply the MDCT window */
+            for(j=0;j<N/2;j++) {
+                input_samples[j] = MUL16(input_samples[j],
+                                         ac3_window[j]) >> 15;
+                input_samples[N-j-1] = MUL16(input_samples[N-j-1],
+                                             ac3_window[j]) >> 15;
+            }
+
+            /* Normalize the samples to use the maximum available
+               precision */
+            v = 14 - log2_tab(input_samples, N);
+            if (v < 0)
+                v = 0;
+            exp_samples[i][ch] = v - 10;
+            lshift_tab(input_samples, N, v);
+
+            /* do the MDCT */
+            mdct512(mdct_coef[i][ch], input_samples);
+
+            /* compute "exponents". We take into account the
+               normalization there */
+            for(j=0;j<N/2;j++) {
+                int e;
+                v = abs(mdct_coef[i][ch][j]);
+                if (v == 0)
+                    e = 24;
+                else {
+                    e = 23 - av_log2(v) + exp_samples[i][ch];
+                    if (e >= 24) {
+                        e = 24;
+                        mdct_coef[i][ch][j] = 0;
+                    }
+                }
+                exp[i][ch][j] = e;
+            }
+        }
+
+        compute_exp_strategy(exp_strategy, exp, ch, ch == s->lfe_channel);
+
+        /* compute the exponents as the decoder will see them. The
+           EXP_REUSE case must be handled carefully : we select the
+           min of the exponents */
+        i = 0;
+        while (i < NB_BLOCKS) {
+            j = i + 1;
+            while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) {
+                exponent_min(exp[i][ch], exp[j][ch], s->nb_coefs[ch]);
+                j++;
+            }
+            frame_bits += encode_exp(encoded_exp[i][ch],
+                                     exp[i][ch], s->nb_coefs[ch],
+                                     exp_strategy[i][ch]);
+            /* copy encoded exponents for reuse case */
+            for(k=i+1;k<j;k++) {
+                memcpy(encoded_exp[k][ch], encoded_exp[i][ch],
+                       s->nb_coefs[ch] * sizeof(uint8_t));
+            }
+            i = j;
+        }
+    }
+
+    /* adjust for fractional frame sizes */
+    while(s->bits_written >= s->bit_rate*1000 && s->samples_written >= s->sample_rate) {
+        s->bits_written -= s->bit_rate*1000;
+        s->samples_written -= s->sample_rate;
+    }
+    s->frame_size = s->frame_size_min + (s->bits_written * s->sample_rate < s->samples_written * s->bit_rate*1000);
+    s->bits_written += s->frame_size * 16;
+    s->samples_written += AC3_FRAME_SIZE;
+
+    compute_bit_allocation(s, bap, encoded_exp, exp_strategy, frame_bits);
+    /* everything is known... let's output the frame */
+    output_frame_header(s, frame);
+
+    for(i=0;i<NB_BLOCKS;i++) {
+        output_audio_block(s, exp_strategy[i], encoded_exp[i],
+                           bap[i], mdct_coef[i], exp_samples[i], i);
+    }
+    return output_frame_end(s);
+}
+
+static int AC3_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+#if 0
+/*************************************************************************/
+/* TEST */
+
+#define FN (N/4)
+
+void fft_test(void)
+{
+    IComplex in[FN], in1[FN];
+    int k, n, i;
+    float sum_re, sum_im, a;
+
+    /* FFT test */
+
+    for(i=0;i<FN;i++) {
+        in[i].re = random() % 65535 - 32767;
+        in[i].im = random() % 65535 - 32767;
+        in1[i] = in[i];
+    }
+    fft(in, 7);
+
+    /* do it by hand */
+    for(k=0;k<FN;k++) {
+        sum_re = 0;
+        sum_im = 0;
+        for(n=0;n<FN;n++) {
+            a = -2 * M_PI * (n * k) / FN;
+            sum_re += in1[n].re * cos(a) - in1[n].im * sin(a);
+            sum_im += in1[n].re * sin(a) + in1[n].im * cos(a);
+        }
+        printf("%3d: %6d,%6d %6.0f,%6.0f\n",
+               k, in[k].re, in[k].im, sum_re / FN, sum_im / FN);
+    }
+}
+
+void mdct_test(void)
+{
+    int16_t input[N];
+    int32_t output[N/2];
+    float input1[N];
+    float output1[N/2];
+    float s, a, err, e, emax;
+    int i, k, n;
+
+    for(i=0;i<N;i++) {
+        input[i] = (random() % 65535 - 32767) * 9 / 10;
+        input1[i] = input[i];
+    }
+
+    mdct512(output, input);
+
+    /* do it by hand */
+    for(k=0;k<N/2;k++) {
+        s = 0;
+        for(n=0;n<N;n++) {
+            a = (2*M_PI*(2*n+1+N/2)*(2*k+1) / (4 * N));
+            s += input1[n] * cos(a);
+        }
+        output1[k] = -2 * s / N;
+    }
+
+    err = 0;
+    emax = 0;
+    for(i=0;i<N/2;i++) {
+        printf("%3d: %7d %7.0f\n", i, output[i], output1[i]);
+        e = output[i] - output1[i];
+        if (e > emax)
+            emax = e;
+        err += e * e;
+    }
+    printf("err2=%f emax=%f\n", err / (N/2), emax);
+}
+
+void test_ac3(void)
+{
+    AC3EncodeContext ctx;
+    unsigned char frame[AC3_MAX_CODED_FRAME_SIZE];
+    short samples[AC3_FRAME_SIZE];
+    int ret, i;
+
+    AC3_encode_init(&ctx, 44100, 64000, 1);
+
+    fft_test();
+    mdct_test();
+
+    for(i=0;i<AC3_FRAME_SIZE;i++)
+        samples[i] = (int)(sin(2*M_PI*i*1000.0/44100) * 10000);
+    ret = AC3_encode_frame(&ctx, frame, samples);
+    printf("ret=%d\n", ret);
+}
+#endif
+
+AVCodec ac3_encoder = {
+    "ac3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AC3,
+    sizeof(AC3EncodeContext),
+    AC3_encode_init,
+    AC3_encode_frame,
+    AC3_encode_close,
+    NULL,
+};
diff --git a/contrib/ffmpeg/libavcodec/ac3tab.h b/contrib/ffmpeg/libavcodec/ac3tab.h
new file mode 100644
index 000000000..9f90ae95c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ac3tab.h
@@ -0,0 +1,205 @@
+/*
+ * AC3 tables
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file ac3tab.h
+ * tables taken directly from AC3 spec.
+ */
+
+/* possible frequencies */
+static const uint16_t ac3_freqs[3] = { 48000, 44100, 32000 };
+
+/* possible bitrates */
+static const uint16_t ac3_bitratetab[19] = {
+    32, 40, 48, 56, 64, 80, 96, 112, 128,
+    160, 192, 224, 256, 320, 384, 448, 512, 576, 640
+};
+
+/* AC3 MDCT window */
+
+/* MDCT window */
+static const int16_t ac3_window[256] = {
+    4,    7,   12,   16,   21,   28,   34,   42,
+   51,   61,   72,   84,   97,  111,  127,  145,
+  164,  184,  207,  231,  257,  285,  315,  347,
+  382,  419,  458,  500,  544,  591,  641,  694,
+  750,  810,  872,  937, 1007, 1079, 1155, 1235,
+ 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016,
+ 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076,
+ 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444,
+ 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127,
+ 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112,
+ 8380, 8652, 8927, 9207, 9491, 9778,10069,10363,
+10660,10960,11264,11570,11879,12190,12504,12820,
+13138,13458,13780,14103,14427,14753,15079,15407,
+15735,16063,16392,16720,17049,17377,17705,18032,
+18358,18683,19007,19330,19651,19970,20287,20602,
+20914,21225,21532,21837,22139,22438,22733,23025,
+23314,23599,23880,24157,24430,24699,24964,25225,
+25481,25732,25979,26221,26459,26691,26919,27142,
+27359,27572,27780,27983,28180,28373,28560,28742,
+28919,29091,29258,29420,29577,29729,29876,30018,
+30155,30288,30415,30538,30657,30771,30880,30985,
+31086,31182,31274,31363,31447,31528,31605,31678,
+31747,31814,31877,31936,31993,32046,32097,32145,
+32190,32232,32272,32310,32345,32378,32409,32438,
+32465,32490,32513,32535,32556,32574,32592,32608,
+32623,32636,32649,32661,32671,32681,32690,32698,
+32705,32712,32718,32724,32729,32733,32737,32741,
+32744,32747,32750,32752,32754,32756,32757,32759,
+32760,32761,32762,32763,32764,32764,32765,32765,
+32766,32766,32766,32766,32767,32767,32767,32767,
+32767,32767,32767,32767,32767,32767,32767,32767,
+32767,32767,32767,32767,32767,32767,32767,32767,
+};
+
+static uint8_t masktab[253];
+
+static const uint8_t latab[260]= {
+0x0040,0x003f,0x003e,0x003d,0x003c,0x003b,0x003a,0x0039,0x0038,0x0037,
+0x0036,0x0035,0x0034,0x0034,0x0033,0x0032,0x0031,0x0030,0x002f,0x002f,
+0x002e,0x002d,0x002c,0x002c,0x002b,0x002a,0x0029,0x0029,0x0028,0x0027,
+0x0026,0x0026,0x0025,0x0024,0x0024,0x0023,0x0023,0x0022,0x0021,0x0021,
+0x0020,0x0020,0x001f,0x001e,0x001e,0x001d,0x001d,0x001c,0x001c,0x001b,
+0x001b,0x001a,0x001a,0x0019,0x0019,0x0018,0x0018,0x0017,0x0017,0x0016,
+0x0016,0x0015,0x0015,0x0015,0x0014,0x0014,0x0013,0x0013,0x0013,0x0012,
+0x0012,0x0012,0x0011,0x0011,0x0011,0x0010,0x0010,0x0010,0x000f,0x000f,
+0x000f,0x000e,0x000e,0x000e,0x000d,0x000d,0x000d,0x000d,0x000c,0x000c,
+0x000c,0x000c,0x000b,0x000b,0x000b,0x000b,0x000a,0x000a,0x000a,0x000a,
+0x000a,0x0009,0x0009,0x0009,0x0009,0x0009,0x0008,0x0008,0x0008,0x0008,
+0x0008,0x0008,0x0007,0x0007,0x0007,0x0007,0x0007,0x0007,0x0006,0x0006,
+0x0006,0x0006,0x0006,0x0006,0x0006,0x0006,0x0005,0x0005,0x0005,0x0005,
+0x0005,0x0005,0x0005,0x0005,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
+0x0004,0x0004,0x0004,0x0004,0x0004,0x0003,0x0003,0x0003,0x0003,0x0003,
+0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0002,
+0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
+0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0001,0x0001,
+0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
+0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
+0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+};
+
+static const uint16_t hth[50][3]= {
+{ 0x04d0,0x04f0,0x0580 },
+{ 0x04d0,0x04f0,0x0580 },
+{ 0x0440,0x0460,0x04b0 },
+{ 0x0400,0x0410,0x0450 },
+{ 0x03e0,0x03e0,0x0420 },
+{ 0x03c0,0x03d0,0x03f0 },
+{ 0x03b0,0x03c0,0x03e0 },
+{ 0x03b0,0x03b0,0x03d0 },
+{ 0x03a0,0x03b0,0x03c0 },
+{ 0x03a0,0x03a0,0x03b0 },
+{ 0x03a0,0x03a0,0x03b0 },
+{ 0x03a0,0x03a0,0x03b0 },
+{ 0x03a0,0x03a0,0x03a0 },
+{ 0x0390,0x03a0,0x03a0 },
+{ 0x0390,0x0390,0x03a0 },
+{ 0x0390,0x0390,0x03a0 },
+{ 0x0380,0x0390,0x03a0 },
+{ 0x0380,0x0380,0x03a0 },
+{ 0x0370,0x0380,0x03a0 },
+{ 0x0370,0x0380,0x03a0 },
+{ 0x0360,0x0370,0x0390 },
+{ 0x0360,0x0370,0x0390 },
+{ 0x0350,0x0360,0x0390 },
+{ 0x0350,0x0360,0x0390 },
+{ 0x0340,0x0350,0x0380 },
+{ 0x0340,0x0350,0x0380 },
+{ 0x0330,0x0340,0x0380 },
+{ 0x0320,0x0340,0x0370 },
+{ 0x0310,0x0320,0x0360 },
+{ 0x0300,0x0310,0x0350 },
+{ 0x02f0,0x0300,0x0340 },
+{ 0x02f0,0x02f0,0x0330 },
+{ 0x02f0,0x02f0,0x0320 },
+{ 0x02f0,0x02f0,0x0310 },
+{ 0x0300,0x02f0,0x0300 },
+{ 0x0310,0x0300,0x02f0 },
+{ 0x0340,0x0320,0x02f0 },
+{ 0x0390,0x0350,0x02f0 },
+{ 0x03e0,0x0390,0x0300 },
+{ 0x0420,0x03e0,0x0310 },
+{ 0x0460,0x0420,0x0330 },
+{ 0x0490,0x0450,0x0350 },
+{ 0x04a0,0x04a0,0x03c0 },
+{ 0x0460,0x0490,0x0410 },
+{ 0x0440,0x0460,0x0470 },
+{ 0x0440,0x0440,0x04a0 },
+{ 0x0520,0x0480,0x0460 },
+{ 0x0800,0x0630,0x0440 },
+{ 0x0840,0x0840,0x0450 },
+{ 0x0840,0x0840,0x04e0 },
+};
+
+static const uint8_t baptab[64]= {
+    0, 1, 1, 1, 1, 1, 2, 2, 3, 3,
+    3, 4, 4, 5, 5, 6, 6, 6, 6, 7,
+    7, 7, 7, 8, 8, 8, 8, 9, 9, 9,
+    9, 10, 10, 10, 10, 11, 11, 11, 11, 12,
+    12, 12, 12, 13, 13, 13, 13, 14, 14, 14,
+    14, 14, 14, 14, 14, 15, 15, 15, 15, 15,
+    15, 15, 15, 15,
+};
+
+static const uint8_t sdecaytab[4]={
+    0x0f, 0x11, 0x13, 0x15,
+};
+
+static const uint8_t fdecaytab[4]={
+    0x3f, 0x53, 0x67, 0x7b,
+};
+
+static const uint16_t sgaintab[4]= {
+    0x540, 0x4d8, 0x478, 0x410,
+};
+
+static const uint16_t dbkneetab[4]= {
+    0x000, 0x700, 0x900, 0xb00,
+};
+
+static const int16_t floortab[8]= {
+    0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800,
+};
+
+static const uint16_t fgaintab[8]= {
+    0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400,
+};
+
+static const uint8_t bndsz[50]={
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3,
+    3, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 24, 24, 24, 24, 24
+};
+
+static uint8_t bndtab[51];
+
+/* fft & mdct sin cos tables */
+static int16_t costab[64];
+static int16_t sintab[64];
+static int16_t fft_rev[512];
+static int16_t xcos1[128];
+static int16_t xsin1[128];
diff --git a/contrib/ffmpeg/libavcodec/adpcm.c b/contrib/ffmpeg/libavcodec/adpcm.c
new file mode 100644
index 000000000..ec3fe6f6e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/adpcm.c
@@ -0,0 +1,1370 @@
+/*
+ * ADPCM codecs
+ * Copyright (c) 2001-2003 The ffmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "bitstream.h"
+
+/**
+ * @file adpcm.c
+ * ADPCM codecs.
+ * First version by Francois Revol (revol@free.fr)
+ * Fringe ADPCM codecs (e.g., DK3, DK4, Westwood)
+ *   by Mike Melanson (melanson@pcisys.net)
+ * CD-ROM XA ADPCM codec by BERO
+ * EA ADPCM decoder by Robin Kay (komadori@myrealbox.com)
+ *
+ * Features and limitations:
+ *
+ * Reference documents:
+ * http://www.pcisys.net/~melanson/codecs/simpleaudio.html
+ * http://www.geocities.com/SiliconValley/8682/aud3.txt
+ * http://openquicktime.sourceforge.net/plugins.htm
+ * XAnim sources (xa_codec.c) http://www.rasnaimaging.com/people/lapus/download.html
+ * http://www.cs.ucla.edu/~leec/mediabench/applications.html
+ * SoX source code http://home.sprynet.com/~cbagwell/sox.html
+ *
+ * CD-ROM XA:
+ * http://ku-www.ss.titech.ac.jp/~yatsushi/xaadpcm.html
+ * vagpack & depack http://homepages.compuserve.de/bITmASTER32/psx-index.html
+ * readstr http://www.geocities.co.jp/Playtown/2004/
+ */
+
+#define BLKSIZE 1024
+
+#define CLAMP_TO_SHORT(value) \
+if (value > 32767) \
+    value = 32767; \
+else if (value < -32768) \
+    value = -32768; \
+
+/* step_table[] and index_table[] are from the ADPCM reference source */
+/* This is the index table: */
+static const int index_table[16] = {
+    -1, -1, -1, -1, 2, 4, 6, 8,
+    -1, -1, -1, -1, 2, 4, 6, 8,
+};
+
+/**
+ * This is the step table. Note that many programs use slight deviations from
+ * this table, but such deviations are negligible:
+ */
+static const int step_table[89] = {
+    7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
+    19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
+    50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
+    130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
+    337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
+    876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
+    2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
+    5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
+    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+};
+
+/* These are for MS-ADPCM */
+/* AdaptationTable[], AdaptCoeff1[], and AdaptCoeff2[] are from libsndfile */
+static const int AdaptationTable[] = {
+        230, 230, 230, 230, 307, 409, 512, 614,
+        768, 614, 512, 409, 307, 230, 230, 230
+};
+
+static const int AdaptCoeff1[] = {
+        256, 512, 0, 192, 240, 460, 392
+};
+
+static const int AdaptCoeff2[] = {
+        0, -256, 0, 64, 0, -208, -232
+};
+
+/* These are for CD-ROM XA ADPCM */
+static const int xa_adpcm_table[5][2] = {
+   {   0,   0 },
+   {  60,   0 },
+   { 115, -52 },
+   {  98, -55 },
+   { 122, -60 }
+};
+
+static const int ea_adpcm_table[] = {
+    0, 240, 460, 392, 0, 0, -208, -220, 0, 1,
+    3, 4, 7, 8, 10, 11, 0, -1, -3, -4
+};
+
+static const int ct_adpcm_table[8] = {
+    0x00E6, 0x00E6, 0x00E6, 0x00E6,
+    0x0133, 0x0199, 0x0200, 0x0266
+};
+
+// padded to zero where table size is less then 16
+static const int swf_index_tables[4][16] = {
+    /*2*/ { -1, 2 },
+    /*3*/ { -1, -1, 2, 4 },
+    /*4*/ { -1, -1, -1, -1, 2, 4, 6, 8 },
+    /*5*/ { -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16 }
+};
+
+static const int yamaha_indexscale[] = {
+    230, 230, 230, 230, 307, 409, 512, 614,
+    230, 230, 230, 230, 307, 409, 512, 614
+};
+
+static const int yamaha_difflookup[] = {
+    1, 3, 5, 7, 9, 11, 13, 15,
+    -1, -3, -5, -7, -9, -11, -13, -15
+};
+
+/* end of tables */
+
+typedef struct ADPCMChannelStatus {
+    int predictor;
+    short int step_index;
+    int step;
+    /* for encoding */
+    int prev_sample;
+
+    /* MS version */
+    short sample1;
+    short sample2;
+    int coeff1;
+    int coeff2;
+    int idelta;
+} ADPCMChannelStatus;
+
+typedef struct ADPCMContext {
+    int channel; /* for stereo MOVs, decode left, then decode right, then tell it's decoded */
+    ADPCMChannelStatus status[2];
+    short sample_buffer[32]; /* hold left samples while waiting for right samples */
+
+    /* SWF only */
+    int nb_bits;
+    int nb_samples;
+} ADPCMContext;
+
+/* XXX: implement encoding */
+
+#ifdef CONFIG_ENCODERS
+static int adpcm_encode_init(AVCodecContext *avctx)
+{
+    if (avctx->channels > 2)
+        return -1; /* only stereo or mono =) */
+    switch(avctx->codec->id) {
+    case CODEC_ID_ADPCM_IMA_QT:
+        av_log(avctx, AV_LOG_ERROR, "ADPCM: codec adpcm_ima_qt unsupported for encoding !\n");
+        avctx->frame_size = 64; /* XXX: can multiple of avctx->channels * 64 (left and right blocks are interleaved) */
+        return -1;
+        break;
+    case CODEC_ID_ADPCM_IMA_WAV:
+        avctx->frame_size = (BLKSIZE - 4 * avctx->channels) * 8 / (4 * avctx->channels) + 1; /* each 16 bits sample gives one nibble */
+                                                             /* and we have 4 bytes per channel overhead */
+        avctx->block_align = BLKSIZE;
+        /* seems frame_size isn't taken into account... have to buffer the samples :-( */
+        break;
+    case CODEC_ID_ADPCM_MS:
+        avctx->frame_size = (BLKSIZE - 7 * avctx->channels) * 2 / avctx->channels + 2; /* each 16 bits sample gives one nibble */
+                                                             /* and we have 7 bytes per channel overhead */
+        avctx->block_align = BLKSIZE;
+        break;
+    case CODEC_ID_ADPCM_YAMAHA:
+        avctx->frame_size = BLKSIZE * avctx->channels;
+        avctx->block_align = BLKSIZE;
+        break;
+    default:
+        return -1;
+        break;
+    }
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    return 0;
+}
+
+static int adpcm_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+
+static inline unsigned char adpcm_ima_compress_sample(ADPCMChannelStatus *c, short sample)
+{
+    int delta = sample - c->prev_sample;
+    int nibble = FFMIN(7, abs(delta)*4/step_table[c->step_index]) + (delta<0)*8;
+    c->prev_sample = c->prev_sample + ((step_table[c->step_index] * yamaha_difflookup[nibble]) / 8);
+    CLAMP_TO_SHORT(c->prev_sample);
+    c->step_index = clip(c->step_index + index_table[nibble], 0, 88);
+    return nibble;
+}
+
+static inline unsigned char adpcm_ms_compress_sample(ADPCMChannelStatus *c, short sample)
+{
+    int predictor, nibble, bias;
+
+    predictor = (((c->sample1) * (c->coeff1)) + ((c->sample2) * (c->coeff2))) / 256;
+
+    nibble= sample - predictor;
+    if(nibble>=0) bias= c->idelta/2;
+    else          bias=-c->idelta/2;
+
+    nibble= (nibble + bias) / c->idelta;
+    nibble= clip(nibble, -8, 7)&0x0F;
+
+    predictor += (signed)((nibble & 0x08)?(nibble - 0x10):(nibble)) * c->idelta;
+    CLAMP_TO_SHORT(predictor);
+
+    c->sample2 = c->sample1;
+    c->sample1 = predictor;
+
+    c->idelta = (AdaptationTable[(int)nibble] * c->idelta) >> 8;
+    if (c->idelta < 16) c->idelta = 16;
+
+    return nibble;
+}
+
+static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c, short sample)
+{
+    int nibble, delta;
+
+    if(!c->step) {
+        c->predictor = 0;
+        c->step = 127;
+    }
+
+    delta = sample - c->predictor;
+
+    nibble = FFMIN(7, abs(delta)*4/c->step) + (delta<0)*8;
+
+    c->predictor = c->predictor + ((c->step * yamaha_difflookup[nibble]) / 8);
+    CLAMP_TO_SHORT(c->predictor);
+    c->step = (c->step * yamaha_indexscale[nibble]) >> 8;
+    c->step = clip(c->step, 127, 24567);
+
+    return nibble;
+}
+
+typedef struct TrellisPath {
+    int nibble;
+    int prev;
+} TrellisPath;
+
+typedef struct TrellisNode {
+    uint32_t ssd;
+    int path;
+    int sample1;
+    int sample2;
+    int step;
+} TrellisNode;
+
+static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
+                                   uint8_t *dst, ADPCMChannelStatus *c, int n)
+{
+#define FREEZE_INTERVAL 128
+    //FIXME 6% faster if frontier is a compile-time constant
+    const int frontier = 1 << avctx->trellis;
+    const int stride = avctx->channels;
+    const int version = avctx->codec->id;
+    const int max_paths = frontier*FREEZE_INTERVAL;
+    TrellisPath paths[max_paths], *p;
+    TrellisNode node_buf[2][frontier];
+    TrellisNode *nodep_buf[2][frontier];
+    TrellisNode **nodes = nodep_buf[0]; // nodes[] is always sorted by .ssd
+    TrellisNode **nodes_next = nodep_buf[1];
+    int pathn = 0, froze = -1, i, j, k;
+
+    assert(!(max_paths&(max_paths-1)));
+
+    memset(nodep_buf, 0, sizeof(nodep_buf));
+    nodes[0] = &node_buf[1][0];
+    nodes[0]->ssd = 0;
+    nodes[0]->path = 0;
+    nodes[0]->step = c->step_index;
+    nodes[0]->sample1 = c->sample1;
+    nodes[0]->sample2 = c->sample2;
+    if(version == CODEC_ID_ADPCM_IMA_WAV)
+        nodes[0]->sample1 = c->prev_sample;
+    if(version == CODEC_ID_ADPCM_MS)
+        nodes[0]->step = c->idelta;
+    if(version == CODEC_ID_ADPCM_YAMAHA) {
+        if(c->step == 0) {
+            nodes[0]->step = 127;
+            nodes[0]->sample1 = 0;
+        } else {
+            nodes[0]->step = c->step;
+            nodes[0]->sample1 = c->predictor;
+        }
+    }
+
+    for(i=0; i<n; i++) {
+        TrellisNode *t = node_buf[i&1];
+        TrellisNode **u;
+        int sample = samples[i*stride];
+        memset(nodes_next, 0, frontier*sizeof(TrellisNode*));
+        for(j=0; j<frontier && nodes[j]; j++) {
+            // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too
+            const int range = (j < frontier/2) ? 1 : 0;
+            const int step = nodes[j]->step;
+            int nidx;
+            if(version == CODEC_ID_ADPCM_MS) {
+                const int predictor = ((nodes[j]->sample1 * c->coeff1) + (nodes[j]->sample2 * c->coeff2)) / 256;
+                const int div = (sample - predictor) / step;
+                const int nmin = clip(div-range, -8, 6);
+                const int nmax = clip(div+range, -7, 7);
+                for(nidx=nmin; nidx<=nmax; nidx++) {
+                    const int nibble = nidx & 0xf;
+                    int dec_sample = predictor + nidx * step;
+#define STORE_NODE(NAME, STEP_INDEX)\
+                    int d;\
+                    uint32_t ssd;\
+                    CLAMP_TO_SHORT(dec_sample);\
+                    d = sample - dec_sample;\
+                    ssd = nodes[j]->ssd + d*d;\
+                    if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\
+                        continue;\
+                    /* Collapse any two states with the same previous sample value. \
+                     * One could also distinguish states by step and by 2nd to last
+                     * sample, but the effects of that are negligible. */\
+                    for(k=0; k<frontier && nodes_next[k]; k++) {\
+                        if(dec_sample == nodes_next[k]->sample1) {\
+                            assert(ssd >= nodes_next[k]->ssd);\
+                            goto next_##NAME;\
+                        }\
+                    }\
+                    for(k=0; k<frontier; k++) {\
+                        if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\
+                            TrellisNode *u = nodes_next[frontier-1];\
+                            if(!u) {\
+                                assert(pathn < max_paths);\
+                                u = t++;\
+                                u->path = pathn++;\
+                            }\
+                            u->ssd = ssd;\
+                            u->step = STEP_INDEX;\
+                            u->sample2 = nodes[j]->sample1;\
+                            u->sample1 = dec_sample;\
+                            paths[u->path].nibble = nibble;\
+                            paths[u->path].prev = nodes[j]->path;\
+                            memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\
+                            nodes_next[k] = u;\
+                            break;\
+                        }\
+                    }\
+                    next_##NAME:;
+                    STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8));
+                }
+            } else if(version == CODEC_ID_ADPCM_IMA_WAV) {
+#define LOOP_NODES(NAME, STEP_TABLE, STEP_INDEX)\
+                const int predictor = nodes[j]->sample1;\
+                const int div = (sample - predictor) * 4 / STEP_TABLE;\
+                int nmin = clip(div-range, -7, 6);\
+                int nmax = clip(div+range, -6, 7);\
+                if(nmin<=0) nmin--; /* distinguish -0 from +0 */\
+                if(nmax<0) nmax--;\
+                for(nidx=nmin; nidx<=nmax; nidx++) {\
+                    const int nibble = nidx<0 ? 7-nidx : nidx;\
+                    int dec_sample = predictor + (STEP_TABLE * yamaha_difflookup[nibble]) / 8;\
+                    STORE_NODE(NAME, STEP_INDEX);\
+                }
+                LOOP_NODES(ima, step_table[step], clip(step + index_table[nibble], 0, 88));
+            } else { //CODEC_ID_ADPCM_YAMAHA
+                LOOP_NODES(yamaha, step, clip((step * yamaha_indexscale[nibble]) >> 8, 127, 24567));
+#undef LOOP_NODES
+#undef STORE_NODE
+            }
+        }
+
+        u = nodes;
+        nodes = nodes_next;
+        nodes_next = u;
+
+        // prevent overflow
+        if(nodes[0]->ssd > (1<<28)) {
+            for(j=1; j<frontier && nodes[j]; j++)
+                nodes[j]->ssd -= nodes[0]->ssd;
+            nodes[0]->ssd = 0;
+        }
+
+        // merge old paths to save memory
+        if(i == froze + FREEZE_INTERVAL) {
+            p = &paths[nodes[0]->path];
+            for(k=i; k>froze; k--) {
+                dst[k] = p->nibble;
+                p = &paths[p->prev];
+            }
+            froze = i;
+            pathn = 0;
+            // other nodes might use paths that don't coincide with the frozen one.
+            // checking which nodes do so is too slow, so just kill them all.
+            // this also slightly improves quality, but I don't know why.
+            memset(nodes+1, 0, (frontier-1)*sizeof(TrellisNode*));
+        }
+    }
+
+    p = &paths[nodes[0]->path];
+    for(i=n-1; i>froze; i--) {
+        dst[i] = p->nibble;
+        p = &paths[p->prev];
+    }
+
+    c->predictor = nodes[0]->sample1;
+    c->sample1 = nodes[0]->sample1;
+    c->sample2 = nodes[0]->sample2;
+    c->step_index = nodes[0]->step;
+    c->step = nodes[0]->step;
+    c->idelta = nodes[0]->step;
+}
+
+static int adpcm_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame, int buf_size, void *data)
+{
+    int n, i, st;
+    short *samples;
+    unsigned char *dst;
+    ADPCMContext *c = avctx->priv_data;
+
+    dst = frame;
+    samples = (short *)data;
+    st= avctx->channels == 2;
+/*    n = (BLKSIZE - 4 * avctx->channels) / (2 * 8 * avctx->channels); */
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_ADPCM_IMA_QT: /* XXX: can't test until we get .mov writer */
+        break;
+    case CODEC_ID_ADPCM_IMA_WAV:
+        n = avctx->frame_size / 8;
+            c->status[0].prev_sample = (signed short)samples[0]; /* XXX */
+/*            c->status[0].step_index = 0; *//* XXX: not sure how to init the state machine */
+            *dst++ = (c->status[0].prev_sample) & 0xFF; /* little endian */
+            *dst++ = (c->status[0].prev_sample >> 8) & 0xFF;
+            *dst++ = (unsigned char)c->status[0].step_index;
+            *dst++ = 0; /* unknown */
+            samples++;
+            if (avctx->channels == 2) {
+                c->status[1].prev_sample = (signed short)samples[1];
+/*                c->status[1].step_index = 0; */
+                *dst++ = (c->status[1].prev_sample) & 0xFF;
+                *dst++ = (c->status[1].prev_sample >> 8) & 0xFF;
+                *dst++ = (unsigned char)c->status[1].step_index;
+                *dst++ = 0;
+                samples++;
+            }
+
+            /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */
+            if(avctx->trellis > 0) {
+                uint8_t buf[2][n*8];
+                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n*8);
+                if(avctx->channels == 2)
+                    adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n*8);
+                for(i=0; i<n; i++) {
+                    *dst++ = buf[0][8*i+0] | (buf[0][8*i+1] << 4);
+                    *dst++ = buf[0][8*i+2] | (buf[0][8*i+3] << 4);
+                    *dst++ = buf[0][8*i+4] | (buf[0][8*i+5] << 4);
+                    *dst++ = buf[0][8*i+6] | (buf[0][8*i+7] << 4);
+                    if (avctx->channels == 2) {
+                        *dst++ = buf[1][8*i+0] | (buf[1][8*i+1] << 4);
+                        *dst++ = buf[1][8*i+2] | (buf[1][8*i+3] << 4);
+                        *dst++ = buf[1][8*i+4] | (buf[1][8*i+5] << 4);
+                        *dst++ = buf[1][8*i+6] | (buf[1][8*i+7] << 4);
+                    }
+                }
+            } else
+            for (; n>0; n--) {
+                *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F;
+                *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels]) << 4) & 0xF0;
+                dst++;
+                *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 2]) & 0x0F;
+                *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 3]) << 4) & 0xF0;
+                dst++;
+                *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 4]) & 0x0F;
+                *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 5]) << 4) & 0xF0;
+                dst++;
+                *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 6]) & 0x0F;
+                *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 7]) << 4) & 0xF0;
+                dst++;
+                /* right channel */
+                if (avctx->channels == 2) {
+                    *dst = adpcm_ima_compress_sample(&c->status[1], samples[1]);
+                    *dst |= adpcm_ima_compress_sample(&c->status[1], samples[3]) << 4;
+                    dst++;
+                    *dst = adpcm_ima_compress_sample(&c->status[1], samples[5]);
+                    *dst |= adpcm_ima_compress_sample(&c->status[1], samples[7]) << 4;
+                    dst++;
+                    *dst = adpcm_ima_compress_sample(&c->status[1], samples[9]);
+                    *dst |= adpcm_ima_compress_sample(&c->status[1], samples[11]) << 4;
+                    dst++;
+                    *dst = adpcm_ima_compress_sample(&c->status[1], samples[13]);
+                    *dst |= adpcm_ima_compress_sample(&c->status[1], samples[15]) << 4;
+                    dst++;
+                }
+                samples += 8 * avctx->channels;
+            }
+        break;
+    case CODEC_ID_ADPCM_MS:
+        for(i=0; i<avctx->channels; i++){
+            int predictor=0;
+
+            *dst++ = predictor;
+            c->status[i].coeff1 = AdaptCoeff1[predictor];
+            c->status[i].coeff2 = AdaptCoeff2[predictor];
+        }
+        for(i=0; i<avctx->channels; i++){
+            if (c->status[i].idelta < 16)
+                c->status[i].idelta = 16;
+
+            *dst++ = c->status[i].idelta & 0xFF;
+            *dst++ = c->status[i].idelta >> 8;
+        }
+        for(i=0; i<avctx->channels; i++){
+            c->status[i].sample1= *samples++;
+
+            *dst++ = c->status[i].sample1 & 0xFF;
+            *dst++ = c->status[i].sample1 >> 8;
+        }
+        for(i=0; i<avctx->channels; i++){
+            c->status[i].sample2= *samples++;
+
+            *dst++ = c->status[i].sample2 & 0xFF;
+            *dst++ = c->status[i].sample2 >> 8;
+        }
+
+        if(avctx->trellis > 0) {
+            int n = avctx->block_align - 7*avctx->channels;
+            uint8_t buf[2][n];
+            if(avctx->channels == 1) {
+                n *= 2;
+                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
+                for(i=0; i<n; i+=2)
+                    *dst++ = (buf[0][i] << 4) | buf[0][i+1];
+            } else {
+                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
+                adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n);
+                for(i=0; i<n; i++)
+                    *dst++ = (buf[0][i] << 4) | buf[1][i];
+            }
+        } else
+        for(i=7*avctx->channels; i<avctx->block_align; i++) {
+            int nibble;
+            nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4;
+            nibble|= adpcm_ms_compress_sample(&c->status[st], *samples++);
+            *dst++ = nibble;
+        }
+        break;
+    case CODEC_ID_ADPCM_YAMAHA:
+        n = avctx->frame_size / 2;
+        if(avctx->trellis > 0) {
+            uint8_t buf[2][n*2];
+            n *= 2;
+            if(avctx->channels == 1) {
+                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
+                for(i=0; i<n; i+=2)
+                    *dst++ = buf[0][i] | (buf[0][i+1] << 4);
+            } else {
+                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
+                adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n);
+                for(i=0; i<n; i++)
+                    *dst++ = buf[0][i] | (buf[1][i] << 4);
+            }
+        } else
+        for (; n>0; n--) {
+            for(i = 0; i < avctx->channels; i++) {
+                int nibble;
+                nibble  = adpcm_yamaha_compress_sample(&c->status[i], samples[i]);
+                nibble |= adpcm_yamaha_compress_sample(&c->status[i], samples[i+avctx->channels]) << 4;
+                *dst++ = nibble;
+            }
+            samples += 2 * avctx->channels;
+        }
+        break;
+    default:
+        return -1;
+    }
+    return dst - frame;
+}
+#endif //CONFIG_ENCODERS
+
+static int adpcm_decode_init(AVCodecContext * avctx)
+{
+    ADPCMContext *c = avctx->priv_data;
+
+    c->channel = 0;
+    c->status[0].predictor = c->status[1].predictor = 0;
+    c->status[0].step_index = c->status[1].step_index = 0;
+    c->status[0].step = c->status[1].step = 0;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_ADPCM_CT:
+        c->status[0].step = c->status[1].step = 511;
+        break;
+    default:
+        break;
+    }
+    return 0;
+}
+
+static inline short adpcm_ima_expand_nibble(ADPCMChannelStatus *c, char nibble, int shift)
+{
+    int step_index;
+    int predictor;
+    int sign, delta, diff, step;
+
+    step = step_table[c->step_index];
+    step_index = c->step_index + index_table[(unsigned)nibble];
+    if (step_index < 0) step_index = 0;
+    else if (step_index > 88) step_index = 88;
+
+    sign = nibble & 8;
+    delta = nibble & 7;
+    /* perform direct multiplication instead of series of jumps proposed by
+     * the reference ADPCM implementation since modern CPUs can do the mults
+     * quickly enough */
+    diff = ((2 * delta + 1) * step) >> shift;
+    predictor = c->predictor;
+    if (sign) predictor -= diff;
+    else predictor += diff;
+
+    CLAMP_TO_SHORT(predictor);
+    c->predictor = predictor;
+    c->step_index = step_index;
+
+    return (short)predictor;
+}
+
+static inline short adpcm_ms_expand_nibble(ADPCMChannelStatus *c, char nibble)
+{
+    int predictor;
+
+    predictor = (((c->sample1) * (c->coeff1)) + ((c->sample2) * (c->coeff2))) / 256;
+    predictor += (signed)((nibble & 0x08)?(nibble - 0x10):(nibble)) * c->idelta;
+    CLAMP_TO_SHORT(predictor);
+
+    c->sample2 = c->sample1;
+    c->sample1 = predictor;
+    c->idelta = (AdaptationTable[(int)nibble] * c->idelta) >> 8;
+    if (c->idelta < 16) c->idelta = 16;
+
+    return (short)predictor;
+}
+
+static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble)
+{
+    int predictor;
+    int sign, delta, diff;
+    int new_step;
+
+    sign = nibble & 8;
+    delta = nibble & 7;
+    /* perform direct multiplication instead of series of jumps proposed by
+     * the reference ADPCM implementation since modern CPUs can do the mults
+     * quickly enough */
+    diff = ((2 * delta + 1) * c->step) >> 3;
+    predictor = c->predictor;
+    /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */
+    if(sign)
+        predictor = ((predictor * 254) >> 8) - diff;
+    else
+            predictor = ((predictor * 254) >> 8) + diff;
+    /* calculate new step and clamp it to range 511..32767 */
+    new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8;
+    c->step = new_step;
+    if(c->step < 511)
+        c->step = 511;
+    if(c->step > 32767)
+        c->step = 32767;
+
+    CLAMP_TO_SHORT(predictor);
+    c->predictor = predictor;
+    return (short)predictor;
+}
+
+static inline short adpcm_sbpro_expand_nibble(ADPCMChannelStatus *c, char nibble, int size, int shift)
+{
+    int sign, delta, diff;
+
+    sign = nibble & (1<<(size-1));
+    delta = nibble & ((1<<(size-1))-1);
+    diff = delta << (7 + c->step + shift);
+
+    if (sign)
+        c->predictor -= diff;
+    else
+        c->predictor += diff;
+
+    /* clamp result */
+    if (c->predictor > 16256)
+        c->predictor = 16256;
+    else if (c->predictor < -16384)
+        c->predictor = -16384;
+
+    /* calculate new step */
+    if (delta >= (2*size - 3) && c->step < 3)
+        c->step++;
+    else if (delta == 0 && c->step > 0)
+        c->step--;
+
+    return (short) c->predictor;
+}
+
+static inline short adpcm_yamaha_expand_nibble(ADPCMChannelStatus *c, unsigned char nibble)
+{
+    if(!c->step) {
+        c->predictor = 0;
+        c->step = 127;
+    }
+
+    c->predictor += (c->step * yamaha_difflookup[nibble]) / 8;
+    CLAMP_TO_SHORT(c->predictor);
+    c->step = (c->step * yamaha_indexscale[nibble]) >> 8;
+    c->step = clip(c->step, 127, 24567);
+    return c->predictor;
+}
+
+static void xa_decode(short *out, const unsigned char *in,
+    ADPCMChannelStatus *left, ADPCMChannelStatus *right, int inc)
+{
+    int i, j;
+    int shift,filter,f0,f1;
+    int s_1,s_2;
+    int d,s,t;
+
+    for(i=0;i<4;i++) {
+
+        shift  = 12 - (in[4+i*2] & 15);
+        filter = in[4+i*2] >> 4;
+        f0 = xa_adpcm_table[filter][0];
+        f1 = xa_adpcm_table[filter][1];
+
+        s_1 = left->sample1;
+        s_2 = left->sample2;
+
+        for(j=0;j<28;j++) {
+            d = in[16+i+j*4];
+
+            t = (signed char)(d<<4)>>4;
+            s = ( t<<shift ) + ((s_1*f0 + s_2*f1+32)>>6);
+            CLAMP_TO_SHORT(s);
+            *out = s;
+            out += inc;
+            s_2 = s_1;
+            s_1 = s;
+        }
+
+        if (inc==2) { /* stereo */
+            left->sample1 = s_1;
+            left->sample2 = s_2;
+            s_1 = right->sample1;
+            s_2 = right->sample2;
+            out = out + 1 - 28*2;
+        }
+
+        shift  = 12 - (in[5+i*2] & 15);
+        filter = in[5+i*2] >> 4;
+
+        f0 = xa_adpcm_table[filter][0];
+        f1 = xa_adpcm_table[filter][1];
+
+        for(j=0;j<28;j++) {
+            d = in[16+i+j*4];
+
+            t = (signed char)d >> 4;
+            s = ( t<<shift ) + ((s_1*f0 + s_2*f1+32)>>6);
+            CLAMP_TO_SHORT(s);
+            *out = s;
+            out += inc;
+            s_2 = s_1;
+            s_1 = s;
+        }
+
+        if (inc==2) { /* stereo */
+            right->sample1 = s_1;
+            right->sample2 = s_2;
+            out -= 1;
+        } else {
+            left->sample1 = s_1;
+            left->sample2 = s_2;
+        }
+    }
+}
+
+
+/* DK3 ADPCM support macro */
+#define DK3_GET_NEXT_NIBBLE() \
+    if (decode_top_nibble_next) \
+    { \
+        nibble = (last_byte >> 4) & 0x0F; \
+        decode_top_nibble_next = 0; \
+    } \
+    else \
+    { \
+        last_byte = *src++; \
+        if (src >= buf + buf_size) break; \
+        nibble = last_byte & 0x0F; \
+        decode_top_nibble_next = 1; \
+    }
+
+static int adpcm_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    ADPCMContext *c = avctx->priv_data;
+    ADPCMChannelStatus *cs;
+    int n, m, channel, i;
+    int block_predictor[2];
+    short *samples;
+    uint8_t *src;
+    int st; /* stereo */
+
+    /* DK3 ADPCM accounting variables */
+    unsigned char last_byte = 0;
+    unsigned char nibble;
+    int decode_top_nibble_next = 0;
+    int diff_channel;
+
+    /* EA ADPCM state variables */
+    uint32_t samples_in_chunk;
+    int32_t previous_left_sample, previous_right_sample;
+    int32_t current_left_sample, current_right_sample;
+    int32_t next_left_sample, next_right_sample;
+    int32_t coeff1l, coeff2l, coeff1r, coeff2r;
+    uint8_t shift_left, shift_right;
+    int count1, count2;
+
+    if (!buf_size)
+        return 0;
+
+    samples = data;
+    src = buf;
+
+    st = avctx->channels == 2 ? 1 : 0;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_ADPCM_IMA_QT:
+        n = (buf_size - 2);/* >> 2*avctx->channels;*/
+        channel = c->channel;
+        cs = &(c->status[channel]);
+        /* (pppppp) (piiiiiii) */
+
+        /* Bits 15-7 are the _top_ 9 bits of the 16-bit initial predictor value */
+        cs->predictor = (*src++) << 8;
+        cs->predictor |= (*src & 0x80);
+        cs->predictor &= 0xFF80;
+
+        /* sign extension */
+        if(cs->predictor & 0x8000)
+            cs->predictor -= 0x10000;
+
+        CLAMP_TO_SHORT(cs->predictor);
+
+        cs->step_index = (*src++) & 0x7F;
+
+        if (cs->step_index > 88){
+            av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index);
+            cs->step_index = 88;
+        }
+
+        cs->step = step_table[cs->step_index];
+
+        if (st && channel)
+            samples++;
+
+        for(m=32; n>0 && m>0; n--, m--) { /* in QuickTime, IMA is encoded by chuncks of 34 bytes (=64 samples) */
+            *samples = adpcm_ima_expand_nibble(cs, src[0] & 0x0F, 3);
+            samples += avctx->channels;
+            *samples = adpcm_ima_expand_nibble(cs, (src[0] >> 4) & 0x0F, 3);
+            samples += avctx->channels;
+            src ++;
+        }
+
+        if(st) { /* handle stereo interlacing */
+            c->channel = (channel + 1) % 2; /* we get one packet for left, then one for right data */
+            if(channel == 1) { /* wait for the other packet before outputing anything */
+                return src - buf;
+            }
+        }
+        break;
+    case CODEC_ID_ADPCM_IMA_WAV:
+        if (avctx->block_align != 0 && buf_size > avctx->block_align)
+            buf_size = avctx->block_align;
+
+//        samples_per_block= (block_align-4*chanels)*8 / (bits_per_sample * chanels) + 1;
+
+        for(i=0; i<avctx->channels; i++){
+            cs = &(c->status[i]);
+            cs->predictor = (int16_t)(src[0] + (src[1]<<8));
+            src+=2;
+
+        // XXX: is this correct ??: *samples++ = cs->predictor;
+
+            cs->step_index = *src++;
+            if (cs->step_index > 88){
+                av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index);
+                cs->step_index = 88;
+            }
+            if (*src++) av_log(avctx, AV_LOG_ERROR, "unused byte should be null but is %d!!\n", src[-1]); /* unused */
+        }
+
+        while(src < buf + buf_size){
+            for(m=0; m<4; m++){
+                for(i=0; i<=st; i++)
+                    *samples++ = adpcm_ima_expand_nibble(&c->status[i], src[4*i] & 0x0F, 3);
+                for(i=0; i<=st; i++)
+                    *samples++ = adpcm_ima_expand_nibble(&c->status[i], src[4*i] >> 4  , 3);
+                src++;
+            }
+            src += 4*st;
+        }
+        break;
+    case CODEC_ID_ADPCM_4XM:
+        cs = &(c->status[0]);
+        c->status[0].predictor= (int16_t)(src[0] + (src[1]<<8)); src+=2;
+        if(st){
+            c->status[1].predictor= (int16_t)(src[0] + (src[1]<<8)); src+=2;
+        }
+        c->status[0].step_index= (int16_t)(src[0] + (src[1]<<8)); src+=2;
+        if(st){
+            c->status[1].step_index= (int16_t)(src[0] + (src[1]<<8)); src+=2;
+        }
+        if (cs->step_index < 0) cs->step_index = 0;
+        if (cs->step_index > 88) cs->step_index = 88;
+
+        m= (buf_size - (src - buf))>>st;
+        for(i=0; i<m; i++) {
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
+            if (st)
+                *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4);
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4);
+            if (st)
+                *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4);
+        }
+
+        src += m<<st;
+
+        break;
+    case CODEC_ID_ADPCM_MS:
+        if (avctx->block_align != 0 && buf_size > avctx->block_align)
+            buf_size = avctx->block_align;
+        n = buf_size - 7 * avctx->channels;
+        if (n < 0)
+            return -1;
+        block_predictor[0] = clip(*src++, 0, 7);
+        block_predictor[1] = 0;
+        if (st)
+            block_predictor[1] = clip(*src++, 0, 7);
+        c->status[0].idelta = (int16_t)((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
+        src+=2;
+        if (st){
+            c->status[1].idelta = (int16_t)((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
+            src+=2;
+        }
+        c->status[0].coeff1 = AdaptCoeff1[block_predictor[0]];
+        c->status[0].coeff2 = AdaptCoeff2[block_predictor[0]];
+        c->status[1].coeff1 = AdaptCoeff1[block_predictor[1]];
+        c->status[1].coeff2 = AdaptCoeff2[block_predictor[1]];
+
+        c->status[0].sample1 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
+        src+=2;
+        if (st) c->status[1].sample1 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
+        if (st) src+=2;
+        c->status[0].sample2 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
+        src+=2;
+        if (st) c->status[1].sample2 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
+        if (st) src+=2;
+
+        *samples++ = c->status[0].sample1;
+        if (st) *samples++ = c->status[1].sample1;
+        *samples++ = c->status[0].sample2;
+        if (st) *samples++ = c->status[1].sample2;
+        for(;n>0;n--) {
+            *samples++ = adpcm_ms_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F);
+            *samples++ = adpcm_ms_expand_nibble(&c->status[st], src[0] & 0x0F);
+            src ++;
+        }
+        break;
+    case CODEC_ID_ADPCM_IMA_DK4:
+        if (avctx->block_align != 0 && buf_size > avctx->block_align)
+            buf_size = avctx->block_align;
+
+        c->status[0].predictor = (int16_t)(src[0] | (src[1] << 8));
+        c->status[0].step_index = src[2];
+        src += 4;
+        *samples++ = c->status[0].predictor;
+        if (st) {
+            c->status[1].predictor = (int16_t)(src[0] | (src[1] << 8));
+            c->status[1].step_index = src[2];
+            src += 4;
+            *samples++ = c->status[1].predictor;
+        }
+        while (src < buf + buf_size) {
+
+            /* take care of the top nibble (always left or mono channel) */
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                (src[0] >> 4) & 0x0F, 3);
+
+            /* take care of the bottom nibble, which is right sample for
+             * stereo, or another mono sample */
+            if (st)
+                *samples++ = adpcm_ima_expand_nibble(&c->status[1],
+                    src[0] & 0x0F, 3);
+            else
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                    src[0] & 0x0F, 3);
+
+            src++;
+        }
+        break;
+    case CODEC_ID_ADPCM_IMA_DK3:
+        if (avctx->block_align != 0 && buf_size > avctx->block_align)
+            buf_size = avctx->block_align;
+
+        c->status[0].predictor = (int16_t)(src[10] | (src[11] << 8));
+        c->status[1].predictor = (int16_t)(src[12] | (src[13] << 8));
+        c->status[0].step_index = src[14];
+        c->status[1].step_index = src[15];
+        /* sign extend the predictors */
+        src += 16;
+        diff_channel = c->status[1].predictor;
+
+        /* the DK3_GET_NEXT_NIBBLE macro issues the break statement when
+         * the buffer is consumed */
+        while (1) {
+
+            /* for this algorithm, c->status[0] is the sum channel and
+             * c->status[1] is the diff channel */
+
+            /* process the first predictor of the sum channel */
+            DK3_GET_NEXT_NIBBLE();
+            adpcm_ima_expand_nibble(&c->status[0], nibble, 3);
+
+            /* process the diff channel predictor */
+            DK3_GET_NEXT_NIBBLE();
+            adpcm_ima_expand_nibble(&c->status[1], nibble, 3);
+
+            /* process the first pair of stereo PCM samples */
+            diff_channel = (diff_channel + c->status[1].predictor) / 2;
+            *samples++ = c->status[0].predictor + c->status[1].predictor;
+            *samples++ = c->status[0].predictor - c->status[1].predictor;
+
+            /* process the second predictor of the sum channel */
+            DK3_GET_NEXT_NIBBLE();
+            adpcm_ima_expand_nibble(&c->status[0], nibble, 3);
+
+            /* process the second pair of stereo PCM samples */
+            diff_channel = (diff_channel + c->status[1].predictor) / 2;
+            *samples++ = c->status[0].predictor + c->status[1].predictor;
+            *samples++ = c->status[0].predictor - c->status[1].predictor;
+        }
+        break;
+    case CODEC_ID_ADPCM_IMA_WS:
+        /* no per-block initialization; just start decoding the data */
+        while (src < buf + buf_size) {
+
+            if (st) {
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                    (src[0] >> 4) & 0x0F, 3);
+                *samples++ = adpcm_ima_expand_nibble(&c->status[1],
+                    src[0] & 0x0F, 3);
+            } else {
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                    (src[0] >> 4) & 0x0F, 3);
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                    src[0] & 0x0F, 3);
+            }
+
+            src++;
+        }
+        break;
+    case CODEC_ID_ADPCM_XA:
+        c->status[0].sample1 = c->status[0].sample2 =
+        c->status[1].sample1 = c->status[1].sample2 = 0;
+        while (buf_size >= 128) {
+            xa_decode(samples, src, &c->status[0], &c->status[1],
+                avctx->channels);
+            src += 128;
+            samples += 28 * 8;
+            buf_size -= 128;
+        }
+        break;
+    case CODEC_ID_ADPCM_EA:
+        samples_in_chunk = LE_32(src);
+        if (samples_in_chunk >= ((buf_size - 12) * 2)) {
+            src += buf_size;
+            break;
+        }
+        src += 4;
+        current_left_sample = (int16_t)LE_16(src);
+        src += 2;
+        previous_left_sample = (int16_t)LE_16(src);
+        src += 2;
+        current_right_sample = (int16_t)LE_16(src);
+        src += 2;
+        previous_right_sample = (int16_t)LE_16(src);
+        src += 2;
+
+        for (count1 = 0; count1 < samples_in_chunk/28;count1++) {
+            coeff1l = ea_adpcm_table[(*src >> 4) & 0x0F];
+            coeff2l = ea_adpcm_table[((*src >> 4) & 0x0F) + 4];
+            coeff1r = ea_adpcm_table[*src & 0x0F];
+            coeff2r = ea_adpcm_table[(*src & 0x0F) + 4];
+            src++;
+
+            shift_left = ((*src >> 4) & 0x0F) + 8;
+            shift_right = (*src & 0x0F) + 8;
+            src++;
+
+            for (count2 = 0; count2 < 28; count2++) {
+                next_left_sample = (((*src & 0xF0) << 24) >> shift_left);
+                next_right_sample = (((*src & 0x0F) << 28) >> shift_right);
+                src++;
+
+                next_left_sample = (next_left_sample +
+                    (current_left_sample * coeff1l) +
+                    (previous_left_sample * coeff2l) + 0x80) >> 8;
+                next_right_sample = (next_right_sample +
+                    (current_right_sample * coeff1r) +
+                    (previous_right_sample * coeff2r) + 0x80) >> 8;
+                CLAMP_TO_SHORT(next_left_sample);
+                CLAMP_TO_SHORT(next_right_sample);
+
+                previous_left_sample = current_left_sample;
+                current_left_sample = next_left_sample;
+                previous_right_sample = current_right_sample;
+                current_right_sample = next_right_sample;
+                *samples++ = (unsigned short)current_left_sample;
+                *samples++ = (unsigned short)current_right_sample;
+            }
+        }
+        break;
+    case CODEC_ID_ADPCM_IMA_SMJPEG:
+        c->status[0].predictor = *src;
+        src += 2;
+        c->status[0].step_index = *src++;
+        src++;  /* skip another byte before getting to the meat */
+        while (src < buf + buf_size) {
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                *src & 0x0F, 3);
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0],
+                (*src >> 4) & 0x0F, 3);
+            src++;
+        }
+        break;
+    case CODEC_ID_ADPCM_CT:
+        while (src < buf + buf_size) {
+            if (st) {
+                *samples++ = adpcm_ct_expand_nibble(&c->status[0],
+                    (src[0] >> 4) & 0x0F);
+                *samples++ = adpcm_ct_expand_nibble(&c->status[1],
+                    src[0] & 0x0F);
+            } else {
+                *samples++ = adpcm_ct_expand_nibble(&c->status[0],
+                    (src[0] >> 4) & 0x0F);
+                *samples++ = adpcm_ct_expand_nibble(&c->status[0],
+                    src[0] & 0x0F);
+            }
+            src++;
+        }
+        break;
+    case CODEC_ID_ADPCM_SBPRO_4:
+    case CODEC_ID_ADPCM_SBPRO_3:
+    case CODEC_ID_ADPCM_SBPRO_2:
+        if (!c->status[0].step_index) {
+            /* the first byte is a raw sample */
+            *samples++ = 128 * (*src++ - 0x80);
+            if (st)
+              *samples++ = 128 * (*src++ - 0x80);
+            c->status[0].step_index = 1;
+        }
+        if (avctx->codec->id == CODEC_ID_ADPCM_SBPRO_4) {
+            while (src < buf + buf_size) {
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
+                    (src[0] >> 4) & 0x0F, 4, 0);
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[st],
+                    src[0] & 0x0F, 4, 0);
+                src++;
+            }
+        } else if (avctx->codec->id == CODEC_ID_ADPCM_SBPRO_3) {
+            while (src < buf + buf_size) {
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
+                    (src[0] >> 5) & 0x07, 3, 0);
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
+                    (src[0] >> 2) & 0x07, 3, 0);
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
+                    src[0] & 0x03, 2, 0);
+                src++;
+            }
+        } else {
+            while (src < buf + buf_size) {
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
+                    (src[0] >> 6) & 0x03, 2, 2);
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[st],
+                    (src[0] >> 4) & 0x03, 2, 2);
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
+                    (src[0] >> 2) & 0x03, 2, 2);
+                *samples++ = adpcm_sbpro_expand_nibble(&c->status[st],
+                    src[0] & 0x03, 2, 2);
+                src++;
+            }
+        }
+        break;
+    case CODEC_ID_ADPCM_SWF:
+    {
+        GetBitContext gb;
+        const int *table;
+        int k0, signmask;
+        int size = buf_size*8;
+
+        init_get_bits(&gb, buf, size);
+
+        // first frame, read bits & inital values
+        if (!c->nb_bits)
+        {
+            c->nb_bits = get_bits(&gb, 2)+2;
+//            av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
+        }
+
+        table = swf_index_tables[c->nb_bits-2];
+        k0 = 1 << (c->nb_bits-2);
+        signmask = 1 << (c->nb_bits-1);
+
+        while (get_bits_count(&gb) <= size)
+        {
+            int i;
+
+            c->nb_samples++;
+            // wrap around at every 4096 samples...
+            if ((c->nb_samples & 0xfff) == 1)
+            {
+                for (i = 0; i <= st; i++)
+                {
+                    *samples++ = c->status[i].predictor = get_sbits(&gb, 16);
+                    c->status[i].step_index = get_bits(&gb, 6);
+                }
+            }
+
+            // similar to IMA adpcm
+            for (i = 0; i <= st; i++)
+            {
+                int delta = get_bits(&gb, c->nb_bits);
+                int step = step_table[c->status[i].step_index];
+                long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
+                int k = k0;
+
+                do {
+                    if (delta & k)
+                        vpdiff += step;
+                    step >>= 1;
+                    k >>= 1;
+                } while(k);
+                vpdiff += step;
+
+                if (delta & signmask)
+                    c->status[i].predictor -= vpdiff;
+                else
+                    c->status[i].predictor += vpdiff;
+
+                c->status[i].step_index += table[delta & (~signmask)];
+
+                c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
+                c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
+
+                *samples++ = c->status[i].predictor;
+            }
+        }
+
+//        src += get_bits_count(&gb)*8;
+        src += size;
+
+        break;
+    }
+    case CODEC_ID_ADPCM_YAMAHA:
+        while (src < buf + buf_size) {
+            if (st) {
+                *samples++ = adpcm_yamaha_expand_nibble(&c->status[0],
+                        src[0] & 0x0F);
+                *samples++ = adpcm_yamaha_expand_nibble(&c->status[1],
+                        (src[0] >> 4) & 0x0F);
+            } else {
+                *samples++ = adpcm_yamaha_expand_nibble(&c->status[0],
+                        src[0] & 0x0F);
+                *samples++ = adpcm_yamaha_expand_nibble(&c->status[0],
+                        (src[0] >> 4) & 0x0F);
+            }
+            src++;
+        }
+        break;
+    default:
+        return -1;
+    }
+    *data_size = (uint8_t *)samples - (uint8_t *)data;
+    return src - buf;
+}
+
+
+
+#ifdef CONFIG_ENCODERS
+#define ADPCM_ENCODER(id,name)                  \
+AVCodec name ## _encoder = {                    \
+    #name,                                      \
+    CODEC_TYPE_AUDIO,                           \
+    id,                                         \
+    sizeof(ADPCMContext),                       \
+    adpcm_encode_init,                          \
+    adpcm_encode_frame,                         \
+    adpcm_encode_close,                         \
+    NULL,                                       \
+};
+#else
+#define ADPCM_ENCODER(id,name)
+#endif
+
+#ifdef CONFIG_DECODERS
+#define ADPCM_DECODER(id,name)                  \
+AVCodec name ## _decoder = {                    \
+    #name,                                      \
+    CODEC_TYPE_AUDIO,                           \
+    id,                                         \
+    sizeof(ADPCMContext),                       \
+    adpcm_decode_init,                          \
+    NULL,                                       \
+    NULL,                                       \
+    adpcm_decode_frame,                         \
+};
+#else
+#define ADPCM_DECODER(id,name)
+#endif
+
+#define ADPCM_CODEC(id, name)                   \
+ADPCM_ENCODER(id,name) ADPCM_DECODER(id,name)
+
+ADPCM_CODEC(CODEC_ID_ADPCM_IMA_QT, adpcm_ima_qt);
+ADPCM_CODEC(CODEC_ID_ADPCM_IMA_WAV, adpcm_ima_wav);
+ADPCM_CODEC(CODEC_ID_ADPCM_IMA_DK3, adpcm_ima_dk3);
+ADPCM_CODEC(CODEC_ID_ADPCM_IMA_DK4, adpcm_ima_dk4);
+ADPCM_CODEC(CODEC_ID_ADPCM_IMA_WS, adpcm_ima_ws);
+ADPCM_CODEC(CODEC_ID_ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg);
+ADPCM_CODEC(CODEC_ID_ADPCM_MS, adpcm_ms);
+ADPCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm);
+ADPCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa);
+ADPCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea);
+ADPCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct);
+ADPCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf);
+ADPCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha);
+ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4);
+ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3);
+ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2);
+
+#undef ADPCM_CODEC
diff --git a/contrib/ffmpeg/libavcodec/adx.c b/contrib/ffmpeg/libavcodec/adx.c
new file mode 100644
index 000000000..b449c9124
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/adx.c
@@ -0,0 +1,412 @@
+/*
+ * ADX ADPCM codecs
+ * Copyright (c) 2001,2003 BERO
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+/**
+ * @file adx.c
+ * SEGA CRI adx codecs.
+ *
+ * Reference documents:
+ * http://ku-www.ss.titech.ac.jp/~yatsushi/adx.html
+ * adx2wav & wav2adx http://www.geocities.co.jp/Playtown/2004/
+ */
+
+typedef struct {
+    int s1,s2;
+} PREV;
+
+typedef struct {
+    PREV prev[2];
+    int header_parsed;
+    unsigned char dec_temp[18*2];
+    unsigned short enc_temp[32*2];
+    int in_temp;
+} ADXContext;
+
+//#define    BASEVOL    0x11e0
+#define    BASEVOL   0x4000
+#define    SCALE1    0x7298
+#define    SCALE2    0x3350
+
+#define    CLIP(s)    if (s>32767) s=32767; else if (s<-32768) s=-32768
+
+/* 18 bytes <-> 32 samples */
+
+#ifdef CONFIG_ENCODERS
+static void adx_encode(unsigned char *adx,const short *wav,PREV *prev)
+{
+    int scale;
+    int i;
+    int s0,s1,s2,d;
+    int max=0;
+    int min=0;
+    int data[32];
+
+    s1 = prev->s1;
+    s2 = prev->s2;
+    for(i=0;i<32;i++) {
+        s0 = wav[i];
+        d = ((s0<<14) - SCALE1*s1 + SCALE2*s2)/BASEVOL;
+        data[i]=d;
+        if (max<d) max=d;
+        if (min>d) min=d;
+        s2 = s1;
+        s1 = s0;
+    }
+    prev->s1 = s1;
+    prev->s2 = s2;
+
+    /* -8..+7 */
+
+    if (max==0 && min==0) {
+        memset(adx,0,18);
+        return;
+    }
+
+    if (max/7>-min/8) scale = max/7;
+    else scale = -min/8;
+
+    if (scale==0) scale=1;
+
+    adx[0] = scale>>8;
+    adx[1] = scale;
+
+    for(i=0;i<16;i++) {
+        adx[i+2] = ((data[i*2]/scale)<<4) | ((data[i*2+1]/scale)&0xf);
+    }
+}
+#endif //CONFIG_ENCODERS
+
+static void adx_decode(short *out,const unsigned char *in,PREV *prev)
+{
+    int scale = ((in[0]<<8)|(in[1]));
+    int i;
+    int s0,s1,s2,d;
+
+//    printf("%x ",scale);
+
+    in+=2;
+    s1 = prev->s1;
+    s2 = prev->s2;
+    for(i=0;i<16;i++) {
+        d = in[i];
+        // d>>=4; if (d&8) d-=16;
+        d = ((signed char)d >> 4);
+        s0 = (BASEVOL*d*scale + SCALE1*s1 - SCALE2*s2)>>14;
+        CLIP(s0);
+        *out++=s0;
+        s2 = s1;
+        s1 = s0;
+
+        d = in[i];
+        //d&=15; if (d&8) d-=16;
+        d = ((signed char)(d<<4) >> 4);
+        s0 = (BASEVOL*d*scale + SCALE1*s1 - SCALE2*s2)>>14;
+        CLIP(s0);
+        *out++=s0;
+        s2 = s1;
+        s1 = s0;
+    }
+    prev->s1 = s1;
+    prev->s2 = s2;
+
+}
+
+static void adx_decode_stereo(short *out,const unsigned char *in,PREV *prev)
+{
+    short tmp[32*2];
+    int i;
+
+    adx_decode(tmp   ,in   ,prev);
+    adx_decode(tmp+32,in+18,prev+1);
+    for(i=0;i<32;i++) {
+        out[i*2]   = tmp[i];
+        out[i*2+1] = tmp[i+32];
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+
+static void write_long(unsigned char *p,uint32_t v)
+{
+    p[0] = v>>24;
+    p[1] = v>>16;
+    p[2] = v>>8;
+    p[3] = v;
+}
+
+static int adx_encode_header(AVCodecContext *avctx,unsigned char *buf,size_t bufsize)
+{
+#if 0
+    struct {
+        uint32_t offset; /* 0x80000000 + sample start - 4 */
+        unsigned char unknown1[3]; /* 03 12 04 */
+        unsigned char channel; /* 1 or 2 */
+        uint32_t freq;
+        uint32_t size;
+        uint32_t unknown2; /* 01 f4 03 00 */
+        uint32_t unknown3; /* 00 00 00 00 */
+        uint32_t unknown4; /* 00 00 00 00 */
+
+    /* if loop
+        unknown3 00 15 00 01
+        unknown4 00 00 00 01
+        long loop_start_sample;
+        long loop_start_byte;
+        long loop_end_sample;
+        long loop_end_byte;
+        long
+    */
+    } adxhdr; /* big endian */
+    /* offset-6 "(c)CRI" */
+#endif
+    write_long(buf+0x00,0x80000000|0x20);
+    write_long(buf+0x04,0x03120400|avctx->channels);
+    write_long(buf+0x08,avctx->sample_rate);
+    write_long(buf+0x0c,0); /* FIXME: set after */
+    write_long(buf+0x10,0x01040300);
+    write_long(buf+0x14,0x00000000);
+    write_long(buf+0x18,0x00000000);
+    memcpy(buf+0x1c,"\0\0(c)CRI",8);
+    return 0x20+4;
+}
+
+static int adx_decode_init(AVCodecContext *avctx);
+static int adx_encode_init(AVCodecContext *avctx)
+{
+    if (avctx->channels > 2)
+        return -1; /* only stereo or mono =) */
+    avctx->frame_size = 32;
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+//    avctx->bit_rate = avctx->sample_rate*avctx->channels*18*8/32;
+
+    av_log(avctx, AV_LOG_DEBUG, "adx encode init\n");
+    adx_decode_init(avctx);
+
+    return 0;
+}
+
+static int adx_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+static int adx_encode_frame(AVCodecContext *avctx,
+                uint8_t *frame, int buf_size, void *data)
+{
+    ADXContext *c = avctx->priv_data;
+    const short *samples = data;
+    unsigned char *dst = frame;
+    int rest = avctx->frame_size;
+
+/*
+    input data size =
+    ffmpeg.c: do_audio_out()
+    frame_bytes = enc->frame_size * 2 * enc->channels;
+*/
+
+//    printf("sz=%d ",buf_size); fflush(stdout);
+    if (!c->header_parsed) {
+        int hdrsize = adx_encode_header(avctx,dst,buf_size);
+        dst+=hdrsize;
+        c->header_parsed = 1;
+    }
+
+    if (avctx->channels==1) {
+        while(rest>=32) {
+            adx_encode(dst,samples,c->prev);
+            dst+=18;
+            samples+=32;
+            rest-=32;
+        }
+    } else {
+        while(rest>=32*2) {
+            short tmpbuf[32*2];
+            int i;
+
+            for(i=0;i<32;i++) {
+                tmpbuf[i] = samples[i*2];
+                tmpbuf[i+32] = samples[i*2+1];
+            }
+
+            adx_encode(dst,tmpbuf,c->prev);
+            adx_encode(dst+18,tmpbuf+32,c->prev+1);
+            dst+=18*2;
+            samples+=32*2;
+            rest-=32*2;
+        }
+    }
+    return dst-frame;
+}
+
+#endif //CONFIG_ENCODERS
+
+static uint32_t read_long(const unsigned char *p)
+{
+    return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
+}
+
+static int is_adx(const unsigned char *buf,size_t bufsize)
+{
+    int    offset;
+
+    if (buf[0]!=0x80) return 0;
+    offset = (read_long(buf)^0x80000000)+4;
+    if (bufsize<offset || memcmp(buf+offset-6,"(c)CRI",6)) return 0;
+    return offset;
+}
+
+/* return data offset or 6 */
+static int adx_decode_header(AVCodecContext *avctx,const unsigned char *buf,size_t bufsize)
+{
+    int offset;
+    int channels,freq,size;
+
+    offset = is_adx(buf,bufsize);
+    if (offset==0) return 0;
+
+    channels = buf[7];
+    freq = read_long(buf+8);
+    size = read_long(buf+12);
+
+//    printf("freq=%d ch=%d\n",freq,channels);
+
+    avctx->sample_rate = freq;
+    avctx->channels = channels;
+    avctx->bit_rate = freq*channels*18*8/32;
+//    avctx->frame_size = 18*channels;
+
+    return offset;
+}
+
+static int adx_decode_init(AVCodecContext * avctx)
+{
+    ADXContext *c = avctx->priv_data;
+
+//    printf("adx_decode_init\n"); fflush(stdout);
+    c->prev[0].s1 = 0;
+    c->prev[0].s2 = 0;
+    c->prev[1].s1 = 0;
+    c->prev[1].s2 = 0;
+    c->header_parsed = 0;
+    c->in_temp = 0;
+    return 0;
+}
+
+#if 0
+static void dump(unsigned char *buf,size_t len)
+{
+    int i;
+    for(i=0;i<len;i++) {
+        if ((i&15)==0) av_log(NULL, AV_LOG_DEBUG, "%04x  ",i);
+        av_log(NULL, AV_LOG_DEBUG, "%02x ",buf[i]);
+        if ((i&15)==15) av_log(NULL, AV_LOG_DEBUG, "\n");
+    }
+    av_log(NULL, AV_LOG_ERROR, "\n");
+}
+#endif
+
+static int adx_decode_frame(AVCodecContext *avctx,
+                void *data, int *data_size,
+                uint8_t *buf0, int buf_size)
+{
+    ADXContext *c = avctx->priv_data;
+    short *samples = data;
+    const uint8_t *buf = buf0;
+    int rest = buf_size;
+
+    if (!c->header_parsed) {
+        int hdrsize = adx_decode_header(avctx,buf,rest);
+        if (hdrsize==0) return -1;
+        c->header_parsed = 1;
+        buf  += hdrsize;
+        rest -= hdrsize;
+    }
+
+    if (c->in_temp) {
+        int copysize = 18*avctx->channels - c->in_temp;
+        memcpy(c->dec_temp+c->in_temp,buf,copysize);
+        rest -= copysize;
+        buf  += copysize;
+        if (avctx->channels==1) {
+            adx_decode(samples,c->dec_temp,c->prev);
+            samples += 32;
+        } else {
+            adx_decode_stereo(samples,c->dec_temp,c->prev);
+            samples += 32*2;
+        }
+    }
+    //
+    if (avctx->channels==1) {
+        while(rest>=18) {
+            adx_decode(samples,buf,c->prev);
+            rest-=18;
+            buf+=18;
+            samples+=32;
+        }
+    } else {
+        while(rest>=18*2) {
+            adx_decode_stereo(samples,buf,c->prev);
+            rest-=18*2;
+            buf+=18*2;
+            samples+=32*2;
+        }
+    }
+    //
+    c->in_temp = rest;
+    if (rest) {
+        memcpy(c->dec_temp,buf,rest);
+        buf+=rest;
+    }
+    *data_size = (uint8_t*)samples - (uint8_t*)data;
+//    printf("%d:%d ",buf-buf0,*data_size); fflush(stdout);
+    return buf-buf0;
+}
+
+#ifdef CONFIG_ENCODERS
+AVCodec adpcm_adx_encoder = {
+    "adpcm_adx",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_ADPCM_ADX,
+    sizeof(ADXContext),
+    adx_encode_init,
+    adx_encode_frame,
+    adx_encode_close,
+    NULL,
+};
+#endif //CONFIG_ENCODERS
+
+AVCodec adpcm_adx_decoder = {
+    "adpcm_adx",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_ADPCM_ADX,
+    sizeof(ADXContext),
+    adx_decode_init,
+    NULL,
+    NULL,
+    adx_decode_frame,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/alac.c b/contrib/ffmpeg/libavcodec/alac.c
new file mode 100644
index 000000000..793f71a11
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alac.c
@@ -0,0 +1,859 @@
+/*
+ * ALAC (Apple Lossless Audio Codec) decoder
+ * Copyright (c) 2005 David Hammerton
+ * All rights reserved.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file alac.c
+ * ALAC (Apple Lossless Audio Codec) decoder
+ * @author 2005 David Hammerton
+ *
+ * For more information on the ALAC format, visit:
+ *  http://crazney.net/programs/itunes/alac.html
+ *
+ * Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be
+ * passed through the extradata[_size] fields. This atom is tacked onto
+ * the end of an 'alac' stsd atom and has the following format:
+ *  bytes 0-3   atom size (0x24), big-endian
+ *  bytes 4-7   atom type ('alac', not the 'alac' tag from start of stsd)
+ *  bytes 8-35  data bytes needed by decoder
+ *
+ * Extradata:
+ * 32bit  size
+ * 32bit  tag (=alac)
+ * 32bit  zero?
+ * 32bit  max sample per frame
+ *  8bit  ?? (zero?)
+ *  8bit  sample size
+ *  8bit  history mult
+ *  8bit  initial history
+ *  8bit  kmodifier
+ *  8bit  channels?
+ * 16bit  ??
+ * 32bit  max coded frame size
+ * 32bit  bitrate?
+ * 32bit  samplerate
+ */
+
+
+#include "avcodec.h"
+#include "bitstream.h"
+
+#define ALAC_EXTRADATA_SIZE 36
+
+typedef struct {
+
+    AVCodecContext *avctx;
+    GetBitContext gb;
+    /* init to 0; first frame decode should initialize from extradata and
+     * set this to 1 */
+    int context_initialized;
+
+    int samplesize;
+    int numchannels;
+    int bytespersample;
+
+    /* buffers */
+    int32_t *predicterror_buffer_a;
+    int32_t *predicterror_buffer_b;
+
+    int32_t *outputsamples_buffer_a;
+    int32_t *outputsamples_buffer_b;
+
+    /* stuff from setinfo */
+    uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */    /* max samples per frame? */
+    uint8_t setinfo_7a; /* 0x00 */
+    uint8_t setinfo_sample_size; /* 0x10 */
+    uint8_t setinfo_rice_historymult; /* 0x28 */
+    uint8_t setinfo_rice_initialhistory; /* 0x0a */
+    uint8_t setinfo_rice_kmodifier; /* 0x0e */
+    uint8_t setinfo_7f; /* 0x02 */
+    uint16_t setinfo_80; /* 0x00ff */
+    uint32_t setinfo_82; /* 0x000020e7 */
+    uint32_t setinfo_86; /* 0x00069fe4 */
+    uint32_t setinfo_8a_rate; /* 0x0000ac44 */
+    /* end setinfo stuff */
+
+} ALACContext;
+
+static void allocate_buffers(ALACContext *alac)
+{
+    alac->predicterror_buffer_a = av_malloc(alac->setinfo_max_samples_per_frame * 4);
+    alac->predicterror_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4);
+
+    alac->outputsamples_buffer_a = av_malloc(alac->setinfo_max_samples_per_frame * 4);
+    alac->outputsamples_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4);
+}
+
+static int alac_set_info(ALACContext *alac)
+{
+    unsigned char *ptr = alac->avctx->extradata;
+
+    ptr += 4; /* size */
+    ptr += 4; /* alac */
+    ptr += 4; /* 0 ? */
+
+    if(BE_32(ptr) >= UINT_MAX/4){
+        av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n");
+        return -1;
+    }
+    alac->setinfo_max_samples_per_frame = BE_32(ptr); /* buffer size / 2 ? */
+    ptr += 4;
+    alac->setinfo_7a = *ptr++;
+    alac->setinfo_sample_size = *ptr++;
+    alac->setinfo_rice_historymult = *ptr++;
+    alac->setinfo_rice_initialhistory = *ptr++;
+    alac->setinfo_rice_kmodifier = *ptr++;
+    alac->setinfo_7f = *ptr++; // channels?
+    alac->setinfo_80 = BE_16(ptr);
+    ptr += 2;
+    alac->setinfo_82 = BE_32(ptr); // max coded frame size
+    ptr += 4;
+    alac->setinfo_86 = BE_32(ptr); // bitrate ?
+    ptr += 4;
+    alac->setinfo_8a_rate = BE_32(ptr); // samplerate
+    ptr += 4;
+
+    allocate_buffers(alac);
+
+    return 0;
+}
+
+/* hideously inefficient. could use a bitmask search,
+ * alternatively bsr on x86,
+ */
+static int count_leading_zeros(int32_t input)
+{
+    int i = 0;
+    while (!(0x80000000 & input) && i < 32) {
+        i++;
+        input = input << 1;
+    }
+    return i;
+}
+
+static void bastardized_rice_decompress(ALACContext *alac,
+                                 int32_t *output_buffer,
+                                 int output_size,
+                                 int readsamplesize, /* arg_10 */
+                                 int rice_initialhistory, /* arg424->b */
+                                 int rice_kmodifier, /* arg424->d */
+                                 int rice_historymult, /* arg424->c */
+                                 int rice_kmodifier_mask /* arg424->e */
+        )
+{
+    int output_count;
+    unsigned int history = rice_initialhistory;
+    int sign_modifier = 0;
+
+    for (output_count = 0; output_count < output_size; output_count++) {
+        int32_t x = 0;
+        int32_t x_modified;
+        int32_t final_val;
+
+        /* read x - number of 1s before 0 represent the rice */
+        while (x <= 8 && get_bits1(&alac->gb)) {
+            x++;
+        }
+
+
+        if (x > 8) { /* RICE THRESHOLD */
+          /* use alternative encoding */
+            int32_t value;
+
+            value = get_bits(&alac->gb, readsamplesize);
+
+            /* mask value to readsamplesize size */
+            if (readsamplesize != 32)
+                value &= (0xffffffff >> (32 - readsamplesize));
+
+            x = value;
+        } else {
+          /* standard rice encoding */
+            int extrabits;
+            int k; /* size of extra bits */
+
+            /* read k, that is bits as is */
+            k = 31 - rice_kmodifier - count_leading_zeros((history >> 9) + 3);
+
+            if (k < 0)
+                k += rice_kmodifier;
+            else
+                k = rice_kmodifier;
+
+            if (k != 1) {
+                extrabits = show_bits(&alac->gb, k);
+
+                /* multiply x by 2^k - 1, as part of their strange algorithm */
+                x = (x << k) - x;
+
+                if (extrabits > 1) {
+                    x += extrabits - 1;
+                    get_bits(&alac->gb, k);
+                } else {
+                    get_bits(&alac->gb, k - 1);
+                }
+            }
+        }
+
+        x_modified = sign_modifier + x;
+        final_val = (x_modified + 1) / 2;
+        if (x_modified & 1) final_val *= -1;
+
+        output_buffer[output_count] = final_val;
+
+        sign_modifier = 0;
+
+        /* now update the history */
+        history += (x_modified * rice_historymult)
+                 - ((history * rice_historymult) >> 9);
+
+        if (x_modified > 0xffff)
+            history = 0xffff;
+
+        /* special case: there may be compressed blocks of 0 */
+        if ((history < 128) && (output_count+1 < output_size)) {
+            int block_size;
+
+            sign_modifier = 1;
+
+            x = 0;
+            while (x <= 8 && get_bits1(&alac->gb)) {
+                x++;
+            }
+
+            if (x > 8) {
+                block_size = get_bits(&alac->gb, 16);
+                block_size &= 0xffff;
+            } else {
+                int k;
+                int extrabits;
+
+                k = count_leading_zeros(history) + ((history + 16) >> 6 /* / 64 */) - 24;
+
+                extrabits = show_bits(&alac->gb, k);
+
+                block_size = (((1 << k) - 1) & rice_kmodifier_mask) * x
+                           + extrabits - 1;
+
+                if (extrabits < 2) {
+                    x = 1 - extrabits;
+                    block_size += x;
+                    get_bits(&alac->gb, k - 1);
+                } else {
+                    get_bits(&alac->gb, k);
+                }
+            }
+
+            if (block_size > 0) {
+                memset(&output_buffer[output_count+1], 0, block_size * 4);
+                output_count += block_size;
+
+            }
+
+            if (block_size > 0xffff)
+                sign_modifier = 0;
+
+            history = 0;
+        }
+    }
+}
+
+#define SIGN_EXTENDED32(val, bits) ((val << (32 - bits)) >> (32 - bits))
+
+#define SIGN_ONLY(v) \
+                     ((v < 0) ? (-1) : \
+                                ((v > 0) ? (1) : \
+                                           (0)))
+
+static void predictor_decompress_fir_adapt(int32_t *error_buffer,
+                                           int32_t *buffer_out,
+                                           int output_size,
+                                           int readsamplesize,
+                                           int16_t *predictor_coef_table,
+                                           int predictor_coef_num,
+                                           int predictor_quantitization)
+{
+    int i;
+
+    /* first sample always copies */
+    *buffer_out = *error_buffer;
+
+    if (!predictor_coef_num) {
+        if (output_size <= 1) return;
+        memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4);
+        return;
+    }
+
+    if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */
+      /* second-best case scenario for fir decompression,
+       * error describes a small difference from the previous sample only
+       */
+        if (output_size <= 1) return;
+        for (i = 0; i < output_size - 1; i++) {
+            int32_t prev_value;
+            int32_t error_value;
+
+            prev_value = buffer_out[i];
+            error_value = error_buffer[i+1];
+            buffer_out[i+1] = SIGN_EXTENDED32((prev_value + error_value), readsamplesize);
+        }
+        return;
+    }
+
+    /* read warm-up samples */
+    if (predictor_coef_num > 0) {
+        int i;
+        for (i = 0; i < predictor_coef_num; i++) {
+            int32_t val;
+
+            val = buffer_out[i] + error_buffer[i+1];
+
+            val = SIGN_EXTENDED32(val, readsamplesize);
+
+            buffer_out[i+1] = val;
+        }
+    }
+
+#if 0
+    /* 4 and 8 are very common cases (the only ones i've seen). these
+     * should be unrolled and optimised
+     */
+    if (predictor_coef_num == 4) {
+        /* FIXME: optimised general case */
+        return;
+    }
+
+    if (predictor_coef_table == 8) {
+        /* FIXME: optimised general case */
+        return;
+    }
+#endif
+
+
+    /* general case */
+    if (predictor_coef_num > 0) {
+        for (i = predictor_coef_num + 1;
+             i < output_size;
+             i++) {
+            int j;
+            int sum = 0;
+            int outval;
+            int error_val = error_buffer[i];
+
+            for (j = 0; j < predictor_coef_num; j++) {
+                sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) *
+                       predictor_coef_table[j];
+            }
+
+            outval = (1 << (predictor_quantitization-1)) + sum;
+            outval = outval >> predictor_quantitization;
+            outval = outval + buffer_out[0] + error_val;
+            outval = SIGN_EXTENDED32(outval, readsamplesize);
+
+            buffer_out[predictor_coef_num+1] = outval;
+
+            if (error_val > 0) {
+                int predictor_num = predictor_coef_num - 1;
+
+                while (predictor_num >= 0 && error_val > 0) {
+                    int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
+                    int sign = SIGN_ONLY(val);
+
+                    predictor_coef_table[predictor_num] -= sign;
+
+                    val *= sign; /* absolute value */
+
+                    error_val -= ((val >> predictor_quantitization) *
+                                  (predictor_coef_num - predictor_num));
+
+                    predictor_num--;
+                }
+            } else if (error_val < 0) {
+                int predictor_num = predictor_coef_num - 1;
+
+                while (predictor_num >= 0 && error_val < 0) {
+                    int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
+                    int sign = - SIGN_ONLY(val);
+
+                    predictor_coef_table[predictor_num] -= sign;
+
+                    val *= sign; /* neg value */
+
+                    error_val -= ((val >> predictor_quantitization) *
+                                  (predictor_coef_num - predictor_num));
+
+                    predictor_num--;
+                }
+            }
+
+            buffer_out++;
+        }
+    }
+}
+
+static void deinterlace_16(int32_t *buffer_a, int32_t *buffer_b,
+                    int16_t *buffer_out,
+                    int numchannels, int numsamples,
+                    uint8_t interlacing_shift,
+                    uint8_t interlacing_leftweight)
+{
+    int i;
+    if (numsamples <= 0) return;
+
+    /* weighted interlacing */
+    if (interlacing_leftweight) {
+        for (i = 0; i < numsamples; i++) {
+            int32_t difference, midright;
+            int16_t left;
+            int16_t right;
+
+            midright = buffer_a[i];
+            difference = buffer_b[i];
+
+
+            right = midright - ((difference * interlacing_leftweight) >> interlacing_shift);
+            left = (midright - ((difference * interlacing_leftweight) >> interlacing_shift))
+                 + difference;
+
+            buffer_out[i*numchannels] = left;
+            buffer_out[i*numchannels + 1] = right;
+        }
+
+        return;
+    }
+
+    /* otherwise basic interlacing took place */
+    for (i = 0; i < numsamples; i++) {
+        int16_t left, right;
+
+        left = buffer_a[i];
+        right = buffer_b[i];
+
+        buffer_out[i*numchannels] = left;
+        buffer_out[i*numchannels + 1] = right;
+    }
+}
+
+static int alac_decode_frame(AVCodecContext *avctx,
+                             void *outbuffer, int *outputsize,
+                             uint8_t *inbuffer, int input_buffer_size)
+{
+    ALACContext *alac = avctx->priv_data;
+
+    int channels;
+    int32_t outputsamples;
+
+    /* short-circuit null buffers */
+    if (!inbuffer || !input_buffer_size)
+        return input_buffer_size;
+
+    /* initialize from the extradata */
+    if (!alac->context_initialized) {
+        if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n",
+                ALAC_EXTRADATA_SIZE);
+            return input_buffer_size;
+        }
+        if (alac_set_info(alac)) {
+            av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
+            return input_buffer_size;
+        }
+        alac->context_initialized = 1;
+    }
+
+    outputsamples = alac->setinfo_max_samples_per_frame;
+
+    init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8);
+
+    channels = get_bits(&alac->gb, 3);
+
+    *outputsize = outputsamples * alac->bytespersample;
+
+    switch(channels) {
+    case 0: { /* 1 channel */
+        int hassize;
+        int isnotcompressed;
+        int readsamplesize;
+
+        int wasted_bytes;
+        int ricemodifier;
+
+
+        /* 2^result = something to do with output waiting.
+         * perhaps matters if we read > 1 frame in a pass?
+         */
+        get_bits(&alac->gb, 4);
+
+        get_bits(&alac->gb, 12); /* unknown, skip 12 bits */
+
+        hassize = get_bits(&alac->gb, 1); /* the output sample size is stored soon */
+
+        wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */
+
+        isnotcompressed = get_bits(&alac->gb, 1); /* whether the frame is compressed */
+
+        if (hassize) {
+            /* now read the number of samples,
+             * as a 32bit integer */
+            outputsamples = get_bits(&alac->gb, 32);
+            *outputsize = outputsamples * alac->bytespersample;
+        }
+
+        readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8);
+
+        if (!isnotcompressed) {
+         /* so it is compressed */
+            int16_t predictor_coef_table[32];
+            int predictor_coef_num;
+            int prediction_type;
+            int prediction_quantitization;
+            int i;
+
+            /* FIXME: skip 16 bits, not sure what they are. seem to be used in
+             * two channel case */
+            get_bits(&alac->gb, 8);
+            get_bits(&alac->gb, 8);
+
+            prediction_type = get_bits(&alac->gb, 4);
+            prediction_quantitization = get_bits(&alac->gb, 4);
+
+            ricemodifier = get_bits(&alac->gb, 3);
+            predictor_coef_num = get_bits(&alac->gb, 5);
+
+            /* read the predictor table */
+            for (i = 0; i < predictor_coef_num; i++) {
+                predictor_coef_table[i] = (int16_t)get_bits(&alac->gb, 16);
+            }
+
+            if (wasted_bytes) {
+                /* these bytes seem to have something to do with
+                 * > 2 channel files.
+                 */
+                av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n");
+            }
+
+            bastardized_rice_decompress(alac,
+                                        alac->predicterror_buffer_a,
+                                        outputsamples,
+                                        readsamplesize,
+                                        alac->setinfo_rice_initialhistory,
+                                        alac->setinfo_rice_kmodifier,
+                                        ricemodifier * alac->setinfo_rice_historymult / 4,
+                                        (1 << alac->setinfo_rice_kmodifier) - 1);
+
+            if (prediction_type == 0) {
+              /* adaptive fir */
+                predictor_decompress_fir_adapt(alac->predicterror_buffer_a,
+                                               alac->outputsamples_buffer_a,
+                                               outputsamples,
+                                               readsamplesize,
+                                               predictor_coef_table,
+                                               predictor_coef_num,
+                                               prediction_quantitization);
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type);
+                /* i think the only other prediction type (or perhaps this is just a
+                 * boolean?) runs adaptive fir twice.. like:
+                 * predictor_decompress_fir_adapt(predictor_error, tempout, ...)
+                 * predictor_decompress_fir_adapt(predictor_error, outputsamples ...)
+                 * little strange..
+                 */
+            }
+
+        } else {
+          /* not compressed, easy case */
+            if (readsamplesize <= 16) {
+                int i;
+                for (i = 0; i < outputsamples; i++) {
+                    int32_t audiobits = get_bits(&alac->gb, readsamplesize);
+
+                    audiobits = SIGN_EXTENDED32(audiobits, readsamplesize);
+
+                    alac->outputsamples_buffer_a[i] = audiobits;
+                }
+            } else {
+                int i;
+                for (i = 0; i < outputsamples; i++) {
+                    int32_t audiobits;
+
+                    audiobits = get_bits(&alac->gb, 16);
+                    /* special case of sign extension..
+                     * as we'll be ORing the low 16bits into this */
+                    audiobits = audiobits << 16;
+                    audiobits = audiobits >> (32 - readsamplesize);
+
+                    audiobits |= get_bits(&alac->gb, readsamplesize - 16);
+
+                    alac->outputsamples_buffer_a[i] = audiobits;
+                }
+            }
+            /* wasted_bytes = 0; // unused */
+        }
+
+        switch(alac->setinfo_sample_size) {
+        case 16: {
+            int i;
+            for (i = 0; i < outputsamples; i++) {
+                int16_t sample = alac->outputsamples_buffer_a[i];
+                ((int16_t*)outbuffer)[i * alac->numchannels] = sample;
+            }
+            break;
+        }
+        case 20:
+        case 24:
+        case 32:
+            av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size);
+            break;
+        default:
+            break;
+        }
+        break;
+    }
+    case 1: { /* 2 channels */
+        int hassize;
+        int isnotcompressed;
+        int readsamplesize;
+
+        int wasted_bytes;
+
+        uint8_t interlacing_shift;
+        uint8_t interlacing_leftweight;
+
+        /* 2^result = something to do with output waiting.
+         * perhaps matters if we read > 1 frame in a pass?
+         */
+        get_bits(&alac->gb, 4);
+
+        get_bits(&alac->gb, 12); /* unknown, skip 12 bits */
+
+        hassize = get_bits(&alac->gb, 1); /* the output sample size is stored soon */
+
+        wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */
+
+        isnotcompressed = get_bits(&alac->gb, 1); /* whether the frame is compressed */
+
+        if (hassize) {
+            /* now read the number of samples,
+             * as a 32bit integer */
+            outputsamples = get_bits(&alac->gb, 32);
+            *outputsize = outputsamples * alac->bytespersample;
+        }
+
+        readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8) + 1;
+
+        if (!isnotcompressed) {
+         /* compressed */
+            int16_t predictor_coef_table_a[32];
+            int predictor_coef_num_a;
+            int prediction_type_a;
+            int prediction_quantitization_a;
+            int ricemodifier_a;
+
+            int16_t predictor_coef_table_b[32];
+            int predictor_coef_num_b;
+            int prediction_type_b;
+            int prediction_quantitization_b;
+            int ricemodifier_b;
+
+            int i;
+
+            interlacing_shift = get_bits(&alac->gb, 8);
+            interlacing_leftweight = get_bits(&alac->gb, 8);
+
+            /******** channel 1 ***********/
+            prediction_type_a = get_bits(&alac->gb, 4);
+            prediction_quantitization_a = get_bits(&alac->gb, 4);
+
+            ricemodifier_a = get_bits(&alac->gb, 3);
+            predictor_coef_num_a = get_bits(&alac->gb, 5);
+
+            /* read the predictor table */
+            for (i = 0; i < predictor_coef_num_a; i++) {
+                predictor_coef_table_a[i] = (int16_t)get_bits(&alac->gb, 16);
+            }
+
+            /******** channel 2 *********/
+            prediction_type_b = get_bits(&alac->gb, 4);
+            prediction_quantitization_b = get_bits(&alac->gb, 4);
+
+            ricemodifier_b = get_bits(&alac->gb, 3);
+            predictor_coef_num_b = get_bits(&alac->gb, 5);
+
+            /* read the predictor table */
+            for (i = 0; i < predictor_coef_num_b; i++) {
+                predictor_coef_table_b[i] = (int16_t)get_bits(&alac->gb, 16);
+            }
+
+            /*********************/
+            if (wasted_bytes) {
+              /* see mono case */
+                av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n");
+            }
+
+            /* channel 1 */
+            bastardized_rice_decompress(alac,
+                                        alac->predicterror_buffer_a,
+                                        outputsamples,
+                                        readsamplesize,
+                                        alac->setinfo_rice_initialhistory,
+                                        alac->setinfo_rice_kmodifier,
+                                        ricemodifier_a * alac->setinfo_rice_historymult / 4,
+                                        (1 << alac->setinfo_rice_kmodifier) - 1);
+
+            if (prediction_type_a == 0) {
+              /* adaptive fir */
+                predictor_decompress_fir_adapt(alac->predicterror_buffer_a,
+                                               alac->outputsamples_buffer_a,
+                                               outputsamples,
+                                               readsamplesize,
+                                               predictor_coef_table_a,
+                                               predictor_coef_num_a,
+                                               prediction_quantitization_a);
+            } else {
+              /* see mono case */
+                av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_a);
+            }
+
+            /* channel 2 */
+            bastardized_rice_decompress(alac,
+                                        alac->predicterror_buffer_b,
+                                        outputsamples,
+                                        readsamplesize,
+                                        alac->setinfo_rice_initialhistory,
+                                        alac->setinfo_rice_kmodifier,
+                                        ricemodifier_b * alac->setinfo_rice_historymult / 4,
+                                        (1 << alac->setinfo_rice_kmodifier) - 1);
+
+            if (prediction_type_b == 0) {
+              /* adaptive fir */
+                predictor_decompress_fir_adapt(alac->predicterror_buffer_b,
+                                               alac->outputsamples_buffer_b,
+                                               outputsamples,
+                                               readsamplesize,
+                                               predictor_coef_table_b,
+                                               predictor_coef_num_b,
+                                               prediction_quantitization_b);
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_b);
+            }
+        } else {
+         /* not compressed, easy case */
+            if (alac->setinfo_sample_size <= 16) {
+                int i;
+                for (i = 0; i < outputsamples; i++) {
+                    int32_t audiobits_a, audiobits_b;
+
+                    audiobits_a = get_bits(&alac->gb, alac->setinfo_sample_size);
+                    audiobits_b = get_bits(&alac->gb, alac->setinfo_sample_size);
+
+                    audiobits_a = SIGN_EXTENDED32(audiobits_a, alac->setinfo_sample_size);
+                    audiobits_b = SIGN_EXTENDED32(audiobits_b, alac->setinfo_sample_size);
+
+                    alac->outputsamples_buffer_a[i] = audiobits_a;
+                    alac->outputsamples_buffer_b[i] = audiobits_b;
+                }
+            } else {
+                int i;
+                for (i = 0; i < outputsamples; i++) {
+                    int32_t audiobits_a, audiobits_b;
+
+                    audiobits_a = get_bits(&alac->gb, 16);
+                    audiobits_a = audiobits_a << 16;
+                    audiobits_a = audiobits_a >> (32 - alac->setinfo_sample_size);
+                    audiobits_a |= get_bits(&alac->gb, alac->setinfo_sample_size - 16);
+
+                    audiobits_b = get_bits(&alac->gb, 16);
+                    audiobits_b = audiobits_b << 16;
+                    audiobits_b = audiobits_b >> (32 - alac->setinfo_sample_size);
+                    audiobits_b |= get_bits(&alac->gb, alac->setinfo_sample_size - 16);
+
+                    alac->outputsamples_buffer_a[i] = audiobits_a;
+                    alac->outputsamples_buffer_b[i] = audiobits_b;
+                }
+            }
+            /* wasted_bytes = 0; */
+            interlacing_shift = 0;
+            interlacing_leftweight = 0;
+        }
+
+        switch(alac->setinfo_sample_size) {
+        case 16: {
+            deinterlace_16(alac->outputsamples_buffer_a,
+                           alac->outputsamples_buffer_b,
+                           (int16_t*)outbuffer,
+                           alac->numchannels,
+                           outputsamples,
+                           interlacing_shift,
+                           interlacing_leftweight);
+            break;
+        }
+        case 20:
+        case 24:
+        case 32:
+            av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size);
+            break;
+        default:
+            break;
+        }
+
+        break;
+    }
+    }
+
+    return input_buffer_size;
+}
+
+static int alac_decode_init(AVCodecContext * avctx)
+{
+    ALACContext *alac = avctx->priv_data;
+    alac->avctx = avctx;
+    alac->context_initialized = 0;
+
+    alac->samplesize = alac->avctx->bits_per_sample;
+    alac->numchannels = alac->avctx->channels;
+    alac->bytespersample = (alac->samplesize / 8) * alac->numchannels;
+
+    return 0;
+}
+
+static int alac_decode_close(AVCodecContext *avctx)
+{
+    ALACContext *alac = avctx->priv_data;
+
+    av_free(alac->predicterror_buffer_a);
+    av_free(alac->predicterror_buffer_b);
+
+    av_free(alac->outputsamples_buffer_a);
+    av_free(alac->outputsamples_buffer_b);
+
+    return 0;
+}
+
+AVCodec alac_decoder = {
+    "alac",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_ALAC,
+    sizeof(ALACContext),
+    alac_decode_init,
+    NULL,
+    alac_decode_close,
+    alac_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/allcodecs.c b/contrib/ffmpeg/libavcodec/allcodecs.c
new file mode 100644
index 000000000..9678f6bee
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/allcodecs.c
@@ -0,0 +1,289 @@
+/*
+ * Utils for libavcodec
+ * Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file allcodecs.c
+ * Utils for libavcodec.
+ */
+
+#include "avcodec.h"
+
+#define REGISTER_ENCODER(X,x) \
+          if(ENABLE_##X##_ENCODER)  register_avcodec(&x##_encoder)
+#define REGISTER_DECODER(X,x) \
+          if(ENABLE_##X##_DECODER)  register_avcodec(&x##_decoder)
+#define REGISTER_ENCDEC(X,x)  REGISTER_ENCODER(X,x); REGISTER_DECODER(X,x)
+
+#define REGISTER_PARSER(X,x) \
+          if(ENABLE_##X##_PARSER)  av_register_codec_parser(&x##_parser)
+
+/* If you do not call this function, then you can select exactly which
+   formats you want to support */
+
+/**
+ * simple call to register all the codecs.
+ */
+void avcodec_register_all(void)
+{
+    static int inited = 0;
+
+    if (inited != 0)
+        return;
+    inited = 1;
+
+    /* video codecs */
+    REGISTER_DECODER(AASC, aasc);
+    REGISTER_ENCDEC (ASV1, asv1);
+    REGISTER_ENCDEC (ASV2, asv2);
+    REGISTER_DECODER(AVS, avs);
+    REGISTER_DECODER(BMP, bmp);
+    REGISTER_DECODER(CAVS, cavs);
+    REGISTER_DECODER(CINEPAK, cinepak);
+    REGISTER_DECODER(CLJR, cljr);
+    REGISTER_DECODER(CSCD, cscd);
+    REGISTER_DECODER(CYUV, cyuv);
+    REGISTER_DECODER(DSICINVIDEO, dsicinvideo);
+    REGISTER_ENCDEC (DVVIDEO, dvvideo);
+    REGISTER_DECODER(EIGHTBPS, eightbps);
+    REGISTER_ENCDEC (FFV1, ffv1);
+    REGISTER_ENCDEC (FFVHUFF, ffvhuff);
+    REGISTER_DECODER(FLASHSV, flashsv);
+    REGISTER_DECODER(FLIC, flic);
+    REGISTER_ENCDEC (FLV, flv);
+    REGISTER_DECODER(FOURXM, fourxm);
+    REGISTER_DECODER(FRAPS, fraps);
+    REGISTER_ENCDEC (GIF, gif);
+    REGISTER_ENCDEC (H261, h261);
+    REGISTER_ENCDEC (H263, h263);
+    REGISTER_DECODER(H263I, h263i);
+    REGISTER_ENCODER(H263P, h263p);
+    REGISTER_DECODER(H264, h264);
+    REGISTER_ENCDEC (HUFFYUV, huffyuv);
+    REGISTER_DECODER(IDCIN, idcin);
+    REGISTER_DECODER(INDEO2, indeo2);
+    REGISTER_DECODER(INDEO3, indeo3);
+    REGISTER_DECODER(INTERPLAY_VIDEO, interplay_video);
+    REGISTER_ENCODER(JPEGLS, jpegls);
+    REGISTER_DECODER(KMVC, kmvc);
+    REGISTER_ENCODER(LJPEG, ljpeg);
+    REGISTER_DECODER(LOCO, loco);
+    REGISTER_DECODER(MDEC, mdec);
+    REGISTER_ENCDEC (MJPEG, mjpeg);
+    REGISTER_DECODER(MJPEGB, mjpegb);
+    REGISTER_DECODER(MMVIDEO, mmvideo);
+#ifdef HAVE_XVMC
+    REGISTER_DECODER(MPEG_XVMC, mpeg_xvmc);
+#endif
+    REGISTER_ENCDEC (MPEG1VIDEO, mpeg1video);
+    REGISTER_ENCDEC (MPEG2VIDEO, mpeg2video);
+    REGISTER_ENCDEC (MPEG4, mpeg4);
+    REGISTER_DECODER(MPEGVIDEO, mpegvideo);
+    REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1);
+    REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2);
+    REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3);
+    REGISTER_DECODER(MSRLE, msrle);
+    REGISTER_DECODER(MSVIDEO1, msvideo1);
+    REGISTER_DECODER(MSZH, mszh);
+    REGISTER_DECODER(NUV, nuv);
+    REGISTER_ENCODER(PAM, pam);
+    REGISTER_ENCODER(PBM, pbm);
+    REGISTER_ENCODER(PGM, pgm);
+    REGISTER_ENCODER(PGMYUV, pgmyuv);
+#ifdef CONFIG_ZLIB
+    REGISTER_ENCDEC (PNG, png);
+#endif
+    REGISTER_ENCODER(PPM, ppm);
+    REGISTER_DECODER(QDRAW, qdraw);
+    REGISTER_DECODER(QPEG, qpeg);
+    REGISTER_DECODER(QTRLE, qtrle);
+    REGISTER_ENCDEC (RAWVIDEO, rawvideo);
+    REGISTER_DECODER(ROQ, roq);
+    REGISTER_DECODER(RPZA, rpza);
+    REGISTER_ENCDEC (RV10, rv10);
+    REGISTER_ENCDEC (RV20, rv20);
+    REGISTER_DECODER(SMACKER, smacker);
+    REGISTER_DECODER(SMC, smc);
+    REGISTER_ENCDEC (SNOW, snow);
+    REGISTER_DECODER(SP5X, sp5x);
+    REGISTER_ENCDEC (SVQ1, svq1);
+    REGISTER_DECODER(SVQ3, svq3);
+    REGISTER_DECODER(TARGA, targa);
+    REGISTER_DECODER(THEORA, theora);
+    REGISTER_DECODER(TIERTEXSEQVIDEO, tiertexseqvideo);
+    REGISTER_DECODER(TIFF, tiff);
+    REGISTER_DECODER(TRUEMOTION1, truemotion1);
+    REGISTER_DECODER(TRUEMOTION2, truemotion2);
+    REGISTER_DECODER(TSCC, tscc);
+    REGISTER_DECODER(ULTI, ulti);
+    REGISTER_DECODER(VC1, vc1);
+    REGISTER_DECODER(VCR1, vcr1);
+    REGISTER_DECODER(VMDVIDEO, vmdvideo);
+    REGISTER_DECODER(VMNC, vmnc);
+    REGISTER_DECODER(VP3, vp3);
+    REGISTER_DECODER(VP5, vp5);
+    REGISTER_DECODER(VP6, vp6);
+    REGISTER_DECODER(VP6F, vp6f);
+    REGISTER_DECODER(VQA, vqa);
+    REGISTER_ENCDEC (WMV1, wmv1);
+    REGISTER_ENCDEC (WMV2, wmv2);
+    REGISTER_DECODER(WMV3, wmv3);
+    REGISTER_DECODER(WNV1, wnv1);
+#ifdef CONFIG_X264
+    REGISTER_ENCODER(X264, x264);
+#endif
+    REGISTER_DECODER(XAN_WC3, xan_wc3);
+    REGISTER_DECODER(XL, xl);
+#ifdef CONFIG_XVID
+    REGISTER_ENCODER(XVID, xvid);
+#endif
+    REGISTER_ENCDEC (ZLIB, zlib);
+    REGISTER_DECODER(ZMBV, zmbv);
+
+    /* audio codecs */
+#ifdef CONFIG_FAAD
+    REGISTER_DECODER(AAC, aac);
+    REGISTER_DECODER(MPEG4AAC, mpeg4aac);
+#endif
+#ifdef CONFIG_A52
+    REGISTER_DECODER(AC3, ac3);
+#endif
+    REGISTER_ENCODER(AC3, ac3);
+    REGISTER_DECODER(ALAC, alac);
+#if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
+    REGISTER_ENCDEC (AMR_NB, amr_nb);
+#endif
+#ifdef CONFIG_AMR_WB
+    REGISTER_ENCDEC (AMR_WB, amr_wb);
+#endif
+    REGISTER_DECODER(COOK, cook);
+    REGISTER_DECODER(DSICINAUDIO, dsicinaudio);
+#ifdef CONFIG_DTS
+    REGISTER_DECODER(DTS, dts);
+#endif
+#ifdef CONFIG_FAAC
+    REGISTER_ENCODER(FAAC, faac);
+#endif
+    REGISTER_ENCDEC (FLAC, flac);
+    REGISTER_DECODER(IMC, imc);
+#ifdef CONFIG_LIBGSM
+    REGISTER_ENCDEC (LIBGSM, libgsm);
+#endif
+    REGISTER_DECODER(MACE3, mace3);
+    REGISTER_DECODER(MACE6, mace6);
+    REGISTER_ENCDEC (MP2, mp2);
+    REGISTER_DECODER(MP3, mp3);
+    REGISTER_DECODER(MP3ADU, mp3adu);
+#ifdef CONFIG_MP3LAME
+    REGISTER_ENCODER(MP3LAME, mp3lame);
+#endif
+    REGISTER_DECODER(MP3ON4, mp3on4);
+#ifdef CONFIG_LIBVORBIS
+    if (!ENABLE_VORBIS_ENCODER)  REGISTER_ENCODER(OGGVORBIS, oggvorbis);
+    if (!ENABLE_VORBIS_DECODER)  REGISTER_DECODER(OGGVORBIS, oggvorbis);
+#endif
+    REGISTER_DECODER(QDM2, qdm2);
+    REGISTER_DECODER(RA_144, ra_144);
+    REGISTER_DECODER(RA_288, ra_288);
+    REGISTER_DECODER(SHORTEN, shorten);
+    REGISTER_DECODER(SMACKAUD, smackaud);
+    REGISTER_ENCDEC (SONIC, sonic);
+    REGISTER_ENCODER(SONIC_LS, sonic_ls);
+    REGISTER_DECODER(TRUESPEECH, truespeech);
+    REGISTER_DECODER(TTA, tta);
+    REGISTER_DECODER(VMDAUDIO, vmdaudio);
+    REGISTER_ENCDEC (VORBIS, vorbis);
+    REGISTER_DECODER(WAVPACK, wavpack);
+    REGISTER_DECODER(WMAV1, wmav1);
+    REGISTER_DECODER(WMAV2, wmav2);
+    REGISTER_DECODER(WS_SND1, ws_snd1);
+
+    /* pcm codecs */
+    REGISTER_ENCDEC (PCM_ALAW, pcm_alaw);
+    REGISTER_ENCDEC (PCM_MULAW, pcm_mulaw);
+    REGISTER_ENCDEC (PCM_S8, pcm_s8);
+    REGISTER_ENCDEC (PCM_S16BE, pcm_s16be);
+    REGISTER_ENCDEC (PCM_S16LE, pcm_s16le);
+    REGISTER_ENCDEC (PCM_S24BE, pcm_s24be);
+    REGISTER_ENCDEC (PCM_S24DAUD, pcm_s24daud);
+    REGISTER_ENCDEC (PCM_S24LE, pcm_s24le);
+    REGISTER_ENCDEC (PCM_S32BE, pcm_s32be);
+    REGISTER_ENCDEC (PCM_S32LE, pcm_s32le);
+    REGISTER_ENCDEC (PCM_U8, pcm_u8);
+    REGISTER_ENCDEC (PCM_U16BE, pcm_u16be);
+    REGISTER_ENCDEC (PCM_U16LE, pcm_u16le);
+    REGISTER_ENCDEC (PCM_U24BE, pcm_u24be);
+    REGISTER_ENCDEC (PCM_U24LE, pcm_u24le);
+    REGISTER_ENCDEC (PCM_U32BE, pcm_u32be);
+    REGISTER_ENCDEC (PCM_U32LE, pcm_u32le);
+
+    /* dpcm codecs */
+    REGISTER_DECODER(INTERPLAY_DPCM, interplay_dpcm);
+    REGISTER_DECODER(ROQ_DPCM, roq_dpcm);
+    REGISTER_DECODER(SOL_DPCM, sol_dpcm);
+    REGISTER_DECODER(XAN_DPCM, xan_dpcm);
+
+    /* adpcm codecs */
+    REGISTER_ENCDEC (ADPCM_4XM, adpcm_4xm);
+    REGISTER_ENCDEC (ADPCM_ADX, adpcm_adx);
+    REGISTER_ENCDEC (ADPCM_CT, adpcm_ct);
+    REGISTER_ENCDEC (ADPCM_EA, adpcm_ea);
+    REGISTER_ENCDEC (ADPCM_G726, adpcm_g726);
+    REGISTER_ENCDEC (ADPCM_IMA_DK3, adpcm_ima_dk3);
+    REGISTER_ENCDEC (ADPCM_IMA_DK4, adpcm_ima_dk4);
+    REGISTER_ENCDEC (ADPCM_IMA_QT, adpcm_ima_qt);
+    REGISTER_ENCDEC (ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg);
+    REGISTER_ENCDEC (ADPCM_IMA_WAV, adpcm_ima_wav);
+    REGISTER_ENCDEC (ADPCM_IMA_WS, adpcm_ima_ws);
+    REGISTER_ENCDEC (ADPCM_MS, adpcm_ms);
+    REGISTER_ENCDEC (ADPCM_SBPRO_2, adpcm_sbpro_2);
+    REGISTER_ENCDEC (ADPCM_SBPRO_3, adpcm_sbpro_3);
+    REGISTER_ENCDEC (ADPCM_SBPRO_4, adpcm_sbpro_4);
+    REGISTER_ENCDEC (ADPCM_SWF, adpcm_swf);
+    REGISTER_ENCDEC (ADPCM_XA, adpcm_xa);
+    REGISTER_ENCDEC (ADPCM_YAMAHA, adpcm_yamaha);
+
+    /* subtitles */
+    REGISTER_ENCDEC (DVBSUB, dvbsub);
+    REGISTER_ENCDEC (DVDSUB, dvdsub);
+
+    /* parsers */
+    REGISTER_PARSER (AAC, aac);
+    REGISTER_PARSER (AC3, ac3);
+    REGISTER_PARSER (CAVSVIDEO, cavsvideo);
+    REGISTER_PARSER (DVBSUB, dvbsub);
+    REGISTER_PARSER (DVDSUB, dvdsub);
+    REGISTER_PARSER (H261, h261);
+    REGISTER_PARSER (H263, h263);
+    REGISTER_PARSER (H264, h264);
+    REGISTER_PARSER (MJPEG, mjpeg);
+    REGISTER_PARSER (MPEG4VIDEO, mpeg4video);
+    REGISTER_PARSER (MPEGAUDIO, mpegaudio);
+    REGISTER_PARSER (MPEGVIDEO, mpegvideo);
+    REGISTER_PARSER (PNM, pnm);
+
+    av_register_bitstream_filter(&dump_extradata_bsf);
+    av_register_bitstream_filter(&remove_extradata_bsf);
+    av_register_bitstream_filter(&noise_bsf);
+    av_register_bitstream_filter(&mp3_header_compress_bsf);
+    av_register_bitstream_filter(&mp3_header_decompress_bsf);
+    av_register_bitstream_filter(&mjpega_dump_header_bsf);
+}
+
diff --git a/contrib/ffmpeg/libavcodec/alpha/asm.h b/contrib/ffmpeg/libavcodec/alpha/asm.h
new file mode 100644
index 000000000..c0ddde528
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/asm.h
@@ -0,0 +1,191 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef LIBAVCODEC_ALPHA_ASM_H
+#define LIBAVCODEC_ALPHA_ASM_H
+
+#include <inttypes.h>
+
+#if defined __GNUC__
+# define GNUC_PREREQ(maj, min) \
+        ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define GNUC_PREREQ(maj, min) 0
+#endif
+
+#if GNUC_PREREQ(2,96)
+# define likely(x)      __builtin_expect((x) != 0, 1)
+# define unlikely(x)    __builtin_expect((x) != 0, 0)
+#else
+# define likely(x)      (x)
+# define unlikely(x)    (x)
+#endif
+
+#define AMASK_BWX (1 << 0)
+#define AMASK_FIX (1 << 1)
+#define AMASK_CIX (1 << 2)
+#define AMASK_MVI (1 << 8)
+
+static inline uint64_t BYTE_VEC(uint64_t x)
+{
+    x |= x <<  8;
+    x |= x << 16;
+    x |= x << 32;
+    return x;
+}
+static inline uint64_t WORD_VEC(uint64_t x)
+{
+    x |= x << 16;
+    x |= x << 32;
+    return x;
+}
+
+#define sextw(x) ((int16_t) (x))
+
+#ifdef __GNUC__
+#define ldq(p)                                                  \
+    (((union {                                                  \
+        uint64_t __l;                                           \
+        __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];  \
+    } *) (p))->__l)
+#define ldl(p)                                                  \
+    (((union {                                                  \
+        int32_t __l;                                            \
+        __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];   \
+    } *) (p))->__l)
+#define stq(l, p)                                                       \
+    do {                                                                \
+        (((union {                                                      \
+            uint64_t __l;                                               \
+            __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];      \
+        } *) (p))->__l) = l;                                            \
+    } while (0)
+#define stl(l, p)                                                       \
+    do {                                                                \
+        (((union {                                                      \
+            int32_t __l;                                                \
+            __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];       \
+        } *) (p))->__l) = l;                                            \
+    } while (0)
+struct unaligned_long { uint64_t l; } __attribute__((packed));
+#define ldq_u(p)        (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
+#define uldq(a)         (((const struct unaligned_long *) (a))->l)
+
+#if GNUC_PREREQ(3,3)
+#define prefetch(p)     __builtin_prefetch((p), 0, 1)
+#define prefetch_en(p)  __builtin_prefetch((p), 0, 0)
+#define prefetch_m(p)   __builtin_prefetch((p), 1, 1)
+#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
+#define cmpbge          __builtin_alpha_cmpbge
+/* Avoid warnings.  */
+#define extql(a, b)     __builtin_alpha_extql(a, (uint64_t) (b))
+#define extwl(a, b)     __builtin_alpha_extwl(a, (uint64_t) (b))
+#define extqh(a, b)     __builtin_alpha_extqh(a, (uint64_t) (b))
+#define zap             __builtin_alpha_zap
+#define zapnot          __builtin_alpha_zapnot
+#define amask           __builtin_alpha_amask
+#define implver         __builtin_alpha_implver
+#define rpcc            __builtin_alpha_rpcc
+#else
+#define prefetch(p)     asm volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_en(p)  asm volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_m(p)   asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extql(a, b)  ({ uint64_t __r; asm ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extwl(a, b)  ({ uint64_t __r; asm ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extqh(a, b)  ({ uint64_t __r; asm ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; })
+#define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));                       __r; })
+#define rpcc()       ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));                       __r; })
+#endif
+#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
+
+#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
+#define minub8  __builtin_alpha_minub8
+#define minsb8  __builtin_alpha_minsb8
+#define minuw4  __builtin_alpha_minuw4
+#define minsw4  __builtin_alpha_minsw4
+#define maxub8  __builtin_alpha_maxub8
+#define maxsb8  __builtin_alpha_maxsb8
+#define maxuw4  __builtin_alpha_maxuw4
+#define maxsw4  __builtin_alpha_maxsw4
+#define perr    __builtin_alpha_perr
+#define pklb    __builtin_alpha_pklb
+#define pkwb    __builtin_alpha_pkwb
+#define unpkbl  __builtin_alpha_unpkbl
+#define unpkbw  __builtin_alpha_unpkbw
+#else
+#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define perr(a, b)   ({ uint64_t __r; asm (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
+#define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#endif
+
+#elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */
+
+#include <c_asm.h>
+#define ldq(p) (*(const uint64_t *) (p))
+#define ldl(p) (*(const int32_t *)  (p))
+#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
+#define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0)
+#define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
+#define uldq(a)      (*(const __unaligned uint64_t *) (a))
+#define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
+#define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
+#define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
+#define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b)
+#define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b)
+#define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
+#define amask(a)     asm ("amask   %a0,%v0", a)
+#define implver()    asm ("implver %v0")
+#define rpcc()       asm ("rpcc           %v0")
+#define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
+#define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
+#define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
+#define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b)
+#define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b)
+#define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b)
+#define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b)
+#define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b)
+#define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b)
+#define pklb(a)      asm ("pklb    %a0,%v0", a)
+#define pkwb(a)      asm ("pkwb    %a0,%v0", a)
+#define unpkbl(a)    asm ("unpkbl  %a0,%v0", a)
+#define unpkbw(a)    asm ("unpkbw  %a0,%v0", a)
+#define wh64(a)      asm ("wh64    %a0", a)
+
+#else
+#error "Unknown compiler!"
+#endif
+
+#endif /* LIBAVCODEC_ALPHA_ASM_H */
diff --git a/contrib/ffmpeg/libavcodec/alpha/dsputil_alpha.c b/contrib/ffmpeg/libavcodec/alpha/dsputil_alpha.c
new file mode 100644
index 000000000..c98d6f7ff
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/dsputil_alpha.c
@@ -0,0 +1,362 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.h"
+#include "../dsputil.h"
+
+extern void simple_idct_axp(DCTELEM *block);
+extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
+extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
+
+void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+                        int line_size, int h);
+void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
+                                int line_size);
+void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
+                                int line_size);
+void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
+                                 int line_size);
+void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
+                                 int line_size);
+
+void get_pixels_mvi(DCTELEM *restrict block,
+                    const uint8_t *restrict pixels, int line_size);
+void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
+                     int stride);
+int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
+int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+
+#if 0
+/* These functions were the base for the optimized assembler routines,
+   and remain here for documentation purposes.  */
+static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
+                                   int line_size)
+{
+    int i = 8;
+    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
+
+    do {
+        uint64_t shorts0, shorts1;
+
+        shorts0 = ldq(block);
+        shorts0 = maxsw4(shorts0, 0);
+        shorts0 = minsw4(shorts0, clampmask);
+        stl(pkwb(shorts0), pixels);
+
+        shorts1 = ldq(block + 4);
+        shorts1 = maxsw4(shorts1, 0);
+        shorts1 = minsw4(shorts1, clampmask);
+        stl(pkwb(shorts1), pixels + 4);
+
+        pixels += line_size;
+        block += 8;
+    } while (--i);
+}
+
+void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
+                            int line_size)
+{
+    int h = 8;
+    /* Keep this function a leaf function by generating the constants
+       manually (mainly for the hack value ;-).  */
+    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
+    uint64_t signmask  = zap(-1, 0x33);
+    signmask ^= signmask >> 1;  /* 0x8000800080008000 */
+
+    do {
+        uint64_t shorts0, pix0, signs0;
+        uint64_t shorts1, pix1, signs1;
+
+        shorts0 = ldq(block);
+        shorts1 = ldq(block + 4);
+
+        pix0    = unpkbw(ldl(pixels));
+        /* Signed subword add (MMX paddw).  */
+        signs0  = shorts0 & signmask;
+        shorts0 &= ~signmask;
+        shorts0 += pix0;
+        shorts0 ^= signs0;
+        /* Clamp. */
+        shorts0 = maxsw4(shorts0, 0);
+        shorts0 = minsw4(shorts0, clampmask);
+
+        /* Next 4.  */
+        pix1    = unpkbw(ldl(pixels + 4));
+        signs1  = shorts1 & signmask;
+        shorts1 &= ~signmask;
+        shorts1 += pix1;
+        shorts1 ^= signs1;
+        shorts1 = maxsw4(shorts1, 0);
+        shorts1 = minsw4(shorts1, clampmask);
+
+        stl(pkwb(shorts0), pixels);
+        stl(pkwb(shorts1), pixels + 4);
+
+        pixels += line_size;
+        block += 8;
+    } while (--h);
+}
+#endif
+
+static void clear_blocks_axp(DCTELEM *blocks) {
+    uint64_t *p = (uint64_t *) blocks;
+    int n = sizeof(DCTELEM) * 6 * 64;
+
+    do {
+        p[0] = 0;
+        p[1] = 0;
+        p[2] = 0;
+        p[3] = 0;
+        p[4] = 0;
+        p[5] = 0;
+        p[6] = 0;
+        p[7] = 0;
+        p += 8;
+        n -= 8 * 8;
+    } while (n);
+}
+
+static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
+{
+    return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+static inline uint64_t avg2(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+#if 0
+/* The XY2 routines basically utilize this scheme, but reuse parts in
+   each iteration.  */
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
+{
+    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
+                    + (l2 & BYTE_VEC(0x03))
+                    + (l3 & BYTE_VEC(0x03))
+                    + (l4 & BYTE_VEC(0x03))
+                    + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+    return r1 + r2;
+}
+#endif
+
+#define OP(LOAD, STORE)                         \
+    do {                                        \
+        STORE(LOAD(pixels), block);             \
+        pixels += line_size;                    \
+        block += line_size;                     \
+    } while (--h)
+
+#define OP_X2(LOAD, STORE)                                      \
+    do {                                                        \
+        uint64_t pix1, pix2;                                    \
+                                                                \
+        pix1 = LOAD(pixels);                                    \
+        pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);        \
+        STORE(AVG2(pix1, pix2), block);                         \
+        pixels += line_size;                                    \
+        block += line_size;                                     \
+    } while (--h)
+
+#define OP_Y2(LOAD, STORE)                      \
+    do {                                        \
+        uint64_t pix = LOAD(pixels);            \
+        do {                                    \
+            uint64_t next_pix;                  \
+                                                \
+            pixels += line_size;                \
+            next_pix = LOAD(pixels);            \
+            STORE(AVG2(pix, next_pix), block);  \
+            block += line_size;                 \
+            pix = next_pix;                     \
+        } while (--h);                          \
+    } while (0)
+
+#define OP_XY2(LOAD, STORE)                                                 \
+    do {                                                                    \
+        uint64_t pix1 = LOAD(pixels);                                       \
+        uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);           \
+        uint64_t pix_l = (pix1 & BYTE_VEC(0x03))                            \
+                       + (pix2 & BYTE_VEC(0x03));                           \
+        uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2)                    \
+                       + ((pix2 & ~BYTE_VEC(0x03)) >> 2);                   \
+                                                                            \
+        do {                                                                \
+            uint64_t npix1, npix2;                                          \
+            uint64_t npix_l, npix_h;                                        \
+            uint64_t avg;                                                   \
+                                                                            \
+            pixels += line_size;                                            \
+            npix1 = LOAD(pixels);                                           \
+            npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56);              \
+            npix_l = (npix1 & BYTE_VEC(0x03))                               \
+                   + (npix2 & BYTE_VEC(0x03));                              \
+            npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2)                       \
+                   + ((npix2 & ~BYTE_VEC(0x03)) >> 2);                      \
+            avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
+                + pix_h + npix_h;                                           \
+            STORE(avg, block);                                              \
+                                                                            \
+            block += line_size;                                             \
+            pix_l = npix_l;                                                 \
+            pix_h = npix_h;                                                 \
+        } while (--h);                                                      \
+    } while (0)
+
+#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE)                                \
+static void OPNAME ## _pixels ## SUFF ## _axp                               \
+        (uint8_t *restrict block, const uint8_t *restrict pixels,           \
+         int line_size, int h)                                              \
+{                                                                           \
+    if ((size_t) pixels & 0x7) {                                            \
+        OPKIND(uldq, STORE);                                                \
+    } else {                                                                \
+        OPKIND(ldq, STORE);                                                 \
+    }                                                                       \
+}                                                                           \
+                                                                            \
+static void OPNAME ## _pixels16 ## SUFF ## _axp                             \
+        (uint8_t *restrict block, const uint8_t *restrict pixels,           \
+         int line_size, int h)                                              \
+{                                                                           \
+    OPNAME ## _pixels ## SUFF ## _axp(block,     pixels,     line_size, h); \
+    OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
+}
+
+#define PIXOP(OPNAME, STORE)                    \
+    MAKE_OP(OPNAME, ,     OP,     STORE)        \
+    MAKE_OP(OPNAME, _x2,  OP_X2,  STORE)        \
+    MAKE_OP(OPNAME, _y2,  OP_Y2,  STORE)        \
+    MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
+
+/* Rounding primitives.  */
+#define AVG2 avg2
+#define AVG4 avg4
+#define AVG4_ROUNDER BYTE_VEC(0x02)
+#define STORE(l, b) stq(l, b)
+PIXOP(put, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg, STORE);
+
+/* Not rounding primitives.  */
+#undef AVG2
+#undef AVG4
+#undef AVG4_ROUNDER
+#undef STORE
+#define AVG2 avg2_no_rnd
+#define AVG4 avg4_no_rnd
+#define AVG4_ROUNDER BYTE_VEC(0x01)
+#define STORE(l, b) stq(l, b)
+PIXOP(put_no_rnd, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg_no_rnd, STORE);
+
+void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
+                          int line_size, int h)
+{
+    put_pixels_axp_asm(block,     pixels,     line_size, h);
+    put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
+}
+
+static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
+{
+    return pix_abs16x16_mvi_asm(a, b, stride);
+}
+
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
+{
+    c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
+    c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
+    c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
+
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
+
+    c->avg_pixels_tab[0][0] = avg_pixels16_axp;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
+
+    c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp;
+    c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp;
+    c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp;
+    c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp;
+
+    c->put_pixels_tab[1][0] = put_pixels_axp_asm;
+    c->put_pixels_tab[1][1] = put_pixels_x2_axp;
+    c->put_pixels_tab[1][2] = put_pixels_y2_axp;
+    c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
+
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
+
+    c->avg_pixels_tab[1][0] = avg_pixels_axp;
+    c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
+    c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
+    c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
+
+    c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp;
+    c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp;
+    c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp;
+    c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
+
+    c->clear_blocks = clear_blocks_axp;
+
+    /* amask clears all bits that correspond to present features.  */
+    if (amask(AMASK_MVI) == 0) {
+        c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
+        c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
+
+        c->get_pixels       = get_pixels_mvi;
+        c->diff_pixels      = diff_pixels_mvi;
+        c->sad[0]           = sad16x16_mvi;
+        c->sad[1]           = pix_abs8x8_mvi;
+//        c->pix_abs[0][0]    = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed
+        c->pix_abs[0][0]    = sad16x16_mvi;
+        c->pix_abs[1][0]    = pix_abs8x8_mvi;
+        c->pix_abs[0][1]    = pix_abs16x16_x2_mvi;
+        c->pix_abs[0][2]    = pix_abs16x16_y2_mvi;
+        c->pix_abs[0][3]    = pix_abs16x16_xy2_mvi;
+    }
+
+    put_pixels_clamped_axp_p = c->put_pixels_clamped;
+    add_pixels_clamped_axp_p = c->add_pixels_clamped;
+
+    c->idct_put = simple_idct_put_axp;
+    c->idct_add = simple_idct_add_axp;
+    c->idct = simple_idct_axp;
+}
diff --git a/contrib/ffmpeg/libavcodec/alpha/dsputil_alpha_asm.S b/contrib/ffmpeg/libavcodec/alpha/dsputil_alpha_asm.S
new file mode 100644
index 000000000..367f2d743
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/dsputil_alpha_asm.S
@@ -0,0 +1,285 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * These functions are scheduled for pca56. They should work
+ * reasonably on ev6, though.
+ */
+
+#include "regdef.h"
+
+/* Some nicer register names.  */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+        .set noat
+        .set noreorder
+        .arch pca56
+        .text
+
+/************************************************************************
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ *                         int line_size, int h)
+ */
+        .align 6
+        .globl put_pixels_axp_asm
+        .ent put_pixels_axp_asm
+put_pixels_axp_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
+        and     a1, 7, t0
+        beq     t0, $aligned
+
+        .align 4
+$unaligned:
+        ldq_u   t0, 0(a1)
+        ldq_u   t1, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t2, 0(a1)
+        ldq_u   t3, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t4, 0(a1)
+        ldq_u   t5, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t6, 0(a1)
+        ldq_u   t7, 8(a1)
+        extql   t0, a1, t0
+        addq    a1, a2, a1
+
+        extqh   t1, a1, t1
+        addq    a0, a2, t8
+        extql   t2, a1, t2
+        addq    t8, a2, t9
+
+        extqh   t3, a1, t3
+        addq    t9, a2, ta
+        extql   t4, a1, t4
+        or      t0, t1, t0
+
+        extqh   t5, a1, t5
+        or      t2, t3, t2
+        extql   t6, a1, t6
+        or      t4, t5, t4
+
+        extqh   t7, a1, t7
+        or      t6, t7, t6
+        stq     t0, 0(a0)
+        stq     t2, 0(t8)
+
+        stq     t4, 0(t9)
+        subq    a3, 4, a3
+        stq     t6, 0(ta)
+        addq    ta, a2, a0
+
+        bne     a3, $unaligned
+        ret
+
+        .align 4
+$aligned:
+        ldq     t0, 0(a1)
+        addq    a1, a2, a1
+        ldq     t1, 0(a1)
+        addq    a1, a2, a1
+
+        ldq     t2, 0(a1)
+        addq    a1, a2, a1
+        ldq     t3, 0(a1)
+
+        addq    a0, a2, t4
+        addq    a1, a2, a1
+        addq    t4, a2, t5
+        subq    a3, 4, a3
+
+        stq     t0, 0(a0)
+        addq    t5, a2, t6
+        stq     t1, 0(t4)
+        addq    t6, a2, a0
+
+        stq     t2, 0(t5)
+        stq     t3, 0(t6)
+
+        bne     a3, $aligned
+        ret
+        .end put_pixels_axp_asm
+
+/************************************************************************
+ * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
+ *                                 int line_size)
+ */
+        .align 6
+        .globl put_pixels_clamped_mvi_asm
+        .ent put_pixels_clamped_mvi_asm
+put_pixels_clamped_mvi_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
+        lda     t8, -1
+        lda     t9, 8           # loop counter
+        zap     t8, 0xaa, t8    # 00ff00ff00ff00ff
+
+        .align 4
+1:      ldq     t0,  0(a0)
+        ldq     t1,  8(a0)
+        ldq     t2, 16(a0)
+        ldq     t3, 24(a0)
+
+        maxsw4  t0, zero, t0
+        subq    t9, 2, t9
+        maxsw4  t1, zero, t1
+        lda     a0, 32(a0)
+
+        maxsw4  t2, zero, t2
+        addq    a1, a2, ta
+        maxsw4  t3, zero, t3
+        minsw4  t0, t8, t0
+
+        minsw4  t1, t8, t1
+        minsw4  t2, t8, t2
+        minsw4  t3, t8, t3
+        pkwb    t0, t0
+
+        pkwb    t1, t1
+        pkwb    t2, t2
+        pkwb    t3, t3
+        stl     t0, 0(a1)
+
+        stl     t1, 4(a1)
+        addq    ta, a2, a1
+        stl     t2, 0(ta)
+        stl     t3, 4(ta)
+
+        bne     t9, 1b
+        ret
+        .end put_pixels_clamped_mvi_asm
+
+/************************************************************************
+ * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
+ *                                 int line_size)
+ */
+        .align 6
+        .globl add_pixels_clamped_mvi_asm
+        .ent add_pixels_clamped_mvi_asm
+add_pixels_clamped_mvi_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
+        lda     t1, -1
+        lda     th, 8
+        zap     t1, 0x33, tg
+        nop
+
+        srl     tg, 1, t0
+        xor     tg, t0, tg      # 0x8000800080008000
+        zap     t1, 0xaa, tf    # 0x00ff00ff00ff00ff
+
+        .align 4
+1:      ldl     t1, 0(a1)       # pix0 (try to hit cache line soon)
+        ldl     t4, 4(a1)       # pix1
+        addq    a1, a2, te      # pixels += line_size
+        ldq     t0, 0(a0)       # shorts0
+
+        ldl     t7, 0(te)       # pix2 (try to hit cache line soon)
+        ldl     ta, 4(te)       # pix3
+        ldq     t3, 8(a0)       # shorts1
+        ldq     t6, 16(a0)      # shorts2
+
+        ldq     t9, 24(a0)      # shorts3
+        unpkbw  t1, t1          # 0 0 (quarter/op no.)
+        and     t0, tg, t2      # 0 1
+        unpkbw  t4, t4          # 1 0
+
+        bic     t0, tg, t0      # 0 2
+        unpkbw  t7, t7          # 2 0
+        and     t3, tg, t5      # 1 1
+        addq    t0, t1, t0      # 0 3
+
+        xor     t0, t2, t0      # 0 4
+        unpkbw  ta, ta          # 3 0
+        and     t6, tg, t8      # 2 1
+        maxsw4  t0, zero, t0    # 0 5
+
+        bic     t3, tg, t3      # 1 2
+        bic     t6, tg, t6      # 2 2
+        minsw4  t0, tf, t0      # 0 6
+        addq    t3, t4, t3      # 1 3
+
+        pkwb    t0, t0          # 0 7
+        xor     t3, t5, t3      # 1 4
+        maxsw4  t3, zero, t3    # 1 5
+        addq    t6, t7, t6      # 2 3
+
+        xor     t6, t8, t6      # 2 4
+        and     t9, tg, tb      # 3 1
+        minsw4  t3, tf, t3      # 1 6
+        bic     t9, tg, t9      # 3 2
+
+        maxsw4  t6, zero, t6    # 2 5
+        addq    t9, ta, t9      # 3 3
+        stl     t0, 0(a1)       # 0 8
+        minsw4  t6, tf, t6      # 2 6
+
+        xor     t9, tb, t9      # 3 4
+        maxsw4  t9, zero, t9    # 3 5
+        lda     a0, 32(a0)      # block += 16;
+        pkwb    t3, t3          # 1 7
+
+        minsw4  t9, tf, t9      # 3 6
+        subq    th, 2, th
+        pkwb    t6, t6          # 2 7
+        pkwb    t9, t9          # 3 7
+
+        stl     t3, 4(a1)       # 1 8
+        addq    te, a2, a1      # pixels += line_size
+        stl     t6, 0(te)       # 2 8
+        stl     t9, 4(te)       # 3 8
+
+        bne     th, 1b
+        ret
+        .end add_pixels_clamped_mvi_asm
diff --git a/contrib/ffmpeg/libavcodec/alpha/motion_est_alpha.c b/contrib/ffmpeg/libavcodec/alpha/motion_est_alpha.c
new file mode 100644
index 000000000..337ffb38e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/motion_est_alpha.c
@@ -0,0 +1,345 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.h"
+#include "../dsputil.h"
+
+void get_pixels_mvi(DCTELEM *restrict block,
+                    const uint8_t *restrict pixels, int line_size)
+{
+    int h = 8;
+
+    do {
+        uint64_t p;
+
+        p = ldq(pixels);
+        stq(unpkbw(p),       block);
+        stq(unpkbw(p >> 32), block + 4);
+
+        pixels += line_size;
+        block += 8;
+    } while (--h);
+}
+
+void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
+                     int stride) {
+    int h = 8;
+    uint64_t mask = 0x4040;
+
+    mask |= mask << 16;
+    mask |= mask << 32;
+    do {
+        uint64_t x, y, c, d, a;
+        uint64_t signs;
+
+        x = ldq(s1);
+        y = ldq(s2);
+        c = cmpbge(x, y);
+        d = x - y;
+        a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
+        d += 4 * a;             /* ...so we can use s4addq here.      */
+        signs = zap(-1, c);
+
+        stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
+        stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
+
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    } while (--h);
+}
+
+static inline uint64_t avg2(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
+{
+    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
+                    + (l2 & BYTE_VEC(0x03))
+                    + (l3 & BYTE_VEC(0x03))
+                    + (l4 & BYTE_VEC(0x03))
+                    + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+    return r1 + r2;
+}
+
+int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+
+    if ((size_t) pix2 & 0x7) {
+        /* works only when pix2 is actually unaligned */
+        do {                    /* do 8 pixel a time */
+            uint64_t p1, p2;
+
+            p1  = ldq(pix1);
+            p2  = uldq(pix2);
+            result += perr(p1, p2);
+
+            pix1 += line_size;
+            pix2 += line_size;
+        } while (--h);
+    } else {
+        do {
+            uint64_t p1, p2;
+
+            p1 = ldq(pix1);
+            p2 = ldq(pix2);
+            result += perr(p1, p2);
+
+            pix1 += line_size;
+            pix2 += line_size;
+        } while (--h);
+    }
+
+    return result;
+}
+
+#if 0                           /* now done in assembly */
+int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
+{
+    int result = 0;
+    int h = 16;
+
+    if ((size_t) pix2 & 0x7) {
+        /* works only when pix2 is actually unaligned */
+        do {                    /* do 16 pixel a time */
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t t;
+
+            p1_l  = ldq(pix1);
+            p1_r  = ldq(pix1 + 8);
+            t     = ldq_u(pix2 + 8);
+            p2_l  = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
+            p2_r  = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+    } else {
+        do {
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            p2_l = ldq(pix2);
+            p2_r = ldq(pix2 + 8);
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+    }
+
+    return result;
+}
+#endif
+
+int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+    uint64_t disalign = (size_t) pix2 & 0x7;
+
+    switch (disalign) {
+    case 0:
+        do {
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t l, r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            l    = ldq(pix2);
+            r    = ldq(pix2 + 8);
+            p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
+            p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+        break;
+    case 7:
+        /* |.......l|lllllllr|rrrrrrr*|
+           This case is special because disalign1 would be 8, which
+           gets treated as 0 by extqh.  At least it is a bit faster
+           that way :)  */
+        do {
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t l, m, r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            l     = ldq_u(pix2);
+            m     = ldq_u(pix2 + 8);
+            r     = ldq_u(pix2 + 16);
+            p2_l  = avg2(extql(l, disalign) | extqh(m, disalign), m);
+            p2_r  = avg2(extql(m, disalign) | extqh(r, disalign), r);
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+        break;
+    default:
+        do {
+            uint64_t disalign1 = disalign + 1;
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t l, m, r;
+
+            p1_l  = ldq(pix1);
+            p1_r  = ldq(pix1 + 8);
+            l     = ldq_u(pix2);
+            m     = ldq_u(pix2 + 8);
+            r     = ldq_u(pix2 + 16);
+            p2_l  = avg2(extql(l, disalign) | extqh(m, disalign),
+                         extql(l, disalign1) | extqh(m, disalign1));
+            p2_r  = avg2(extql(m, disalign) | extqh(r, disalign),
+                         extql(m, disalign1) | extqh(r, disalign1));
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+        break;
+    }
+    return result;
+}
+
+int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+
+    if ((size_t) pix2 & 0x7) {
+        uint64_t t, p2_l, p2_r;
+        t     = ldq_u(pix2 + 8);
+        p2_l  = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
+        p2_r  = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
+
+        do {
+            uint64_t p1_l, p1_r, np2_l, np2_r;
+            uint64_t t;
+
+            p1_l  = ldq(pix1);
+            p1_r  = ldq(pix1 + 8);
+            pix2 += line_size;
+            t     = ldq_u(pix2 + 8);
+            np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
+            np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
+
+            result += perr(p1_l, avg2(p2_l, np2_l))
+                    + perr(p1_r, avg2(p2_r, np2_r));
+
+            pix1 += line_size;
+            p2_l  = np2_l;
+            p2_r  = np2_r;
+
+        } while (--h);
+    } else {
+        uint64_t p2_l, p2_r;
+        p2_l = ldq(pix2);
+        p2_r = ldq(pix2 + 8);
+        do {
+            uint64_t p1_l, p1_r, np2_l, np2_r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            pix2 += line_size;
+            np2_l = ldq(pix2);
+            np2_r = ldq(pix2 + 8);
+
+            result += perr(p1_l, avg2(p2_l, np2_l))
+                    + perr(p1_r, avg2(p2_r, np2_r));
+
+            pix1 += line_size;
+            p2_l  = np2_l;
+            p2_r  = np2_r;
+        } while (--h);
+    }
+    return result;
+}
+
+int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+
+    uint64_t p1_l, p1_r;
+    uint64_t p2_l, p2_r, p2_x;
+
+    p1_l = ldq(pix1);
+    p1_r = ldq(pix1 + 8);
+
+    if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
+        p2_l = uldq(pix2);
+        p2_r = uldq(pix2 + 8);
+        p2_x = (uint64_t) pix2[16] << 56;
+    } else {
+        p2_l = ldq(pix2);
+        p2_r = ldq(pix2 + 8);
+        p2_x = ldq(pix2 + 16) << 56;
+    }
+
+    do {
+        uint64_t np1_l, np1_r;
+        uint64_t np2_l, np2_r, np2_x;
+
+        pix1 += line_size;
+        pix2 += line_size;
+
+        np1_l = ldq(pix1);
+        np1_r = ldq(pix1 + 8);
+
+        if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
+            np2_l = uldq(pix2);
+            np2_r = uldq(pix2 + 8);
+            np2_x = (uint64_t) pix2[16] << 56;
+        } else {
+            np2_l = ldq(pix2);
+            np2_r = ldq(pix2 + 8);
+            np2_x = ldq(pix2 + 16) << 56;
+        }
+
+        result += perr(p1_l,
+                       avg4( p2_l, ( p2_l >> 8) | ((uint64_t)  p2_r << 56),
+                            np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
+                + perr(p1_r,
+                       avg4( p2_r, ( p2_r >> 8) | ((uint64_t)  p2_x),
+                            np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
+
+        p1_l = np1_l;
+        p1_r = np1_r;
+        p2_l = np2_l;
+        p2_r = np2_r;
+        p2_x = np2_x;
+    } while (--h);
+
+    return result;
+}
diff --git a/contrib/ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S b/contrib/ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S
new file mode 100644
index 000000000..6015a7824
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S
@@ -0,0 +1,185 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "regdef.h"
+
+/* Some nicer register names.  */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+        .set noat
+        .set noreorder
+        .arch pca56
+        .text
+
+/*****************************************************************************
+ * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
+ *
+ * This code is written with a pca56 in mind. For ev6, one should
+ * really take the increased latency of 3 cycles for MVI instructions
+ * into account.
+ *
+ * It is important to keep the loading and first use of a register as
+ * far apart as possible, because if a register is accessed before it
+ * has been fetched from memory, the CPU will stall.
+ */
+        .align 4
+        .globl pix_abs16x16_mvi_asm
+        .ent pix_abs16x16_mvi_asm
+pix_abs16x16_mvi_asm:
+        .frame sp, 0, ra, 0
+        .prologue 0
+
+#ifdef HAVE_GPROF
+        lda     AT, _mcount
+        jsr     AT, (AT), _mcount
+#endif
+
+        and     a1, 7, t0
+        clr     v0
+        lda     a3, 16
+        beq     t0, $aligned
+        .align 4
+$unaligned:
+        /* Registers:
+           line 0:
+           t0:  left_u -> left lo -> left
+           t1:  mid
+           t2:  right_u -> right hi -> right
+           t3:  ref left
+           t4:  ref right
+           line 1:
+           t5:  left_u -> left lo -> left
+           t6:  mid
+           t7:  right_u -> right hi -> right
+           t8:  ref left
+           t9:  ref right
+           temp:
+           ta:  left hi
+           tb:  right lo
+           tc:  error left
+           td:  error right  */
+
+        /* load line 0 */
+        ldq_u   t0, 0(a1)       # left_u
+        ldq_u   t1, 8(a1)       # mid
+        ldq_u   t2, 16(a1)      # right_u
+        ldq     t3, 0(a0)       # ref left
+        ldq     t4, 8(a0)       # ref right
+        addq    a0, a2, a0      # pix1
+        addq    a1, a2, a1      # pix2
+        /* load line 1 */
+        ldq_u   t5, 0(a1)       # left_u
+        ldq_u   t6, 8(a1)       # mid
+        ldq_u   t7, 16(a1)      # right_u
+        ldq     t8, 0(a0)       # ref left
+        ldq     t9, 8(a0)       # ref right
+        addq    a0, a2, a0      # pix1
+        addq    a1, a2, a1      # pix2
+        /* calc line 0 */
+        extql   t0, a1, t0      # left lo
+        extqh   t1, a1, ta      # left hi
+        extql   t1, a1, tb      # right lo
+        or      t0, ta, t0      # left
+        extqh   t2, a1, t2      # right hi
+        perr    t3, t0, tc      # error left
+        or      t2, tb, t2      # right
+        perr    t4, t2, td      # error right
+        addq    v0, tc, v0      # add error left
+        addq    v0, td, v0      # add error left
+        /* calc line 1 */
+        extql   t5, a1, t5      # left lo
+        extqh   t6, a1, ta      # left hi
+        extql   t6, a1, tb      # right lo
+        or      t5, ta, t5      # left
+        extqh   t7, a1, t7      # right hi
+        perr    t8, t5, tc      # error left
+        or      t7, tb, t7      # right
+        perr    t9, t7, td      # error right
+        addq    v0, tc, v0      # add error left
+        addq    v0, td, v0      # add error left
+        /* loop */
+        subq    a3,  2, a3      # h -= 2
+        bne     a3, $unaligned
+        ret
+
+        .align 4
+$aligned:
+        /* load line 0 */
+        ldq     t0, 0(a1)       # left
+        ldq     t1, 8(a1)       # right
+        addq    a1, a2, a1      # pix2
+        ldq     t2, 0(a0)       # ref left
+        ldq     t3, 8(a0)       # ref right
+        addq    a0, a2, a0      # pix1
+        /* load line 1 */
+        ldq     t4, 0(a1)       # left
+        ldq     t5, 8(a1)       # right
+        addq    a1, a2, a1      # pix2
+        ldq     t6, 0(a0)       # ref left
+        ldq     t7, 8(a0)       # ref right
+        addq    a0, a2, a0      # pix1
+        /* load line 2 */
+        ldq     t8, 0(a1)       # left
+        ldq     t9, 8(a1)       # right
+        addq    a1, a2, a1      # pix2
+        ldq     ta, 0(a0)       # ref left
+        ldq     tb, 8(a0)       # ref right
+        addq    a0, a2, a0      # pix1
+        /* load line 3 */
+        ldq     tc, 0(a1)       # left
+        ldq     td, 8(a1)       # right
+        addq    a1, a2, a1      # pix2
+        ldq     te, 0(a0)       # ref left
+        ldq     tf, 8(a0)       # ref right
+        /* calc line 0 */
+        perr    t0, t2, t0      # error left
+        addq    a0, a2, a0      # pix1
+        perr    t1, t3, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 1 */
+        perr    t4, t6, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    t5, t7, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 2 */
+        perr    t8, ta, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    t9, tb, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 3 */
+        perr    tc, te, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    td, tf, t1      # error right
+        addq    v0, t0, v0      # add error left
+        addq    v0, t1, v0      # add error right
+        /* loop */
+        subq    a3,  4, a3      # h -= 4
+        bne     a3, $aligned
+        ret
+        .end pix_abs16x16_mvi_asm
diff --git a/contrib/ffmpeg/libavcodec/alpha/mpegvideo_alpha.c b/contrib/ffmpeg/libavcodec/alpha/mpegvideo_alpha.c
new file mode 100644
index 000000000..8ad264b06
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/mpegvideo_alpha.c
@@ -0,0 +1,147 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.h"
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+
+static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block,
+                                    int n, int qscale)
+{
+    int i, n_coeffs;
+    uint64_t qmul, qadd;
+    uint64_t correction;
+    DCTELEM *orig_block = block;
+    DCTELEM block0;             /* might not be used uninitialized */
+
+    qadd = WORD_VEC((qscale - 1) | 1);
+    qmul = qscale << 1;
+    /* This mask kills spill from negative subwords to the next subword.  */
+    correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            block0 = block[0] * s->y_dc_scale;
+        else
+            block0 = block[0] * s->c_dc_scale;
+    } else {
+        qadd = 0;
+    }
+    n_coeffs = 63; // does not always use zigzag table
+
+    for(i = 0; i <= n_coeffs; block += 4, i += 4) {
+        uint64_t levels, negmask, zeros, add;
+
+        levels = ldq(block);
+        if (levels == 0)
+            continue;
+
+#ifdef __alpha_max__
+        /* I don't think the speed difference justifies runtime
+           detection.  */
+        negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */
+        negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
+#else
+        negmask = cmpbge(WORD_VEC(0x7fff), levels);
+        negmask &= (negmask >> 1) | (1 << 7);
+        negmask = zap(-1, negmask);
+#endif
+
+        zeros = cmpbge(0, levels);
+        zeros &= zeros >> 1;
+        /* zeros |= zeros << 1 is not needed since qadd <= 255, so
+           zapping the lower byte suffices.  */
+
+        levels *= qmul;
+        levels -= correction & (negmask << 16);
+
+        /* Negate qadd for negative levels.  */
+        add = qadd ^ negmask;
+        add += WORD_VEC(0x0001) & negmask;
+        /* Set qadd to 0 for levels == 0.  */
+        add = zap(add, zeros);
+
+        levels += add;
+
+        stq(levels, block);
+    }
+
+    if (s->mb_intra && !s->h263_aic)
+        orig_block[0] = block0;
+}
+
+static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block,
+                                    int n, int qscale)
+{
+    int i, n_coeffs;
+    uint64_t qmul, qadd;
+    uint64_t correction;
+
+    qadd = WORD_VEC((qscale - 1) | 1);
+    qmul = qscale << 1;
+    /* This mask kills spill from negative subwords to the next subword.  */
+    correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */
+
+    n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
+
+    for(i = 0; i <= n_coeffs; block += 4, i += 4) {
+        uint64_t levels, negmask, zeros, add;
+
+        levels = ldq(block);
+        if (levels == 0)
+            continue;
+
+#ifdef __alpha_max__
+        /* I don't think the speed difference justifies runtime
+           detection.  */
+        negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */
+        negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
+#else
+        negmask = cmpbge(WORD_VEC(0x7fff), levels);
+        negmask &= (negmask >> 1) | (1 << 7);
+        negmask = zap(-1, negmask);
+#endif
+
+        zeros = cmpbge(0, levels);
+        zeros &= zeros >> 1;
+        /* zeros |= zeros << 1 is not needed since qadd <= 255, so
+           zapping the lower byte suffices.  */
+
+        levels *= qmul;
+        levels -= correction & (negmask << 16);
+
+        /* Negate qadd for negative levels.  */
+        add = qadd ^ negmask;
+        add += WORD_VEC(0x0001) & negmask;
+        /* Set qadd to 0 for levels == 0.  */
+        add = zap(add, zeros);
+
+        levels += add;
+
+        stq(levels, block);
+    }
+}
+
+void MPV_common_init_axp(MpegEncContext *s)
+{
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp;
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp;
+}
diff --git a/contrib/ffmpeg/libavcodec/alpha/regdef.h b/contrib/ffmpeg/libavcodec/alpha/regdef.h
new file mode 100644
index 000000000..01e263bac
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/regdef.h
@@ -0,0 +1,66 @@
+/*
+ * Alpha optimized DSP utils
+ * copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Some BSDs don't seem to have regdef.h... sigh  */
+#ifndef alpha_regdef_h
+#define alpha_regdef_h
+
+#define v0      $0      /* function return value */
+
+#define t0      $1      /* temporary registers (caller-saved) */
+#define t1      $2
+#define t2      $3
+#define t3      $4
+#define t4      $5
+#define t5      $6
+#define t6      $7
+#define t7      $8
+
+#define s0      $9      /* saved-registers (callee-saved registers) */
+#define s1      $10
+#define s2      $11
+#define s3      $12
+#define s4      $13
+#define s5      $14
+#define s6      $15
+#define fp      s6      /* frame-pointer (s6 in frame-less procedures) */
+
+#define a0      $16     /* argument registers (caller-saved) */
+#define a1      $17
+#define a2      $18
+#define a3      $19
+#define a4      $20
+#define a5      $21
+
+#define t8      $22     /* more temps (caller-saved) */
+#define t9      $23
+#define t10     $24
+#define t11     $25
+#define ra      $26     /* return address register */
+#define t12     $27
+
+#define pv      t12     /* procedure-variable register */
+#define AT      $at     /* assembler temporary */
+#define gp      $29     /* global pointer */
+#define sp      $30     /* stack pointer */
+#define zero    $31     /* reads as zero, writes are noops */
+
+#endif /* alpha_regdef_h */
diff --git a/contrib/ffmpeg/libavcodec/alpha/simple_idct_alpha.c b/contrib/ffmpeg/libavcodec/alpha/simple_idct_alpha.c
new file mode 100644
index 000000000..adadd3ab0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/alpha/simple_idct_alpha.c
@@ -0,0 +1,308 @@
+/*
+ * Simple IDCT (Alpha optimized)
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * based upon some outcommented c code from mpeg2dec (idct_mmx.c
+ * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
+ *
+ * Alpha optimiziations by M�ns Rullg�rd <mru@users.sourceforge.net>
+ *                     and Falk Hueffner <falk@debian.org>
+ */
+
+#include "asm.h"
+#include "../dsputil.h"
+
+extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
+                                        int line_size);
+extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
+                                        int line_size);
+
+// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
+// W4 is actually exactly 16384, but using 16383 works around
+// accumulating rounding errors for some encoders
+#define W1 ((int_fast32_t) 22725)
+#define W2 ((int_fast32_t) 21407)
+#define W3 ((int_fast32_t) 19266)
+#define W4 ((int_fast32_t) 16383)
+#define W5 ((int_fast32_t) 12873)
+#define W6 ((int_fast32_t)  8867)
+#define W7 ((int_fast32_t)  4520)
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
+static inline int idct_row(DCTELEM *row)
+{
+    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
+    uint64_t l, r, t2;
+    l = ldq(row);
+    r = ldq(row + 4);
+
+    if (l == 0 && r == 0)
+        return 0;
+
+    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
+
+    if (((l & ~0xffffUL) | r) == 0) {
+        a0 >>= ROW_SHIFT;
+        t2 = (uint16_t) a0;
+        t2 |= t2 << 16;
+        t2 |= t2 << 32;
+
+        stq(t2, row);
+        stq(t2, row + 4);
+        return 1;
+    }
+
+    a1 = a0;
+    a2 = a0;
+    a3 = a0;
+
+    t = extwl(l, 4);            /* row[2] */
+    if (t != 0) {
+        t = sextw(t);
+        a0 += W2 * t;
+        a1 += W6 * t;
+        a2 -= W6 * t;
+        a3 -= W2 * t;
+    }
+
+    t = extwl(r, 0);            /* row[4] */
+    if (t != 0) {
+        t = sextw(t);
+        a0 += W4 * t;
+        a1 -= W4 * t;
+        a2 -= W4 * t;
+        a3 += W4 * t;
+    }
+
+    t = extwl(r, 4);            /* row[6] */
+    if (t != 0) {
+        t = sextw(t);
+        a0 += W6 * t;
+        a1 -= W2 * t;
+        a2 += W2 * t;
+        a3 -= W6 * t;
+    }
+
+    t = extwl(l, 2);            /* row[1] */
+    if (t != 0) {
+        t = sextw(t);
+        b0 = W1 * t;
+        b1 = W3 * t;
+        b2 = W5 * t;
+        b3 = W7 * t;
+    } else {
+        b0 = 0;
+        b1 = 0;
+        b2 = 0;
+        b3 = 0;
+    }
+
+    t = extwl(l, 6);            /* row[3] */
+    if (t) {
+        t = sextw(t);
+        b0 += W3 * t;
+        b1 -= W7 * t;
+        b2 -= W1 * t;
+        b3 -= W5 * t;
+    }
+
+
+    t = extwl(r, 2);            /* row[5] */
+    if (t) {
+        t = sextw(t);
+        b0 += W5 * t;
+        b1 -= W1 * t;
+        b2 += W7 * t;
+        b3 += W3 * t;
+    }
+
+    t = extwl(r, 6);            /* row[7] */
+    if (t) {
+        t = sextw(t);
+        b0 += W7 * t;
+        b1 -= W5 * t;
+        b2 += W3 * t;
+        b3 -= W1 * t;
+    }
+
+    row[0] = (a0 + b0) >> ROW_SHIFT;
+    row[1] = (a1 + b1) >> ROW_SHIFT;
+    row[2] = (a2 + b2) >> ROW_SHIFT;
+    row[3] = (a3 + b3) >> ROW_SHIFT;
+    row[4] = (a3 - b3) >> ROW_SHIFT;
+    row[5] = (a2 - b2) >> ROW_SHIFT;
+    row[6] = (a1 - b1) >> ROW_SHIFT;
+    row[7] = (a0 - b0) >> ROW_SHIFT;
+
+    return 2;
+}
+
+static inline void idct_col(DCTELEM *col)
+{
+    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+
+    col[0] += (1 << (COL_SHIFT - 1)) / W4;
+
+    a0 = W4 * col[8 * 0];
+    a1 = W4 * col[8 * 0];
+    a2 = W4 * col[8 * 0];
+    a3 = W4 * col[8 * 0];
+
+    if (col[8 * 2]) {
+        a0 += W2 * col[8 * 2];
+        a1 += W6 * col[8 * 2];
+        a2 -= W6 * col[8 * 2];
+        a3 -= W2 * col[8 * 2];
+    }
+
+    if (col[8 * 4]) {
+        a0 += W4 * col[8 * 4];
+        a1 -= W4 * col[8 * 4];
+        a2 -= W4 * col[8 * 4];
+        a3 += W4 * col[8 * 4];
+    }
+
+    if (col[8 * 6]) {
+        a0 += W6 * col[8 * 6];
+        a1 -= W2 * col[8 * 6];
+        a2 += W2 * col[8 * 6];
+        a3 -= W6 * col[8 * 6];
+    }
+
+    if (col[8 * 1]) {
+        b0 = W1 * col[8 * 1];
+        b1 = W3 * col[8 * 1];
+        b2 = W5 * col[8 * 1];
+        b3 = W7 * col[8 * 1];
+    } else {
+        b0 = 0;
+        b1 = 0;
+        b2 = 0;
+        b3 = 0;
+    }
+
+    if (col[8 * 3]) {
+        b0 += W3 * col[8 * 3];
+        b1 -= W7 * col[8 * 3];
+        b2 -= W1 * col[8 * 3];
+        b3 -= W5 * col[8 * 3];
+    }
+
+    if (col[8 * 5]) {
+        b0 += W5 * col[8 * 5];
+        b1 -= W1 * col[8 * 5];
+        b2 += W7 * col[8 * 5];
+        b3 += W3 * col[8 * 5];
+    }
+
+    if (col[8 * 7]) {
+        b0 += W7 * col[8 * 7];
+        b1 -= W5 * col[8 * 7];
+        b2 += W3 * col[8 * 7];
+        b3 -= W1 * col[8 * 7];
+    }
+
+    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
+    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
+    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
+    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
+    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
+    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
+    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
+    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
+}
+
+/* If all rows but the first one are zero after row transformation,
+   all rows will be identical after column transformation.  */
+static inline void idct_col2(DCTELEM *col)
+{
+    int i;
+    uint64_t l, r;
+
+    for (i = 0; i < 8; ++i) {
+        int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
+
+        a0 *= W4;
+        col[i] = a0 >> COL_SHIFT;
+    }
+
+    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
+    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
+    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
+    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
+    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
+    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
+    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
+    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
+}
+
+void simple_idct_axp(DCTELEM *block)
+{
+
+    int i;
+    int rowsZero = 1;           /* all rows except row 0 zero */
+    int rowsConstant = 1;       /* all rows consist of a constant value */
+
+    for (i = 0; i < 8; i++) {
+        int sparseness = idct_row(block + 8 * i);
+
+        if (i > 0 && sparseness > 0)
+            rowsZero = 0;
+        if (sparseness == 2)
+            rowsConstant = 0;
+    }
+
+    if (rowsZero) {
+        idct_col2(block);
+    } else if (rowsConstant) {
+        idct_col(block);
+        for (i = 0; i < 8; i += 2) {
+            uint64_t v = (uint16_t) block[0];
+            uint64_t w = (uint16_t) block[8];
+
+            v |= v << 16;
+            w |= w << 16;
+            v |= v << 32;
+            w |= w << 32;
+            stq(v, block + 0 * 4);
+            stq(v, block + 1 * 4);
+            stq(w, block + 2 * 4);
+            stq(w, block + 3 * 4);
+            block += 4 * 4;
+        }
+    } else {
+        for (i = 0; i < 8; i++)
+            idct_col(block + i);
+    }
+}
+
+void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    simple_idct_axp(block);
+    put_pixels_clamped_axp_p(block, dest, line_size);
+}
+
+void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    simple_idct_axp(block);
+    add_pixels_clamped_axp_p(block, dest, line_size);
+}
diff --git a/contrib/ffmpeg/libavcodec/amr.c b/contrib/ffmpeg/libavcodec/amr.c
new file mode 100644
index 000000000..2d1877b22
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/amr.c
@@ -0,0 +1,715 @@
+/*
+ * AMR Audio decoder stub
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+ /*
+    This code implements amr-nb and amr-wb audio encoder/decoder through external reference
+    code from www.3gpp.org. The licence of the code from 3gpp is unclear so you
+    have to download the code separately. Two versions exists: One fixed-point
+    and one with floats. For some reason the float-encoder is significant faster
+    atleast on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip at MR102).
+    Both float and fixed point is supported for amr-nb, but only float for
+    amr-wb.
+
+    --AMR-NB--
+    The fixed-point (TS26.073) can be downloaded from:
+    http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-510.zip
+    Extract the soure into ffmpeg/libavcodec/amr
+    To use the fixed version run "./configure" with "--enable-amr_nb-fixed"
+
+    The float version (default) can be downloaded from:
+    http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-510.zip
+    Extract the soure into ffmpeg/libavcodec/amr_float
+
+    The specification for amr-nb can be found in TS 26.071
+    (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
+    info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm
+
+    --AMR-WB--
+    The reference code can be downloaded from:
+    http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-510.zip
+    It should be extracted to "libavcodec/amrwb_float". Enable it with
+    "--enable-amr_wb".
+
+    The specification for amr-wb can be downloaded from:
+    http://www.3gpp.org/ftp/Specs/archive/26_series/26.171/26171-500.zip
+
+    If someone want to use the fixed point version it can be downloaded
+    from: http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip
+
+ */
+
+#include "avcodec.h"
+
+#ifdef CONFIG_AMR_NB_FIXED
+
+#define MMS_IO
+
+#include "amr/sp_dec.h"
+#include "amr/d_homing.h"
+#include "amr/typedef.h"
+#include "amr/sp_enc.h"
+#include "amr/sid_sync.h"
+#include "amr/e_homing.h"
+
+#else
+#include "amr_float/interf_dec.h"
+#include "amr_float/interf_enc.h"
+#endif
+
+/* Common code for fixed and float version*/
+typedef struct AMR_bitrates
+{
+    int startrate;
+    int stoprate;
+    enum Mode mode;
+
+} AMR_bitrates;
+
+/* Match desired bitrate with closest one*/
+static enum Mode getBitrateMode(int bitrate)
+{
+    /* Adjusted so that all bitrates can be used from commandline where
+       only a multiple of 1000 can be specified*/
+    AMR_bitrates rates[]={ {0,4999,MR475}, //4
+                           {5000,5899,MR515},//5
+                           {5900,6699,MR59},//6
+                           {6700,7000,MR67},//7
+                           {7001,7949,MR74},//8
+                           {7950,9999,MR795},//9
+                           {10000,11999,MR102},//10
+                           {12000,64000,MR122},//12
+
+                         };
+    int i;
+    for(i=0;i<8;i++)
+    {
+        if(rates[i].startrate<=bitrate && rates[i].stoprate>=bitrate)
+        {
+            return(rates[i].mode);
+        }
+    }
+    /*Return highest possible*/
+    return(MR122);
+}
+
+static void amr_decode_fix_avctx(AVCodecContext * avctx)
+{
+    const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
+
+    if(avctx->sample_rate == 0)
+    {
+        avctx->sample_rate = 8000 * is_amr_wb;
+    }
+
+    if(avctx->channels == 0)
+    {
+        avctx->channels = 1;
+    }
+
+    avctx->frame_size = 160 * is_amr_wb;
+}
+
+#ifdef CONFIG_AMR_NB_FIXED
+/* fixed point version*/
+/* frame size in serial bitstream file (frame type + serial stream + flags) */
+#define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
+
+typedef struct AMRContext {
+    int frameCount;
+    Speech_Decode_FrameState *speech_decoder_state;
+    enum RXFrameType rx_type;
+    enum Mode mode;
+    Word16 reset_flag;
+    Word16 reset_flag_old;
+
+    enum Mode enc_bitrate;
+    Speech_Encode_FrameState *enstate;
+    sid_syncState *sidstate;
+    enum TXFrameType tx_frametype;
+
+
+} AMRContext;
+
+static int amr_nb_decode_init(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    s->frameCount=0;
+    s->speech_decoder_state=NULL;
+    s->rx_type = (enum RXFrameType)0;
+    s->mode= (enum Mode)0;
+    s->reset_flag=0;
+    s->reset_flag_old=1;
+
+    if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
+    {
+        av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
+        return -1;
+    }
+
+    amr_decode_fix_avctx(avctx);
+
+    if(avctx->channels > 1)
+    {
+        av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int amr_nb_encode_init(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    s->frameCount=0;
+    s->speech_decoder_state=NULL;
+    s->rx_type = (enum RXFrameType)0;
+    s->mode= (enum Mode)0;
+    s->reset_flag=0;
+    s->reset_flag_old=1;
+
+    if(avctx->sample_rate!=8000)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Only 8000Hz sample rate supported\n");
+        }
+        return -1;
+    }
+
+    if(avctx->channels!=1)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Only mono supported\n");
+        }
+        return -1;
+    }
+
+    avctx->frame_size=160;
+    avctx->coded_frame= avcodec_alloc_frame();
+
+    if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Speech_Encode_Frame_init error\n");
+        }
+        return -1;
+    }
+
+    s->enc_bitrate=getBitrateMode(avctx->bit_rate);
+
+    return 0;
+}
+
+static int amr_nb_encode_close(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    Speech_Encode_Frame_exit(&s->enstate);
+    sid_sync_exit (&s->sidstate);
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+static int amr_nb_decode_close(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    Speech_Decode_Frame_exit(&s->speech_decoder_state);
+    return 0;
+}
+
+static int amr_nb_decode_frame(AVCodecContext * avctx,
+            void *data, int *data_size,
+            uint8_t * buf, int buf_size)
+{
+    AMRContext *s = avctx->priv_data;
+
+    uint8_t*amrData=buf;
+    int offset=0;
+
+    UWord8 toc, q, ft;
+
+    Word16 serial[SERIAL_FRAMESIZE];   /* coded bits */
+    Word16 *synth;
+    UWord8 *packed_bits;
+
+    static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
+    int i;
+
+    //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
+
+    synth=data;
+
+//    while(offset<buf_size)
+    {
+        toc=amrData[offset];
+        /* read rest of the frame based on ToC byte */
+        q  = (toc >> 2) & 0x01;
+        ft = (toc >> 3) & 0x0F;
+
+        //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
+
+        offset++;
+
+        packed_bits=amrData+offset;
+
+        offset+=packed_size[ft];
+
+        //Unsort and unpack bits
+        s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
+
+        //We have a new frame
+        s->frameCount++;
+
+        if (s->rx_type == RX_NO_DATA)
+        {
+            s->mode = s->speech_decoder_state->prev_mode;
+        }
+        else {
+            s->speech_decoder_state->prev_mode = s->mode;
+        }
+
+        /* if homed: check if this frame is another homing frame */
+        if (s->reset_flag_old == 1)
+        {
+            /* only check until end of first subframe */
+            s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
+        }
+        /* produce encoder homing frame if homed & input=decoder homing frame */
+        if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
+        {
+            for (i = 0; i < L_FRAME; i++)
+            {
+                synth[i] = EHF_MASK;
+            }
+        }
+        else
+        {
+            /* decode frame */
+            Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
+        }
+
+        //Each AMR-frame results in 160 16-bit samples
+        *data_size+=160*2;
+        synth+=160;
+
+        /* if not homed: check whether current frame is a homing frame */
+        if (s->reset_flag_old == 0)
+        {
+            /* check whole frame */
+            s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
+        }
+        /* reset decoder if current frame is a homing frame */
+        if (s->reset_flag != 0)
+        {
+            Speech_Decode_Frame_reset(s->speech_decoder_state);
+        }
+        s->reset_flag_old = s->reset_flag;
+
+    }
+    return offset;
+}
+
+
+static int amr_nb_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
+{
+    short serial_data[250] = {0};
+
+    AMRContext *s = avctx->priv_data;
+    int written;
+
+    s->reset_flag = encoder_homing_frame_test(data);
+
+    Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
+
+    /* add frame type and mode */
+    sid_sync (s->sidstate, s->mode, &s->tx_frametype);
+
+    written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
+
+    if (s->reset_flag != 0)
+    {
+        Speech_Encode_Frame_reset(s->enstate);
+        sid_sync_reset(s->sidstate);
+    }
+    return written;
+}
+
+
+#elif defined(CONFIG_AMR_NB) /* Float point version*/
+
+typedef struct AMRContext {
+    int frameCount;
+    void * decState;
+    int *enstate;
+    enum Mode enc_bitrate;
+} AMRContext;
+
+static int amr_nb_decode_init(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    s->frameCount=0;
+    s->decState=Decoder_Interface_init();
+    if(!s->decState)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
+        return -1;
+    }
+
+    amr_decode_fix_avctx(avctx);
+
+    if(avctx->channels > 1)
+    {
+        av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int amr_nb_encode_init(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    s->frameCount=0;
+
+    if(avctx->sample_rate!=8000)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Only 8000Hz sample rate supported\n");
+        }
+        return -1;
+    }
+
+    if(avctx->channels!=1)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Only mono supported\n");
+        }
+        return -1;
+    }
+
+    avctx->frame_size=160;
+    avctx->coded_frame= avcodec_alloc_frame();
+
+    s->enstate=Encoder_Interface_init(0);
+    if(!s->enstate)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Encoder_Interface_init error\n");
+        }
+        return -1;
+    }
+
+    s->enc_bitrate=getBitrateMode(avctx->bit_rate);
+
+    return 0;
+}
+
+static int amr_nb_decode_close(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    Decoder_Interface_exit(s->decState);
+    return 0;
+}
+
+static int amr_nb_encode_close(AVCodecContext * avctx)
+{
+    AMRContext *s = avctx->priv_data;
+    Encoder_Interface_exit(s->enstate);
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+static int amr_nb_decode_frame(AVCodecContext * avctx,
+            void *data, int *data_size,
+            uint8_t * buf, int buf_size)
+{
+    AMRContext *s = (AMRContext*)avctx->priv_data;
+
+    uint8_t*amrData=buf;
+    static short block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
+    enum Mode dec_mode;
+    int packet_size;
+
+    /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
+
+    if(buf_size==0) {
+        /* nothing to do */
+        return 0;
+    }
+
+    dec_mode = (buf[0] >> 3) & 0x000F;
+    packet_size = block_size[dec_mode]+1;
+
+    if(packet_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
+        return -1;
+    }
+
+    s->frameCount++;
+    /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
+    /* call decoder */
+    Decoder_Interface_Decode(s->decState, amrData, data, 0);
+    *data_size=160*2;
+
+    return packet_size;
+}
+
+static int amr_nb_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
+{
+    AMRContext *s = (AMRContext*)avctx->priv_data;
+    int written;
+
+    s->enc_bitrate=getBitrateMode(avctx->bit_rate);
+
+    written = Encoder_Interface_Encode(s->enstate,
+        s->enc_bitrate,
+        data,
+        frame,
+        0);
+    /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
+
+    return written;
+}
+
+#endif
+
+#if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
+
+AVCodec amr_nb_decoder =
+{
+    "amr_nb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_NB,
+    sizeof(AMRContext),
+    amr_nb_decode_init,
+    NULL,
+    amr_nb_decode_close,
+    amr_nb_decode_frame,
+};
+
+AVCodec amr_nb_encoder =
+{
+    "amr_nb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_NB,
+    sizeof(AMRContext),
+    amr_nb_encode_init,
+    amr_nb_encode_frame,
+    amr_nb_encode_close,
+    NULL,
+};
+
+#endif
+
+/* -----------AMR wideband ------------*/
+#ifdef CONFIG_AMR_WB
+
+#ifdef _TYPEDEF_H
+//To avoid duplicate typedefs from typdef in amr-nb
+#define typedef_h
+#endif
+
+#include "amrwb_float/enc_if.h"
+#include "amrwb_float/dec_if.h"
+
+/* Common code for fixed and float version*/
+typedef struct AMRWB_bitrates
+{
+    int startrate;
+    int stoprate;
+    int mode;
+
+} AMRWB_bitrates;
+
+static int getWBBitrateMode(int bitrate)
+{
+    /* Adjusted so that all bitrates can be used from commandline where
+       only a multiple of 1000 can be specified*/
+    AMRWB_bitrates rates[]={ {0,7999,0}, //6.6kHz
+                           {8000,9999,1},//8.85
+                           {10000,13000,2},//12.65
+                           {13001,14999,3},//14.25
+                           {15000,17000,4},//15.85
+                           {17001,18000,5},//18.25
+                           {18001,22000,6},//19.85
+                           {22001,23000,7},//23.05
+                           {23001,24000,8},//23.85
+
+                         };
+    int i;
+
+    for(i=0;i<9;i++)
+    {
+        if(rates[i].startrate<=bitrate && rates[i].stoprate>=bitrate)
+        {
+            return(rates[i].mode);
+        }
+    }
+    /*Return highest possible*/
+    return(8);
+}
+
+
+typedef struct AMRWBContext {
+    int frameCount;
+    void *state;
+    int mode;
+    Word16 allow_dtx;
+} AMRWBContext;
+
+static int amr_wb_encode_init(AVCodecContext * avctx)
+{
+    AMRWBContext *s = (AMRWBContext*)avctx->priv_data;
+    s->frameCount=0;
+
+    if(avctx->sample_rate!=16000)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Only 16000Hz sample rate supported\n");
+        }
+        return -1;
+    }
+
+    if(avctx->channels!=1)
+    {
+        if(avctx->debug)
+        {
+            av_log(avctx, AV_LOG_DEBUG, "Only mono supported\n");
+        }
+        return -1;
+    }
+
+    avctx->frame_size=320;
+    avctx->coded_frame= avcodec_alloc_frame();
+
+    s->state = E_IF_init();
+    s->mode=getWBBitrateMode(avctx->bit_rate);
+    s->allow_dtx=0;
+
+    return 0;
+}
+
+static int amr_wb_encode_close(AVCodecContext * avctx)
+{
+    AMRWBContext *s = (AMRWBContext*) avctx->priv_data;
+    E_IF_exit(s->state);
+    av_freep(&avctx->coded_frame);
+    s->frameCount++;
+    return 0;
+}
+
+static int amr_wb_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
+{
+    AMRWBContext *s;
+    int size;
+    s = (AMRWBContext*) avctx->priv_data;
+    s->mode=getWBBitrateMode(avctx->bit_rate);
+    size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
+    return size;
+}
+
+static int amr_wb_decode_init(AVCodecContext * avctx)
+{
+    AMRWBContext *s = (AMRWBContext *)avctx->priv_data;
+    s->frameCount=0;
+    s->state = D_IF_init();
+
+    amr_decode_fix_avctx(avctx);
+
+    if(avctx->channels > 1)
+    {
+        av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+extern const UWord8 block_size[];
+
+static int amr_wb_decode_frame(AVCodecContext * avctx,
+            void *data, int *data_size,
+            uint8_t * buf, int buf_size)
+{
+    AMRWBContext *s = (AMRWBContext*)avctx->priv_data;
+
+    uint8_t*amrData=buf;
+    int mode;
+    int packet_size;
+
+    if(buf_size==0) {
+        /* nothing to do */
+        return 0;
+    }
+
+    mode = (amrData[0] >> 3) & 0x000F;
+    packet_size = block_size[mode];
+
+    if(packet_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
+        return -1;
+    }
+
+    s->frameCount++;
+    D_IF_decode( s->state, amrData, data, _good_frame);
+    *data_size=320*2;
+    return packet_size;
+}
+
+static int amr_wb_decode_close(AVCodecContext * avctx)
+{
+    AMRWBContext *s = (AMRWBContext *)avctx->priv_data;
+    D_IF_exit(s->state);
+    return 0;
+}
+
+AVCodec amr_wb_decoder =
+{
+    "amr_wb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_WB,
+    sizeof(AMRWBContext),
+    amr_wb_decode_init,
+    NULL,
+    amr_wb_decode_close,
+    amr_wb_decode_frame,
+};
+
+AVCodec amr_wb_encoder =
+{
+    "amr_wb",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AMR_WB,
+    sizeof(AMRWBContext),
+    amr_wb_encode_init,
+    amr_wb_encode_frame,
+    amr_wb_encode_close,
+    NULL,
+};
+
+#endif //CONFIG_AMR_WB
diff --git a/contrib/ffmpeg/libavcodec/apiexample.c b/contrib/ffmpeg/libavcodec/apiexample.c
new file mode 100644
index 000000000..484c77876
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/apiexample.c
@@ -0,0 +1,457 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file apiexample.c
+ * avcodec API use example.
+ *
+ * Note that this library only handles codecs (mpeg, mpeg4, etc...),
+ * not file formats (avi, vob, etc...). See library 'libavformat' for the
+ * format handling
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef HAVE_AV_CONFIG_H
+#undef HAVE_AV_CONFIG_H
+#endif
+
+#include "avcodec.h"
+
+#define INBUF_SIZE 4096
+
+/*
+ * Audio encoding example
+ */
+void audio_encode_example(const char *filename)
+{
+    AVCodec *codec;
+    AVCodecContext *c= NULL;
+    int frame_size, i, j, out_size, outbuf_size;
+    FILE *f;
+    short *samples;
+    float t, tincr;
+    uint8_t *outbuf;
+
+    printf("Audio encoding\n");
+
+    /* find the MP2 encoder */
+    codec = avcodec_find_encoder(CODEC_ID_MP2);
+    if (!codec) {
+        fprintf(stderr, "codec not found\n");
+        exit(1);
+    }
+
+    c= avcodec_alloc_context();
+
+    /* put sample parameters */
+    c->bit_rate = 64000;
+    c->sample_rate = 44100;
+    c->channels = 2;
+
+    /* open it */
+    if (avcodec_open(c, codec) < 0) {
+        fprintf(stderr, "could not open codec\n");
+        exit(1);
+    }
+
+    /* the codec gives us the frame size, in samples */
+    frame_size = c->frame_size;
+    samples = malloc(frame_size * 2 * c->channels);
+    outbuf_size = 10000;
+    outbuf = malloc(outbuf_size);
+
+    f = fopen(filename, "wb");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", filename);
+        exit(1);
+    }
+
+    /* encode a single tone sound */
+    t = 0;
+    tincr = 2 * M_PI * 440.0 / c->sample_rate;
+    for(i=0;i<200;i++) {
+        for(j=0;j<frame_size;j++) {
+            samples[2*j] = (int)(sin(t) * 10000);
+            samples[2*j+1] = samples[2*j];
+            t += tincr;
+        }
+        /* encode the samples */
+        out_size = avcodec_encode_audio(c, outbuf, outbuf_size, samples);
+        fwrite(outbuf, 1, out_size, f);
+    }
+    fclose(f);
+    free(outbuf);
+    free(samples);
+
+    avcodec_close(c);
+    av_free(c);
+}
+
+/*
+ * Audio decoding.
+ */
+void audio_decode_example(const char *outfilename, const char *filename)
+{
+    AVCodec *codec;
+    AVCodecContext *c= NULL;
+    int out_size, size, len;
+    FILE *f, *outfile;
+    uint8_t *outbuf;
+    uint8_t inbuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE], *inbuf_ptr;
+
+    printf("Audio decoding\n");
+
+    /* set end of buffer to 0 (this ensures that no overreading happens for damaged mpeg streams) */
+    memset(inbuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+
+    /* find the mpeg audio decoder */
+    codec = avcodec_find_decoder(CODEC_ID_MP2);
+    if (!codec) {
+        fprintf(stderr, "codec not found\n");
+        exit(1);
+    }
+
+    c= avcodec_alloc_context();
+
+    /* open it */
+    if (avcodec_open(c, codec) < 0) {
+        fprintf(stderr, "could not open codec\n");
+        exit(1);
+    }
+
+    outbuf = malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
+
+    f = fopen(filename, "rb");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", filename);
+        exit(1);
+    }
+    outfile = fopen(outfilename, "wb");
+    if (!outfile) {
+        av_free(c);
+        exit(1);
+    }
+
+    /* decode until eof */
+    inbuf_ptr = inbuf;
+    for(;;) {
+        size = fread(inbuf, 1, INBUF_SIZE, f);
+        if (size == 0)
+            break;
+
+        inbuf_ptr = inbuf;
+        while (size > 0) {
+            len = avcodec_decode_audio(c, (short *)outbuf, &out_size,
+                                       inbuf_ptr, size);
+            if (len < 0) {
+                fprintf(stderr, "Error while decoding\n");
+                exit(1);
+            }
+            if (out_size > 0) {
+                /* if a frame has been decoded, output it */
+                fwrite(outbuf, 1, out_size, outfile);
+            }
+            size -= len;
+            inbuf_ptr += len;
+        }
+    }
+
+    fclose(outfile);
+    fclose(f);
+    free(outbuf);
+
+    avcodec_close(c);
+    av_free(c);
+}
+
+/*
+ * Video encoding example
+ */
+void video_encode_example(const char *filename)
+{
+    AVCodec *codec;
+    AVCodecContext *c= NULL;
+    int i, out_size, size, x, y, outbuf_size;
+    FILE *f;
+    AVFrame *picture;
+    uint8_t *outbuf, *picture_buf;
+
+    printf("Video encoding\n");
+
+    /* find the mpeg1 video encoder */
+    codec = avcodec_find_encoder(CODEC_ID_MPEG1VIDEO);
+    if (!codec) {
+        fprintf(stderr, "codec not found\n");
+        exit(1);
+    }
+
+    c= avcodec_alloc_context();
+    picture= avcodec_alloc_frame();
+
+    /* put sample parameters */
+    c->bit_rate = 400000;
+    /* resolution must be a multiple of two */
+    c->width = 352;
+    c->height = 288;
+    /* frames per second */
+    c->time_base= (AVRational){1,25};
+    c->gop_size = 10; /* emit one intra frame every ten frames */
+    c->max_b_frames=1;
+    c->pix_fmt = PIX_FMT_YUV420P;
+
+    /* open it */
+    if (avcodec_open(c, codec) < 0) {
+        fprintf(stderr, "could not open codec\n");
+        exit(1);
+    }
+
+    /* the codec gives us the frame size, in samples */
+
+    f = fopen(filename, "wb");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", filename);
+        exit(1);
+    }
+
+    /* alloc image and output buffer */
+    outbuf_size = 100000;
+    outbuf = malloc(outbuf_size);
+    size = c->width * c->height;
+    picture_buf = malloc((size * 3) / 2); /* size for YUV 420 */
+
+    picture->data[0] = picture_buf;
+    picture->data[1] = picture->data[0] + size;
+    picture->data[2] = picture->data[1] + size / 4;
+    picture->linesize[0] = c->width;
+    picture->linesize[1] = c->width / 2;
+    picture->linesize[2] = c->width / 2;
+
+    /* encode 1 second of video */
+    for(i=0;i<25;i++) {
+        fflush(stdout);
+        /* prepare a dummy image */
+        /* Y */
+        for(y=0;y<c->height;y++) {
+            for(x=0;x<c->width;x++) {
+                picture->data[0][y * picture->linesize[0] + x] = x + y + i * 3;
+            }
+        }
+
+        /* Cb and Cr */
+        for(y=0;y<c->height/2;y++) {
+            for(x=0;x<c->width/2;x++) {
+                picture->data[1][y * picture->linesize[1] + x] = 128 + y + i * 2;
+                picture->data[2][y * picture->linesize[2] + x] = 64 + x + i * 5;
+            }
+        }
+
+        /* encode the image */
+        out_size = avcodec_encode_video(c, outbuf, outbuf_size, picture);
+        printf("encoding frame %3d (size=%5d)\n", i, out_size);
+        fwrite(outbuf, 1, out_size, f);
+    }
+
+    /* get the delayed frames */
+    for(; out_size; i++) {
+        fflush(stdout);
+
+        out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
+        printf("write frame %3d (size=%5d)\n", i, out_size);
+        fwrite(outbuf, 1, out_size, f);
+    }
+
+    /* add sequence end code to have a real mpeg file */
+    outbuf[0] = 0x00;
+    outbuf[1] = 0x00;
+    outbuf[2] = 0x01;
+    outbuf[3] = 0xb7;
+    fwrite(outbuf, 1, 4, f);
+    fclose(f);
+    free(picture_buf);
+    free(outbuf);
+
+    avcodec_close(c);
+    av_free(c);
+    av_free(picture);
+    printf("\n");
+}
+
+/*
+ * Video decoding example
+ */
+
+void pgm_save(unsigned char *buf,int wrap, int xsize,int ysize,char *filename)
+{
+    FILE *f;
+    int i;
+
+    f=fopen(filename,"w");
+    fprintf(f,"P5\n%d %d\n%d\n",xsize,ysize,255);
+    for(i=0;i<ysize;i++)
+        fwrite(buf + i * wrap,1,xsize,f);
+    fclose(f);
+}
+
+void video_decode_example(const char *outfilename, const char *filename)
+{
+    AVCodec *codec;
+    AVCodecContext *c= NULL;
+    int frame, size, got_picture, len;
+    FILE *f;
+    AVFrame *picture;
+    uint8_t inbuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE], *inbuf_ptr;
+    char buf[1024];
+
+    /* set end of buffer to 0 (this ensures that no overreading happens for damaged mpeg streams) */
+    memset(inbuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+
+    printf("Video decoding\n");
+
+    /* find the mpeg1 video decoder */
+    codec = avcodec_find_decoder(CODEC_ID_MPEG1VIDEO);
+    if (!codec) {
+        fprintf(stderr, "codec not found\n");
+        exit(1);
+    }
+
+    c= avcodec_alloc_context();
+    picture= avcodec_alloc_frame();
+
+    if(codec->capabilities&CODEC_CAP_TRUNCATED)
+        c->flags|= CODEC_FLAG_TRUNCATED; /* we dont send complete frames */
+
+    /* for some codecs, such as msmpeg4 and mpeg4, width and height
+       MUST be initialized there because these info are not available
+       in the bitstream */
+
+    /* open it */
+    if (avcodec_open(c, codec) < 0) {
+        fprintf(stderr, "could not open codec\n");
+        exit(1);
+    }
+
+    /* the codec gives us the frame size, in samples */
+
+    f = fopen(filename, "rb");
+    if (!f) {
+        fprintf(stderr, "could not open %s\n", filename);
+        exit(1);
+    }
+
+    frame = 0;
+    for(;;) {
+        size = fread(inbuf, 1, INBUF_SIZE, f);
+        if (size == 0)
+            break;
+
+        /* NOTE1: some codecs are stream based (mpegvideo, mpegaudio)
+           and this is the only method to use them because you cannot
+           know the compressed data size before analysing it.
+
+           BUT some other codecs (msmpeg4, mpeg4) are inherently frame
+           based, so you must call them with all the data for one
+           frame exactly. You must also initialize 'width' and
+           'height' before initializing them. */
+
+        /* NOTE2: some codecs allow the raw parameters (frame size,
+           sample rate) to be changed at any frame. We handle this, so
+           you should also take care of it */
+
+        /* here, we use a stream based decoder (mpeg1video), so we
+           feed decoder and see if it could decode a frame */
+        inbuf_ptr = inbuf;
+        while (size > 0) {
+            len = avcodec_decode_video(c, picture, &got_picture,
+                                       inbuf_ptr, size);
+            if (len < 0) {
+                fprintf(stderr, "Error while decoding frame %d\n", frame);
+                exit(1);
+            }
+            if (got_picture) {
+                printf("saving frame %3d\n", frame);
+                fflush(stdout);
+
+                /* the picture is allocated by the decoder. no need to
+                   free it */
+                snprintf(buf, sizeof(buf), outfilename, frame);
+                pgm_save(picture->data[0], picture->linesize[0],
+                         c->width, c->height, buf);
+                frame++;
+            }
+            size -= len;
+            inbuf_ptr += len;
+        }
+    }
+
+    /* some codecs, such as MPEG, transmit the I and P frame with a
+       latency of one frame. You must do the following to have a
+       chance to get the last frame of the video */
+    len = avcodec_decode_video(c, picture, &got_picture,
+                               NULL, 0);
+    if (got_picture) {
+        printf("saving last frame %3d\n", frame);
+        fflush(stdout);
+
+        /* the picture is allocated by the decoder. no need to
+           free it */
+        snprintf(buf, sizeof(buf), outfilename, frame);
+        pgm_save(picture->data[0], picture->linesize[0],
+                 c->width, c->height, buf);
+        frame++;
+    }
+
+    fclose(f);
+
+    avcodec_close(c);
+    av_free(c);
+    av_free(picture);
+    printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+    const char *filename;
+
+    /* must be called before using avcodec lib */
+    avcodec_init();
+
+    /* register all the codecs (you can also register only the codec
+       you wish to have smaller code */
+    avcodec_register_all();
+
+    if (argc <= 1) {
+        audio_encode_example("/tmp/test.mp2");
+        audio_decode_example("/tmp/test.sw", "/tmp/test.mp2");
+
+        video_encode_example("/tmp/test.mpg");
+        filename = "/tmp/test.mpg";
+    } else {
+        filename = argv[1];
+    }
+
+    //    audio_decode_example("/tmp/test.sw", filename);
+    video_decode_example("/tmp/test%d.pgm", filename);
+
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/armv4l/dsputil_arm.c b/contrib/ffmpeg/libavcodec/armv4l/dsputil_arm.c
new file mode 100644
index 000000000..9f0bfa2af
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/dsputil_arm.c
@@ -0,0 +1,262 @@
+/*
+ * ARMv4L optimized DSP utils
+ * Copyright (c) 2001 Lionel Ulmer.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#ifdef HAVE_IPP
+#include "ipp.h"
+#endif
+
+extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
+
+extern void j_rev_dct_ARM(DCTELEM *data);
+extern void simple_idct_ARM(DCTELEM *data);
+
+extern void simple_idct_armv5te(DCTELEM *data);
+extern void simple_idct_put_armv5te(uint8_t *dest, int line_size,
+                                    DCTELEM *data);
+extern void simple_idct_add_armv5te(uint8_t *dest, int line_size,
+                                    DCTELEM *data);
+
+/* XXX: local hack */
+static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
+CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
+CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
+CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
+CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
+CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
+
+static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
+{
+    asm volatile (
+                  "mov r10, #8 \n\t"
+
+                  "1: \n\t"
+
+                  /* load dest */
+                  "ldr r4, [%1] \n\t"
+                  /* block[0] and block[1]*/
+                  "ldrsh r5, [%0] \n\t"
+                  "ldrsh r7, [%0, #2] \n\t"
+                  "and r6, r4, #0xFF \n\t"
+                  "and r8, r4, #0xFF00 \n\t"
+                  "add r6, r5, r6 \n\t"
+                  "add r8, r7, r8, lsr #8 \n\t"
+                  "mvn r5, r5 \n\t"
+                  "mvn r7, r7 \n\t"
+                  "tst r6, #0x100 \n\t"
+                  "movne r6, r5, lsr #24 \n\t"
+                  "tst r8, #0x100 \n\t"
+                  "movne r8, r7, lsr #24 \n\t"
+                  "mov r9, r6 \n\t"
+                  "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */
+                  "orr r9, r9, r8, lsl #8 \n\t"
+                  /* block[2] and block[3] */
+                  /* [A] */
+                  "ldrsh r7, [%0, #6] \n\t"
+                  "and r6, r4, #0xFF0000 \n\t"
+                  "and r8, r4, #0xFF000000 \n\t"
+                  "add r6, r5, r6, lsr #16 \n\t"
+                  "add r8, r7, r8, lsr #24 \n\t"
+                  "mvn r5, r5 \n\t"
+                  "mvn r7, r7 \n\t"
+                  "tst r6, #0x100 \n\t"
+                  "movne r6, r5, lsr #24 \n\t"
+                  "tst r8, #0x100 \n\t"
+                  "movne r8, r7, lsr #24 \n\t"
+                  "orr r9, r9, r6, lsl #16 \n\t"
+                  "ldr r4, [%1, #4] \n\t"       /* moved form [B] */
+                  "orr r9, r9, r8, lsl #24 \n\t"
+                  /* store dest */
+                  "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */
+                  "str r9, [%1] \n\t"
+
+                  /* load dest */
+                  /* [B] */
+                  /* block[4] and block[5] */
+                  /* [C] */
+                  "ldrsh r7, [%0, #10] \n\t"
+                  "and r6, r4, #0xFF \n\t"
+                  "and r8, r4, #0xFF00 \n\t"
+                  "add r6, r5, r6 \n\t"
+                  "add r8, r7, r8, lsr #8 \n\t"
+                  "mvn r5, r5 \n\t"
+                  "mvn r7, r7 \n\t"
+                  "tst r6, #0x100 \n\t"
+                  "movne r6, r5, lsr #24 \n\t"
+                  "tst r8, #0x100 \n\t"
+                  "movne r8, r7, lsr #24 \n\t"
+                  "mov r9, r6 \n\t"
+                  "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */
+                  "orr r9, r9, r8, lsl #8 \n\t"
+                  /* block[6] and block[7] */
+                  /* [D] */
+                  "ldrsh r7, [%0, #14] \n\t"
+                  "and r6, r4, #0xFF0000 \n\t"
+                  "and r8, r4, #0xFF000000 \n\t"
+                  "add r6, r5, r6, lsr #16 \n\t"
+                  "add r8, r7, r8, lsr #24 \n\t"
+                  "mvn r5, r5 \n\t"
+                  "mvn r7, r7 \n\t"
+                  "tst r6, #0x100 \n\t"
+                  "movne r6, r5, lsr #24 \n\t"
+                  "tst r8, #0x100 \n\t"
+                  "movne r8, r7, lsr #24 \n\t"
+                  "orr r9, r9, r6, lsl #16 \n\t"
+                  "add %0, %0, #16 \n\t" /* moved from [E] */
+                  "orr r9, r9, r8, lsl #24 \n\t"
+                  "subs r10, r10, #1 \n\t" /* moved from [F] */
+                  /* store dest */
+                  "str r9, [%1, #4] \n\t"
+
+                  /* [E] */
+                  /* [F] */
+                  "add %1, %1, %2 \n\t"
+                  "bne 1b \n\t"
+                  : "+r"(block),
+                    "+r"(dest)
+                  : "r"(line_size)
+                  : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+}
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+   converted */
+static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct_ARM (block);
+    ff_put_pixels_clamped(block, dest, line_size);
+}
+static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct_ARM (block);
+    ff_add_pixels_clamped(block, dest, line_size);
+}
+static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    simple_idct_ARM (block);
+    ff_put_pixels_clamped(block, dest, line_size);
+}
+static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    simple_idct_ARM (block);
+    ff_add_pixels_clamped(block, dest, line_size);
+}
+
+#ifdef HAVE_IPP
+static void simple_idct_ipp(DCTELEM *block)
+{
+    ippiDCT8x8Inv_Video_16s_C1I(block);
+}
+static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
+}
+
+void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ippiDCT8x8Inv_Video_16s_C1I(block);
+#ifdef HAVE_IWMMXT
+    add_pixels_clamped_iwmmxt(block, dest, line_size);
+#else
+    add_pixels_clamped_ARM(block, dest, line_size);
+#endif
+}
+#endif
+
+void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
+{
+    int idct_algo= avctx->idct_algo;
+
+    ff_put_pixels_clamped = c->put_pixels_clamped;
+    ff_add_pixels_clamped = c->add_pixels_clamped;
+
+    if(idct_algo == FF_IDCT_AUTO){
+#if defined(HAVE_IPP)
+        idct_algo = FF_IDCT_IPP;
+#elif defined(HAVE_ARMV5TE)
+        idct_algo = FF_IDCT_SIMPLEARMV5TE;
+#else
+        idct_algo = FF_IDCT_ARM;
+#endif
+    }
+
+    if(idct_algo==FF_IDCT_ARM){
+        c->idct_put= j_rev_dct_ARM_put;
+        c->idct_add= j_rev_dct_ARM_add;
+        c->idct    = j_rev_dct_ARM;
+        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
+    } else if (idct_algo==FF_IDCT_SIMPLEARM){
+        c->idct_put= simple_idct_ARM_put;
+        c->idct_add= simple_idct_ARM_add;
+        c->idct    = simple_idct_ARM;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+#ifdef HAVE_ARMV5TE
+    } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
+        c->idct_put= simple_idct_put_armv5te;
+        c->idct_add= simple_idct_add_armv5te;
+        c->idct    = simple_idct_armv5te;
+        c->idct_permutation_type = FF_NO_IDCT_PERM;
+#endif
+#ifdef HAVE_IPP
+    } else if (idct_algo==FF_IDCT_IPP){
+        c->idct_put= simple_idct_ipp_put;
+        c->idct_add= simple_idct_ipp_add;
+        c->idct    = simple_idct_ipp;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+#endif
+    }
+
+/*     c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG!
+    c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK!
+    c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK!
+/*     c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */
+/*     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; */
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; // OK
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; //OK
+/*     c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; //NG */
+    c->put_pixels_tab[1][0] = put_pixels8_arm; //OK
+    c->put_pixels_tab[1][1] = put_pixels8_x2_arm; //OK
+/*     c->put_pixels_tab[1][2] = put_pixels8_y2_arm; //NG */
+/*     c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; //NG */
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;//OK
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; //OK
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK
+/*     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;//NG */
+
+#ifdef HAVE_IWMMXT
+    dsputil_init_iwmmxt(c, avctx);
+#endif
+}
diff --git a/contrib/ffmpeg/libavcodec/armv4l/dsputil_arm_s.S b/contrib/ffmpeg/libavcodec/armv4l/dsputil_arm_s.S
new file mode 100644
index 000000000..2a3ee9c50
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/dsputil_arm_s.S
@@ -0,0 +1,696 @@
+@
+@ ARMv4L optimized DSP utils
+@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of FFmpeg.
+@
+@ FFmpeg is free software; you can redistribute it and/or
+@ modify it under the terms of the GNU Lesser General Public
+@ License as published by the Free Software Foundation; either
+@ version 2.1 of the License, or (at your option) any later version.
+@
+@ FFmpeg is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+@ Lesser General Public License for more details.
+@
+@ You should have received a copy of the GNU Lesser General Public
+@ License along with FFmpeg; if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@
+
+.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+        mov \Rd0, \Rn0, lsr #(\shift * 8)
+        mov \Rd1, \Rn1, lsr #(\shift * 8)
+        mov \Rd2, \Rn2, lsr #(\shift * 8)
+        mov \Rd3, \Rn3, lsr #(\shift * 8)
+        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
+        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
+        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
+        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
+.endm
+.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
+        mov \R0, \R0, lsr #(\shift * 8)
+        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
+        mov \R1, \R1, lsr #(\shift * 8)
+        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
+.endm
+.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
+        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
+        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
+        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
+        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
+.endm
+
+.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+        @ Rmask = 0xFEFEFEFE
+        @ Rn = destroy
+        eor \Rd0, \Rn0, \Rm0
+        eor \Rd1, \Rn1, \Rm1
+        orr \Rn0, \Rn0, \Rm0
+        orr \Rn1, \Rn1, \Rm1
+        and \Rd0, \Rd0, \Rmask
+        and \Rd1, \Rd1, \Rmask
+        sub \Rd0, \Rn0, \Rd0, lsr #1
+        sub \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+        @ Rmask = 0xFEFEFEFE
+        @ Rn = destroy
+        eor \Rd0, \Rn0, \Rm0
+        eor \Rd1, \Rn1, \Rm1
+        and \Rn0, \Rn0, \Rm0
+        and \Rn1, \Rn1, \Rm1
+        and \Rd0, \Rd0, \Rmask
+        and \Rd1, \Rd1, \Rmask
+        add \Rd0, \Rn0, \Rd0, lsr #1
+        add \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels16_arm
+put_pixels16_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        bic r1, r1, #3
+        add r5, r5, r4, lsl #2
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r7}
+        add r1, r1, r2
+        stmia r0, {r4-r7}
+        pld [r1]
+        subs r3, r3, #1
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+2:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+3:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r11, pc}
+        .align 8
+4:
+        ldmia r1, {r4-r8}
+        add r1, r1, r2
+        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r9-r12}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+5:
+        .word 1b
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_arm
+put_pixels8_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        bic r1, r1, #3
+        add r5, r5, r4, lsl #2
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        subs r3, r3, #1
+        pld [r1]
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r12}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
+        pld [r1]
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r5,pc}
+        .align 8
+5:
+        .word 1b
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_x2_arm
+put_pixels8_x2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+        pld [r1]
+        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+        .align 8
+        .global put_no_rnd_pixels8_x2_arm
+put_no_rnd_pixels8_x2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 1b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 2b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 3b
+        ldmfd sp!, {r4-r10,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r5, r10}
+        add r1, r1, r2
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 4b
+        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+
+@ ----------------------------------------------------------------
+        .align 8
+        .global put_pixels8_y2_arm
+put_pixels8_y2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        mov r3, r3, lsr #1
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+6:      ldmia r1, {r6-r7}
+        add r1, r1, r2
+        pld [r1]
+        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        pld [r1]
+        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+        subs r3, r3, #1
+        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+        .align 8
+        .global put_no_rnd_pixels8_y2_arm
+put_no_rnd_pixels8_y2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adr r5, 5f
+        ands r4, r1, #3
+        mov r3, r3, lsr #1
+        ldr r12, [r5]
+        add r5, r5, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+6:      ldmia r1, {r6-r7}
+        add r1, r1, r2
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+        ldmia r1, {r4-r5}
+        add r1, r1, r2
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        pld [r1]
+        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+        subs r3, r3, #1
+        stmia r0, {r8-r9}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+2:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+3:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+4:
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6:      ldmia r1, {r7-r9}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        ldmia r1, {r4-r6}
+        add r1, r1, r2
+        pld [r1]
+        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+        subs r3, r3, #1
+        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+        stmia r0, {r10-r11}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+        .align 8
+5:
+        .word 0xFEFEFEFE
+        .word 2b
+        .word 3b
+        .word 4b
+
+@ ----------------------------------------------------------------
+.macro  RND_XY2_IT align, rnd
+        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
+        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
+.if \align == 0
+        ldmia r1, {r6-r8}
+.elseif \align == 3
+        ldmia r1, {r5-r7}
+.else
+        ldmia r1, {r8-r10}
+.endif
+        add r1, r1, r2
+        pld [r1]
+.if \align == 0
+        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
+.elseif \align == 1
+        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
+        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
+.elseif \align == 2
+        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
+        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
+.elseif \align == 3
+        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
+.endif
+        ldr r14, [r12, #0]      @ 0x03030303
+        tst r3, #1
+        and r8, r4, r14
+        and r9, r5, r14
+        and r10, r6, r14
+        and r11, r7, r14
+.if \rnd == 1
+        ldreq r14, [r12, #16]   @ 0x02020202
+.else
+        ldreq r14, [r12, #28]   @ 0x01010101
+.endif
+        add r8, r8, r10
+        add r9, r9, r11
+        addeq r8, r8, r14
+        addeq r9, r9, r14
+        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        and r6, r14, r6, lsr #2
+        and r7, r14, r7, lsr #2
+        add r10, r4, r6
+        add r11, r5, r7
+.endm
+
+.macro RND_XY2_EXPAND align, rnd
+        RND_XY2_IT \align, \rnd
+6:      stmfd sp!, {r8-r11}
+        RND_XY2_IT \align, \rnd
+        ldmfd sp!, {r4-r7}
+        add r4, r4, r8
+        add r5, r5, r9
+        add r6, r6, r10
+        add r7, r7, r11
+        ldr r14, [r12, #24]     @ 0x0F0F0F0F
+        and r4, r14, r4, lsr #2
+        and r5, r14, r5, lsr #2
+        add r4, r4, r6
+        add r5, r5, r7
+        subs r3, r3, #1
+        stmia r0, {r4-r5}
+        add r0, r0, r2
+        bne 6b
+        ldmfd sp!, {r4-r11,pc}
+.endm
+
+        .align 8
+        .global put_pixels8_xy2_arm
+put_pixels8_xy2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adrl r12, 5f
+        ands r4, r1, #3
+        add r5, r12, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        RND_XY2_EXPAND 0, 1
+
+        .align 8
+2:
+        RND_XY2_EXPAND 1, 1
+
+        .align 8
+3:
+        RND_XY2_EXPAND 2, 1
+
+        .align 8
+4:
+        RND_XY2_EXPAND 3, 1
+
+5:
+        .word 0x03030303
+        .word 2b
+        .word 3b
+        .word 4b
+        .word 0x02020202
+        .word 0xFCFCFCFC >> 2
+        .word 0x0F0F0F0F
+        .word 0x01010101
+
+        .align 8
+        .global put_no_rnd_pixels8_xy2_arm
+put_no_rnd_pixels8_xy2_arm:
+        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+        @ block = word aligned, pixles = unaligned
+        pld [r1]
+        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+        adrl r12, 5f
+        ands r4, r1, #3
+        add r5, r12, r4, lsl #2
+        bic r1, r1, #3
+        ldrne pc, [r5]
+1:
+        RND_XY2_EXPAND 0, 0
+
+        .align 8
+2:
+        RND_XY2_EXPAND 1, 0
+
+        .align 8
+3:
+        RND_XY2_EXPAND 2, 0
+
+        .align 8
+4:
+        RND_XY2_EXPAND 3, 0
+
+5:
+        .word 0x03030303
+        .word 2b
+        .word 3b
+        .word 4b
+        .word 0x02020202
+        .word 0xFCFCFCFC >> 2
+        .word 0x0F0F0F0F
+        .word 0x01010101
diff --git a/contrib/ffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c b/contrib/ffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
new file mode 100644
index 000000000..d7401e760
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/dsputil_iwmmxt.c
@@ -0,0 +1,188 @@
+/*
+ * iWMMXt optimized DSP utils
+ * Copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
+#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2b"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2B
+
+#define DEF(x, y) x ## _ ## y ##_iwmmxt
+#define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
+#define WAVG2B "wavg2br"
+#include "dsputil_iwmmxt_rnd.h"
+#undef DEF
+#undef SET_RND
+#undef WAVG2BR
+
+// need scheduling
+#define OP(AVG)                                         \
+    asm volatile (                                      \
+        /* alignment */                                 \
+        "and r12, %[pixels], #7 \n\t"                   \
+        "bic %[pixels], %[pixels], #7 \n\t"             \
+        "tmcr wcgr1, r12 \n\t"                          \
+                                                        \
+        "wldrd wr0, [%[pixels]] \n\t"                   \
+        "wldrd wr1, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "walignr1 wr4, wr0, wr1 \n\t"                   \
+                                                        \
+        "1: \n\t"                                       \
+                                                        \
+        "wldrd wr2, [%[pixels]] \n\t"                   \
+        "wldrd wr3, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "pld [%[pixels]] \n\t"                          \
+        "walignr1 wr5, wr2, wr3 \n\t"                   \
+        AVG " wr6, wr4, wr5 \n\t"                       \
+        "wstrd wr6, [%[block]] \n\t"                    \
+        "add %[block], %[block], %[line_size] \n\t"     \
+                                                        \
+        "wldrd wr0, [%[pixels]] \n\t"                   \
+        "wldrd wr1, [%[pixels], #8] \n\t"               \
+        "add %[pixels], %[pixels], %[line_size] \n\t"   \
+        "walignr1 wr4, wr0, wr1 \n\t"                   \
+        "pld [%[pixels]] \n\t"                          \
+        AVG " wr6, wr4, wr5 \n\t"                       \
+        "wstrd wr6, [%[block]] \n\t"                    \
+        "add %[block], %[block], %[line_size] \n\t"     \
+                                                        \
+        "subs %[h], %[h], #2 \n\t"                      \
+        "bne 1b \n\t"                                   \
+        : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h)  \
+        : [line_size]"r"(line_size) \
+        : "memory", "r12");
+void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    OP("wavg2br");
+}
+void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    OP("wavg2b");
+}
+#undef OP
+
+void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    uint8_t *pixels2 = pixels + line_size;
+
+    __asm__ __volatile__ (
+        "mov            r12, #4                 \n\t"
+        "1:                                     \n\t"
+        "pld            [%[pixels], %[line_size2]]              \n\t"
+        "pld            [%[pixels2], %[line_size2]]             \n\t"
+        "wldrd          wr4, [%[pixels]]        \n\t"
+        "wldrd          wr5, [%[pixels2]]       \n\t"
+        "pld            [%[block], #32]         \n\t"
+        "wunpckelub     wr6, wr4                \n\t"
+        "wldrd          wr0, [%[block]]         \n\t"
+        "wunpckehub     wr7, wr4                \n\t"
+        "wldrd          wr1, [%[block], #8]     \n\t"
+        "wunpckelub     wr8, wr5                \n\t"
+        "wldrd          wr2, [%[block], #16]    \n\t"
+        "wunpckehub     wr9, wr5                \n\t"
+        "wldrd          wr3, [%[block], #24]    \n\t"
+        "add            %[block], %[block], #32 \n\t"
+        "waddhss        wr10, wr0, wr6          \n\t"
+        "waddhss        wr11, wr1, wr7          \n\t"
+        "waddhss        wr12, wr2, wr8          \n\t"
+        "waddhss        wr13, wr3, wr9          \n\t"
+        "wpackhus       wr14, wr10, wr11        \n\t"
+        "wpackhus       wr15, wr12, wr13        \n\t"
+        "wstrd          wr14, [%[pixels]]       \n\t"
+        "add            %[pixels], %[pixels], %[line_size2]     \n\t"
+        "subs           r12, r12, #1            \n\t"
+        "wstrd          wr15, [%[pixels2]]      \n\t"
+        "add            %[pixels2], %[pixels2], %[line_size2]   \n\t"
+        "bne            1b                      \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
+        : [line_size2]"r"(line_size << 1)
+        : "cc", "memory", "r12");
+}
+
+static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    return;
+}
+
+int mm_flags; /* multimedia extension flags */
+
+int mm_support(void)
+{
+    return 0; /* TODO, implement proper detection */
+}
+
+void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
+{
+    mm_flags = mm_support();
+
+    if (avctx->dsp_mask) {
+        if (avctx->dsp_mask & FF_MM_FORCE)
+            mm_flags |= (avctx->dsp_mask & 0xffff);
+        else
+            mm_flags &= ~(avctx->dsp_mask & 0xffff);
+    }
+
+    if (!(mm_flags & MM_IWMMXT)) return;
+
+    c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
+
+    c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
+    c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
+    c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
+
+    c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
+    c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
+    c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
+    c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
+
+    c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
+
+    c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
+    c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
+    c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+}
diff --git a/contrib/ffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h b/contrib/ffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
new file mode 100644
index 000000000..51ba61c47
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/dsputil_iwmmxt_rnd.h
@@ -0,0 +1,1114 @@
+/*
+ * iWMMXt optimized DSP utils
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "wldrd wr0, [%[block]] \n\t"
+        "wldrd wr2, [r5] \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        WAVG2B" wr8, wr8, wr0 \n\t"
+        WAVG2B" wr10, wr10, wr2 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "pld [%[block]] \n\t"
+        "pld [%[block], #32] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "pld [r5] \n\t"
+        "pld [r5, #32] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "and r12, %[pixels], #7 \n\t"
+        "bic %[pixels], %[pixels], #7 \n\t"
+        "tmcr wcgr1, r12 \n\t"
+        "add r4, %[pixels], %[line_size] \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1: \n\t"
+        "wldrd wr0, [%[pixels]] \n\t"
+        "wldrd wr1, [%[pixels], #8] \n\t"
+        "subs %[h], %[h], #2 \n\t"
+        "wldrd wr2, [%[pixels], #16] \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4] \n\t"
+        "pld [%[pixels]] \n\t"
+        "pld [%[pixels], #32] \n\t"
+        "walignr1 wr8, wr0, wr1 \n\t"
+        "wldrd wr4, [r4, #8] \n\t"
+        "walignr1 wr9, wr1, wr2 \n\t"
+        "wldrd wr5, [r4, #16] \n\t"
+        "add r4, r4, %[line_size] \n\t"
+        "pld [r4] \n\t"
+        "pld [r4, #32] \n\t"
+        "walignr1 wr10, wr3, wr4 \n\t"
+        "wstrd wr8, [%[block]] \n\t"
+        "walignr1 wr11, wr4, wr5 \n\t"
+        "wstrd wr9, [%[block], #8] \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5] \n\t"
+        "wstrd wr11, [r5, #8] \n\t"
+        "add r5, r5, %[line_size] \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    __asm__ __volatile__ (
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "1:                             \n\t"
+        "wldrd wr0, [%[pixels]]         \n\t"
+        "wldrd wr1, [%[pixels], #8]     \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wldrd wr2, [%[pixels], #16]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr3, [r4]                \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr8, wr0, wr1         \n\t"
+        "wldrd wr4, [r4, #8]            \n\t"
+        "walignr1 wr9, wr1, wr2         \n\t"
+        "wldrd wr5, [r4, #16]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "wldrd wr0, [%[block]]          \n\t"
+        "pld [r4]                       \n\t"
+        "wldrd wr1, [%[block], #8]      \n\t"
+        "pld [r4, #32]                  \n\t"
+        "wldrd wr2, [r5]                \n\t"
+        "walignr1 wr10, wr3, wr4        \n\t"
+        "wldrd wr3, [r5, #8]            \n\t"
+        WAVG2B" wr8, wr8, wr0           \n\t"
+        WAVG2B" wr9, wr9, wr1           \n\t"
+        WAVG2B" wr10, wr10, wr2         \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "walignr1 wr11, wr4, wr5        \n\t"
+        WAVG2B" wr11, wr11, wr3         \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size] \n\t"
+        "wstrd wr10, [r5]               \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "wstrd wr11, [r5, #8]           \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b \n\t"
+        : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+        :
+        : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wldrd wr15, [r4, #16]          \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "walignr1 wr3, wr14, wr15       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr5, wr12               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "wmoveq wr7, wr15               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr5, wr11, wr12     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "walignr2ne wr7, wr14, wr15     \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr1, wr1, wr5           \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wstrd wr1, [%[block], #8]      \n\t"
+        WAVG2B" wr3, wr3, wr7           \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr3, [r5, #8]            \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "wldrd wr12, [r5]               \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        WAVG2B" wr0, wr0, wr10          \n\t"
+        WAVG2B" wr2, wr2, wr12          \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "add r4, %[pixels], %[line_size]\n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add r5, %[block], %[line_size] \n\t"
+        "mov %[line_size], %[line_size], lsl #1 \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "wldrd wr13, [r4]               \n\t"
+        "pld [%[pixels]]                \n\t"
+        "wldrd wr14, [r4, #8]           \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wldrd wr15, [r4, #16]          \n\t"
+        "add r4, r4, %[line_size]       \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "pld [r4]                       \n\t"
+        "pld [r4, #32]                  \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "walignr1 wr2, wr13, wr14       \n\t"
+        "walignr1 wr3, wr14, wr15       \n\t"
+        "wmoveq wr4, wr11               \n\t"
+        "wmoveq wr5, wr12               \n\t"
+        "wmoveq wr6, wr14               \n\t"
+        "wmoveq wr7, wr15               \n\t"
+        "walignr2ne wr4, wr10, wr11     \n\t"
+        "walignr2ne wr5, wr11, wr12     \n\t"
+        "walignr2ne wr6, wr13, wr14     \n\t"
+        "walignr2ne wr7, wr14, wr15     \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        WAVG2B" wr0, wr0, wr4           \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr1, wr1, wr5           \n\t"
+        "wldrd wr12, [r5]               \n\t"
+        WAVG2B" wr2, wr2, wr6           \n\t"
+        "wldrd wr13, [r5, #8]           \n\t"
+        WAVG2B" wr3, wr3, wr7           \n\t"
+        WAVG2B" wr0, wr0, wr10          \n\t"
+        WAVG2B" wr1, wr1, wr11          \n\t"
+        WAVG2B" wr2, wr2, wr12          \n\t"
+        WAVG2B" wr3, wr3, wr13          \n\t"
+        "wstrd wr0, [%[block]]          \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr1, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wstrd wr2, [r5]                \n\t"
+        "pld [%[block]]                 \n\t"
+        "wstrd wr3, [r5, #8]            \n\t"
+        "add r5, r5, %[line_size]       \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [r5]                       \n\t"
+        "pld [r5, #32]                  \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        :"r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "and            r12, %[pixels], #7                      \n\t"
+        "tmcr           wcgr1, r12                              \n\t"
+        "bic            %[pixels], %[pixels], #7                \n\t"
+
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "pld            [%[block]]                              \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "walignr1       wr0, wr10, wr11                         \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+
+      "1:                                                       \n\t"
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "walignr1       wr4, wr10, wr11                         \n\t"
+        "wldrd          wr10, [%[block]]                        \n\t"
+         WAVG2B"        wr8, wr0, wr4                           \n\t"
+         WAVG2B"        wr8, wr8, wr10                          \n\t"
+        "wstrd          wr8, [%[block]]                         \n\t"
+        "add            %[block], %[block], %[line_size]        \n\t"
+
+        "wldrd          wr10, [%[pixels]]                       \n\t"
+        "wldrd          wr11, [%[pixels], #8]                   \n\t"
+        "pld            [%[block]]                              \n\t"
+        "add            %[pixels], %[pixels], %[line_size]      \n\t"
+        "pld            [%[pixels]]                             \n\t"
+        "pld            [%[pixels], #32]                        \n\t"
+        "walignr1       wr0, wr10, wr11                         \n\t"
+        "wldrd          wr10, [%[block]]                        \n\t"
+         WAVG2B"        wr8, wr0, wr4                           \n\t"
+         WAVG2B"        wr8, wr8, wr10                          \n\t"
+        "wstrd          wr8, [%[block]]                         \n\t"
+        "add            %[block], %[block], %[line_size]        \n\t"
+
+        "subs           %[h], %[h], #2                          \n\t"
+        "pld            [%[block]]                              \n\t"
+        "bne            1b                                      \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "cc", "memory", "r12");
+}
+
+void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr4, wr10, wr11       \n\t"
+        "walignr1 wr5, wr11, wr12       \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    int stride = line_size;
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "and r12, %[pixels], #7         \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+
+        "1:                             \n\t"
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr4, wr10, wr11       \n\t"
+        "walignr1 wr5, wr11, wr12       \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        WAVG2B" wr8, wr8, wr10          \n\t"
+        WAVG2B" wr9, wr9, wr11          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "wldrd wr10, [%[pixels]]        \n\t"
+        "wldrd wr11, [%[pixels], #8]    \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr12, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr0, wr10, wr11       \n\t"
+        "walignr1 wr1, wr11, wr12       \n\t"
+        "wldrd wr10, [%[block]]         \n\t"
+        "wldrd wr11, [%[block], #8]     \n\t"
+        WAVG2B" wr8, wr0, wr4           \n\t"
+        WAVG2B" wr9, wr1, wr5           \n\t"
+        WAVG2B" wr8, wr8, wr10          \n\t"
+        WAVG2B" wr9, wr9, wr11          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "pld [%[block]]                 \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+        :
+        : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "add r12, r12, #1               \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "cmp r12, #8                    \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        /* alignment */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "tmcr wcgr2, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr7, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr6, wr7            \n\t"
+        "wunpckehub wr7, wr7            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr6, wr6, wr10         \n\t"
+        "waddhus wr7, wr7, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        "subs %[h], %[h], #2            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7   \n\t"
+        "tmcr wcgr1, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "add r12, r12, #1               \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "tmcr wcgr2, r12                \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "cmp r12, #8                    \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
+
+void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+    // [wr0 wr1 wr2 wr3] for previous line
+    // [wr4 wr5 wr6 wr7] for current line
+    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
+    __asm__ __volatile__(
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "pld [%[pixels]]                \n\t"
+        "mov r12, #2                    \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "tmcr wcgr0, r12                \n\t" /* for shift value */
+        /* alignment */
+        "and r12, %[pixels], #7         \n\t"
+        "bic %[pixels], %[pixels], #7           \n\t"
+        "tmcr wcgr1, r12                \n\t"
+        "add r12, r12, #1               \n\t"
+        "tmcr wcgr2, r12                \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "pld [%[pixels]]                \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+
+        "1:                             \n\t"
+        // [wr0 wr1 wr2 wr3]
+        // [wr4 wr5 wr6 wr7] <= *
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "cmp r12, #8                    \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr6, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr7, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr4, wr6            \n\t"
+        "wunpckehub wr5, wr6            \n\t"
+        "wunpckelub wr6, wr7            \n\t"
+        "wunpckehub wr7, wr7            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr4, wr4, wr8          \n\t"
+        "waddhus wr5, wr5, wr9          \n\t"
+        "waddhus wr6, wr6, wr10         \n\t"
+        "waddhus wr7, wr7, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wldrd wr13, [%[block], #8]     \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        WAVG2B" wr9, wr9, wr13          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+
+        // [wr0 wr1 wr2 wr3] <= *
+        // [wr4 wr5 wr6 wr7]
+        "wldrd wr12, [%[pixels]]        \n\t"
+        "pld [%[block]]                 \n\t"
+        "wldrd wr13, [%[pixels], #8]    \n\t"
+        "pld [%[block], #32]            \n\t"
+        "wldrd wr14, [%[pixels], #16]   \n\t"
+        "add %[pixels], %[pixels], %[line_size] \n\t"
+        "walignr1 wr2, wr12, wr13       \n\t"
+        "pld [%[pixels]]                \n\t"
+        "pld [%[pixels], #32]           \n\t"
+        "walignr1 wr3, wr13, wr14       \n\t"
+        "wmoveq wr10, wr13              \n\t"
+        "wmoveq wr11, wr14              \n\t"
+        "walignr2ne wr10, wr12, wr13    \n\t"
+        "walignr2ne wr11, wr13, wr14    \n\t"
+        "wunpckelub wr0, wr2            \n\t"
+        "wunpckehub wr1, wr2            \n\t"
+        "wunpckelub wr2, wr3            \n\t"
+        "wunpckehub wr3, wr3            \n\t"
+        "wunpckelub wr8, wr10           \n\t"
+        "wunpckehub wr9, wr10           \n\t"
+        "wunpckelub wr10, wr11          \n\t"
+        "wunpckehub wr11, wr11          \n\t"
+        "waddhus wr0, wr0, wr8          \n\t"
+        "waddhus wr1, wr1, wr9          \n\t"
+        "waddhus wr2, wr2, wr10         \n\t"
+        "waddhus wr3, wr3, wr11         \n\t"
+        "waddhus wr8, wr0, wr4          \n\t"
+        "waddhus wr9, wr1, wr5          \n\t"
+        "waddhus wr10, wr2, wr6         \n\t"
+        "waddhus wr11, wr3, wr7         \n\t"
+        "waddhus wr8, wr8, wr15         \n\t"
+        "waddhus wr9, wr9, wr15         \n\t"
+        "waddhus wr10, wr10, wr15       \n\t"
+        "waddhus wr11, wr11, wr15       \n\t"
+        "wsrlhg wr8, wr8, wcgr0         \n\t"
+        "wsrlhg wr9, wr9, wcgr0         \n\t"
+        "wldrd wr12, [%[block]]         \n\t"
+        "wldrd wr13, [%[block], #8]     \n\t"
+        "wsrlhg wr10, wr10, wcgr0       \n\t"
+        "wsrlhg wr11, wr11, wcgr0       \n\t"
+        "wpackhus wr8, wr8, wr9         \n\t"
+        "wpackhus wr9, wr10, wr11       \n\t"
+        WAVG2B" wr8, wr8, wr12          \n\t"
+        WAVG2B" wr9, wr9, wr13          \n\t"
+        "wstrd wr8, [%[block]]          \n\t"
+        "wstrd wr9, [%[block], #8]      \n\t"
+        "add %[block], %[block], %[line_size]   \n\t"
+        "subs %[h], %[h], #2            \n\t"
+        "pld [%[block]]                 \n\t"
+        "pld [%[block], #32]            \n\t"
+        "bne 1b                         \n\t"
+        : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+        : [line_size]"r"(line_size)
+        : "r12", "memory");
+}
diff --git a/contrib/ffmpeg/libavcodec/armv4l/jrevdct_arm.S b/contrib/ffmpeg/libavcodec/armv4l/jrevdct_arm.S
new file mode 100644
index 000000000..294ea4750
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/jrevdct_arm.S
@@ -0,0 +1,386 @@
+/*
+   C-like prototype :
+        void j_rev_dct_ARM(DCTBLOCK data)
+
+   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
+
+   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+#define FIX_0_298631336 2446
+#define FIX_0_541196100 4433
+#define FIX_0_765366865 6270
+#define FIX_1_175875602 9633
+#define FIX_1_501321110 12299
+#define FIX_2_053119869 16819
+#define FIX_3_072711026 25172
+#define FIX_M_0_390180644 -3196
+#define FIX_M_0_899976223 -7373
+#define FIX_M_1_847759065 -15137
+#define FIX_M_1_961570560 -16069
+#define FIX_M_2_562915447 -20995
+#define FIX_0xFFFF 0xFFFF
+
+#define FIX_0_298631336_ID      0
+#define FIX_0_541196100_ID      4
+#define FIX_0_765366865_ID      8
+#define FIX_1_175875602_ID     12
+#define FIX_1_501321110_ID     16
+#define FIX_2_053119869_ID     20
+#define FIX_3_072711026_ID     24
+#define FIX_M_0_390180644_ID   28
+#define FIX_M_0_899976223_ID   32
+#define FIX_M_1_847759065_ID   36
+#define FIX_M_1_961570560_ID   40
+#define FIX_M_2_562915447_ID   44
+#define FIX_0xFFFF_ID          48
+        .text
+        .align
+
+        .global j_rev_dct_ARM
+j_rev_dct_ARM:
+        stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
+
+        sub sp, sp, #4                  @ reserve some space on the stack
+        str r0, [ sp ]                  @ save the DCT pointer to the stack
+
+        mov lr, r0                      @ lr = pointer to the current row
+        mov r12, #8                     @ r12 = row-counter
+        add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
+row_loop:
+        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
+        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
+
+        @ Optimization for row that have all items except the first set to 0
+        @ (this works as the DCTELEMS are always 4-byte aligned)
+        ldr r5, [lr, # 0]
+        ldr r2, [lr, # 4]
+        ldr r3, [lr, # 8]
+        ldr r4, [lr, #12]
+        orr r3, r3, r4
+        orr r3, r3, r2
+        orrs r5, r3, r5
+        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
+        orrs r2, r3, r1
+        beq empty_row
+
+        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
+        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
+        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
+
+        ldr r3, [r11, #FIX_0_541196100_ID]
+        add r7, r2, r6
+        ldr r5, [r11, #FIX_M_1_847759065_ID]
+        mul r7, r3, r7                      @ r7 = z1
+        ldr r3, [r11, #FIX_0_765366865_ID]
+        mla r6, r5, r6, r7                  @ r6 = tmp2
+        add r5, r0, r4                      @ r5 = tmp0
+        mla r2, r3, r2, r7                  @ r2 = tmp3
+        sub r3, r0, r4                      @ r3 = tmp1
+
+        add r0, r2, r5, lsl #13             @ r0 = tmp10
+        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
+        add r4, r6, r3, lsl #13             @ r4 = tmp11
+        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
+
+        stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+
+        ldrsh r3, [lr, #10]             @ r3 = 'd3'
+        ldrsh r5, [lr, #12]             @ r5 = 'd5'
+        ldrsh r7, [lr, #14]             @ r7 = 'd7'
+
+        add r0, r3, r5                        @ r0 = 'z2'
+        add r2, r1, r7                  @ r2 = 'z1'
+        add r4, r3, r7                  @ r4 = 'z3'
+        add r6, r1, r5                  @ r6 = 'z4'
+        ldr r9, [r11, #FIX_1_175875602_ID]
+        add r8, r4, r6                  @ r8 = z3 + z4
+        ldr r10, [r11, #FIX_M_0_899976223_ID]
+        mul r8, r9, r8                  @ r8 = 'z5'
+        ldr r9, [r11, #FIX_M_2_562915447_ID]
+        mul r2, r10, r2                 @ r2 = 'z1'
+        ldr r10, [r11, #FIX_M_1_961570560_ID]
+        mul r0, r9, r0                  @ r0 = 'z2'
+        ldr r9, [r11, #FIX_M_0_390180644_ID]
+        mla r4, r10, r4, r8             @ r4 = 'z3'
+        ldr r10, [r11, #FIX_0_298631336_ID]
+        mla r6, r9, r6, r8              @ r6 = 'z4'
+        ldr r9, [r11, #FIX_2_053119869_ID]
+        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
+        ldr r10, [r11, #FIX_3_072711026_ID]
+        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
+        ldr r9, [r11, #FIX_1_501321110_ID]
+        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
+        add r7, r7, r4                  @ r7 = tmp0
+        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
+        add r5,        r5, r6                  @ r5 = tmp1
+        add r3, r3, r4                  @ r3 = tmp2
+        add r1, r1, r6                  @ r1 = tmp3
+
+        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
+                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
+        add r8, r0, r1
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 0]
+
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
+        sub r8, r0, r1
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #14]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
+        add r8, r6, r3
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 2]
+
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
+        sub r8, r6, r3
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #12]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
+        add r8, r4, r5
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 4]
+
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
+        sub r8, r4, r5
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #10]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
+        add r8, r2, r7
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 6]
+
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
+        sub r8, r2, r7
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 8]
+
+        @ End of row loop
+        add lr, lr, #16
+        subs r12, r12, #1
+        bne row_loop
+        beq start_column_loop
+
+empty_row:
+        ldr r1, [r11, #FIX_0xFFFF_ID]
+        mov r0, r0, lsl #2
+        and r0, r0, r1
+        add r0, r0, r0, lsl #16
+        str r0, [lr, # 0]
+        str r0, [lr, # 4]
+        str r0, [lr, # 8]
+        str r0, [lr, #12]
+
+end_of_row_loop:
+        @ End of loop
+        add lr, lr, #16
+        subs r12, r12, #1
+        bne row_loop
+
+start_column_loop:
+        @ Start of column loop
+        ldr lr, [ sp ]
+        mov r12, #8
+column_loop:
+        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
+        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
+        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
+        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
+
+        ldr r3, [r11, #FIX_0_541196100_ID]
+        add r1, r2, r6
+        ldr r5, [r11, #FIX_M_1_847759065_ID]
+        mul r1, r3, r1                      @ r1 = z1
+        ldr r3, [r11, #FIX_0_765366865_ID]
+        mla r6, r5, r6, r1                  @ r6 = tmp2
+        add r5, r0, r4                      @ r5 = tmp0
+        mla r2, r3, r2, r1                  @ r2 = tmp3
+        sub r3, r0, r4                      @ r3 = tmp1
+
+        add r0, r2, r5, lsl #13             @ r0 = tmp10
+        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
+        add r4, r6, r3, lsl #13             @ r4 = tmp11
+        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
+
+        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
+        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
+        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
+        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
+
+        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
+        orr r9, r1, r3
+        orr r10, r5, r7
+        orrs r10, r9, r10
+        beq empty_odd_column
+
+        stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+
+        add r0, r3, r5                  @ r0 = 'z2'
+        add r2, r1, r7                  @ r2 = 'z1'
+        add r4, r3, r7                  @ r4 = 'z3'
+        add r6, r1, r5                  @ r6 = 'z4'
+        ldr r9, [r11, #FIX_1_175875602_ID]
+        add r8, r4, r6
+        ldr r10, [r11, #FIX_M_0_899976223_ID]
+        mul r8, r9, r8                  @ r8 = 'z5'
+        ldr r9, [r11, #FIX_M_2_562915447_ID]
+        mul r2, r10, r2                 @ r2 = 'z1'
+        ldr r10, [r11, #FIX_M_1_961570560_ID]
+        mul r0, r9, r0                  @ r0 = 'z2'
+        ldr r9, [r11, #FIX_M_0_390180644_ID]
+        mla r4, r10, r4, r8             @ r4 = 'z3'
+        ldr r10, [r11, #FIX_0_298631336_ID]
+        mla r6, r9, r6, r8              @ r6 = 'z4'
+        ldr r9, [r11, #FIX_2_053119869_ID]
+        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
+        ldr r10, [r11, #FIX_3_072711026_ID]
+        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
+        ldr r9, [r11, #FIX_1_501321110_ID]
+        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
+        add r7, r7, r4                  @ r7 = tmp0
+        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
+        add r5,        r5, r6                  @ r5 = tmp1
+        add r3, r3, r4                  @ r3 = tmp2
+        add r1, r1, r6                  @ r1 = tmp3
+
+        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
+                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
+        add r8, r0, r1
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 0*8)]
+
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
+        sub r8, r0, r1
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(14*8)]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
+        add r8, r4, r3
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 2*8)]
+
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
+        sub r8, r4, r3
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(12*8)]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
+        add r8, r6, r5
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 4*8)]
+
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
+        sub r8, r6, r5
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(10*8)]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
+        add r8, r2, r7
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 6*8)]
+
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
+        sub r8, r2, r7
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 8*8)]
+
+        @ End of row loop
+        add lr, lr, #2
+        subs r12, r12, #1
+        bne column_loop
+        beq the_end
+
+empty_odd_column:
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
+        add r0, r0, #(1<<17)
+        mov r0, r0, asr #18
+        strh r0, [lr, #( 0*8)]
+        strh r0, [lr, #(14*8)]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
+        add r4, r4, #(1<<17)
+        mov r4, r4, asr #18
+        strh r4, [lr, #( 2*8)]
+        strh r4, [lr, #(12*8)]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
+        add r6, r6, #(1<<17)
+        mov r6, r6, asr #18
+        strh r6, [lr, #( 4*8)]
+        strh r6, [lr, #(10*8)]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
+        add r2, r2, #(1<<17)
+        mov r2, r2, asr #18
+        strh r2, [lr, #( 6*8)]
+        strh r2, [lr, #( 8*8)]
+
+        @ End of row loop
+        add lr, lr, #2
+        subs r12, r12, #1
+        bne column_loop
+
+the_end:
+        @ The end....
+        add sp, sp, #4
+        ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
+
+const_array:
+        .align
+        .word FIX_0_298631336
+        .word FIX_0_541196100
+        .word FIX_0_765366865
+        .word FIX_1_175875602
+        .word FIX_1_501321110
+        .word FIX_2_053119869
+        .word FIX_3_072711026
+        .word FIX_M_0_390180644
+        .word FIX_M_0_899976223
+        .word FIX_M_1_847759065
+        .word FIX_M_1_961570560
+        .word FIX_M_2_562915447
+        .word FIX_0xFFFF
diff --git a/contrib/ffmpeg/libavcodec/armv4l/mathops.h b/contrib/ffmpeg/libavcodec/armv4l/mathops.h
new file mode 100644
index 000000000..7ddd0ec6e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/mathops.h
@@ -0,0 +1,49 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+#   define MULL(a, b) \
+        ({  int lo, hi;\
+         asm("smull %0, %1, %2, %3     \n\t"\
+             "mov   %0, %0,     lsr %4\n\t"\
+             "add   %1, %0, %1, lsl %5\n\t"\
+             : "=&r"(lo), "=&r"(hi)\
+             : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));\
+         hi; })
+#endif
+
+#define MULH(a, b) \
+    ({ int lo, hi;\
+     asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
+     hi; })
+
+#if defined(HAVE_ARMV5TE)
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb) \
+        asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+/* signed 16x16 -> 32 multiply */
+#   define MUL16(ra, rb)                                                \
+        ({ int __rt;                                                    \
+         asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \
+         __rt; })
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/armv4l/mpegvideo_arm.c b/contrib/ffmpeg/libavcodec/armv4l/mpegvideo_arm.c
new file mode 100644
index 000000000..10a005cd3
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/mpegvideo_arm.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+extern void MPV_common_init_iwmmxt(MpegEncContext *s);
+
+void MPV_common_init_armv4l(MpegEncContext *s)
+{
+#ifdef HAVE_IWMMXT
+    MPV_common_init_iwmmxt(s);
+#endif
+}
diff --git a/contrib/ffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c b/contrib/ffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
new file mode 100644
index 000000000..1336ac5f8
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/mpegvideo_iwmmxt.c
@@ -0,0 +1,119 @@
+/*
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
+                                             DCTELEM *block, int n, int qscale)
+{
+    int level, qmul, qadd;
+    int nCoeffs;
+    DCTELEM *block_orig = block;
+
+    assert(s->block_last_index[n]>=0);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+        level = block[0];
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    __asm__ __volatile__ (
+/*      "movd %1, %%mm6                 \n\t" //qmul */
+/*      "packssdw %%mm6, %%mm6          \n\t" */
+/*      "packssdw %%mm6, %%mm6          \n\t" */
+        "tbcsth wr6, %[qmul]            \n\t"
+/*      "movd %2, %%mm5                 \n\t" //qadd */
+/*      "packssdw %%mm5, %%mm5          \n\t" */
+/*      "packssdw %%mm5, %%mm5          \n\t" */
+        "tbcsth wr5, %[qadd]            \n\t"
+        "wzero wr7                      \n\t" /* "pxor %%mm7, %%mm7             \n\t" */
+        "wzero wr4                      \n\t" /* "pxor %%mm4, %%mm4             \n\t" */
+        "wsubh wr7, wr5, wr7            \n\t" /* "psubw %%mm5, %%mm7            \n\t" */
+        "1:                             \n\t"
+        "wldrd wr2, [%[block]]          \n\t" /* "movq (%0, %3), %%mm0          \n\t" */
+        "wldrd wr3, [%[block], #8]      \n\t" /* "movq 8(%0, %3), %%mm1         \n\t" */
+        "wmulsl wr0, wr6, wr2           \n\t" /* "pmullw %%mm6, %%mm0           \n\t" */
+        "wmulsl wr1, wr6, wr3           \n\t" /* "pmullw %%mm6, %%mm1           \n\t" */
+/*      "movq (%0, %3), %%mm2           \n\t" */
+/*      "movq 8(%0, %3), %%mm3          \n\t" */
+        "wcmpgtsh wr2, wr4, wr2         \n\t" /* "pcmpgtw %%mm4, %%mm2          \n\t" // block[i] < 0 ? -1 : 0 */
+        "wcmpgtsh wr3, wr4, wr2         \n\t" /* "pcmpgtw %%mm4, %%mm3          \n\t" // block[i] < 0 ? -1 : 0 */
+        "wxor wr0, wr2, wr0             \n\t" /* "pxor %%mm2, %%mm0             \n\t" */
+        "wxor wr1, wr3, wr1             \n\t" /* "pxor %%mm3, %%mm1             \n\t" */
+        "waddh wr0, wr7, wr0            \n\t" /* "paddw %%mm7, %%mm0            \n\t" */
+        "waddh wr1, wr7, wr1            \n\t" /* "paddw %%mm7, %%mm1            \n\t" */
+        "wxor wr2, wr0, wr2             \n\t" /* "pxor %%mm0, %%mm2             \n\t" */
+        "wxor wr3, wr1, wr3             \n\t" /* "pxor %%mm1, %%mm3             \n\t" */
+        "wcmpeqh wr0, wr7, wr0          \n\t" /* "pcmpeqw %%mm7, %%mm0          \n\t" // block[i] == 0 ? -1 : 0 */
+        "wcmpeqh wr1, wr7, wr1          \n\t" /* "pcmpeqw %%mm7, %%mm1          \n\t" // block[i] == 0 ? -1 : 0 */
+        "wandn wr0, wr2, wr0            \n\t" /* "pandn %%mm2, %%mm0            \n\t" */
+        "wandn wr1, wr3, wr1            \n\t" /* "pandn %%mm3, %%mm1            \n\t" */
+        "wstrd wr0, [%[block]]          \n\t" /* "movq %%mm0, (%0, %3)          \n\t" */
+        "wstrd wr1, [%[block], #8]      \n\t" /* "movq %%mm1, 8(%0, %3)         \n\t" */
+        "add %[block], %[block], #16    \n\t" /* "addl $16, %3                  \n\t" */
+        "subs %[i], %[i], #1            \n\t"
+        "bne 1b                         \n\t" /* "jng 1b                                \n\t" */
+        :[block]"+r"(block)
+        :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
+        :"memory");
+
+    block_orig[0] = level;
+}
+
+#if 0
+static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s,
+                                             DCTELEM *block, int n, int qscale)
+{
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale);
+}
+#endif
+
+void MPV_common_init_iwmmxt(MpegEncContext *s)
+{
+    if (!(mm_flags & MM_IWMMXT)) return;
+
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
+#if 0
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt;
+#endif
+}
diff --git a/contrib/ffmpeg/libavcodec/armv4l/simple_idct_arm.S b/contrib/ffmpeg/libavcodec/armv4l/simple_idct_arm.S
new file mode 100644
index 000000000..b5a20f6da
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/simple_idct_arm.S
@@ -0,0 +1,487 @@
+/*
+ * simple_idct_arm.S
+ * Copyright (C) 2002 Frederic 'dilb' Boulay.
+ * All Rights Reserved.
+ *
+ * Author: Frederic Boulay <dilb@handhelds.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * The function defined in this file, is derived from the simple_idct function
+ * from the libavcodec library part of the ffmpeg project.
+ */
+
+/* useful constants for the algorithm, they are save in __constant_ptr__ at */
+/* the end of the source code.*/
+#define W1  22725
+#define W2  21407
+#define W3  19266
+#define W4  16383
+#define W5  12873
+#define W6  8867
+#define W7  4520
+#define MASK_MSHW 0xFFFF0000
+
+/* offsets of the constants in the vector */
+#define offW1  0
+#define offW2  4
+#define offW3  8
+#define offW4  12
+#define offW5  16
+#define offW6  20
+#define offW7  24
+#define offMASK_MSHW 28
+
+#define ROW_SHIFT 11
+#define ROW_SHIFT2MSHW (16-11)
+#define COL_SHIFT 20
+#define ROW_SHIFTED_1 1024 /* 1<< (ROW_SHIFT-1) */
+#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
+
+
+        .text
+        .align
+        .global simple_idct_ARM
+
+simple_idct_ARM:
+        @@ void simple_idct_ARM(int16_t *block)
+        @@ save stack for reg needed (take all of them),
+        @@ R0-R3 are scratch regs, so no need to save them, but R0 contains the pointer to block
+        @@ so it must not be overwritten, if it is not saved!!
+        @@ R12 is another scratch register, so it should not be saved too
+        @@ save all registers
+        stmfd sp!, {r4-r11, r14} @ R14 is also called LR
+        @@ at this point, R0=block, other registers are free.
+        add r14, r0, #112        @ R14=&block[8*7], better start from the last row, and decrease the value until row=0, i.e. R12=block.
+        add r12, pc, #(__constant_ptr__-.-8) @ R12=__constant_ptr__, the vector containing the constants, probably not necessary to reserve a register for it
+        @@ add 2 temporary variables in the stack: R0 and R14
+        sub sp, sp, #8          @ allow 2 local variables
+        str r0, [sp, #0]        @ save block in sp[0]
+        @@ stack status
+        @@ sp+4   free
+        @@ sp+0   R0  (block)
+
+
+        @@ at this point, R0=block, R14=&block[56], R12=__const_ptr_, R1-R11 free
+
+
+__row_loop:
+        @@ read the row and check if it is null, almost null, or not, according to strongarm specs, it is not necessary to optimise ldr accesses (i.e. split 32bits in 2 16bits words), at least it gives more usable registers :)
+        ldr r1, [r14, #0]        @ R1=(int32)(R12)[0]=ROWr32[0] (relative row cast to a 32b pointer)
+        ldr r2, [r14, #4]        @ R2=(int32)(R12)[1]=ROWr32[1]
+        ldr r3, [r14, #8]        @ R3=ROWr32[2]
+        ldr r4, [r14, #12]       @ R4=ROWr32[3]
+        @@ check if the words are null, if all of them are null, then proceed with next row (branch __end_row_loop),
+        @@ if ROWr16[0] is the only one not null, then proceed with this special case (branch __almost_empty_row)
+        @@ else follow the complete algorithm.
+        @@ at this point, R0=block, R14=&block[n], R12=__const_ptr_, R1=ROWr32[0], R2=ROWr32[1],
+        @@                R3=ROWr32[2], R4=ROWr32[3], R5-R11 free
+        orr r5, r4, r3           @ R5=R4 | R3
+        orr r5, r5, r2           @ R5=R4 | R3 | R2
+        orrs r6, r5, r1          @ Test R5 | R1 (the aim is to check if everything is null)
+        beq __end_row_loop
+        mov r7, r1, asr #16      @ R7=R1>>16=ROWr16[1] (evaluate it now, as it could be useful later)
+        ldrsh r6, [r14, #0]      @ R6=ROWr16[0]
+        orrs r5, r5, r7          @ R5=R4 | R3 | R2 | R7
+        beq __almost_empty_row
+
+__b_evaluation:
+        @@ at this point, R0=block (temp),  R1(free), R2=ROWr32[1], R3=ROWr32[2], R4=ROWr32[3],
+        @@     R5=(temp), R6=ROWr16[0], R7=ROWr16[1], R8-R11 free,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ to save some registers/calls, proceed with b0-b3 first, followed by a0-a3
+
+        @@ MUL16(b0, W1, row[1]);
+        @@ MUL16(b1, W3, row[1]);
+        @@ MUL16(b2, W5, row[1]);
+        @@ MUL16(b3, W7, row[1]);
+        @@ MAC16(b0, W3, row[3]);
+        @@ MAC16(b1, -W7, row[3]);
+        @@ MAC16(b2, -W1, row[3]);
+        @@ MAC16(b3, -W5, row[3]);
+        ldr r8, [r12, #offW1]    @ R8=W1
+        mov r2, r2, asr #16      @ R2=ROWr16[3]
+        mul r0, r8, r7           @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r9, [r12, #offW3]    @ R9=W3
+        ldr r10, [r12, #offW5]   @ R10=W5
+        mul r1, r9, r7           @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r11, [r12, #offW7]   @ R11=W7
+        mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+                teq r2, #0               @ if null avoid muls
+                mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        rsbne r2, r2, #0         @ R2=-ROWr16[3]
+        mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
+        @@     R5=b2, R6=ROWr16[0], R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
+        @@ if (temp != 0) {}
+        orrs r2, r3, r4          @ R2=ROWr32[2] | ROWr32[3]
+        beq __end_b_evaluation
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3],
+        @@     R5=b2, R6=ROWr16[0], R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ MAC16(b0, W5, row[5]);
+        @@ MAC16(b2, W7, row[5]);
+        @@ MAC16(b3, W3, row[5]);
+        @@ MAC16(b1, -W1, row[5]);
+        @@ MAC16(b0, W7, row[7]);
+        @@ MAC16(b2, W3, row[7]);
+        @@ MAC16(b3, -W1, row[7]);
+        @@ MAC16(b1, -W5, row[7]);
+        mov r3, r3, asr #16      @ R3=ROWr16[5]
+                teq r3, #0               @ if null avoid muls
+        mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5]=b0
+        mov r4, r4, asr #16      @ R4=ROWr16[7]
+        mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5]=b2
+        mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5]=b3
+        rsbne r3, r3, #0         @ R3=-ROWr16[5]
+        mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5]=b1
+        @@ R3 is free now
+                teq r4, #0               @ if null avoid muls
+        mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7]=b0
+        mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7]=b2
+        rsbne r4, r4, #0         @ R4=-ROWr16[7]
+        mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7]=b3
+        mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7]=b1
+        @@ R4 is free now
+__end_b_evaluation:
+        @@ at this point, R0=b0,  R1=b1, R2=ROWr32[2] | ROWr32[3] (tmp), R3 (free), R4 (free),
+        @@     R5=b2, R6=ROWr16[0], R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+
+__a_evaluation:
+        @@ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+        @@ a1 = a0 + W6 * row[2];
+        @@ a2 = a0 - W6 * row[2];
+        @@ a3 = a0 - W2 * row[2];
+        @@ a0 = a0 + W2 * row[2];
+        ldr r9, [r12, #offW4]    @ R9=W4
+        mul r6, r9, r6           @ R6=W4*ROWr16[0]
+        ldr r10, [r12, #offW6]   @ R10=W6
+        ldrsh r4, [r14, #4]      @ R4=ROWr16[2] (a3 not defined yet)
+        add r6, r6, #ROW_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(ROW_SHIFT-1) (a0)
+
+        mul r11, r10, r4         @ R11=W6*ROWr16[2]
+        ldr r8, [r12, #offW2]    @ R8=W2
+        sub r3, r6, r11          @ R3=a0-W6*ROWr16[2] (a2)
+        @@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
+        @@ if (temp != 0) {}
+        teq r2, #0
+        beq __end_bef_a_evaluation
+
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+
+
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+
+
+        @@ a0 += W4*row[4]
+        @@ a1 -= W4*row[4]
+        @@ a2 -= W4*row[4]
+        @@ a3 += W4*row[4]
+        ldrsh r11, [r14, #8]     @ R11=ROWr16[4]
+                teq r11, #0              @ if null avoid muls
+        mulne r11, r9, r11       @ R11=W4*ROWr16[4]
+        @@ R9 is free now
+        ldrsh r9, [r14, #12]     @ R9=ROWr16[6]
+        addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
+        subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
+        subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
+        addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
+        @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
+                teq r9, #0               @ if null avoid muls
+        mulne r11, r10, r9       @ R11=W6*ROWr16[6]
+        addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
+        mulne r10, r8, r9        @ R10=W2*ROWr16[6]
+        @@ a0 += W6*row[6];
+        @@ a3 -= W6*row[6];
+        @@ a1 -= W2*row[6];
+        @@ a2 += W2*row[6];
+        subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
+        addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
+
+__end_a_evaluation:
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ row[0] = (a0 + b0) >> ROW_SHIFT;
+        @@ row[1] = (a1 + b1) >> ROW_SHIFT;
+        @@ row[2] = (a2 + b2) >> ROW_SHIFT;
+        @@ row[3] = (a3 + b3) >> ROW_SHIFT;
+        @@ row[4] = (a3 - b3) >> ROW_SHIFT;
+        @@ row[5] = (a2 - b2) >> ROW_SHIFT;
+        @@ row[6] = (a1 - b1) >> ROW_SHIFT;
+        @@ row[7] = (a0 - b0) >> ROW_SHIFT;
+        add r8, r6, r0           @ R8=a0+b0
+        add r9, r2, r1           @ R9=a1+b1
+        @@ put 2 16 bits half-words in a 32bits word
+        @@ ROWr32[0]=ROWr16[0] | (ROWr16[1]<<16) (only Little Endian compliant then!!!)
+        ldr r10, [r12, #offMASK_MSHW] @ R10=0xFFFF0000
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a1+b1)<<5)
+        mvn r11, r10             @ R11= NOT R10= 0x0000FFFF
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a0+b0)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #0]
+
+        add r8, r3, r5           @ R8=a2+b2
+        add r9, r4, r7           @ R9=a3+b3
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a3+b3)<<5)
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a2+b2)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #4]
+
+        sub r8, r4, r7           @ R8=a3-b3
+        sub r9, r3, r5           @ R9=a2-b2
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a2-b2)<<5)
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a3-b3)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #8]
+
+        sub r8, r2, r1           @ R8=a1-b1
+        sub r9, r6, r0           @ R9=a0-b0
+        and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a0-b0)<<5)
+        and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a1-b1)>>11)
+        orr r8, r8, r9
+        str r8, [r14, #12]
+
+        bal __end_row_loop
+
+__almost_empty_row:
+        @@ the row was empty, except ROWr16[0], now, management of this special case
+        @@ at this point, R0=block, R14=&block[n], R12=__const_ptr_, R1=ROWr32[0], R2=ROWr32[1],
+        @@                R3=ROWr32[2], R4=ROWr32[3], R5=(temp), R6=ROWr16[0], R7=ROWr16[1],
+        @@                R8=0xFFFF (temp), R9-R11 free
+        mov r8, #0x10000         @ R8=0xFFFF (2 steps needed!) it saves a ldr call (because of delay run).
+        sub r8, r8, #1           @ R8 is now ready.
+        and r5, r8, r6, lsl #3   @ R5=R8 & (R6<<3)= (ROWr16[0]<<3) & 0xFFFF
+        orr r5, r5, r5, lsl #16  @ R5=R5 | (R5<<16)
+        str r5, [r14, #0]        @ R14[0]=ROWr32[0]=R5
+        str r5, [r14, #4]        @ R14[4]=ROWr32[1]=R5
+        str r5, [r14, #8]        @ R14[8]=ROWr32[2]=R5
+        str r5, [r14, #12]       @ R14[12]=ROWr32[3]=R5
+
+__end_row_loop:
+        @@ at this point, R0-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        ldr r0, [sp, #0]         @ R0=block
+        teq r0, r14              @ compare current &block[8*n] to block, when block is reached, the loop is finished.
+        sub r14, r14, #16
+        bne __row_loop
+
+
+
+        @@ at this point, R0=block, R1-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        add r14, r0, #14        @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
+__col_loop:
+
+__b_evaluation2:
+        @@ at this point, R0=block (temp),  R1-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ proceed with b0-b3 first, followed by a0-a3
+        @@ MUL16(b0, W1, col[8x1]);
+        @@ MUL16(b1, W3, col[8x1]);
+        @@ MUL16(b2, W5, col[8x1]);
+        @@ MUL16(b3, W7, col[8x1]);
+        @@ MAC16(b0, W3, col[8x3]);
+        @@ MAC16(b1, -W7, col[8x3]);
+        @@ MAC16(b2, -W1, col[8x3]);
+        @@ MAC16(b3, -W5, col[8x3]);
+        ldr r8, [r12, #offW1]    @ R8=W1
+        ldrsh r7, [r14, #16]
+        mul r0, r8, r7           @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r9, [r12, #offW3]    @ R9=W3
+        ldr r10, [r12, #offW5]   @ R10=W5
+        mul r1, r9, r7           @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r11, [r12, #offW7]   @ R11=W7
+        mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldrsh r2, [r14, #48]
+        mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        teq r2, #0               @ if 0, then avoid muls
+        mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        rsbne r2, r2, #0         @ R2=-ROWr16[3]
+        mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
+        @@     R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ MAC16(b0, W5, col[5x8]);
+        @@ MAC16(b2, W7, col[5x8]);
+        @@ MAC16(b3, W3, col[5x8]);
+        @@ MAC16(b1, -W1, col[5x8]);
+        @@ MAC16(b0, W7, col[7x8]);
+        @@ MAC16(b2, W3, col[7x8]);
+        @@ MAC16(b3, -W1, col[7x8]);
+        @@ MAC16(b1, -W5, col[7x8]);
+        ldrsh r3, [r14, #80]     @ R3=COLr16[5x8]
+        teq r3, #0               @ if 0 then avoid muls
+        mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5x8]=b0
+        mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5x8]=b2
+        mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5x8]=b3
+        rsbne r3, r3, #0         @ R3=-ROWr16[5x8]
+        ldrsh r4, [r14, #112]    @ R4=COLr16[7x8]
+        mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5x8]=b1
+        @@ R3 is free now
+        teq r4, #0               @ if 0 then avoid muls
+        mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7x8]=b0
+        mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7x8]=b2
+        rsbne r4, r4, #0         @ R4=-ROWr16[7x8]
+        mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7x8]=b3
+        mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1
+        @@ R4 is free now
+__end_b_evaluation2:
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
+        @@     R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+
+__a_evaluation2:
+        @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
+        @@ a1 = a0 + W6 * row[2];
+        @@ a2 = a0 - W6 * row[2];
+        @@ a3 = a0 - W2 * row[2];
+        @@ a0 = a0 + W2 * row[2];
+        ldrsh r6, [r14, #0]
+        ldr r9, [r12, #offW4]    @ R9=W4
+        mul r6, r9, r6           @ R6=W4*ROWr16[0]
+        ldr r10, [r12, #offW6]   @ R10=W6
+        ldrsh r4, [r14, #32]     @ R4=ROWr16[2] (a3 not defined yet)
+        add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
+        mul r11, r10, r4         @ R11=W6*ROWr16[2]
+        ldr r8, [r12, #offW2]    @ R8=W2
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        sub r3, r6, r11          @ R3=a0-W6*ROWr16[2] (a2)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ a0 += W4*row[4]
+        @@ a1 -= W4*row[4]
+        @@ a2 -= W4*row[4]
+        @@ a3 += W4*row[4]
+        ldrsh r11, [r14, #64]    @ R11=ROWr16[4]
+        teq r11, #0              @ if null avoid muls
+        mulne r11, r9, r11       @ R11=W4*ROWr16[4]
+        @@ R9 is free now
+        addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
+        subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
+        subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
+        ldrsh r9, [r14, #96]     @ R9=ROWr16[6]
+        addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
+        @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
+        teq r9, #0               @ if null avoid muls
+        mulne r11, r10, r9       @ R11=W6*ROWr16[6]
+        addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
+        mulne r10, r8, r9        @ R10=W2*ROWr16[6]
+        @@ a0 += W6*row[6];
+        @@ a3 -= W6*row[6];
+        @@ a1 -= W2*row[6];
+        @@ a2 += W2*row[6];
+        subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
+        addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
+__end_a_evaluation2:
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
+        @@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
+        @@ col[16] = ((a2 + b2) >> COL_SHIFT);
+        @@ col[24] = ((a3 + b3) >> COL_SHIFT);
+        @@ col[32] = ((a3 - b3) >> COL_SHIFT);
+        @@ col[40] = ((a2 - b2) >> COL_SHIFT);
+        @@ col[48] = ((a1 - b1) >> COL_SHIFT);
+        @@ col[56] = ((a0 - b0) >> COL_SHIFT);
+        @@@@@ no optimisation here @@@@@
+        add r8, r6, r0           @ R8=a0+b0
+        add r9, r2, r1           @ R9=a1+b1
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #0]
+        strh r9, [r14, #16]
+        add r8, r3, r5           @ R8=a2+b2
+        add r9, r4, r7           @ R9=a3+b3
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #32]
+        strh r9, [r14, #48]
+        sub r8, r4, r7           @ R8=a3-b3
+        sub r9, r3, r5           @ R9=a2-b2
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #64]
+        strh r9, [r14, #80]
+        sub r8, r2, r1           @ R8=a1-b1
+        sub r9, r6, r0           @ R9=a0-b0
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #96]
+        strh r9, [r14, #112]
+
+__end_col_loop:
+        @@ at this point, R0-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        ldr r0, [sp, #0]         @ R0=block
+        teq r0, r14              @ compare current &block[n] to block, when block is reached, the loop is finished.
+        sub r14, r14, #2
+        bne __col_loop
+
+
+
+
+__end_simple_idct_ARM:
+        @@ restore registers to previous status!
+        add sp, sp, #8 @@ the local variables!
+        ldmfd sp!, {r4-r11, r15} @@ update PC with LR content.
+
+
+
+@@ kind of sub-function, here not to overload the common case.
+__end_bef_a_evaluation:
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+        bal __end_a_evaluation
+
+
+__constant_ptr__:  @@ see #defines at the beginning of the source code for values.
+        .align
+        .word   W1
+        .word   W2
+        .word   W3
+        .word   W4
+        .word   W5
+        .word   W6
+        .word   W7
+        .word   MASK_MSHW
diff --git a/contrib/ffmpeg/libavcodec/armv4l/simple_idct_armv5te.S b/contrib/ffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
new file mode 100644
index 000000000..28bee0643
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/armv4l/simple_idct_armv5te.S
@@ -0,0 +1,718 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Mans Rullgard <mru@inprovide.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define W13 (W1 | (W3 << 16))
+#define W26 (W2 | (W6 << 16))
+#define W57 (W5 | (W7 << 16))
+
+        .text
+        .align
+w13:    .long W13
+w26:    .long W26
+w57:    .long W57
+
+        .align
+        .func idct_row_armv5te
+idct_row_armv5te:
+        str    lr, [sp, #-4]!
+
+        ldrd   v1, [a1, #8]
+        ldrd   a3, [a1]              /* a3 = row[1:0], a4 = row[3:2] */
+        orrs   v1, v1, v2
+        cmpeq  v1, a4
+        cmpeq  v1, a3, lsr #16
+        beq    row_dc_only
+
+        mov    v1, #(1<<(ROW_SHIFT-1))
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+        smlabb v1, ip, a3, v1        /* v1 = W4*row[0]+(1<<(RS-1)) */
+        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
+        smultb a2, ip, a4
+        smulbb lr, ip, a4
+        add    v2, v1, a2
+        sub    v3, v1, a2
+        sub    v4, v1, lr
+        add    v1, v1, lr
+
+        ldr    ip, [pc, #(w13-.-8)]  /* ip = W1 | (W3 << 16) */
+        ldr    lr, [pc, #(w57-.-8)]  /* lr = W5 | (W7 << 16) */
+        smulbt v5, ip, a3
+        smultt v6, lr, a4
+        smlatt v5, ip, a4, v5
+        smultt a2, ip, a3
+        smulbt v7, lr, a3
+        sub    v6, v6, a2
+        smulbt a2, ip, a4
+        smultt fp, lr, a3
+        sub    v7, v7, a2
+        smulbt a2, lr, a4
+        ldrd   a3, [a1, #8]          /* a3=row[5:4] a4=row[7:6] */
+        sub    fp, fp, a2
+
+        orrs   a2, a3, a4
+        beq    1f
+
+        smlabt v5, lr, a3, v5
+        smlabt v6, ip, a3, v6
+        smlatt v5, lr, a4, v5
+        smlabt v6, lr, a4, v6
+        smlatt v7, lr, a3, v7
+        smlatt fp, ip, a3, fp
+        smulbt a2, ip, a4
+        smlatt v7, ip, a4, v7
+        sub    fp, fp, a2
+
+        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
+        mov    a2, #16384
+        sub    a2, a2, #1            /* a2 =  W4 */
+        smulbb a2, a2, a3            /* a2 =  W4*row[4] */
+        smultb lr, ip, a4            /* lr =  W6*row[6] */
+        add    v1, v1, a2            /* v1 += W4*row[4] */
+        add    v1, v1, lr            /* v1 += W6*row[6] */
+        add    v4, v4, a2            /* v4 += W4*row[4] */
+        sub    v4, v4, lr            /* v4 -= W6*row[6] */
+        smulbb lr, ip, a4            /* lr =  W2*row[6] */
+        sub    v2, v2, a2            /* v2 -= W4*row[4] */
+        sub    v2, v2, lr            /* v2 -= W2*row[6] */
+        sub    v3, v3, a2            /* v3 -= W4*row[4] */
+        add    v3, v3, lr            /* v3 += W2*row[6] */
+
+1:      add    a2, v1, v5
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v2, v6
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v3, v7
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        add    a2, v4, fp
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, [a1]
+
+        sub    a2, v4, fp
+        mov    a3, a2, lsr #11
+        bic    a3, a3, #0x1f0000
+        sub    a2, v3, v7
+        mov    a2, a2, lsr #11
+        add    a3, a3, a2, lsl #16
+        add    a2, v2, v6
+        mov    a4, a2, lsr #11
+        bic    a4, a4, #0x1f0000
+        sub    a2, v1, v5
+        mov    a2, a2, lsr #11
+        add    a4, a4, a2, lsl #16
+        strd   a3, [a1, #8]
+
+        ldr    pc, [sp], #4
+
+row_dc_only:
+        orr    a3, a3, a3, lsl #16
+        bic    a3, a3, #0xe000
+        mov    a3, a3, lsl #3
+        mov    a4, a3
+        strd   a3, [a1]
+        strd   a3, [a1, #8]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .macro idct_col
+        ldr    a4, [a1]              /* a4 = col[1:0] */
+        mov    ip, #16384
+        sub    ip, ip, #1            /* ip = W4 */
+#if 0
+        mov    v1, #(1<<(COL_SHIFT-1))
+        smlabt v2, ip, a4, v1        /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
+        smlabb v1, ip, a4, v1        /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
+        ldr    a4, [a1, #(16*4)]
+#else
+        mov    v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
+        add    v2, v1, a4, asr #16
+        rsb    v2, v2, v2, lsl #14
+        mov    a4, a4, lsl #16
+        add    v1, v1, a4, asr #16
+        ldr    a4, [a1, #(16*4)]
+        rsb    v1, v1, v1, lsl #14
+#endif
+
+        smulbb lr, ip, a4
+        smulbt a3, ip, a4
+        sub    v3, v1, lr
+        sub    v5, v1, lr
+        add    v7, v1, lr
+        add    v1, v1, lr
+        sub    v4, v2, a3
+        sub    v6, v2, a3
+        add    fp, v2, a3
+        ldr    ip, [pc, #(w26-.-8)]
+        ldr    a4, [a1, #(16*2)]
+        add    v2, v2, a3
+
+        smulbb lr, ip, a4
+        smultb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        add    v3, v3, a3
+        sub    v5, v5, a3
+        smulbt lr, ip, a4
+        smultt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        add    v4, v4, a3
+        ldr    a4, [a1, #(16*6)]
+        sub    v6, v6, a3
+
+        smultb lr, ip, a4
+        smulbb a3, ip, a4
+        add    v1, v1, lr
+        sub    v7, v7, lr
+        sub    v3, v3, a3
+        add    v5, v5, a3
+        smultt lr, ip, a4
+        smulbt a3, ip, a4
+        add    v2, v2, lr
+        sub    fp, fp, lr
+        sub    v4, v4, a3
+        add    v6, v6, a3
+
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
+
+        ldr    ip, [pc, #(w13-.-8)]
+        ldr    a4, [a1, #(16*1)]
+        ldr    lr, [pc, #(w57-.-8)]
+        smulbb v1, ip, a4
+        smultb v3, ip, a4
+        smulbb v5, lr, a4
+        smultb v7, lr, a4
+        smulbt v2, ip, a4
+        smultt v4, ip, a4
+        smulbt v6, lr, a4
+        smultt fp, lr, a4
+        rsb    v4, v4, #0
+        ldr    a4, [a1, #(16*3)]
+        rsb    v3, v3, #0
+
+        smlatb v1, ip, a4, v1
+        smlatb v3, lr, a4, v3
+        smulbb a3, ip, a4
+        smulbb a2, lr, a4
+        sub    v5, v5, a3
+        sub    v7, v7, a2
+        smlatt v2, ip, a4, v2
+        smlatt v4, lr, a4, v4
+        smulbt a3, ip, a4
+        smulbt a2, lr, a4
+        sub    v6, v6, a3
+        ldr    a4, [a1, #(16*5)]
+        sub    fp, fp, a2
+
+        smlabb v1, lr, a4, v1
+        smlabb v3, ip, a4, v3
+        smlatb v5, lr, a4, v5
+        smlatb v7, ip, a4, v7
+        smlabt v2, lr, a4, v2
+        smlabt v4, ip, a4, v4
+        smlatt v6, lr, a4, v6
+        ldr    a3, [a1, #(16*7)]
+        smlatt fp, ip, a4, fp
+
+        smlatb v1, lr, a3, v1
+        smlabb v3, lr, a3, v3
+        smlatb v5, ip, a3, v5
+        smulbb a4, ip, a3
+        smlatt v2, lr, a3, v2
+        sub    v7, v7, a4
+        smlabt v4, lr, a3, v4
+        smulbt a4, ip, a3
+        smlatt v6, ip, a3, v6
+        sub    fp, fp, a4
+        .endm
+
+        .align
+        .func idct_col_armv5te
+idct_col_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        adds   a2, a3, v1
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v2
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1]
+        subs   a3, a3, v1
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v2
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*7)]
+
+        subs   a2, a3, v3
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    ip, a4, v4
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*1)]
+        adds   a3, a3, v3
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    a4, a4, v4
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*6)]
+
+        adds   a2, a3, v5
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, v6
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*2)]
+        subs   a3, a3, v5
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, v6
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        ldmfd  sp!, {a3, a4}
+        str    a2, [a1, #(16*5)]
+
+        adds   a2, a3, v7
+        mov    a2, a2, lsr #20
+        orrmi  a2, a2, #0xf000
+        add    ip, a4, fp
+        mov    ip, ip, asr #20
+        orr    a2, a2, ip, lsl #16
+        str    a2, [a1, #(16*3)]
+        subs   a3, a3, v7
+        mov    a2, a3, lsr #20
+        orrmi  a2, a2, #0xf000
+        sub    a4, a4, fp
+        mov    a4, a4, asr #20
+        orr    a2, a2, a4, lsl #16
+        str    a2, [a1, #(16*4)]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .func idct_col_put_armv5te
+idct_col_put_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldmfd  sp!, {a3, a4}
+        ldr    lr, [sp, #32]
+        add    a2, a3, v1
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, v2
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        sub    a3, a3, v1
+        movs   a3, a3, asr #20
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        sub    a4, a4, v2
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        ldr    v1, [sp, #28]
+        movgt  a4, #255
+        strh   a2, [v1]
+        add    a2, v1, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        rsb    v2, lr, lr, lsl #3
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, v1]!
+
+        sub    a2, a3, v3
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    ip, a4, v4
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]!
+        add    a3, a3, v3
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    a4, a4, v4
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, -lr]!
+
+        add    a2, a3, v5
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, v6
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]!
+        sub    a3, a3, v5
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    a4, a4, v6
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        ldmfd  sp!, {a3, a4}
+        strh   a2, [v2, -lr]!
+
+        add    a2, a3, v7
+        movs   a2, a2, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    ip, a4, fp
+        movs   ip, ip, asr #20
+        movmi  ip, #0
+        cmp    ip, #255
+        movgt  ip, #255
+        orr    a2, a2, ip, lsl #8
+        strh   a2, [v1, lr]
+        sub    a3, a3, v7
+        movs   a2, a3, asr #20
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    a4, a4, fp
+        movs   a4, a4, asr #20
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a2, a4, lsl #8
+        strh   a2, [v2, -lr]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .func idct_col_add_armv5te
+idct_col_add_armv5te:
+        str    lr, [sp, #-4]!
+
+        idct_col
+
+        ldr    lr, [sp, #36]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr]
+        add    a2, a3, v1
+        mov    a2, a2, asr #20
+        sub    a3, a3, v1
+        and    v1, ip, #255
+        adds   a2, a2, v1
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v1, a4, v2
+        mov    v1, v1, asr #20
+        adds   v1, v1, ip, lsr #8
+        movmi  v1, #0
+        cmp    v1, #255
+        movgt  v1, #255
+        orr    a2, a2, v1, lsl #8
+        ldr    v1, [sp, #32]
+        sub    a4, a4, v2
+        rsb    v2, v1, v1, lsl #3
+        ldrh   ip, [v2, lr]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        add    a2, lr, #2
+        str    a2, [sp, #28]
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        sub    a2, a3, v3
+        mov    a2, a2, asr #20
+        add    a3, a3, v3
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        sub    v3, a4, v4
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        add    a4, a4, v4
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        add    a2, a3, v5
+        mov    a2, a2, asr #20
+        sub    a3, a3, v5
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v3, a4, v6
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, v6
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldmfd  sp!, {a3, a4}
+        ldrh   ip, [lr, v1]!
+        add    a2, a3, v7
+        mov    a2, a2, asr #20
+        sub    a3, a3, v7
+        and    v3, ip, #255
+        adds   a2, a2, v3
+        movmi  a2, #0
+        cmp    a2, #255
+        movgt  a2, #255
+        add    v3, a4, fp
+        mov    v3, v3, asr #20
+        adds   v3, v3, ip, lsr #8
+        movmi  v3, #0
+        cmp    v3, #255
+        movgt  v3, #255
+        orr    a2, a2, v3, lsl #8
+        sub    a4, a4, fp
+        ldrh   ip, [v2, -v1]!
+        strh   a2, [lr]
+        mov    a3, a3, asr #20
+        and    a2, ip, #255
+        adds   a3, a3, a2
+        movmi  a3, #0
+        cmp    a3, #255
+        movgt  a3, #255
+        mov    a4, a4, asr #20
+        adds   a4, a4, ip, lsr #8
+        movmi  a4, #0
+        cmp    a4, #255
+        movgt  a4, #255
+        orr    a2, a3, a4, lsl #8
+        strh   a2, [v2]
+
+        ldr    pc, [sp], #4
+        .endfunc
+
+        .align
+        .global simple_idct_armv5te
+        .func simple_idct_armv5te
+simple_idct_armv5te:
+        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+        add    a1, a1, #4
+        bl     idct_col_armv5te
+
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
+
+        .align
+        .global simple_idct_add_armv5te
+        .func simple_idct_add_armv5te
+simple_idct_add_armv5te:
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+        add    a1, a1, #4
+        bl     idct_col_add_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
+
+        .align
+        .global simple_idct_put_armv5te
+        .func simple_idct_put_armv5te
+simple_idct_put_armv5te:
+        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
+
+        mov    a1, a3
+
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+        add    a1, a1, #16
+        bl     idct_row_armv5te
+
+        sub    a1, a1, #(16*7)
+
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+        add    a1, a1, #4
+        bl     idct_col_put_armv5te
+
+        add    sp, sp, #8
+        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
+        .endfunc
diff --git a/contrib/ffmpeg/libavcodec/asv1.c b/contrib/ffmpeg/libavcodec/asv1.c
new file mode 100644
index 000000000..ec6bbb9ba
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/asv1.c
@@ -0,0 +1,655 @@
+/*
+ * ASUS V1/V2 codec
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file asv1.c
+ * ASUS V1/V2 codec.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+#define VLC_BITS 6
+#define ASV2_LEVEL_VLC_BITS 10
+
+typedef struct ASV1Context{
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame picture;
+    PutBitContext pb;
+    GetBitContext gb;
+    ScanTable scantable;
+    int inv_qscale;
+    int mb_width;
+    int mb_height;
+    int mb_width2;
+    int mb_height2;
+    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
+    DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]);
+    DECLARE_ALIGNED_8(int, q_intra_matrix[64]);
+    uint8_t *bitstream_buffer;
+    unsigned int bitstream_buffer_size;
+} ASV1Context;
+
+static const uint8_t scantab[64]={
+    0x00,0x08,0x01,0x09,0x10,0x18,0x11,0x19,
+    0x02,0x0A,0x03,0x0B,0x12,0x1A,0x13,0x1B,
+    0x04,0x0C,0x05,0x0D,0x20,0x28,0x21,0x29,
+    0x06,0x0E,0x07,0x0F,0x14,0x1C,0x15,0x1D,
+    0x22,0x2A,0x23,0x2B,0x30,0x38,0x31,0x39,
+    0x16,0x1E,0x17,0x1F,0x24,0x2C,0x25,0x2D,
+    0x32,0x3A,0x33,0x3B,0x26,0x2E,0x27,0x2F,
+    0x34,0x3C,0x35,0x3D,0x36,0x3E,0x37,0x3F,
+};
+
+
+static const uint8_t ccp_tab[17][2]={
+    {0x2,2}, {0x7,5}, {0xB,5}, {0x3,5},
+    {0xD,5}, {0x5,5}, {0x9,5}, {0x1,5},
+    {0xE,5}, {0x6,5}, {0xA,5}, {0x2,5},
+    {0xC,5}, {0x4,5}, {0x8,5}, {0x3,2},
+    {0xF,5}, //EOB
+};
+
+static const uint8_t level_tab[7][2]={
+    {3,4}, {3,3}, {3,2}, {0,3}, {2,2}, {2,3}, {2,4}
+};
+
+static const uint8_t dc_ccp_tab[8][2]={
+    {0x1,2}, {0xD,4}, {0xF,4}, {0xC,4},
+    {0x5,3}, {0xE,4}, {0x4,3}, {0x0,2},
+};
+
+static const uint8_t ac_ccp_tab[16][2]={
+    {0x00,2}, {0x3B,6}, {0x0A,4}, {0x3A,6},
+    {0x02,3}, {0x39,6}, {0x3C,6}, {0x38,6},
+    {0x03,3}, {0x3D,6}, {0x08,4}, {0x1F,5},
+    {0x09,4}, {0x0B,4}, {0x0D,4}, {0x0C,4},
+};
+
+static const uint8_t asv2_level_tab[63][2]={
+    {0x3F,10},{0x2F,10},{0x37,10},{0x27,10},{0x3B,10},{0x2B,10},{0x33,10},{0x23,10},
+    {0x3D,10},{0x2D,10},{0x35,10},{0x25,10},{0x39,10},{0x29,10},{0x31,10},{0x21,10},
+    {0x1F, 8},{0x17, 8},{0x1B, 8},{0x13, 8},{0x1D, 8},{0x15, 8},{0x19, 8},{0x11, 8},
+    {0x0F, 6},{0x0B, 6},{0x0D, 6},{0x09, 6},
+    {0x07, 4},{0x05, 4},
+    {0x03, 2},
+    {0x00, 5},
+    {0x02, 2},
+    {0x04, 4},{0x06, 4},
+    {0x08, 6},{0x0C, 6},{0x0A, 6},{0x0E, 6},
+    {0x10, 8},{0x18, 8},{0x14, 8},{0x1C, 8},{0x12, 8},{0x1A, 8},{0x16, 8},{0x1E, 8},
+    {0x20,10},{0x30,10},{0x28,10},{0x38,10},{0x24,10},{0x34,10},{0x2C,10},{0x3C,10},
+    {0x22,10},{0x32,10},{0x2A,10},{0x3A,10},{0x26,10},{0x36,10},{0x2E,10},{0x3E,10},
+};
+
+
+static VLC ccp_vlc;
+static VLC level_vlc;
+static VLC dc_ccp_vlc;
+static VLC ac_ccp_vlc;
+static VLC asv2_level_vlc;
+
+static void init_vlcs(ASV1Context *a){
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+
+        init_vlc(&ccp_vlc, VLC_BITS, 17,
+                 &ccp_tab[0][1], 2, 1,
+                 &ccp_tab[0][0], 2, 1, 1);
+        init_vlc(&dc_ccp_vlc, VLC_BITS, 8,
+                 &dc_ccp_tab[0][1], 2, 1,
+                 &dc_ccp_tab[0][0], 2, 1, 1);
+        init_vlc(&ac_ccp_vlc, VLC_BITS, 16,
+                 &ac_ccp_tab[0][1], 2, 1,
+                 &ac_ccp_tab[0][0], 2, 1, 1);
+        init_vlc(&level_vlc,  VLC_BITS, 7,
+                 &level_tab[0][1], 2, 1,
+                 &level_tab[0][0], 2, 1, 1);
+        init_vlc(&asv2_level_vlc, ASV2_LEVEL_VLC_BITS, 63,
+                 &asv2_level_tab[0][1], 2, 1,
+                 &asv2_level_tab[0][0], 2, 1, 1);
+    }
+}
+
+//FIXME write a reversed bitstream reader to avoid the double reverse
+static inline int asv2_get_bits(GetBitContext *gb, int n){
+    return ff_reverse[ get_bits(gb, n) << (8-n) ];
+}
+
+static inline void asv2_put_bits(PutBitContext *pb, int n, int v){
+    put_bits(pb, n, ff_reverse[ v << (8-n) ]);
+}
+
+static inline int asv1_get_level(GetBitContext *gb){
+    int code= get_vlc2(gb, level_vlc.table, VLC_BITS, 1);
+
+    if(code==3) return get_sbits(gb, 8);
+    else        return code - 3;
+}
+
+static inline int asv2_get_level(GetBitContext *gb){
+    int code= get_vlc2(gb, asv2_level_vlc.table, ASV2_LEVEL_VLC_BITS, 1);
+
+    if(code==31) return (int8_t)asv2_get_bits(gb, 8);
+    else         return code - 31;
+}
+
+static inline void asv1_put_level(PutBitContext *pb, int level){
+    unsigned int index= level + 3;
+
+    if(index <= 6) put_bits(pb, level_tab[index][1], level_tab[index][0]);
+    else{
+        put_bits(pb, level_tab[3][1], level_tab[3][0]);
+        put_bits(pb, 8, level&0xFF);
+    }
+}
+
+static inline void asv2_put_level(PutBitContext *pb, int level){
+    unsigned int index= level + 31;
+
+    if(index <= 62) put_bits(pb, asv2_level_tab[index][1], asv2_level_tab[index][0]);
+    else{
+        put_bits(pb, asv2_level_tab[31][1], asv2_level_tab[31][0]);
+        asv2_put_bits(pb, 8, level&0xFF);
+    }
+}
+
+static inline int asv1_decode_block(ASV1Context *a, DCTELEM block[64]){
+    int i;
+
+    block[0]= 8*get_bits(&a->gb, 8);
+
+    for(i=0; i<11; i++){
+        const int ccp= get_vlc2(&a->gb, ccp_vlc.table, VLC_BITS, 1);
+
+        if(ccp){
+            if(ccp == 16) break;
+            if(ccp < 0 || i>=10){
+                av_log(a->avctx, AV_LOG_ERROR, "coded coeff pattern damaged\n");
+                return -1;
+            }
+
+            if(ccp&8) block[a->scantable.permutated[4*i+0]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+0])>>4;
+            if(ccp&4) block[a->scantable.permutated[4*i+1]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+1])>>4;
+            if(ccp&2) block[a->scantable.permutated[4*i+2]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+2])>>4;
+            if(ccp&1) block[a->scantable.permutated[4*i+3]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+3])>>4;
+        }
+    }
+
+    return 0;
+}
+
+static inline int asv2_decode_block(ASV1Context *a, DCTELEM block[64]){
+    int i, count, ccp;
+
+    count= asv2_get_bits(&a->gb, 4);
+
+    block[0]= 8*asv2_get_bits(&a->gb, 8);
+
+    ccp= get_vlc2(&a->gb, dc_ccp_vlc.table, VLC_BITS, 1);
+    if(ccp){
+        if(ccp&4) block[a->scantable.permutated[1]]= (asv2_get_level(&a->gb) * a->intra_matrix[1])>>4;
+        if(ccp&2) block[a->scantable.permutated[2]]= (asv2_get_level(&a->gb) * a->intra_matrix[2])>>4;
+        if(ccp&1) block[a->scantable.permutated[3]]= (asv2_get_level(&a->gb) * a->intra_matrix[3])>>4;
+    }
+
+    for(i=1; i<count+1; i++){
+        const int ccp= get_vlc2(&a->gb, ac_ccp_vlc.table, VLC_BITS, 1);
+
+        if(ccp){
+            if(ccp&8) block[a->scantable.permutated[4*i+0]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+0])>>4;
+            if(ccp&4) block[a->scantable.permutated[4*i+1]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+1])>>4;
+            if(ccp&2) block[a->scantable.permutated[4*i+2]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+2])>>4;
+            if(ccp&1) block[a->scantable.permutated[4*i+3]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+3])>>4;
+        }
+    }
+
+    return 0;
+}
+
+static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){
+    int i;
+    int nc_count=0;
+
+    put_bits(&a->pb, 8, (block[0] + 32)>>6);
+    block[0]= 0;
+
+    for(i=0; i<10; i++){
+        const int index= scantab[4*i];
+        int ccp=0;
+
+        if( (block[index + 0] = (block[index + 0]*a->q_intra_matrix[index + 0] + (1<<15))>>16) ) ccp |= 8;
+        if( (block[index + 8] = (block[index + 8]*a->q_intra_matrix[index + 8] + (1<<15))>>16) ) ccp |= 4;
+        if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 2;
+        if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1;
+
+        if(ccp){
+            for(;nc_count; nc_count--)
+                put_bits(&a->pb, ccp_tab[0][1], ccp_tab[0][0]);
+
+            put_bits(&a->pb, ccp_tab[ccp][1], ccp_tab[ccp][0]);
+
+            if(ccp&8) asv1_put_level(&a->pb, block[index + 0]);
+            if(ccp&4) asv1_put_level(&a->pb, block[index + 8]);
+            if(ccp&2) asv1_put_level(&a->pb, block[index + 1]);
+            if(ccp&1) asv1_put_level(&a->pb, block[index + 9]);
+        }else{
+            nc_count++;
+        }
+    }
+    put_bits(&a->pb, ccp_tab[16][1], ccp_tab[16][0]);
+}
+
+static inline void asv2_encode_block(ASV1Context *a, DCTELEM block[64]){
+    int i;
+    int count=0;
+
+    for(count=63; count>3; count--){
+        const int index= scantab[count];
+
+        if( (block[index]*a->q_intra_matrix[index] + (1<<15))>>16 )
+            break;
+    }
+
+    count >>= 2;
+
+    asv2_put_bits(&a->pb, 4, count);
+    asv2_put_bits(&a->pb, 8, (block[0] + 32)>>6);
+    block[0]= 0;
+
+    for(i=0; i<=count; i++){
+        const int index= scantab[4*i];
+        int ccp=0;
+
+        if( (block[index + 0] = (block[index + 0]*a->q_intra_matrix[index + 0] + (1<<15))>>16) ) ccp |= 8;
+        if( (block[index + 8] = (block[index + 8]*a->q_intra_matrix[index + 8] + (1<<15))>>16) ) ccp |= 4;
+        if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 2;
+        if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1;
+
+        assert(i || ccp<8);
+        if(i) put_bits(&a->pb, ac_ccp_tab[ccp][1], ac_ccp_tab[ccp][0]);
+        else  put_bits(&a->pb, dc_ccp_tab[ccp][1], dc_ccp_tab[ccp][0]);
+
+        if(ccp){
+            if(ccp&8) asv2_put_level(&a->pb, block[index + 0]);
+            if(ccp&4) asv2_put_level(&a->pb, block[index + 8]);
+            if(ccp&2) asv2_put_level(&a->pb, block[index + 1]);
+            if(ccp&1) asv2_put_level(&a->pb, block[index + 9]);
+        }
+    }
+}
+
+static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64]){
+    int i;
+
+    a->dsp.clear_blocks(block[0]);
+
+    if(a->avctx->codec_id == CODEC_ID_ASV1){
+        for(i=0; i<6; i++){
+            if( asv1_decode_block(a, block[i]) < 0)
+                return -1;
+        }
+    }else{
+        for(i=0; i<6; i++){
+            if( asv2_decode_block(a, block[i]) < 0)
+                return -1;
+        }
+    }
+    return 0;
+}
+
+static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){
+    int i;
+
+    if(a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb)>>3) < 30*16*16*3/2/8){
+        av_log(a->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+    if(a->avctx->codec_id == CODEC_ID_ASV1){
+        for(i=0; i<6; i++)
+            asv1_encode_block(a, block[i]);
+    }else{
+        for(i=0; i<6; i++)
+            asv2_encode_block(a, block[i]);
+    }
+    return 0;
+}
+
+static inline void idct_put(ASV1Context *a, int mb_x, int mb_y){
+    DCTELEM (*block)[64]= a->block;
+    int linesize= a->picture.linesize[0];
+
+    uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
+    uint8_t *dest_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
+    uint8_t *dest_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;
+
+    a->dsp.idct_put(dest_y                 , linesize, block[0]);
+    a->dsp.idct_put(dest_y              + 8, linesize, block[1]);
+    a->dsp.idct_put(dest_y + 8*linesize    , linesize, block[2]);
+    a->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]);
+
+    if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
+        a->dsp.idct_put(dest_cb, a->picture.linesize[1], block[4]);
+        a->dsp.idct_put(dest_cr, a->picture.linesize[2], block[5]);
+    }
+}
+
+static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){
+    DCTELEM (*block)[64]= a->block;
+    int linesize= a->picture.linesize[0];
+    int i;
+
+    uint8_t *ptr_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
+    uint8_t *ptr_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
+    uint8_t *ptr_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;
+
+    a->dsp.get_pixels(block[0], ptr_y                 , linesize);
+    a->dsp.get_pixels(block[1], ptr_y              + 8, linesize);
+    a->dsp.get_pixels(block[2], ptr_y + 8*linesize    , linesize);
+    a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
+    for(i=0; i<4; i++)
+        a->dsp.fdct(block[i]);
+
+    if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
+        a->dsp.get_pixels(block[4], ptr_cb, a->picture.linesize[1]);
+        a->dsp.get_pixels(block[5], ptr_cr, a->picture.linesize[2]);
+        for(i=4; i<6; i++)
+            a->dsp.fdct(block[i]);
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    ASV1Context * const a = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    int mb_x, mb_y;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    a->bitstream_buffer= av_fast_realloc(a->bitstream_buffer, &a->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    if(avctx->codec_id == CODEC_ID_ASV1)
+        a->dsp.bswap_buf((uint32_t*)a->bitstream_buffer, (uint32_t*)buf, buf_size/4);
+    else{
+        int i;
+        for(i=0; i<buf_size; i++)
+            a->bitstream_buffer[i]= ff_reverse[ buf[i] ];
+    }
+
+    init_get_bits(&a->gb, a->bitstream_buffer, buf_size*8);
+
+    for(mb_y=0; mb_y<a->mb_height2; mb_y++){
+        for(mb_x=0; mb_x<a->mb_width2; mb_x++){
+            if( decode_mb(a, a->block) <0)
+                return -1;
+
+            idct_put(a, mb_x, mb_y);
+        }
+    }
+
+    if(a->mb_width2 != a->mb_width){
+        mb_x= a->mb_width2;
+        for(mb_y=0; mb_y<a->mb_height2; mb_y++){
+            if( decode_mb(a, a->block) <0)
+                return -1;
+
+            idct_put(a, mb_x, mb_y);
+        }
+    }
+
+    if(a->mb_height2 != a->mb_height){
+        mb_y= a->mb_height2;
+        for(mb_x=0; mb_x<a->mb_width; mb_x++){
+            if( decode_mb(a, a->block) <0)
+                return -1;
+
+            idct_put(a, mb_x, mb_y);
+        }
+    }
+#if 0
+int i;
+printf("%d %d\n", 8*buf_size, get_bits_count(&a->gb));
+for(i=get_bits_count(&a->gb); i<8*buf_size; i++){
+    printf("%d", get_bits1(&a->gb));
+}
+
+for(i=0; i<s->avctx->extradata_size; i++){
+    printf("%c\n", ((uint8_t*)s->avctx->extradata)[i]);
+}
+#endif
+
+    *picture= *(AVFrame*)&a->picture;
+    *data_size = sizeof(AVPicture);
+
+    emms_c();
+
+    return (get_bits_count(&a->gb)+31)/32*4;
+}
+
+#ifdef CONFIG_ENCODERS
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    ASV1Context * const a = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    int size;
+    int mb_x, mb_y;
+
+    init_put_bits(&a->pb, buf, buf_size);
+
+    *p = *pict;
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    for(mb_y=0; mb_y<a->mb_height2; mb_y++){
+        for(mb_x=0; mb_x<a->mb_width2; mb_x++){
+            dct_get(a, mb_x, mb_y);
+            encode_mb(a, a->block);
+        }
+    }
+
+    if(a->mb_width2 != a->mb_width){
+        mb_x= a->mb_width2;
+        for(mb_y=0; mb_y<a->mb_height2; mb_y++){
+            dct_get(a, mb_x, mb_y);
+            encode_mb(a, a->block);
+        }
+    }
+
+    if(a->mb_height2 != a->mb_height){
+        mb_y= a->mb_height2;
+        for(mb_x=0; mb_x<a->mb_width; mb_x++){
+            dct_get(a, mb_x, mb_y);
+            encode_mb(a, a->block);
+        }
+    }
+    emms_c();
+
+    align_put_bits(&a->pb);
+    while(put_bits_count(&a->pb)&31)
+        put_bits(&a->pb, 8, 0);
+
+    size= put_bits_count(&a->pb)/32;
+
+    if(avctx->codec_id == CODEC_ID_ASV1)
+        a->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
+    else{
+        int i;
+        for(i=0; i<4*size; i++)
+            buf[i]= ff_reverse[ buf[i] ];
+    }
+
+    return size*4;
+}
+#endif /* CONFIG_ENCODERS */
+
+static void common_init(AVCodecContext *avctx){
+    ASV1Context * const a = avctx->priv_data;
+
+    dsputil_init(&a->dsp, avctx);
+
+    a->mb_width   = (avctx->width  + 15) / 16;
+    a->mb_height  = (avctx->height + 15) / 16;
+    a->mb_width2  = (avctx->width  + 0) / 16;
+    a->mb_height2 = (avctx->height + 0) / 16;
+
+    avctx->coded_frame= (AVFrame*)&a->picture;
+    a->avctx= avctx;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    ASV1Context * const a = avctx->priv_data;
+    AVFrame *p= (AVFrame*)&a->picture;
+    int i;
+    const int scale= avctx->codec_id == CODEC_ID_ASV1 ? 1 : 2;
+
+    common_init(avctx);
+    init_vlcs(a);
+    ff_init_scantable(a->dsp.idct_permutation, &a->scantable, scantab);
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    a->inv_qscale= ((uint8_t*)avctx->extradata)[0];
+    if(a->inv_qscale == 0){
+        av_log(avctx, AV_LOG_ERROR, "illegal qscale 0\n");
+        if(avctx->codec_id == CODEC_ID_ASV1)
+            a->inv_qscale= 6;
+        else
+            a->inv_qscale= 10;
+    }
+
+    for(i=0; i<64; i++){
+        int index= scantab[i];
+
+        a->intra_matrix[i]= 64*scale*ff_mpeg1_default_intra_matrix[index] / a->inv_qscale;
+    }
+
+    p->qstride= a->mb_width;
+    p->qscale_table= av_malloc( p->qstride * a->mb_height);
+    p->quality= (32*scale + a->inv_qscale/2)/a->inv_qscale;
+    memset(p->qscale_table, p->quality, p->qstride*a->mb_height);
+
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+static int encode_init(AVCodecContext *avctx){
+    ASV1Context * const a = avctx->priv_data;
+    int i;
+    const int scale= avctx->codec_id == CODEC_ID_ASV1 ? 1 : 2;
+
+    common_init(avctx);
+
+    if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
+
+    a->inv_qscale= (32*scale*FF_QUALITY_SCALE +  avctx->global_quality/2) / avctx->global_quality;
+
+    avctx->extradata= av_mallocz(8);
+    avctx->extradata_size=8;
+    ((uint32_t*)avctx->extradata)[0]= le2me_32(a->inv_qscale);
+    ((uint32_t*)avctx->extradata)[1]= le2me_32(ff_get_fourcc("ASUS"));
+
+    for(i=0; i<64; i++){
+        int q= 32*scale*ff_mpeg1_default_intra_matrix[i];
+        a->q_intra_matrix[i]= ((a->inv_qscale<<16) + q/2) / q;
+    }
+
+    return 0;
+}
+#endif
+
+static int decode_end(AVCodecContext *avctx){
+    ASV1Context * const a = avctx->priv_data;
+
+    av_freep(&a->bitstream_buffer);
+    av_freep(&a->picture.qscale_table);
+    a->bitstream_buffer_size=0;
+
+    return 0;
+}
+
+AVCodec asv1_decoder = {
+    "asv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ASV1,
+    sizeof(ASV1Context),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+
+AVCodec asv2_decoder = {
+    "asv2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ASV2,
+    sizeof(ASV1Context),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+
+#ifdef CONFIG_ENCODERS
+
+AVCodec asv1_encoder = {
+    "asv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ASV1,
+    sizeof(ASV1Context),
+    encode_init,
+    encode_frame,
+    //encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec asv2_encoder = {
+    "asv2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ASV2,
+    sizeof(ASV1Context),
+    encode_init,
+    encode_frame,
+    //encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+#endif //CONFIG_ENCODERS
diff --git a/contrib/ffmpeg/libavcodec/audioconvert.c b/contrib/ffmpeg/libavcodec/audioconvert.c
new file mode 100644
index 000000000..e6291ac6d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/audioconvert.c
@@ -0,0 +1,79 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file audioconvert.c
+ * audio conversion
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "avcodec.h"
+
+int av_audio_convert(void *maybe_dspcontext_or_something_av_convert_specific,
+                     void *out[6], int out_stride[6], enum SampleFormat out_fmt,
+                     void * in[6], int  in_stride[6], enum SampleFormat  in_fmt, int len){
+    int ch;
+    const int isize= FFMIN( in_fmt+1, 4);
+    const int osize= FFMIN(out_fmt+1, 4);
+    const int fmt_pair= out_fmt + 5*in_fmt;
+
+    //FIXME optimize common cases
+
+    for(ch=0; ch<6; ch++){
+        const int is=  in_stride[ch] * isize;
+        const int os= out_stride[ch] * osize;
+        uint8_t *pi=  in[ch];
+        uint8_t *po= out[ch];
+        uint8_t *end= po + os;
+        if(!out[ch])
+            continue;
+
+#define CONV(ofmt, otype, ifmt, expr)\
+if(fmt_pair == ofmt + 5*ifmt){\
+    do{\
+        *(otype*)po = expr; pi += is; po += os;\
+    }while(po < end);\
+}
+
+//FIXME put things below under ifdefs so we dont waste space for cases no codec will need
+//FIXME rounding and cliping ?
+
+             CONV(SAMPLE_FMT_U8 , uint8_t, SAMPLE_FMT_U8 ,  *(uint8_t*)pi)
+        else CONV(SAMPLE_FMT_S16, int16_t, SAMPLE_FMT_U8 , (*(uint8_t*)pi - 0x80)<<8)
+        else CONV(SAMPLE_FMT_S32, int32_t, SAMPLE_FMT_U8 , (*(uint8_t*)pi - 0x80)<<24)
+        else CONV(SAMPLE_FMT_FLT, float  , SAMPLE_FMT_U8 , (*(uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+        else CONV(SAMPLE_FMT_U8 , uint8_t, SAMPLE_FMT_S16, (*(int16_t*)pi>>8) + 0x80)
+        else CONV(SAMPLE_FMT_S16, int16_t, SAMPLE_FMT_S16,  *(int16_t*)pi)
+        else CONV(SAMPLE_FMT_S32, int32_t, SAMPLE_FMT_S16,  *(int16_t*)pi<<16)
+        else CONV(SAMPLE_FMT_FLT, float  , SAMPLE_FMT_S16,  *(int16_t*)pi*(1.0 / (1<<15)))
+        else CONV(SAMPLE_FMT_U8 , uint8_t, SAMPLE_FMT_S32, (*(int32_t*)pi>>24) + 0x80)
+        else CONV(SAMPLE_FMT_S16, int16_t, SAMPLE_FMT_S32,  *(int32_t*)pi>>16)
+        else CONV(SAMPLE_FMT_S32, int32_t, SAMPLE_FMT_S32,  *(int32_t*)pi)
+        else CONV(SAMPLE_FMT_FLT, float  , SAMPLE_FMT_S32,  *(int32_t*)pi*(1.0 / (1<<31)))
+        else CONV(SAMPLE_FMT_U8 , uint8_t, SAMPLE_FMT_FLT, lrintf(*(float*)pi * (1<<7)) + 0x80)
+        else CONV(SAMPLE_FMT_S16, int16_t, SAMPLE_FMT_FLT, lrintf(*(float*)pi * (1<<15)))
+        else CONV(SAMPLE_FMT_S32, int32_t, SAMPLE_FMT_FLT, lrintf(*(float*)pi * (1<<31)))
+        else CONV(SAMPLE_FMT_FLT, float  , SAMPLE_FMT_FLT, *(float*)pi)
+        else return -1;
+    }
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/avcodec.h b/contrib/ffmpeg/libavcodec/avcodec.h
new file mode 100644
index 000000000..da063b562
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/avcodec.h
@@ -0,0 +1,2707 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef AVCODEC_H
+#define AVCODEC_H
+
+/**
+ * @file avcodec.h
+ * external api header.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "avutil.h"
+#include <sys/types.h> /* size_t */
+
+#define AV_STRINGIFY(s)         AV_TOSTRING(s)
+#define AV_TOSTRING(s) #s
+
+#define LIBAVCODEC_VERSION_INT  ((51<<16)+(25<<8)+0)
+#define LIBAVCODEC_VERSION      51.25.0
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
+
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+
+#define AV_NOPTS_VALUE          int64_t_C(0x8000000000000000)
+#define AV_TIME_BASE            1000000
+#define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+
+enum CodecID {
+    CODEC_ID_NONE,
+    CODEC_ID_MPEG1VIDEO,
+    CODEC_ID_MPEG2VIDEO, /* prefered ID for MPEG Video 1 or 2 decoding */
+    CODEC_ID_MPEG2VIDEO_XVMC,
+    CODEC_ID_H261,
+    CODEC_ID_H263,
+    CODEC_ID_RV10,
+    CODEC_ID_RV20,
+    CODEC_ID_MJPEG,
+    CODEC_ID_MJPEGB,
+    CODEC_ID_LJPEG,
+    CODEC_ID_SP5X,
+    CODEC_ID_JPEGLS,
+    CODEC_ID_MPEG4,
+    CODEC_ID_RAWVIDEO,
+    CODEC_ID_MSMPEG4V1,
+    CODEC_ID_MSMPEG4V2,
+    CODEC_ID_MSMPEG4V3,
+    CODEC_ID_WMV1,
+    CODEC_ID_WMV2,
+    CODEC_ID_H263P,
+    CODEC_ID_H263I,
+    CODEC_ID_FLV1,
+    CODEC_ID_SVQ1,
+    CODEC_ID_SVQ3,
+    CODEC_ID_DVVIDEO,
+    CODEC_ID_HUFFYUV,
+    CODEC_ID_CYUV,
+    CODEC_ID_H264,
+    CODEC_ID_INDEO3,
+    CODEC_ID_VP3,
+    CODEC_ID_THEORA,
+    CODEC_ID_ASV1,
+    CODEC_ID_ASV2,
+    CODEC_ID_FFV1,
+    CODEC_ID_4XM,
+    CODEC_ID_VCR1,
+    CODEC_ID_CLJR,
+    CODEC_ID_MDEC,
+    CODEC_ID_ROQ,
+    CODEC_ID_INTERPLAY_VIDEO,
+    CODEC_ID_XAN_WC3,
+    CODEC_ID_XAN_WC4,
+    CODEC_ID_RPZA,
+    CODEC_ID_CINEPAK,
+    CODEC_ID_WS_VQA,
+    CODEC_ID_MSRLE,
+    CODEC_ID_MSVIDEO1,
+    CODEC_ID_IDCIN,
+    CODEC_ID_8BPS,
+    CODEC_ID_SMC,
+    CODEC_ID_FLIC,
+    CODEC_ID_TRUEMOTION1,
+    CODEC_ID_VMDVIDEO,
+    CODEC_ID_MSZH,
+    CODEC_ID_ZLIB,
+    CODEC_ID_QTRLE,
+    CODEC_ID_SNOW,
+    CODEC_ID_TSCC,
+    CODEC_ID_ULTI,
+    CODEC_ID_QDRAW,
+    CODEC_ID_VIXL,
+    CODEC_ID_QPEG,
+    CODEC_ID_XVID,
+    CODEC_ID_PNG,
+    CODEC_ID_PPM,
+    CODEC_ID_PBM,
+    CODEC_ID_PGM,
+    CODEC_ID_PGMYUV,
+    CODEC_ID_PAM,
+    CODEC_ID_FFVHUFF,
+    CODEC_ID_RV30,
+    CODEC_ID_RV40,
+    CODEC_ID_VC1,
+    CODEC_ID_WMV3,
+    CODEC_ID_LOCO,
+    CODEC_ID_WNV1,
+    CODEC_ID_AASC,
+    CODEC_ID_INDEO2,
+    CODEC_ID_FRAPS,
+    CODEC_ID_TRUEMOTION2,
+    CODEC_ID_BMP,
+    CODEC_ID_CSCD,
+    CODEC_ID_MMVIDEO,
+    CODEC_ID_ZMBV,
+    CODEC_ID_AVS,
+    CODEC_ID_SMACKVIDEO,
+    CODEC_ID_NUV,
+    CODEC_ID_KMVC,
+    CODEC_ID_FLASHSV,
+    CODEC_ID_CAVS,
+    CODEC_ID_JPEG2000,
+    CODEC_ID_VMNC,
+    CODEC_ID_VP5,
+    CODEC_ID_VP6,
+    CODEC_ID_VP6F,
+    CODEC_ID_TARGA,
+    CODEC_ID_DSICINVIDEO,
+    CODEC_ID_TIERTEXSEQVIDEO,
+    CODEC_ID_TIFF,
+    CODEC_ID_GIF,
+
+    /* various pcm "codecs" */
+    CODEC_ID_PCM_S16LE= 0x10000,
+    CODEC_ID_PCM_S16BE,
+    CODEC_ID_PCM_U16LE,
+    CODEC_ID_PCM_U16BE,
+    CODEC_ID_PCM_S8,
+    CODEC_ID_PCM_U8,
+    CODEC_ID_PCM_MULAW,
+    CODEC_ID_PCM_ALAW,
+    CODEC_ID_PCM_S32LE,
+    CODEC_ID_PCM_S32BE,
+    CODEC_ID_PCM_U32LE,
+    CODEC_ID_PCM_U32BE,
+    CODEC_ID_PCM_S24LE,
+    CODEC_ID_PCM_S24BE,
+    CODEC_ID_PCM_U24LE,
+    CODEC_ID_PCM_U24BE,
+    CODEC_ID_PCM_S24DAUD,
+
+    /* various adpcm codecs */
+    CODEC_ID_ADPCM_IMA_QT= 0x11000,
+    CODEC_ID_ADPCM_IMA_WAV,
+    CODEC_ID_ADPCM_IMA_DK3,
+    CODEC_ID_ADPCM_IMA_DK4,
+    CODEC_ID_ADPCM_IMA_WS,
+    CODEC_ID_ADPCM_IMA_SMJPEG,
+    CODEC_ID_ADPCM_MS,
+    CODEC_ID_ADPCM_4XM,
+    CODEC_ID_ADPCM_XA,
+    CODEC_ID_ADPCM_ADX,
+    CODEC_ID_ADPCM_EA,
+    CODEC_ID_ADPCM_G726,
+    CODEC_ID_ADPCM_CT,
+    CODEC_ID_ADPCM_SWF,
+    CODEC_ID_ADPCM_YAMAHA,
+    CODEC_ID_ADPCM_SBPRO_4,
+    CODEC_ID_ADPCM_SBPRO_3,
+    CODEC_ID_ADPCM_SBPRO_2,
+
+    /* AMR */
+    CODEC_ID_AMR_NB= 0x12000,
+    CODEC_ID_AMR_WB,
+
+    /* RealAudio codecs*/
+    CODEC_ID_RA_144= 0x13000,
+    CODEC_ID_RA_288,
+
+    /* various DPCM codecs */
+    CODEC_ID_ROQ_DPCM= 0x14000,
+    CODEC_ID_INTERPLAY_DPCM,
+    CODEC_ID_XAN_DPCM,
+    CODEC_ID_SOL_DPCM,
+
+    CODEC_ID_MP2= 0x15000,
+    CODEC_ID_MP3, /* prefered ID for MPEG Audio layer 1, 2 or3 decoding */
+    CODEC_ID_AAC,
+#if LIBAVCODEC_VERSION_INT < ((52<<16)+(0<<8)+0)
+    CODEC_ID_MPEG4AAC,
+#endif
+    CODEC_ID_AC3,
+    CODEC_ID_DTS,
+    CODEC_ID_VORBIS,
+    CODEC_ID_DVAUDIO,
+    CODEC_ID_WMAV1,
+    CODEC_ID_WMAV2,
+    CODEC_ID_MACE3,
+    CODEC_ID_MACE6,
+    CODEC_ID_VMDAUDIO,
+    CODEC_ID_SONIC,
+    CODEC_ID_SONIC_LS,
+    CODEC_ID_FLAC,
+    CODEC_ID_MP3ADU,
+    CODEC_ID_MP3ON4,
+    CODEC_ID_SHORTEN,
+    CODEC_ID_ALAC,
+    CODEC_ID_WESTWOOD_SND1,
+    CODEC_ID_GSM,
+    CODEC_ID_QDM2,
+    CODEC_ID_COOK,
+    CODEC_ID_TRUESPEECH,
+    CODEC_ID_TTA,
+    CODEC_ID_SMACKAUDIO,
+    CODEC_ID_QCELP,
+    CODEC_ID_WAVPACK,
+    CODEC_ID_DSICINAUDIO,
+    CODEC_ID_IMC,
+
+    /* subtitle codecs */
+    CODEC_ID_DVD_SUBTITLE= 0x17000,
+    CODEC_ID_DVB_SUBTITLE,
+
+    CODEC_ID_MPEG2TS= 0x20000, /* _FAKE_ codec to indicate a raw MPEG2 transport
+                         stream (only used by libavformat) */
+};
+
+/* CODEC_ID_MP3LAME is absolete */
+#define CODEC_ID_MP3LAME CODEC_ID_MP3
+
+enum CodecType {
+    CODEC_TYPE_UNKNOWN = -1,
+    CODEC_TYPE_VIDEO,
+    CODEC_TYPE_AUDIO,
+    CODEC_TYPE_DATA,
+    CODEC_TYPE_SUBTITLE,
+};
+
+/* currently unused, may be used if 24/32 bits samples ever supported */
+/* all in native endian */
+enum SampleFormat {
+    SAMPLE_FMT_NONE = -1,
+    SAMPLE_FMT_U8,              ///< unsigned 8 bits
+    SAMPLE_FMT_S16,             ///< signed 16 bits
+    SAMPLE_FMT_S24,             ///< signed 24 bits
+    SAMPLE_FMT_S32,             ///< signed 32 bits
+    SAMPLE_FMT_FLT,             ///< float
+};
+
+/* in bytes */
+#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
+
+/**
+ * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
+ * this is mainly needed because some optimized bitstream readers read
+ * 32 or 64 bit at once and could read over the end<br>
+ * Note, if the first 23 bits of the additional bytes are not 0 then damaged
+ * MPEG bitstreams could cause overread and segfault
+ */
+#define FF_INPUT_BUFFER_PADDING_SIZE 8
+
+/**
+ * minimum encoding buffer size.
+ * used to avoid some checks during header writing
+ */
+#define FF_MIN_BUFFER_SIZE 16384
+
+/* motion estimation type, EPZS by default */
+enum Motion_Est_ID {
+    ME_ZERO = 1,
+    ME_FULL,
+    ME_LOG,
+    ME_PHODS,
+    ME_EPZS,
+    ME_X1,
+    ME_HEX,
+    ME_UMH,
+    ME_ITER,
+};
+
+enum AVDiscard{
+//we leave some space between them for extensions (drop some keyframes for intra only or drop just some bidir frames)
+    AVDISCARD_NONE   =-16, ///< discard nothing
+    AVDISCARD_DEFAULT=  0, ///< discard useless packets like 0 size packets in avi
+    AVDISCARD_NONREF =  8, ///< discard all non reference
+    AVDISCARD_BIDIR  = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONKEY = 32, ///< discard all frames except keyframes
+    AVDISCARD_ALL    = 48, ///< discard all
+};
+
+typedef struct RcOverride{
+    int start_frame;
+    int end_frame;
+    int qscale; // if this is 0 then quality_factor will be used instead
+    float quality_factor;
+} RcOverride;
+
+#define FF_MAX_B_FRAMES 16
+
+/* encoding support
+   these flags can be passed in AVCodecContext.flags before initing
+   Note: not everything is supported yet.
+*/
+
+#define CODEC_FLAG_QSCALE 0x0002  ///< use fixed qscale
+#define CODEC_FLAG_4MV    0x0004  ///< 4 MV per MB allowed / Advanced prediction for H263
+#define CODEC_FLAG_QPEL   0x0010  ///< use qpel MC
+#define CODEC_FLAG_GMC    0x0020  ///< use GMC
+#define CODEC_FLAG_MV0    0x0040  ///< always try a MB with MV=<0,0>
+#define CODEC_FLAG_PART   0x0080  ///< use data partitioning
+/* parent program gurantees that the input for b-frame containing streams is not written to
+   for at least s->max_b_frames+1 frames, if this is not set than the input will be copied */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100
+#define CODEC_FLAG_PASS1 0x0200   ///< use internal 2pass ratecontrol in first  pass mode
+#define CODEC_FLAG_PASS2 0x0400   ///< use internal 2pass ratecontrol in second pass mode
+#define CODEC_FLAG_EXTERN_HUFF 0x1000 ///< use external huffman table (for mjpeg)
+#define CODEC_FLAG_GRAY  0x2000   ///< only decode/encode grayscale
+#define CODEC_FLAG_EMU_EDGE 0x4000///< don't draw edges
+#define CODEC_FLAG_PSNR           0x8000 ///< error[?] variables will be set during encoding
+#define CODEC_FLAG_TRUNCATED  0x00010000 /** input bitstream might be truncated at a random location instead
+                                            of only at frame boundaries */
+#define CODEC_FLAG_NORMALIZE_AQP  0x00020000 ///< normalize adaptive quantization
+#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< use interlaced dct
+#define CODEC_FLAG_LOW_DELAY      0x00080000 ///< force low delay
+#define CODEC_FLAG_ALT_SCAN       0x00100000 ///< use alternate scan
+#define CODEC_FLAG_TRELLIS_QUANT  0x00200000 ///< use trellis quantization
+#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< place global headers in extradata instead of every keyframe
+#define CODEC_FLAG_BITEXACT       0x00800000 ///< use only bitexact stuff (except (i)dct)
+/* Fx : Flag for h263+ extra options */
+#define CODEC_FLAG_H263P_AIC      0x01000000 ///< H263 Advanced intra coding / MPEG4 AC prediction (remove this)
+#define CODEC_FLAG_AC_PRED        0x01000000 ///< H263 Advanced intra coding / MPEG4 AC prediction
+#define CODEC_FLAG_H263P_UMV      0x02000000 ///< Unlimited motion vector
+#define CODEC_FLAG_CBP_RD         0x04000000 ///< use rate distortion optimization for cbp
+#define CODEC_FLAG_QP_RD          0x08000000 ///< use rate distortion optimization for qp selectioon
+#define CODEC_FLAG_H263P_AIV      0x00000008 ///< H263 Alternative inter vlc
+#define CODEC_FLAG_OBMC           0x00000001 ///< OBMC
+#define CODEC_FLAG_LOOP_FILTER    0x00000800 ///< loop filter
+#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
+#define CODEC_FLAG_INTERLACED_ME  0x20000000 ///< interlaced motion estimation
+#define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< will reserve space for SVCD scan offset user data
+#define CODEC_FLAG_CLOSED_GOP     ((int)0x80000000)
+#define CODEC_FLAG2_FAST          0x00000001 ///< allow non spec compliant speedup tricks
+#define CODEC_FLAG2_STRICT_GOP    0x00000002 ///< strictly enforce GOP size
+#define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< skip bitstream encoding
+#define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< place global headers at every keyframe instead of in extradata
+#define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow b-frames to be used as references
+#define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for b-frames
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 multiple references per partition
+#define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
+#define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
+#define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
+#define CODEC_FLAG2_BRDO          0x00000400 ///< b-frame rate-distortion optimization
+#define CODEC_FLAG2_INTRA_VLC     0x00000800 ///< use MPEG-2 intra VLC table
+#define CODEC_FLAG2_MEMC_ONLY     0x00001000 ///< only do ME/MC (I frames -> ref, P frame -> ME+MC)
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format
+
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independant Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+
+#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< decoder can use draw_horiz_band callback
+/**
+ * Codec uses get_buffer() for allocating buffers.
+ * direct rendering method 1
+ */
+#define CODEC_CAP_DR1             0x0002
+/* if 'parse_only' field is true, then avcodec_parse_frame() can be
+   used */
+#define CODEC_CAP_PARSE_ONLY      0x0004
+#define CODEC_CAP_TRUNCATED       0x0008
+/* codec can export data for HW decoding (XvMC) */
+#define CODEC_CAP_HWACCEL         0x0010
+/**
+ * codec has a non zero delay and needs to be feeded with NULL at the end to get the delayed data.
+ * if this is not set, the codec is guranteed to never be feeded with NULL data
+ */
+#define CODEC_CAP_DELAY           0x0020
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+
+//the following defines may change, don't expect compatibility if you use them
+#define MB_TYPE_INTRA4x4   0x0001
+#define MB_TYPE_INTRA16x16 0x0002 //FIXME h264 specific
+#define MB_TYPE_INTRA_PCM  0x0004 //FIXME h264 specific
+#define MB_TYPE_16x16      0x0008
+#define MB_TYPE_16x8       0x0010
+#define MB_TYPE_8x16       0x0020
+#define MB_TYPE_8x8        0x0040
+#define MB_TYPE_INTERLACED 0x0080
+#define MB_TYPE_DIRECT2     0x0100 //FIXME
+#define MB_TYPE_ACPRED     0x0200
+#define MB_TYPE_GMC        0x0400
+#define MB_TYPE_SKIP       0x0800
+#define MB_TYPE_P0L0       0x1000
+#define MB_TYPE_P1L0       0x2000
+#define MB_TYPE_P0L1       0x4000
+#define MB_TYPE_P1L1       0x8000
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      0x00010000
+#define MB_TYPE_CBP        0x00020000
+//Note bits 24-31 are reserved for codec specific use (h264 ref0, mpeg1 0mv, ...)
+
+/**
+ * Pan Scan area.
+ * this specifies the area which should be displayed. Note there may be multiple such areas for one frame
+ */
+typedef struct AVPanScan{
+    /**
+     * id.
+     * - encoding: set by user.
+     * - decoding: set by lavc
+     */
+    int id;
+
+    /**
+     * width and height in 1/16 pel
+     * - encoding: set by user.
+     * - decoding: set by lavc
+     */
+    int width;
+    int height;
+
+    /**
+     * position of the top left corner in 1/16 pel for up to 3 fields/frames.
+     * - encoding: set by user.
+     * - decoding: set by lavc
+     */
+    int16_t position[3][2];
+}AVPanScan;
+
+#define FF_COMMON_FRAME \
+    /**\
+     * pointer to the picture planes.\
+     * this might be different from the first allocated byte\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *data[4];\
+    int linesize[4];\
+    /**\
+     * pointer to the first allocated byte of the picture. can be used in get_buffer/release_buffer\
+     * this isn't used by lavc unless the default get/release_buffer() is used\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *base[4];\
+    /**\
+     * 1 -> keyframe, 0-> not\
+     * - encoding: set by lavc\
+     * - decoding: set by lavc\
+     */\
+    int key_frame;\
+\
+    /**\
+     * picture type of the frame, see ?_TYPE below.\
+     * - encoding: set by lavc for coded_picture (and set by user for input)\
+     * - decoding: set by lavc\
+     */\
+    int pict_type;\
+\
+    /**\
+     * presentation timestamp in time_base units (time when frame should be shown to user)\
+     * if AV_NOPTS_VALUE then frame_rate = 1/time_base will be assumed\
+     * - encoding: MUST be set by user\
+     * - decoding: set by lavc\
+     */\
+    int64_t pts;\
+\
+    /**\
+     * picture number in bitstream order.\
+     * - encoding: set by\
+     * - decoding: set by lavc\
+     */\
+    int coded_picture_number;\
+    /**\
+     * picture number in display order.\
+     * - encoding: set by\
+     * - decoding: set by lavc\
+     */\
+    int display_picture_number;\
+\
+    /**\
+     * quality (between 1 (good) and FF_LAMBDA_MAX (bad)) \
+     * - encoding: set by lavc for coded_picture (and set by user for input)\
+     * - decoding: set by lavc\
+     */\
+    int quality; \
+\
+    /**\
+     * buffer age (1->was last buffer and dint change, 2->..., ...).\
+     * set to INT_MAX if the buffer has not been used yet \
+     * - encoding: unused\
+     * - decoding: MUST be set by get_buffer()\
+     */\
+    int age;\
+\
+    /**\
+     * is this picture used as reference\
+     * - encoding: unused\
+     * - decoding: set by lavc (before get_buffer() call))\
+     */\
+    int reference;\
+\
+    /**\
+     * QP table\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    int8_t *qscale_table;\
+    /**\
+     * QP store stride\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    int qstride;\
+\
+    /**\
+     * mbskip_table[mb]>=1 if MB didnt change\
+     * stride= mb_width = (width+15)>>4\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    uint8_t *mbskip_table;\
+\
+    /**\
+     * Motion vector table.\
+     * @code\
+     * example:\
+     * int mv_sample_log2= 4 - motion_subsample_log2;\
+     * int mb_width= (width+15)>>4;\
+     * int mv_stride= (mb_width << mv_sample_log2) + 1;\
+     * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];\
+     * @endcode\
+     * - encoding: set by user\
+     * - decoding: set by lavc\
+     */\
+    int16_t (*motion_val[2])[2];\
+\
+    /**\
+     * Macroblock type table\
+     * mb_type_base + mb_width + 2\
+     * - encoding: set by user\
+     * - decoding: set by lavc\
+     */\
+    uint32_t *mb_type;\
+\
+    /**\
+     * log2 of the size of the block which a single vector in motion_val represents: \
+     * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    uint8_t motion_subsample_log2;\
+\
+    /**\
+     * for some private data of the user\
+     * - encoding: unused\
+     * - decoding: set by user\
+     */\
+    void *opaque;\
+\
+    /**\
+     * error\
+     * - encoding: set by lavc if flags&CODEC_FLAG_PSNR\
+     * - decoding: unused\
+     */\
+    uint64_t error[4];\
+\
+    /**\
+     * type of the buffer (to keep track of who has to dealloc data[*])\
+     * - encoding: set by the one who allocs it\
+     * - decoding: set by the one who allocs it\
+     * Note: user allocated (direct rendering) & internal buffers can not coexist currently\
+     */\
+    int type;\
+    \
+    /**\
+     * when decoding, this signal how much the picture must be delayed.\
+     * extra_delay = repeat_pict / (2*fps)\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    int repeat_pict;\
+    \
+    /**\
+     * \
+     */\
+    int qscale_type;\
+    \
+    /**\
+     * The content of the picture is interlaced.\
+     * - encoding: set by user\
+     * - decoding: set by lavc (default 0)\
+     */\
+    int interlaced_frame;\
+    \
+    /**\
+     * if the content is interlaced, is top field displayed first.\
+     * - encoding: set by user\
+     * - decoding: set by lavc\
+     */\
+    int top_field_first;\
+    \
+    /**\
+     * Pan scan.\
+     * - encoding: set by user\
+     * - decoding: set by lavc\
+     */\
+    AVPanScan *pan_scan;\
+    \
+    /**\
+     * tell user application that palette has changed from previous frame.\
+     * - encoding: ??? (no palette-enabled encoder yet)\
+     * - decoding: set by lavc (default 0)\
+     */\
+    int palette_has_changed;\
+    \
+    /**\
+     * Codec suggestion on buffer type if != 0\
+     * - encoding: unused\
+     * - decoding: set by lavc (before get_buffer() call))\
+     */\
+    int buffer_hints;\
+\
+    /**\
+     * DCT coeffitients\
+     * - encoding: unused\
+     * - decoding: set by lavc\
+     */\
+    short *dct_coeff;\
+\
+    /**\
+     * Motion referece frame index\
+     * - encoding: set by user\
+     * - decoding: set by lavc\
+     */\
+    int8_t *ref_index[2];
+
+#define FF_QSCALE_TYPE_MPEG1 0
+#define FF_QSCALE_TYPE_MPEG2 1
+#define FF_QSCALE_TYPE_H264  2
+
+#define FF_BUFFER_TYPE_INTERNAL 1
+#define FF_BUFFER_TYPE_USER     2 ///< Direct rendering buffers (image is (de)allocated by user)
+#define FF_BUFFER_TYPE_SHARED   4 ///< buffer from somewhere else, don't dealloc image (data/base), all other tables are not shared
+#define FF_BUFFER_TYPE_COPY     8 ///< just a (modified) copy of some other buffer, don't dealloc anything
+
+
+#define FF_I_TYPE 1 // Intra
+#define FF_P_TYPE 2 // Predicted
+#define FF_B_TYPE 3 // Bi-dir predicted
+#define FF_S_TYPE 4 // S(GMC)-VOP MPEG4
+#define FF_SI_TYPE 5
+#define FF_SP_TYPE 6
+
+#define FF_BUFFER_HINTS_VALID    0x01 // Buffer hints value is meaningful (if 0 ignore)
+#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer
+#define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content
+#define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update)
+
+/**
+ * Audio Video Frame.
+ */
+typedef struct AVFrame {
+    FF_COMMON_FRAME
+} AVFrame;
+
+#define DEFAULT_FRAME_RATE_BASE 1001000
+
+/**
+ * main external api structure.
+ */
+typedef struct AVCodecContext {
+    /**
+     * Info on struct for av_log
+     * - set by avcodec_alloc_context
+     */
+    AVClass *av_class;
+    /**
+     * the average bitrate.
+     * - encoding: set by user. unused for constant quantizer encoding
+     * - decoding: set by lavc. 0 or some bitrate if this info is available in the stream
+     */
+    int bit_rate;
+
+    /**
+     * number of bits the bitstream is allowed to diverge from the reference.
+     *           the reference can be CBR (for CBR pass1) or VBR (for pass2)
+     * - encoding: set by user. unused for constant quantizer encoding
+     * - decoding: unused
+     */
+    int bit_rate_tolerance;
+
+    /**
+     * CODEC_FLAG_*.
+     * - encoding: set by user.
+     * - decoding: set by user.
+     */
+    int flags;
+
+    /**
+     * some codecs needs additionnal format info. It is stored here
+     * - encoding: set by user.
+     * - decoding: set by lavc. (FIXME is this ok?)
+     */
+    int sub_id;
+
+    /**
+     * motion estimation algorithm used for video coding.
+     * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
+     * 8 (umh), 9 (iter) [7, 8 are x264 specific, 9 is snow specific]
+     * - encoding: MUST be set by user.
+     * - decoding: unused
+     */
+    int me_method;
+
+    /**
+     * some codecs need / can use extra-data like huffman tables.
+     * mjpeg: huffman tables
+     * rv10: additional flags
+     * mpeg4: global headers (they can be in the bitstream or here)
+     * the allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * then extradata_size to avoid prolems if its read with the bitstream reader
+     * the bytewise contents of extradata must not depend on the architecture or cpu endianness
+     * - encoding: set/allocated/freed by lavc.
+     * - decoding: set/allocated/freed by user.
+     */
+    uint8_t *extradata;
+    int extradata_size;
+
+    /**
+     * this is the fundamental unit of time (in seconds) in terms
+     * of which frame timestamps are represented. for fixed-fps content,
+     * timebase should be 1/framerate and timestamp increments should be
+     * identically 1.
+     * - encoding: MUST be set by user
+     * - decoding: set by lavc.
+     */
+    AVRational time_base;
+
+    /* video only */
+    /**
+     * picture width / height.
+     * - encoding: MUST be set by user.
+     * - decoding: set by lavc.
+     * Note, for compatibility its possible to set this instead of
+     * coded_width/height before decoding
+     */
+    int width, height;
+
+#define FF_ASPECT_EXTENDED 15
+
+    /**
+     * the number of pictures in a group of pitures, or 0 for intra_only.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int gop_size;
+
+    /**
+     * pixel format, see PIX_FMT_xxx.
+     * - encoding: set by user.
+     * - decoding: set by lavc.
+     */
+    enum PixelFormat pix_fmt;
+
+    /**
+     * Frame rate emulation. If not zero lower layer (i.e. format handler)
+     * has to read frames at native frame rate.
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int rate_emu;
+
+    /**
+     * if non NULL, 'draw_horiz_band' is called by the libavcodec
+     * decoder to draw an horizontal band. It improve cache usage. Not
+     * all codecs can do that. You must check the codec capabilities
+     * before
+     * - encoding: unused
+     * - decoding: set by user.
+     * @param height the height of the slice
+     * @param y the y position of the slice
+     * @param type 1->top field, 2->bottom field, 3->frame
+     * @param offset offset into the AVFrame.data from which the slice should be read
+     */
+    void (*draw_horiz_band)(struct AVCodecContext *s,
+                            const AVFrame *src, int offset[4],
+                            int y, int type, int height);
+
+    /* audio only */
+    int sample_rate; ///< samples per sec
+    int channels;
+
+    /**
+     * audio sample format.
+     * - encoding: set by user.
+     * - decoding: set by lavc.
+     */
+    enum SampleFormat sample_fmt;  ///< sample format, currenly unused
+
+    /* the following data should not be initialized */
+    /**
+     * samples per packet. initialized when calling 'init'
+     */
+    int frame_size;
+    int frame_number;   ///< audio or video frame number
+    int real_pict_num;  ///< returns the real picture number of previous encoded frame
+
+    /**
+     * number of frames the decoded output will be delayed relative to
+     * the encoded input.
+     * - encoding: set by lavc.
+     * - decoding: unused
+     */
+    int delay;
+
+    /* - encoding parameters */
+    float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
+    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+
+    /**
+     * minimum quantizer.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int qmin;
+
+    /**
+     * maximum quantizer.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int qmax;
+
+    /**
+     * maximum quantizer difference etween frames.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int max_qdiff;
+
+    /**
+     * maximum number of b frames between non b frames.
+     * note: the output will be delayed by max_b_frames+1 relative to the input
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int max_b_frames;
+
+    /**
+     * qscale factor between ip and b frames.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float b_quant_factor;
+
+    /** obsolete FIXME remove */
+    int rc_strategy;
+#define FF_RC_STRATEGY_XVID 1
+
+    int b_frame_strategy;
+
+    /**
+     * hurry up amount.
+     * deprecated in favor of skip_idct and skip_frame
+     * - encoding: unused
+     * - decoding: set by user. 1-> skip b frames, 2-> skip idct/dequant too, 5-> skip everything except header
+     */
+    int hurry_up;
+
+    struct AVCodec *codec;
+
+    void *priv_data;
+
+    /* unused, FIXME remove*/
+    int rtp_mode;
+
+    int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
+                            /* do it's best to deliver a chunk with size    */
+                            /* below rtp_payload_size, the chunk will start */
+                            /* with a start code on some codecs like H.263  */
+                            /* This doesn't take account of any particular  */
+                            /* headers inside the transmited RTP payload    */
+
+
+    /* The RTP callback: This function is called   */
+    /* every time the encoder has a packet to send */
+    /* Depends on the encoder if the data starts   */
+    /* with a Start Code (it should) H.263 does.   */
+    /* mb_nb contains the number of macroblocks    */
+    /* encoded in the RTP payload                  */
+    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
+
+    /* statistics, used for 2-pass encoding */
+    int mv_bits;
+    int header_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int i_count;
+    int p_count;
+    int skip_count;
+    int misc_bits;
+
+    /**
+     * number of bits used for the previously encoded frame.
+     * - encoding: set by lavc
+     * - decoding: unused
+     */
+    int frame_bits;
+
+    /**
+     * private data of the user, can be used to carry app specific stuff.
+     * - encoding: set by user
+     * - decoding: set by user
+     */
+    void *opaque;
+
+    char codec_name[32];
+    enum CodecType codec_type; /* see CODEC_TYPE_xxx */
+    enum CodecID codec_id; /* see CODEC_ID_xxx */
+
+    /**
+     * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * this is used to workaround some encoder bugs
+     * - encoding: set by user, if not then the default based on codec_id will be used
+     * - decoding: set by user, will be converted to upper case by lavc during init
+     */
+    unsigned int codec_tag;
+
+    /**
+     * workaround bugs in encoders which sometimes cannot be detected automatically.
+     * - encoding: set by user
+     * - decoding: set by user
+     */
+    int workaround_bugs;
+#define FF_BUG_AUTODETECT       1  ///< autodetection
+#define FF_BUG_OLD_MSMPEG4      2
+#define FF_BUG_XVID_ILACE       4
+#define FF_BUG_UMP4             8
+#define FF_BUG_NO_PADDING       16
+#define FF_BUG_AMV              32
+#define FF_BUG_AC_VLC           0  ///< will be removed, libavcodec can now handle these non compliant files by default
+#define FF_BUG_QPEL_CHROMA      64
+#define FF_BUG_STD_QPEL         128
+#define FF_BUG_QPEL_CHROMA2     256
+#define FF_BUG_DIRECT_BLOCKSIZE 512
+#define FF_BUG_EDGE             1024
+#define FF_BUG_HPEL_CHROMA      2048
+#define FF_BUG_DC_CLIP          4096
+#define FF_BUG_MS               8192 ///< workaround various bugs in microsofts broken decoders
+//#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100%
+
+    /**
+     * luma single coeff elimination threshold.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int luma_elim_threshold;
+
+    /**
+     * chroma single coeff elimination threshold.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int chroma_elim_threshold;
+
+    /**
+     * strictly follow the std (MPEG4, ...).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int strict_std_compliance;
+#define FF_COMPLIANCE_VERY_STRICT   2 ///< strictly conform to a older more strict version of the spec or reference software
+#define FF_COMPLIANCE_STRICT        1 ///< strictly conform to all the things in the spec no matter what consequences
+#define FF_COMPLIANCE_NORMAL        0
+#define FF_COMPLIANCE_INOFFICIAL   -1 ///< allow inofficial extensions
+#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< allow non standarized experimental things
+
+    /**
+     * qscale offset between ip and b frames.
+     * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset)
+     * if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset)
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float b_quant_offset;
+
+    /**
+     * error resilience higher values will detect more errors but may missdetect
+     * some more or less valid parts as errors.
+     * - encoding: unused
+     * - decoding: set by user
+     */
+    int error_resilience;
+#define FF_ER_CAREFUL         1
+#define FF_ER_COMPLIANT       2
+#define FF_ER_AGGRESSIVE      3
+#define FF_ER_VERY_AGGRESSIVE 4
+
+    /**
+     * called at the beginning of each frame to get a buffer for it.
+     * if pic.reference is set then the frame will be read later by lavc
+     * avcodec_align_dimensions() should be used to find the required width and
+     * height, as they normally need to be rounded up to the next multiple of 16
+     * - encoding: unused
+     * - decoding: set by lavc, user can override
+     */
+    int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
+
+    /**
+     * called to release buffers which where allocated with get_buffer.
+     * a released buffer can be reused in get_buffer()
+     * pic.data[*] must be set to NULL
+     * - encoding: unused
+     * - decoding: set by lavc, user can override
+     */
+    void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic);
+
+    /**
+     * if 1 the stream has a 1 frame delay during decoding.
+     * - encoding: set by lavc
+     * - decoding: set by lavc
+     */
+    int has_b_frames;
+
+    /**
+     * number of bytes per packet if constant and known or 0
+     * used by some WAV based audio codecs
+     */
+    int block_align;
+
+    int parse_only; /* - decoding only: if true, only parsing is done
+                       (function avcodec_parse_frame()). The frame
+                       data is returned. Only MPEG codecs support this now. */
+
+    /**
+     * 0-> h263 quant 1-> mpeg quant.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int mpeg_quant;
+
+    /**
+     * pass1 encoding statistics output buffer.
+     * - encoding: set by lavc
+     * - decoding: unused
+     */
+    char *stats_out;
+
+    /**
+     * pass2 encoding statistics input buffer.
+     * concatenated stuff from stats_out of pass1 should be placed here
+     * - encoding: allocated/set/freed by user
+     * - decoding: unused
+     */
+    char *stats_in;
+
+    /**
+     * ratecontrol qmin qmax limiting method.
+     * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float rc_qsquish;
+
+    float rc_qmod_amp;
+    int rc_qmod_freq;
+
+    /**
+     * ratecontrol override, see RcOverride.
+     * - encoding: allocated/set/freed by user.
+     * - decoding: unused
+     */
+    RcOverride *rc_override;
+    int rc_override_count;
+
+    /**
+     * rate control equation.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    char *rc_eq;
+
+    /**
+     * maximum bitrate.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int rc_max_rate;
+
+    /**
+     * minimum bitrate.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int rc_min_rate;
+
+    /**
+     * decoder bitstream buffer size.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int rc_buffer_size;
+    float rc_buffer_aggressivity;
+
+    /**
+     * qscale factor between p and i frames.
+     * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset)
+     * if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset)
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float i_quant_factor;
+
+    /**
+     * qscale offset between p and i frames.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float i_quant_offset;
+
+    /**
+     * initial complexity for pass1 ratecontrol.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float rc_initial_cplx;
+
+    /**
+     * dct algorithm, see FF_DCT_* below.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int dct_algo;
+#define FF_DCT_AUTO    0
+#define FF_DCT_FASTINT 1
+#define FF_DCT_INT     2
+#define FF_DCT_MMX     3
+#define FF_DCT_MLIB    4
+#define FF_DCT_ALTIVEC 5
+#define FF_DCT_FAAN    6
+
+    /**
+     * luminance masking (0-> disabled).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    float lumi_masking;
+
+    /**
+     * temporary complexity masking (0-> disabled).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    float temporal_cplx_masking;
+
+    /**
+     * spatial complexity masking (0-> disabled).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    float spatial_cplx_masking;
+
+    /**
+     * p block masking (0-> disabled).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    float p_masking;
+
+    /**
+     * darkness masking (0-> disabled).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    float dark_masking;
+
+
+    /* for binary compatibility */
+    int unused;
+
+    /**
+     * idct algorithm, see FF_IDCT_* below.
+     * - encoding: set by user
+     * - decoding: set by user
+     */
+    int idct_algo;
+#define FF_IDCT_AUTO         0
+#define FF_IDCT_INT          1
+#define FF_IDCT_SIMPLE       2
+#define FF_IDCT_SIMPLEMMX    3
+#define FF_IDCT_LIBMPEG2MMX  4
+#define FF_IDCT_PS2          5
+#define FF_IDCT_MLIB         6
+#define FF_IDCT_ARM          7
+#define FF_IDCT_ALTIVEC      8
+#define FF_IDCT_SH4          9
+#define FF_IDCT_SIMPLEARM    10
+#define FF_IDCT_H264         11
+#define FF_IDCT_VP3          12
+#define FF_IDCT_IPP          13
+#define FF_IDCT_XVIDMMX      14
+#define FF_IDCT_CAVS         15
+#define FF_IDCT_SIMPLEARMV5TE 16
+
+    /**
+     * slice count.
+     * - encoding: set by lavc
+     * - decoding: set by user (or 0)
+     */
+    int slice_count;
+    /**
+     * slice offsets in the frame in bytes.
+     * - encoding: set/allocated by lavc
+     * - decoding: set/allocated by user (or NULL)
+     */
+    int *slice_offset;
+
+    /**
+     * error concealment flags.
+     * - encoding: unused
+     * - decoding: set by user
+     */
+    int error_concealment;
+#define FF_EC_GUESS_MVS   1
+#define FF_EC_DEBLOCK     2
+
+    /**
+     * dsp_mask could be add used to disable unwanted CPU features
+     * CPU features (i.e. MMX, SSE. ...)
+     *
+     * with FORCE flag you may instead enable given CPU features
+     * (Dangerous: usable in case of misdetection, improper usage however will
+     * result into program crash)
+     */
+    unsigned dsp_mask;
+#define FF_MM_FORCE    0x80000000 /* force usage of selected flags (OR) */
+    /* lower 16 bits - CPU features */
+#ifdef HAVE_MMX
+#define FF_MM_MMX      0x0001 /* standard MMX */
+#define FF_MM_3DNOW    0x0004 /* AMD 3DNOW */
+#define FF_MM_MMXEXT   0x0002 /* SSE integer functions or AMD MMX ext */
+#define FF_MM_SSE      0x0008 /* SSE functions */
+#define FF_MM_SSE2     0x0010 /* PIV SSE2 functions */
+#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
+#endif /* HAVE_MMX */
+#ifdef HAVE_IWMMXT
+#define FF_MM_IWMMXT   0x0100 /* XScale IWMMXT */
+#endif /* HAVE_IWMMXT */
+
+    /**
+     * bits per sample/pixel from the demuxer (needed for huffyuv).
+     * - encoding: set by lavc
+     * - decoding: set by user
+     */
+     int bits_per_sample;
+
+    /**
+     * prediction method (needed for huffyuv).
+     * - encoding: set by user
+     * - decoding: unused
+     */
+     int prediction_method;
+#define FF_PRED_LEFT   0
+#define FF_PRED_PLANE  1
+#define FF_PRED_MEDIAN 2
+
+    /**
+     * sample aspect ratio (0 if unknown).
+     * numerator and denominator must be relative prime and smaller then 256 for some video standards
+     * - encoding: set by user.
+     * - decoding: set by lavc.
+     */
+    AVRational sample_aspect_ratio;
+
+    /**
+     * the picture in the bitstream.
+     * - encoding: set by lavc
+     * - decoding: set by lavc
+     */
+    AVFrame *coded_frame;
+
+    /**
+     * debug.
+     * - encoding: set by user.
+     * - decoding: set by user.
+     */
+    int debug;
+#define FF_DEBUG_PICT_INFO 1
+#define FF_DEBUG_RC        2
+#define FF_DEBUG_BITSTREAM 4
+#define FF_DEBUG_MB_TYPE   8
+#define FF_DEBUG_QP        16
+#define FF_DEBUG_MV        32
+#define FF_DEBUG_DCT_COEFF 0x00000040
+#define FF_DEBUG_SKIP      0x00000080
+#define FF_DEBUG_STARTCODE 0x00000100
+#define FF_DEBUG_PTS       0x00000200
+#define FF_DEBUG_ER        0x00000400
+#define FF_DEBUG_MMCO      0x00000800
+#define FF_DEBUG_BUGS      0x00001000
+#define FF_DEBUG_VIS_QP    0x00002000
+#define FF_DEBUG_VIS_MB_TYPE 0x00004000
+
+    /**
+     * debug.
+     * - encoding: set by user.
+     * - decoding: set by user.
+     */
+    int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames
+
+    /**
+     * error.
+     * - encoding: set by lavc if flags&CODEC_FLAG_PSNR
+     * - decoding: unused
+     */
+    uint64_t error[4];
+
+    /**
+     * minimum MB quantizer.
+     * - encoding: unused
+     * - decoding: unused
+     */
+    int mb_qmin;
+
+    /**
+     * maximum MB quantizer.
+     * - encoding: unused
+     * - decoding: unused
+     */
+    int mb_qmax;
+
+    /**
+     * motion estimation compare function.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int me_cmp;
+    /**
+     * subpixel motion estimation compare function.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int me_sub_cmp;
+    /**
+     * macroblock compare function (not supported yet).
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int mb_cmp;
+    /**
+     * interlaced dct compare function
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int ildct_cmp;
+#define FF_CMP_SAD  0
+#define FF_CMP_SSE  1
+#define FF_CMP_SATD 2
+#define FF_CMP_DCT  3
+#define FF_CMP_PSNR 4
+#define FF_CMP_BIT  5
+#define FF_CMP_RD   6
+#define FF_CMP_ZERO 7
+#define FF_CMP_VSAD 8
+#define FF_CMP_VSSE 9
+#define FF_CMP_NSSE 10
+#define FF_CMP_W53  11
+#define FF_CMP_W97  12
+#define FF_CMP_DCTMAX 13
+#define FF_CMP_DCT264 14
+#define FF_CMP_CHROMA 256
+
+    /**
+     * ME diamond size & shape.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int dia_size;
+
+    /**
+     * amount of previous MV predictors (2a+1 x 2a+1 square).
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int last_predictor_count;
+
+    /**
+     * pre pass for motion estimation.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int pre_me;
+
+    /**
+     * motion estimation pre pass compare function.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int me_pre_cmp;
+
+    /**
+     * ME pre pass diamond size & shape.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int pre_dia_size;
+
+    /**
+     * subpel ME quality.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int me_subpel_quality;
+
+    /**
+     * callback to negotiate the pixelFormat.
+     * @param fmt is the list of formats which are supported by the codec,
+     * its terminated by -1 as 0 is a valid format, the formats are ordered by quality
+     * the first is allways the native one
+     * @return the choosen format
+     * - encoding: unused
+     * - decoding: set by user, if not set then the native format will always be choosen
+     */
+    enum PixelFormat (*get_format)(struct AVCodecContext *s, const enum PixelFormat * fmt);
+
+    /**
+     * DTG active format information (additionnal aspect ratio
+     * information only used in DVB MPEG2 transport streams). 0 if
+     * not set.
+     *
+     * - encoding: unused.
+     * - decoding: set by decoder
+     */
+    int dtg_active_format;
+#define FF_DTG_AFD_SAME         8
+#define FF_DTG_AFD_4_3          9
+#define FF_DTG_AFD_16_9         10
+#define FF_DTG_AFD_14_9         11
+#define FF_DTG_AFD_4_3_SP_14_9  13
+#define FF_DTG_AFD_16_9_SP_14_9 14
+#define FF_DTG_AFD_SP_4_3       15
+
+    /**
+     * Maximum motion estimation search range in subpel units.
+     * if 0 then no limit
+     *
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int me_range;
+
+    /**
+     * intra quantizer bias.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int intra_quant_bias;
+#define FF_DEFAULT_QUANT_BIAS 999999
+
+    /**
+     * inter quantizer bias.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int inter_quant_bias;
+
+    /**
+     * color table ID.
+     * - encoding: unused.
+     * - decoding: which clrtable should be used for 8bit RGB images
+     *             table have to be stored somewhere FIXME
+     */
+    int color_table_id;
+
+    /**
+     * internal_buffer count.
+     * Don't touch, used by lavc default_get_buffer()
+     */
+    int internal_buffer_count;
+
+    /**
+     * internal_buffers.
+     * Don't touch, used by lavc default_get_buffer()
+     */
+    void *internal_buffer;
+
+#define FF_LAMBDA_SHIFT 7
+#define FF_LAMBDA_SCALE (1<<FF_LAMBDA_SHIFT)
+#define FF_QP2LAMBDA 118 ///< factor to convert from H.263 QP to lambda
+#define FF_LAMBDA_MAX (256*128-1)
+
+#define FF_QUALITY_SCALE FF_LAMBDA_SCALE //FIXME maybe remove
+    /**
+     * global quality for codecs which cannot change it per frame.
+     * this should be proportional to MPEG1/2/4 qscale.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int global_quality;
+
+#define FF_CODER_TYPE_VLC   0
+#define FF_CODER_TYPE_AC    1
+    /**
+     * coder type
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int coder_type;
+
+    /**
+     * context model
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int context_model;
+#if 0
+    /**
+     *
+     * - encoding: unused
+     * - decoding: set by user.
+     */
+    uint8_t * (*realloc)(struct AVCodecContext *s, uint8_t *buf, int buf_size);
+#endif
+
+    /**
+     * slice flags
+     * - encoding: unused
+     * - decoding: set by user.
+     */
+    int slice_flags;
+#define SLICE_FLAG_CODED_ORDER    0x0001 ///< draw_horiz_band() is called in coded order instead of display
+#define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG2 field pics)
+#define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
+
+    /**
+     * XVideo Motion Acceleration
+     * - encoding: forbidden
+     * - decoding: set by decoder
+     */
+    int xvmc_acceleration;
+
+    /**
+     * macroblock decision mode
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int mb_decision;
+#define FF_MB_DECISION_SIMPLE 0        ///< uses mb_cmp
+#define FF_MB_DECISION_BITS   1        ///< chooses the one which needs the fewest bits
+#define FF_MB_DECISION_RD     2        ///< rate distoration
+
+    /**
+     * custom intra quantization matrix
+     * - encoding: set by user, can be NULL
+     * - decoding: set by lavc
+     */
+    uint16_t *intra_matrix;
+
+    /**
+     * custom inter quantization matrix
+     * - encoding: set by user, can be NULL
+     * - decoding: set by lavc
+     */
+    uint16_t *inter_matrix;
+
+    /**
+     * fourcc from the AVI stream header (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * this is used to workaround some encoder bugs
+     * - encoding: unused
+     * - decoding: set by user, will be converted to upper case by lavc during init
+     */
+    unsigned int stream_codec_tag;
+
+    /**
+     * scene change detection threshold.
+     * 0 is default, larger means fewer detected scene changes
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int scenechange_threshold;
+
+    /**
+     * minimum lagrange multipler
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int lmin;
+
+    /**
+     * maximum lagrange multipler
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int lmax;
+
+    /**
+     * Palette control structure
+     * - encoding: ??? (no palette-enabled encoder yet)
+     * - decoding: set by user.
+     */
+    struct AVPaletteControl *palctrl;
+
+    /**
+     * noise reduction strength
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int noise_reduction;
+
+    /**
+     * called at the beginning of a frame to get cr buffer for it.
+     * buffer type (size, hints) must be the same. lavc won't check it.
+     * lavc will pass previous buffer in pic, function should return
+     * same buffer or new buffer with old frame "painted" into it.
+     * if pic.data[0] == NULL must behave like get_buffer().
+     * - encoding: unused
+     * - decoding: set by lavc, user can override
+     */
+    int (*reget_buffer)(struct AVCodecContext *c, AVFrame *pic);
+
+    /**
+     * number of bits which should be loaded into the rc buffer before decoding starts
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int rc_initial_buffer_occupancy;
+
+    /**
+     *
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int inter_threshold;
+
+    /**
+     * CODEC_FLAG2_*.
+     * - encoding: set by user.
+     * - decoding: set by user.
+     */
+    int flags2;
+
+    /**
+     * simulates errors in the bitstream to test error concealment.
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int error_rate;
+
+    /**
+     * MP3 antialias algorithm, see FF_AA_* below.
+     * - encoding: unused
+     * - decoding: set by user
+     */
+    int antialias_algo;
+#define FF_AA_AUTO    0
+#define FF_AA_FASTINT 1 //not implemented yet
+#define FF_AA_INT     2
+#define FF_AA_FLOAT   3
+    /**
+     * Quantizer noise shaping.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int quantizer_noise_shaping;
+
+    /**
+     * Thread count.
+     * is used to decide how many independant tasks should be passed to execute()
+     * - encoding: set by user
+     * - decoding: set by user
+     */
+    int thread_count;
+
+    /**
+     * the codec may call this to execute several independant things. it will return only after
+     * finishing all tasks, the user may replace this with some multithreaded implementation, the
+     * default implementation will execute the parts serially
+     * @param count the number of things to execute
+     * - encoding: set by lavc, user can override
+     * - decoding: set by lavc, user can override
+     */
+    int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void **arg2, int *ret, int count);
+
+    /**
+     * Thread opaque.
+     * can be used by execute() to store some per AVCodecContext stuff.
+     * - encoding: set by execute()
+     * - decoding: set by execute()
+     */
+    void *thread_opaque;
+
+    /**
+     * Motion estimation threshold. under which no motion estimation is
+     * performed, but instead the user specified motion vectors are used
+     *
+     * - encoding: set by user
+     * - decoding: unused
+     */
+     int me_threshold;
+
+    /**
+     * Macroblock threshold. under which the user specified macroblock types will be used
+     * - encoding: set by user
+     * - decoding: unused
+     */
+     int mb_threshold;
+
+    /**
+     * precision of the intra dc coefficient - 8.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+     int intra_dc_precision;
+
+    /**
+     * noise vs. sse weight for the nsse comparsion function.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+     int nsse_weight;
+
+    /**
+     * number of macroblock rows at the top which are skipped.
+     * - encoding: unused
+     * - decoding: set by user
+     */
+     int skip_top;
+
+    /**
+     * number of macroblock rows at the bottom which are skipped.
+     * - encoding: unused
+     * - decoding: set by user
+     */
+     int skip_bottom;
+
+    /**
+     * profile
+     * - encoding: set by user
+     * - decoding: set by lavc
+     */
+     int profile;
+#define FF_PROFILE_UNKNOWN -99
+
+    /**
+     * level
+     * - encoding: set by user
+     * - decoding: set by lavc
+     */
+     int level;
+#define FF_LEVEL_UNKNOWN -99
+
+    /**
+     * low resolution decoding. 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: set by user
+     */
+     int lowres;
+
+    /**
+     * bitsream width / height. may be different from width/height if lowres
+     * or other things are used
+     * - encoding: unused
+     * - decoding: set by user before init if known, codec should override / dynamically change if needed
+     */
+    int coded_width, coded_height;
+
+    /**
+     * frame skip threshold
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int frame_skip_threshold;
+
+    /**
+     * frame skip factor
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int frame_skip_factor;
+
+    /**
+     * frame skip exponent
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    int frame_skip_exp;
+
+    /**
+     * frame skip comparission function
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int frame_skip_cmp;
+
+    /**
+     * border processing masking. raises the quantizer for mbs on the borders
+     * of the picture.
+     * - encoding: set by user
+     * - decoding: unused
+     */
+    float border_masking;
+
+    /**
+     * minimum MB lagrange multipler.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int mb_lmin;
+
+    /**
+     * maximum MB lagrange multipler.
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int mb_lmax;
+
+    /**
+     *
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int me_penalty_compensation;
+
+    /**
+     *
+     * - encoding: unused
+     * - decoding: set by user.
+     */
+    enum AVDiscard skip_loop_filter;
+
+    /**
+     *
+     * - encoding: unused
+     * - decoding: set by user.
+     */
+    enum AVDiscard skip_idct;
+
+    /**
+     *
+     * - encoding: unused
+     * - decoding: set by user.
+     */
+    enum AVDiscard skip_frame;
+
+    /**
+     *
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int bidir_refine;
+
+    /**
+     *
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int brd_scale;
+
+    /**
+     * constant rate factor - quality-based VBR - values ~correspond to qps
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float crf;
+
+    /**
+     * constant quantization parameter rate control method
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int cqp;
+
+    /**
+     * minimum gop size
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int keyint_min;
+
+    /**
+     * number of reference frames
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int refs;
+
+    /**
+     * chroma qp offset from luma
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int chromaoffset;
+
+    /**
+     * influences how often b-frames are used
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int bframebias;
+
+    /**
+     * trellis RD quantization
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int trellis;
+
+    /**
+     * reduce fluctuations in qp (before curve compression)
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float complexityblur;
+
+    /**
+     * in-loop deblocking filter alphac0 parameter
+     * alpha is in the range -6...6
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int deblockalpha;
+
+    /**
+     * in-loop deblocking filter beta parameter
+     * beta is in the range -6...6
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int deblockbeta;
+
+    /**
+     * macroblock subpartition sizes to consider - p8x8, p4x4, b8x8, i8x8, i4x4
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int partitions;
+#define X264_PART_I4X4 0x001  /* Analyse i4x4 */
+#define X264_PART_I8X8 0x002  /* Analyse i8x8 (requires 8x8 transform) */
+#define X264_PART_P8X8 0x010  /* Analyse p16x8, p8x16 and p8x8 */
+#define X264_PART_P4X4 0x020  /* Analyse p8x4, p4x8, p4x4 */
+#define X264_PART_B8X8 0x100  /* Analyse b16x8, b8x16 and b8x8 */
+
+    /**
+     * direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal)
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int directpred;
+
+    /**
+     * audio cutoff bandwidth (0 means "automatic") . Currently used only by FAAC
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int cutoff;
+
+    /**
+     * multiplied by qscale for each frame and added to scene_change_score
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int scenechange_factor;
+
+    /**
+     *
+     * note: value depends upon the compare functin used for fullpel ME
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int mv0_threshold;
+
+    /**
+     * adjusts sensitivity of b_frame_strategy 1
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int b_sensitivity;
+
+    /**
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int compression_level;
+#define FF_COMPRESSION_DEFAULT -1
+
+    /**
+     * sets whether to use LPC mode - used by FLAC encoder
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int use_lpc;
+
+    /**
+     * LPC coefficient precision - used by FLAC encoder
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int lpc_coeff_precision;
+
+    /**
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int min_prediction_order;
+
+    /**
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int max_prediction_order;
+
+    /**
+     * search method for selecting prediction order
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int prediction_order_method;
+
+    /**
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int min_partition_order;
+
+    /**
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int max_partition_order;
+
+    /**
+     * GOP timecode frame start number, in non drop frame format
+     * - encoding: set by user.
+     * - decoding: unused.
+     */
+    int64_t timecode_frame_start;
+} AVCodecContext;
+
+/**
+ * AVCodec.
+ */
+typedef struct AVCodec {
+    const char *name;
+    enum CodecType type;
+    enum CodecID id;
+    int priv_data_size;
+    int (*init)(AVCodecContext *);
+    int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data);
+    int (*close)(AVCodecContext *);
+    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
+                  uint8_t *buf, int buf_size);
+    int capabilities;
+#if LIBAVCODEC_VERSION_INT < ((50<<16)+(0<<8)+0)
+    void *dummy; // FIXME remove next time we break binary compatibility
+#endif
+    struct AVCodec *next;
+    void (*flush)(AVCodecContext *);
+    const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0}
+    const enum PixelFormat *pix_fmts;       ///array of supported pixel formats, or NULL if unknown, array is terminanted by -1
+} AVCodec;
+
+/**
+ * four components are given, that's all.
+ * the last component is alpha
+ */
+typedef struct AVPicture {
+    uint8_t *data[4];
+    int linesize[4];       ///< number of bytes per line
+} AVPicture;
+
+/**
+ * AVPaletteControl
+ * This structure defines a method for communicating palette changes
+ * between and demuxer and a decoder.
+ * this is totally broken, palette changes should be sent as AVPackets
+ */
+#define AVPALETTE_SIZE 1024
+#define AVPALETTE_COUNT 256
+typedef struct AVPaletteControl {
+
+    /* demuxer sets this to 1 to indicate the palette has changed;
+     * decoder resets to 0 */
+    int palette_changed;
+
+    /* 4-byte ARGB palette entries, stored in native byte order; note that
+     * the individual palette components should be on a 8-bit scale; if
+     * the palette data comes from a IBM VGA native format, the component
+     * data is probably 6 bits in size and needs to be scaled */
+    unsigned int palette[AVPALETTE_COUNT];
+
+} AVPaletteControl attribute_deprecated;
+
+typedef struct AVSubtitleRect {
+    uint16_t x;
+    uint16_t y;
+    uint16_t w;
+    uint16_t h;
+    uint16_t nb_colors;
+    int linesize;
+    uint32_t *rgba_palette;
+    uint8_t *bitmap;
+} AVSubtitleRect;
+
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    uint32_t num_rects;
+    AVSubtitleRect *rects;
+} AVSubtitle;
+
+extern AVCodec ac3_encoder;
+extern AVCodec mp2_encoder;
+extern AVCodec mp3lame_encoder;
+extern AVCodec oggvorbis_encoder;
+extern AVCodec faac_encoder;
+extern AVCodec flac_encoder;
+extern AVCodec gif_encoder;
+extern AVCodec xvid_encoder;
+extern AVCodec mpeg1video_encoder;
+extern AVCodec mpeg2video_encoder;
+extern AVCodec h261_encoder;
+extern AVCodec h263_encoder;
+extern AVCodec h263p_encoder;
+extern AVCodec flv_encoder;
+extern AVCodec rv10_encoder;
+extern AVCodec rv20_encoder;
+extern AVCodec dvvideo_encoder;
+extern AVCodec mjpeg_encoder;
+extern AVCodec ljpeg_encoder;
+extern AVCodec jpegls_encoder;
+extern AVCodec png_encoder;
+extern AVCodec ppm_encoder;
+extern AVCodec pgm_encoder;
+extern AVCodec pgmyuv_encoder;
+extern AVCodec pbm_encoder;
+extern AVCodec pam_encoder;
+extern AVCodec mpeg4_encoder;
+extern AVCodec msmpeg4v1_encoder;
+extern AVCodec msmpeg4v2_encoder;
+extern AVCodec msmpeg4v3_encoder;
+extern AVCodec wmv1_encoder;
+extern AVCodec wmv2_encoder;
+extern AVCodec huffyuv_encoder;
+extern AVCodec ffvhuff_encoder;
+extern AVCodec h264_encoder;
+extern AVCodec asv1_encoder;
+extern AVCodec asv2_encoder;
+extern AVCodec vcr1_encoder;
+extern AVCodec ffv1_encoder;
+extern AVCodec snow_encoder;
+extern AVCodec vorbis_encoder;
+extern AVCodec mdec_encoder;
+extern AVCodec zlib_encoder;
+extern AVCodec sonic_encoder;
+extern AVCodec sonic_ls_encoder;
+extern AVCodec svq1_encoder;
+extern AVCodec x264_encoder;
+
+extern AVCodec gif_decoder;
+extern AVCodec h263_decoder;
+extern AVCodec h261_decoder;
+extern AVCodec mpeg4_decoder;
+extern AVCodec msmpeg4v1_decoder;
+extern AVCodec msmpeg4v2_decoder;
+extern AVCodec msmpeg4v3_decoder;
+extern AVCodec wmv1_decoder;
+extern AVCodec wmv2_decoder;
+extern AVCodec vc1_decoder;
+extern AVCodec wmv3_decoder;
+extern AVCodec mpeg1video_decoder;
+extern AVCodec mpeg2video_decoder;
+extern AVCodec mpegvideo_decoder;
+extern AVCodec mpeg_xvmc_decoder;
+extern AVCodec h263i_decoder;
+extern AVCodec flv_decoder;
+extern AVCodec rv10_decoder;
+extern AVCodec rv20_decoder;
+extern AVCodec rv30_decoder;
+extern AVCodec rv40_decoder;
+extern AVCodec svq1_decoder;
+extern AVCodec svq3_decoder;
+extern AVCodec dvvideo_decoder;
+extern AVCodec wmav1_decoder;
+extern AVCodec wmav2_decoder;
+extern AVCodec mjpeg_decoder;
+extern AVCodec mjpegb_decoder;
+extern AVCodec sp5x_decoder;
+extern AVCodec png_decoder;
+extern AVCodec mp2_decoder;
+extern AVCodec mp3_decoder;
+extern AVCodec mp3adu_decoder;
+extern AVCodec mp3on4_decoder;
+extern AVCodec qdm2_decoder;
+extern AVCodec cook_decoder;
+extern AVCodec truespeech_decoder;
+extern AVCodec tta_decoder;
+extern AVCodec mace3_decoder;
+extern AVCodec mace6_decoder;
+extern AVCodec huffyuv_decoder;
+extern AVCodec ffvhuff_decoder;
+extern AVCodec oggvorbis_decoder;
+extern AVCodec cyuv_decoder;
+extern AVCodec h264_decoder;
+extern AVCodec indeo3_decoder;
+extern AVCodec vp3_decoder;
+extern AVCodec theora_decoder;
+extern AVCodec vp5_decoder;
+extern AVCodec vp6_decoder;
+extern AVCodec vp6f_decoder;
+extern AVCodec amr_nb_decoder;
+extern AVCodec amr_nb_encoder;
+extern AVCodec amr_wb_encoder;
+extern AVCodec amr_wb_decoder;
+extern AVCodec aac_decoder;
+extern AVCodec mpeg4aac_decoder;
+extern AVCodec asv1_decoder;
+extern AVCodec asv2_decoder;
+extern AVCodec vcr1_decoder;
+extern AVCodec cljr_decoder;
+extern AVCodec ffv1_decoder;
+extern AVCodec snow_decoder;
+extern AVCodec fourxm_decoder;
+extern AVCodec mdec_decoder;
+extern AVCodec roq_decoder;
+extern AVCodec interplay_video_decoder;
+extern AVCodec xan_wc3_decoder;
+extern AVCodec rpza_decoder;
+extern AVCodec cinepak_decoder;
+extern AVCodec msrle_decoder;
+extern AVCodec msvideo1_decoder;
+extern AVCodec vqa_decoder;
+extern AVCodec idcin_decoder;
+extern AVCodec eightbps_decoder;
+extern AVCodec smc_decoder;
+extern AVCodec flic_decoder;
+extern AVCodec vmdvideo_decoder;
+extern AVCodec vmdaudio_decoder;
+extern AVCodec truemotion1_decoder;
+extern AVCodec truemotion2_decoder;
+extern AVCodec mszh_decoder;
+extern AVCodec zlib_decoder;
+extern AVCodec ra_144_decoder;
+extern AVCodec ra_288_decoder;
+extern AVCodec roq_dpcm_decoder;
+extern AVCodec interplay_dpcm_decoder;
+extern AVCodec xan_dpcm_decoder;
+extern AVCodec sol_dpcm_decoder;
+extern AVCodec sonic_decoder;
+extern AVCodec qtrle_decoder;
+extern AVCodec flac_decoder;
+extern AVCodec tscc_decoder;
+extern AVCodec cscd_decoder;
+extern AVCodec nuv_decoder;
+extern AVCodec ulti_decoder;
+extern AVCodec qdraw_decoder;
+extern AVCodec xl_decoder;
+extern AVCodec qpeg_decoder;
+extern AVCodec shorten_decoder;
+extern AVCodec loco_decoder;
+extern AVCodec wnv1_decoder;
+extern AVCodec aasc_decoder;
+extern AVCodec alac_decoder;
+extern AVCodec ws_snd1_decoder;
+extern AVCodec indeo2_decoder;
+extern AVCodec vorbis_decoder;
+extern AVCodec fraps_decoder;
+extern AVCodec libgsm_encoder;
+extern AVCodec libgsm_decoder;
+extern AVCodec bmp_decoder;
+extern AVCodec mmvideo_decoder;
+extern AVCodec zmbv_decoder;
+extern AVCodec avs_decoder;
+extern AVCodec smacker_decoder;
+extern AVCodec smackaud_decoder;
+extern AVCodec kmvc_decoder;
+extern AVCodec flashsv_decoder;
+extern AVCodec cavs_decoder;
+extern AVCodec vmnc_decoder;
+extern AVCodec wavpack_decoder;
+extern AVCodec targa_decoder;
+extern AVCodec dsicinvideo_decoder;
+extern AVCodec dsicinaudio_decoder;
+extern AVCodec tiertexseqvideo_decoder;
+extern AVCodec tiff_decoder;
+extern AVCodec imc_decoder;
+
+/* pcm codecs */
+#define PCM_CODEC(id, name) \
+extern AVCodec name ## _decoder; \
+extern AVCodec name ## _encoder
+
+PCM_CODEC(CODEC_ID_PCM_S32LE, pcm_s32le);
+PCM_CODEC(CODEC_ID_PCM_S32BE, pcm_s32be);
+PCM_CODEC(CODEC_ID_PCM_U32LE, pcm_u32le);
+PCM_CODEC(CODEC_ID_PCM_U32BE, pcm_u32be);
+PCM_CODEC(CODEC_ID_PCM_S24LE, pcm_s24le);
+PCM_CODEC(CODEC_ID_PCM_S24BE, pcm_s24be);
+PCM_CODEC(CODEC_ID_PCM_U24LE, pcm_u24le);
+PCM_CODEC(CODEC_ID_PCM_U24BE, pcm_u24be);
+PCM_CODEC(CODEC_ID_PCM_S24DAUD, pcm_s24daud);
+PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le);
+PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be);
+PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le);
+PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be);
+PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8);
+PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8);
+PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw);
+PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw);
+
+/* adpcm codecs */
+
+PCM_CODEC(CODEC_ID_ADPCM_IMA_QT, adpcm_ima_qt);
+PCM_CODEC(CODEC_ID_ADPCM_IMA_WAV, adpcm_ima_wav);
+PCM_CODEC(CODEC_ID_ADPCM_IMA_DK3, adpcm_ima_dk3);
+PCM_CODEC(CODEC_ID_ADPCM_IMA_DK4, adpcm_ima_dk4);
+PCM_CODEC(CODEC_ID_ADPCM_IMA_WS, adpcm_ima_ws);
+PCM_CODEC(CODEC_ID_ADPCM_SMJPEG, adpcm_ima_smjpeg);
+PCM_CODEC(CODEC_ID_ADPCM_MS, adpcm_ms);
+PCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm);
+PCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa);
+PCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx);
+PCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea);
+PCM_CODEC(CODEC_ID_ADPCM_G726, adpcm_g726);
+PCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct);
+PCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf);
+PCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha);
+PCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4);
+PCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3);
+PCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2);
+
+#undef PCM_CODEC
+
+/* dummy raw video codec */
+extern AVCodec rawvideo_encoder;
+extern AVCodec rawvideo_decoder;
+
+/* the following codecs use external GPL libs */
+extern AVCodec ac3_decoder;
+extern AVCodec dts_decoder;
+
+/* subtitles */
+extern AVCodec dvdsub_encoder;
+extern AVCodec dvdsub_decoder;
+extern AVCodec dvbsub_encoder;
+extern AVCodec dvbsub_decoder;
+
+/* resample.c */
+
+struct ReSampleContext;
+struct AVResampleContext;
+
+typedef struct ReSampleContext ReSampleContext;
+
+ReSampleContext *audio_resample_init(int output_channels, int input_channels,
+                                     int output_rate, int input_rate);
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
+void audio_resample_close(ReSampleContext *s);
+
+struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
+int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
+void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
+void av_resample_close(struct AVResampleContext *c);
+
+/* YUV420 format is assumed ! */
+
+struct ImgReSampleContext;
+
+typedef struct ImgReSampleContext ImgReSampleContext;
+
+ImgReSampleContext *img_resample_init(int output_width, int output_height,
+                                      int input_width, int input_height);
+
+ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
+                                      int iwidth, int iheight,
+                                      int topBand, int bottomBand,
+                                      int leftBand, int rightBand,
+                                      int padtop, int padbottom,
+                                      int padleft, int padright);
+
+
+void img_resample(ImgReSampleContext *s,
+                  AVPicture *output, const AVPicture *input);
+
+void img_resample_close(ImgReSampleContext *s);
+
+/**
+ * Allocate memory for a picture.  Call avpicture_free to free it.
+ *
+ * @param picture the picture to be filled in.
+ * @param pix_fmt the format of the picture.
+ * @param width the width of the picture.
+ * @param height the height of the picture.
+ * @return 0 if successful, -1 if not.
+ */
+int avpicture_alloc(AVPicture *picture, int pix_fmt, int width, int height);
+
+/* Free a picture previously allocated by avpicture_alloc. */
+void avpicture_free(AVPicture *picture);
+
+int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+                   int pix_fmt, int width, int height);
+int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size);
+int avpicture_get_size(int pix_fmt, int width, int height);
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(int pix_fmt);
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
+enum PixelFormat avcodec_get_pix_fmt(const char* name);
+unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat p);
+
+#define FF_LOSS_RESOLUTION  0x0001 /* loss due to resolution change */
+#define FF_LOSS_DEPTH       0x0002 /* loss due to color depth change */
+#define FF_LOSS_COLORSPACE  0x0004 /* loss due to color space conversion */
+#define FF_LOSS_ALPHA       0x0008 /* loss of alpha bits */
+#define FF_LOSS_COLORQUANT  0x0010 /* loss due to color quantization */
+#define FF_LOSS_CHROMA      0x0020 /* loss of chroma (e.g. rgb to gray conversion) */
+
+int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
+                             int has_alpha);
+int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
+                              int has_alpha, int *loss_ptr);
+
+#define FF_ALPHA_TRANSP       0x0001 /* image has some totally transparent pixels */
+#define FF_ALPHA_SEMI_TRANSP  0x0002 /* image has some transparent pixels */
+int img_get_alpha_info(const AVPicture *src,
+                       int pix_fmt, int width, int height);
+
+/* convert among pixel formats */
+int img_convert(AVPicture *dst, int dst_pix_fmt,
+                const AVPicture *src, int pix_fmt,
+                int width, int height);
+
+/* deinterlace a picture */
+int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
+                          int pix_fmt, int width, int height);
+
+/* external high level API */
+
+extern AVCodec *first_avcodec;
+
+/* returns LIBAVCODEC_VERSION_INT constant */
+unsigned avcodec_version(void);
+/* returns LIBAVCODEC_BUILD constant */
+unsigned avcodec_build(void);
+void avcodec_init(void);
+
+void register_avcodec(AVCodec *format);
+AVCodec *avcodec_find_encoder(enum CodecID id);
+AVCodec *avcodec_find_encoder_by_name(const char *name);
+AVCodec *avcodec_find_decoder(enum CodecID id);
+AVCodec *avcodec_find_decoder_by_name(const char *name);
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
+
+void avcodec_get_context_defaults(AVCodecContext *s);
+AVCodecContext *avcodec_alloc_context(void);
+void avcodec_get_frame_defaults(AVFrame *pic);
+AVFrame *avcodec_alloc_frame(void);
+
+int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
+int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h);
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt);
+
+int avcodec_thread_init(AVCodecContext *s, int thread_count);
+void avcodec_thread_free(AVCodecContext *s);
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count);
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count);
+//FIXME func typedef
+
+/**
+ * opens / inits the AVCodecContext.
+ * not thread save!
+ */
+int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
+
+/**
+ * Decode an audio frame.
+ *
+ * @param avctx the codec context.
+ * @param samples output buffer, 16 byte aligned
+ * @param frame_size_ptr the output buffer size in bytes, zero if no frame could be compressed
+ * @param buf input buffer, 16 byte aligned
+ * @param buf_size the input buffer size
+ * @return 0 if successful, -1 if not.
+ */
+
+int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         uint8_t *buf, int buf_size);
+int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         uint8_t *buf, int buf_size);
+int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            const uint8_t *buf, int buf_size);
+int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata,
+                        int *data_size_ptr,
+                        uint8_t *buf, int buf_size);
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const short *samples);
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const AVFrame *pict);
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub);
+
+int avcodec_close(AVCodecContext *avctx);
+
+void avcodec_register_all(void);
+
+void avcodec_flush_buffers(AVCodecContext *avctx);
+
+void avcodec_default_free_buffers(AVCodecContext *s);
+
+/* misc usefull functions */
+
+/**
+ * returns a single letter to describe the picture type
+ */
+char av_get_pict_type_char(int pict_type);
+
+/**
+ * returns codec bits per sample
+ */
+int av_get_bits_per_sample(enum CodecID codec_id);
+
+/* frame parsing */
+typedef struct AVCodecParserContext {
+    void *priv_data;
+    struct AVCodecParser *parser;
+    int64_t frame_offset; /* offset of the current frame */
+    int64_t cur_offset; /* current offset
+                           (incremented by each av_parser_parse()) */
+    int64_t last_frame_offset; /* offset of the last frame */
+    /* video info */
+    int pict_type; /* XXX: put it back in AVCodecContext */
+    int repeat_pict; /* XXX: put it back in AVCodecContext */
+    int64_t pts;     /* pts of the current frame */
+    int64_t dts;     /* dts of the current frame */
+
+    /* private data */
+    int64_t last_pts;
+    int64_t last_dts;
+    int fetch_timestamp;
+
+#define AV_PARSER_PTS_NB 4
+    int cur_frame_start_index;
+    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
+    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
+    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
+
+    int flags;
+#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
+} AVCodecParserContext;
+
+typedef struct AVCodecParser {
+    int codec_ids[5]; /* several codec IDs are permitted */
+    int priv_data_size;
+    int (*parser_init)(AVCodecParserContext *s);
+    int (*parser_parse)(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size);
+    void (*parser_close)(AVCodecParserContext *s);
+    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
+    struct AVCodecParser *next;
+} AVCodecParser;
+
+extern AVCodecParser *av_first_parser;
+
+void av_register_codec_parser(AVCodecParser *parser);
+AVCodecParserContext *av_parser_init(int codec_id);
+int av_parser_parse(AVCodecParserContext *s,
+                    AVCodecContext *avctx,
+                    uint8_t **poutbuf, int *poutbuf_size,
+                    const uint8_t *buf, int buf_size,
+                    int64_t pts, int64_t dts);
+int av_parser_change(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe);
+void av_parser_close(AVCodecParserContext *s);
+
+extern AVCodecParser mpegvideo_parser;
+extern AVCodecParser mpeg4video_parser;
+extern AVCodecParser cavsvideo_parser;
+extern AVCodecParser h261_parser;
+extern AVCodecParser h263_parser;
+extern AVCodecParser h264_parser;
+extern AVCodecParser mjpeg_parser;
+extern AVCodecParser pnm_parser;
+extern AVCodecParser mpegaudio_parser;
+extern AVCodecParser ac3_parser;
+extern AVCodecParser dvdsub_parser;
+extern AVCodecParser dvbsub_parser;
+extern AVCodecParser aac_parser;
+
+
+typedef struct AVBitStreamFilterContext {
+    void *priv_data;
+    struct AVBitStreamFilter *filter;
+    AVCodecParserContext *parser;
+    struct AVBitStreamFilterContext *next;
+} AVBitStreamFilterContext;
+
+
+typedef struct AVBitStreamFilter {
+    const char *name;
+    int priv_data_size;
+    int (*filter)(AVBitStreamFilterContext *bsfc,
+                  AVCodecContext *avctx, const char *args,
+                  uint8_t **poutbuf, int *poutbuf_size,
+                  const uint8_t *buf, int buf_size, int keyframe);
+    struct AVBitStreamFilter *next;
+} AVBitStreamFilter;
+
+extern AVBitStreamFilter *av_first_bitstream_filter;
+
+void av_register_bitstream_filter(AVBitStreamFilter *bsf);
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                               uint8_t **poutbuf, int *poutbuf_size,
+                               const uint8_t *buf, int buf_size, int keyframe);
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
+
+extern AVBitStreamFilter dump_extradata_bsf;
+extern AVBitStreamFilter remove_extradata_bsf;
+extern AVBitStreamFilter noise_bsf;
+extern AVBitStreamFilter mp3_header_compress_bsf;
+extern AVBitStreamFilter mp3_header_decompress_bsf;
+extern AVBitStreamFilter mjpega_dump_header_bsf;
+
+
+/* memory */
+void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size);
+/* for static data only */
+/* call av_free_static to release all staticaly allocated tables */
+void av_free_static(void);
+void *av_mallocz_static(unsigned int size);
+void *av_realloc_static(void *ptr, unsigned int size);
+
+void img_copy(AVPicture *dst, const AVPicture *src,
+              int pix_fmt, int width, int height);
+
+int img_crop(AVPicture *dst, const AVPicture *src,
+             int pix_fmt, int top_band, int left_band);
+
+int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
+            int padtop, int padbottom, int padleft, int padright, int *color);
+
+/* endian macros */
+#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32)
+#define BE_16(x)  ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1])
+#define BE_32(x)  ((((uint8_t*)(x))[0] << 24) | \
+                   (((uint8_t*)(x))[1] << 16) | \
+                   (((uint8_t*)(x))[2] << 8) | \
+                    ((uint8_t*)(x))[3])
+#define LE_16(x)  ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0])
+#define LE_32(x)  ((((uint8_t*)(x))[3] << 24) | \
+                   (((uint8_t*)(x))[2] << 16) | \
+                   (((uint8_t*)(x))[1] << 8) | \
+                    ((uint8_t*)(x))[0])
+#endif
+
+extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AVCODEC_H */
diff --git a/contrib/ffmpeg/libavcodec/avs.c b/contrib/ffmpeg/libavcodec/avs.c
new file mode 100644
index 000000000..953aea1be
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/avs.c
@@ -0,0 +1,160 @@
+/*
+ * AVS video decoder.
+ * Copyright (c) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+
+
+typedef struct {
+    AVFrame picture;
+} avs_context_t;
+
+typedef enum {
+    AVS_VIDEO     = 0x01,
+    AVS_AUDIO     = 0x02,
+    AVS_PALETTE   = 0x03,
+    AVS_GAME_DATA = 0x04,
+} avs_block_type_t;
+
+typedef enum {
+    AVS_I_FRAME     = 0x00,
+    AVS_P_FRAME_3X3 = 0x01,
+    AVS_P_FRAME_2X2 = 0x02,
+    AVS_P_FRAME_2X3 = 0x03,
+} avs_video_sub_type_t;
+
+
+static int
+avs_decode_frame(AVCodecContext * avctx,
+                 void *data, int *data_size, uint8_t * buf, int buf_size)
+{
+    avs_context_t *const avs = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame *const p = (AVFrame *) & avs->picture;
+    uint8_t *table, *vect, *out;
+    int i, j, x, y, stride, vect_w = 3, vect_h = 3;
+    int sub_type;
+    avs_block_type_t type;
+    GetBitContext change_map;
+
+    if (avctx->reget_buffer(avctx, p)) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+    p->reference = 1;
+    p->pict_type = FF_P_TYPE;
+    p->key_frame = 0;
+
+    out = avs->picture.data[0];
+    stride = avs->picture.linesize[0];
+
+    sub_type = buf[0];
+    type = buf[1];
+    buf += 4;
+
+    if (type == AVS_PALETTE) {
+        int first, last;
+        uint32_t *pal = (uint32_t *) avs->picture.data[1];
+
+        first = LE_16(buf);
+        last = first + LE_16(buf + 2);
+        buf += 4;
+        for (i=first; i<last; i++, buf+=3)
+            pal[i] = (buf[0] << 18) | (buf[1] << 10) | (buf[2] << 2);
+
+        sub_type = buf[0];
+        type = buf[1];
+        buf += 4;
+    }
+
+    if (type != AVS_VIDEO)
+        return -1;
+
+    switch (sub_type) {
+    case AVS_I_FRAME:
+        p->pict_type = FF_I_TYPE;
+        p->key_frame = 1;
+    case AVS_P_FRAME_3X3:
+        vect_w = 3;
+        vect_h = 3;
+        break;
+
+    case AVS_P_FRAME_2X2:
+        vect_w = 2;
+        vect_h = 2;
+        break;
+
+    case AVS_P_FRAME_2X3:
+        vect_w = 2;
+        vect_h = 3;
+        break;
+
+    default:
+      return -1;
+    }
+
+    table = buf + (256 * vect_w * vect_h);
+    if (sub_type != AVS_I_FRAME) {
+        int map_size = ((318 / vect_w + 7) / 8) * (198 / vect_h);
+        init_get_bits(&change_map, table, map_size);
+        table += map_size;
+    }
+
+    for (y=0; y<198; y+=vect_h) {
+        for (x=0; x<318; x+=vect_w) {
+            if (sub_type == AVS_I_FRAME || get_bits1(&change_map)) {
+                vect = &buf[*table++ * (vect_w * vect_h)];
+                for (j=0; j<vect_w; j++) {
+                    out[(y + 0) * stride + x + j] = vect[(0 * vect_w) + j];
+                    out[(y + 1) * stride + x + j] = vect[(1 * vect_w) + j];
+                    if (vect_h == 3)
+                        out[(y + 2) * stride + x + j] =
+                            vect[(2 * vect_w) + j];
+                }
+            }
+        }
+        if (sub_type != AVS_I_FRAME)
+            align_get_bits(&change_map);
+    }
+
+    *picture = *(AVFrame *) & avs->picture;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+static int avs_decode_init(AVCodecContext * avctx)
+{
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    return 0;
+}
+
+AVCodec avs_decoder = {
+    "avs",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_AVS,
+    sizeof(avs_context_t),
+    avs_decode_init,
+    NULL,
+    NULL,
+    avs_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/beosthread.c b/contrib/ffmpeg/libavcodec/beosthread.c
new file mode 100644
index 000000000..3d059912b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/beosthread.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2004 François Revol <revol@free.fr>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+//#define DEBUG
+
+#include "avcodec.h"
+#include "common.h"
+
+#include <OS.h>
+
+typedef struct ThreadContext{
+    AVCodecContext *avctx;
+    thread_id thread;
+    sem_id work_sem;
+    sem_id done_sem;
+    int (*func)(AVCodecContext *c, void *arg);
+    void *arg;
+    int ret;
+}ThreadContext;
+
+// it's odd Be never patented that :D
+struct benaphore {
+        vint32 atom;
+        sem_id sem;
+};
+static inline int lock_ben(struct benaphore *ben)
+{
+        if (atomic_add(&ben->atom, 1) > 0)
+                return acquire_sem(ben->sem);
+        return B_OK;
+}
+static inline int unlock_ben(struct benaphore *ben)
+{
+        if (atomic_add(&ben->atom, -1) > 1)
+                return release_sem(ben->sem);
+        return B_OK;
+}
+
+static struct benaphore av_thread_lib_ben;
+
+static int32 ff_thread_func(void *v){
+    ThreadContext *c= v;
+
+    for(;;){
+//printf("thread_func %X enter wait\n", (int)v); fflush(stdout);
+        acquire_sem(c->work_sem);
+//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout);
+        if(c->func)
+            c->ret= c->func(c->avctx, c->arg);
+        else
+            return 0;
+//printf("thread_func %X signal complete\n", (int)v); fflush(stdout);
+        release_sem(c->done_sem);
+    }
+
+    return B_OK;
+}
+
+/**
+ * free what has been allocated by avcodec_thread_init().
+ * must be called after decoding has finished, especially dont call while avcodec_thread_execute() is running
+ */
+void avcodec_thread_free(AVCodecContext *s){
+    ThreadContext *c= s->thread_opaque;
+    int i;
+    int32 ret;
+
+    for(i=0; i<s->thread_count; i++){
+
+        c[i].func= NULL;
+        release_sem(c[i].work_sem);
+        wait_for_thread(c[i].thread, &ret);
+        if(c[i].work_sem > B_OK) delete_sem(c[i].work_sem);
+        if(c[i].done_sem > B_OK) delete_sem(c[i].done_sem);
+    }
+
+    av_freep(&s->thread_opaque);
+}
+
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
+    ThreadContext *c= s->thread_opaque;
+    int i;
+
+    assert(s == c->avctx);
+    assert(count <= s->thread_count);
+
+    /* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */
+
+    for(i=0; i<count; i++){
+        c[i].arg= arg[i];
+        c[i].func= func;
+        c[i].ret= 12345;
+
+        release_sem(c[i].work_sem);
+    }
+    for(i=0; i<count; i++){
+        acquire_sem(c[i].done_sem);
+
+        c[i].func= NULL;
+        if(ret) ret[i]= c[i].ret;
+    }
+    return 0;
+}
+
+int avcodec_thread_init(AVCodecContext *s, int thread_count){
+    int i;
+    ThreadContext *c;
+
+    s->thread_count= thread_count;
+
+    assert(!s->thread_opaque);
+    c= av_mallocz(sizeof(ThreadContext)*thread_count);
+    s->thread_opaque= c;
+
+    for(i=0; i<thread_count; i++){
+//printf("init semaphors %d\n", i); fflush(stdout);
+        c[i].avctx= s;
+
+        if((c[i].work_sem = create_sem(0, "ff work sem")) < B_OK)
+            goto fail;
+        if((c[i].done_sem = create_sem(0, "ff done sem")) < B_OK)
+            goto fail;
+
+//printf("create thread %d\n", i); fflush(stdout);
+        c[i].thread = spawn_thread(ff_thread_func, "libavcodec thread", B_LOW_PRIORITY, &c[i] );
+        if( c[i].thread < B_OK ) goto fail;
+        resume_thread(c[i].thread );
+    }
+//printf("init done\n"); fflush(stdout);
+
+    s->execute= avcodec_thread_execute;
+
+    return 0;
+fail:
+    avcodec_thread_free(s);
+    return -1;
+}
+
+/* provide a mean to serialize calls to avcodec_*() for thread safety. */
+
+int avcodec_thread_lock_lib(void)
+{
+        return lock_ben(&av_thread_lib_ben);
+}
+
+int avcodec_thread_unlock_lib(void)
+{
+        return unlock_ben(&av_thread_lib_ben);
+}
+
+/* our versions of _init and _fini (which are called by those actually from crt.o) */
+
+void initialize_after(void)
+{
+        av_thread_lib_ben.atom = 0;
+        av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
+}
+
+void uninitialize_before(void)
+{
+        delete_sem(av_thread_lib_ben.sem);
+}
+
+
+
diff --git a/contrib/ffmpeg/libavcodec/bfin/dsputil_bfin.c b/contrib/ffmpeg/libavcodec/bfin/dsputil_bfin.c
new file mode 100644
index 000000000..196ef6fea
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/bfin/dsputil_bfin.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2006 Michael Benjamin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../avcodec.h"
+#include "../dsputil.h"
+
+static int sad8x8_bfin( void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h )
+{
+    int sum;
+    __asm__ __volatile__ (
+    "P0 = %1;" // blk1
+    "P1 = %2;" // blk2
+    "P2 = %3;\n" // h
+    "I0 = P0;"
+    "I1 = P1;\n"
+    "A0 = 0;"
+    "A1 = 0;\n"
+    "M0 = P2;\n"
+    "P3 = 32;\n"
+    "LSETUP (sad8x8LoopBegin, sad8x8LoopEnd) LC0=P3;\n"
+    "sad8x8LoopBegin:\n"
+    "  DISALGNEXCPT || R0 = [I0] || R2 = [I1];\n"
+    "  DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];\n"
+    "sad8x8LoopEnd:\n"
+    "  SAA ( R1:0 , R3:2 );\n"
+    "R3 = A1.L + A1.H, R2 = A0.L + A0.H;\n"
+    "%0 = R2 + R3 (S);\n"
+    : "=&d" (sum)
+    : "m"(blk1), "m"(blk2), "m"(h)
+    : "P0","P1","P2","I0","I1","A0","A1","R0","R1","R2","R3");
+    return sum;
+}
+
+void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
+{
+    c->pix_abs[1][0] = sad8x8_bfin;
+    c->sad[1] = sad8x8_bfin;
+}
diff --git a/contrib/ffmpeg/libavcodec/bitstream.c b/contrib/ffmpeg/libavcodec/bitstream.c
new file mode 100644
index 000000000..22d256df5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/bitstream.c
@@ -0,0 +1,254 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file bitstream.c
+ * bitstream api.
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+
+void align_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    put_bits(s,(  - s->index) & 7,0);
+#else
+    put_bits(s,s->bit_left & 7,0);
+#endif
+}
+
+void ff_put_string(PutBitContext * pbc, char *s, int put_zero)
+{
+    while(*s){
+        put_bits(pbc, 8, *s);
+        s++;
+    }
+    if(put_zero)
+        put_bits(pbc, 8, 0);
+}
+
+/* VLC decoding */
+
+//#define DEBUG_VLC
+
+#define GET_DATA(v, table, i, wrap, size) \
+{\
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
+    switch(size) {\
+    case 1:\
+        v = *(const uint8_t *)ptr;\
+        break;\
+    case 2:\
+        v = *(const uint16_t *)ptr;\
+        break;\
+    default:\
+        v = *(const uint32_t *)ptr;\
+        break;\
+    }\
+}
+
+
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index;
+    index = vlc->table_size;
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        vlc->table_allocated += (1 << vlc->bits);
+        if(use_static)
+            vlc->table = av_realloc_static(vlc->table,
+                                           sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+        else
+            vlc->table = av_realloc(vlc->table,
+                                    sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+        if (!vlc->table)
+            return -1;
+    }
+    return index;
+}
+
+static int build_table(VLC *vlc, int table_nb_bits,
+                       int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       uint32_t code_prefix, int n_prefix, int flags)
+{
+    int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2;
+    uint32_t code;
+    VLC_TYPE (*table)[2];
+
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_STATIC);
+#ifdef DEBUG_VLC
+    printf("new table index=%d size=%d code_prefix=%x n=%d\n",
+           table_index, table_size, code_prefix, n_prefix);
+#endif
+    if (table_index < 0)
+        return -1;
+    table = &vlc->table[table_index];
+
+    for(i=0;i<table_size;i++) {
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
+    }
+
+    /* first pass: map codes and compute auxillary table sizes */
+    for(i=0;i<nb_codes;i++) {
+        GET_DATA(n, bits, i, bits_wrap, bits_size);
+        GET_DATA(code, codes, i, codes_wrap, codes_size);
+        /* we accept tables with holes */
+        if (n <= 0)
+            continue;
+#if defined(DEBUG_VLC) && 0
+        printf("i=%d n=%d code=0x%x\n", i, n, code);
+#endif
+        /* if code matches the prefix, it is in the table */
+        n -= n_prefix;
+        if(flags & INIT_VLC_LE)
+            code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (1 << n_prefix)-1);
+        else
+            code_prefix2= code >> n;
+        if (n > 0 && code_prefix2 == code_prefix) {
+            if (n <= table_nb_bits) {
+                /* no need to add another table */
+                j = (code << (table_nb_bits - n)) & (table_size - 1);
+                nb = 1 << (table_nb_bits - n);
+                for(k=0;k<nb;k++) {
+                    if(flags & INIT_VLC_LE)
+                        j = (code >> n_prefix) + (k<<n);
+#ifdef DEBUG_VLC
+                    av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
+                           j, i, n);
+#endif
+                    if (table[j][1] /*bits*/ != 0) {
+                        av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                        return -1;
+                    }
+                    table[j][1] = n; //bits
+                    table[j][0] = i; //code
+                    j++;
+                }
+            } else {
+                n -= table_nb_bits;
+                j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
+#ifdef DEBUG_VLC
+                printf("%4x: n=%d (subtable)\n",
+                       j, n);
+#endif
+                /* compute table size */
+                n1 = -table[j][1]; //bits
+                if (n > n1)
+                    n1 = n;
+                table[j][1] = -n1; //bits
+            }
+        }
+    }
+
+    /* second pass : fill auxillary tables recursively */
+    for(i=0;i<table_size;i++) {
+        n = table[i][1]; //bits
+        if (n < 0) {
+            n = -n;
+            if (n > table_nb_bits) {
+                n = table_nb_bits;
+                table[i][1] = -n; //bits
+            }
+            index = build_table(vlc, n, nb_codes,
+                                bits, bits_wrap, bits_size,
+                                codes, codes_wrap, codes_size,
+                                (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
+                                n_prefix + table_nb_bits, flags);
+            if (index < 0)
+                return -1;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[i][0] = index; //code
+        }
+    }
+    return table_index;
+}
+
+
+/* Build VLC decoding tables suitable for use with get_vlc().
+
+   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+
+   'nb_codes' : number of vlcs codes
+
+   'bits' : table which gives the size (in bits) of each vlc code.
+
+   'codes' : table which gives the bit pattern of of each vlc code.
+
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+
+   'wrap' and 'size' allows to use any memory configuration and types
+   (byte/word/long) to store the 'bits' and 'codes' tables.
+
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if free_vlc() will be used.
+*/
+int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             int use_static)
+{
+    vlc->bits = nb_bits;
+    if(!use_static) {
+        vlc->table = NULL;
+        vlc->table_allocated = 0;
+        vlc->table_size = 0;
+    } else {
+        /* Static tables are initially always NULL, return
+           if vlc->table != NULL to avoid double allocation */
+        if(vlc->table)
+            return 0;
+    }
+
+#ifdef DEBUG_VLC
+    printf("build table nb_codes=%d\n", nb_codes);
+#endif
+
+    if (build_table(vlc, nb_bits, nb_codes,
+                    bits, bits_wrap, bits_size,
+                    codes, codes_wrap, codes_size,
+                    0, 0, use_static) < 0) {
+        av_free(vlc->table);
+        return -1;
+    }
+    return 0;
+}
+
+
+void free_vlc(VLC *vlc)
+{
+    av_free(vlc->table);
+}
+
diff --git a/contrib/ffmpeg/libavcodec/bitstream.h b/contrib/ffmpeg/libavcodec/bitstream.h
new file mode 100644
index 000000000..af25b6dcf
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/bitstream.h
@@ -0,0 +1,956 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file bitstream.h
+ * bitstream api header.
+ */
+
+#ifndef BITSTREAM_H
+#define BITSTREAM_H
+
+#include "log.h"
+
+#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
+#define ALT_BITSTREAM_READER
+#endif
+
+//#define ALT_BITSTREAM_WRITER
+//#define ALIGNED_BITSTREAM_WRITER
+#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
+#   ifdef ARCH_ARMV4L
+#       define A32_BITSTREAM_READER
+#   else
+#define ALT_BITSTREAM_READER
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+#   endif
+#endif
+#define LIBMPEG2_BITSTREAM_READER_HACK //add BERO
+
+extern const uint8_t ff_reverse[256];
+
+#if defined(ARCH_X86)
+// avoid +32 for shift optimization (gcc should do that ...)
+static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    asm ("sarl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    asm ("shrl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+#else
+#    define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#    define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
+/* bit output */
+
+/* buf and buf_end must be present and used by every alternative writer. */
+typedef struct PutBitContext {
+#ifdef ALT_BITSTREAM_WRITER
+    uint8_t *buf, *buf_end;
+    int index;
+#else
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+#endif
+} PutBitContext;
+
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
+{
+    if(buffer_size < 0) {
+        buffer_size = 0;
+        buffer = NULL;
+    }
+
+    s->buf = buffer;
+    s->buf_end = s->buf + buffer_size;
+#ifdef ALT_BITSTREAM_WRITER
+    s->index=0;
+    ((uint32_t*)(s->buf))[0]=0;
+//    memset(buffer, 0, buffer_size);
+#else
+    s->buf_ptr = s->buf;
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+/* return the number of bits output */
+static inline int put_bits_count(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    return s->index;
+#else
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+#endif
+}
+
+/* pad the end of the output stream with zeros */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    align_put_bits(s);
+#else
+    s->bit_buf<<= s->bit_left;
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+        s->bit_left+=8;
+    }
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+void align_put_bits(PutBitContext *s);
+void ff_put_string(PutBitContext * pbc, char *s, int put_zero);
+
+/* bit input */
+/* buffer, buffer_end and size_in_bits must be present and used by every reader */
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+#ifdef ALT_BITSTREAM_READER
+    int index;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    uint8_t *buffer_ptr;
+    uint32_t cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    uint32_t *buffer_ptr;
+    uint32_t cache0;
+    uint32_t cache1;
+    int bit_count;
+#endif
+    int size_in_bits;
+} GetBitContext;
+
+#define VLC_TYPE int16_t
+
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
+#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) || defined(ARCH_MIPS)
+#define UNALIGNED_STORES_ARE_BAD
+#endif
+
+/* used to avoid missaligned exceptions on some archs (alpha, ...) */
+#if defined(ARCH_X86)
+#    define unaligned16(a) (*(const uint16_t*)(a))
+#    define unaligned32(a) (*(const uint32_t*)(a))
+#    define unaligned64(a) (*(const uint64_t*)(a))
+#else
+#    ifdef __GNUC__
+#    define unaligned(x)                                \
+static inline uint##x##_t unaligned##x(const void *v) { \
+    struct Unaligned {                                  \
+        uint##x##_t i;                                  \
+    } __attribute__((packed));                          \
+                                                        \
+    return ((const struct Unaligned *) v)->i;           \
+}
+#    elif defined(__DECC)
+#    define unaligned(x)                                        \
+static inline uint##x##_t unaligned##x##(const void *v) {       \
+    return *(const __unaligned uint##x##_t *) v;                \
+}
+#    else
+#    define unaligned(x)                                        \
+static inline uint##x##_t unaligned##x##(const void *v) {       \
+    return *(const uint##x##_t *) v;                            \
+}
+#    endif
+unaligned(16)
+unaligned(32)
+unaligned(64)
+#undef unaligned
+#endif /* defined(ARCH_X86) */
+
+#ifndef ALT_BITSTREAM_WRITER
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf;
+    int bit_left;
+
+    //    printf("put_bits=%d %x\n", n, value);
+    assert(n == 32 || value < (1U << n));
+
+    bit_buf = s->bit_buf;
+    bit_left = s->bit_left;
+
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+    if (n < bit_left) {
+        bit_buf = (bit_buf<<n) | value;
+        bit_left-=n;
+    } else {
+        bit_buf<<=bit_left;
+        bit_buf |= value >> (n - bit_left);
+#ifdef UNALIGNED_STORES_ARE_BAD
+        if (3 & (intptr_t) s->buf_ptr) {
+            s->buf_ptr[0] = bit_buf >> 24;
+            s->buf_ptr[1] = bit_buf >> 16;
+            s->buf_ptr[2] = bit_buf >>  8;
+            s->buf_ptr[3] = bit_buf      ;
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        bit_left+=32 - n;
+        bit_buf = value;
+    }
+
+    s->bit_buf = bit_buf;
+    s->bit_left = bit_left;
+}
+#endif
+
+
+#ifdef ALT_BITSTREAM_WRITER
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+#    ifdef ALIGNED_BITSTREAM_WRITER
+#        if defined(ARCH_X86)
+    asm volatile(
+        "movl %0, %%ecx                 \n\t"
+        "xorl %%eax, %%eax              \n\t"
+        "shrdl %%cl, %1, %%eax          \n\t"
+        "shrl %%cl, %1                  \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "andl $0xFFFFFFFC, %%ecx        \n\t"
+        "bswapl %1                      \n\t"
+        "orl %1, (%2, %%ecx)            \n\t"
+        "bswapl %%eax                   \n\t"
+        "addl %3, %0                    \n\t"
+        "movl %%eax, 4(%2, %%ecx)       \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+        : "%eax", "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+
+    value<<= 32-n;
+
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    else //ALIGNED_BITSTREAM_WRITER
+#        if defined(ARCH_X86)
+    asm volatile(
+        "movl $7, %%ecx                 \n\t"
+        "andl %0, %%ecx                 \n\t"
+        "addl %3, %%ecx                 \n\t"
+        "negl %%ecx                     \n\t"
+        "shll %%cl, %1                  \n\t"
+        "bswapl %1                      \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "orl %1, (%%ecx, %2)            \n\t"
+        "addl %3, %0                    \n\t"
+        "movl $0, 4(%%ecx, %2)          \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+        : "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
+
+    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
+    ptr[1] = 0;
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    endif //!ALIGNED_BITSTREAM_WRITER
+}
+#endif
+
+
+static inline uint8_t* pbBufPtr(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+        return s->buf + (s->index>>3);
+#else
+        return s->buf_ptr;
+#endif
+}
+
+/**
+ *
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n){
+        assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+        s->index += n<<3;
+#else
+        assert(s->bit_left==32);
+        s->buf_ptr += n;
+#endif
+}
+
+/**
+ * skips the given number of bits.
+ * must only be used if the actual values in the bitstream dont matter
+ */
+static inline void skip_put_bits(PutBitContext *s, int n){
+#ifdef ALT_BITSTREAM_WRITER
+    s->index += n;
+#else
+    s->bit_left -= n;
+    s->buf_ptr-= s->bit_left>>5;
+    s->bit_left &= 31;
+#endif
+}
+
+/**
+ * Changes the end of the buffer.
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
+    s->buf_end= s->buf + size;
+}
+
+/* Bitstream reader API docs:
+name
+    abritary name which is used as prefix for the internal variables
+
+gb
+    getbitcontext
+
+OPEN_READER(name, gb)
+    loads gb into local variables
+
+CLOSE_READER(name, gb)
+    stores local vars in gb
+
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+
+SHOW_UBITS(name, gb, num)
+    will return the next num bits
+
+SHOW_SBITS(name, gb, num)
+    will return the next num bits and do sign extension
+
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
+
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+
+LAST_SKIP_BITS(name, gb, num)
+    is equivalent to SKIP_LAST_CACHE; SKIP_COUNTER
+
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+
+static inline int unaligned32_be(const void *v)
+{
+#ifdef CONFIG_ALIGN
+        const uint8_t *p=v;
+        return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
+#else
+        return be2me_32( unaligned32(v)); //original
+#endif
+}
+
+static inline int unaligned32_le(const void *v)
+{
+#ifdef CONFIG_ALIGN
+       const uint8_t *p=v;
+       return (((p[3]<<8) | p[2])<<16) | (p[1]<<8) | (p[0]);
+#else
+       return le2me_32( unaligned32(v)); //original
+#endif
+}
+
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+
+#   define OPEN_READER(name, gb)\
+        int name##_index= (gb)->index;\
+        int name##_cache= 0;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+
+# ifdef ALT_BITSTREAM_READER_LE
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= unaligned32_le( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache >>= (num);
+# else
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= unaligned32_be( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);
+# endif
+
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+
+# ifdef ALT_BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num)\
+        ((name##_cache) & (NEG_USR32(0xffffffff,num)))
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32((name##_cache)<<(32-(num)), num)
+# else
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+# endif
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(GetBitContext *s){
+    return s->index;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    s->index += n;
+}
+
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+
+#   define MIN_CACHE_BITS 17
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#ifdef LIBMPEG2_BITSTREAM_READER_HACK
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= (int)be2me_16(*(uint16_t*)name##_buffer_ptr) << name##_bit_count;\
+        name##_buffer_ptr += 2;\
+        name##_bit_count-= 16;\
+    }\
+
+#else
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+
+#endif
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += 2*(re_bit_count>>4);
+    re_bit_count &= 15;
+    re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+
+#elif defined A32_BITSTREAM_READER
+
+#   define MIN_CACHE_BITS 32
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= be2me_32( *name##_buffer_ptr );\
+        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+
+#if defined(ARCH_X86)
+#   define SKIP_CACHE(name, gb, num)\
+        asm(\
+            "shldl %2, %1, %0          \n\t"\
+            "shll %2, %1               \n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)(num))\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= NEG_USR32(name##_cache1,num);\
+        name##_cache1 <<= (num);
+#endif
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache0, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache0, num)
+
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+
+static inline int get_bits_count(GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += re_bit_count>>5;
+    re_bit_count &= 31;
+    re_cache0 = be2me_32( re_buffer_ptr[-1] ) << re_bit_count;
+    re_cache1 = 0;
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+
+#endif
+
+/**
+ * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ * @author BERO
+ */
+static inline int get_xbits(GetBitContext *s, int n){
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    cache = GET_CACHE(re,s);
+    sign=(~cache)>>31;
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+
+static inline int get_sbits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * reads 0-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * shows 0-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+}
+
+static inline unsigned int get_bits1(GetBitContext *s){
+#ifdef ALT_BITSTREAM_READER
+    int index= s->index;
+    uint8_t result= s->buffer[ index>>3 ];
+#ifdef ALT_BITSTREAM_READER_LE
+    result>>= (index&0x07);
+    result&= 1;
+#else
+    result<<= (index&0x07);
+    result>>= 8 - 1;
+#endif
+    index++;
+    s->index= index;
+
+    return result;
+#else
+    return get_bits(s, 1);
+#endif
+}
+
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
+}
+
+static inline void skip_bits1(GetBitContext *s){
+    skip_bits(s, 1);
+}
+
+/**
+ * reads 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n){
+    if(n<=17) return get_bits(s, n);
+    else{
+#ifdef ALT_BITSTREAM_READER_LE
+        int ret= get_bits(s, 16);
+        return ret | (get_bits(s, n-16) << 16);
+#else
+        int ret= get_bits(s, 16) << (n-16);
+        return ret | get_bits(s, n-16);
+#endif
+    }
+}
+
+/**
+ * shows 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n){
+    if(n<=17) return show_bits(s, n);
+    else{
+        GetBitContext gb= *s;
+        int ret= get_bits_long(s, n);
+        *s= gb;
+        return ret;
+    }
+}
+
+static inline int check_marker(GetBitContext *s, const char *msg)
+{
+    int bit= get_bits1(s);
+    if(!bit)
+        av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
+
+    return bit;
+}
+
+/**
+ * init GetBitContext.
+ * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ */
+static inline void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int bit_size)
+{
+    int buffer_size= (bit_size+7)>>3;
+    if(buffer_size < 0 || bit_size < 0) {
+        buffer_size = bit_size = 0;
+        buffer = NULL;
+    }
+
+    s->buffer= buffer;
+    s->size_in_bits= bit_size;
+    s->buffer_end= buffer + buffer_size;
+#ifdef ALT_BITSTREAM_READER
+    s->index=0;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
+    s->bit_count = 16 + 8*((intptr_t)buffer&1);
+    skip_bits_long(s, 0);
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
+    s->bit_count = 32 + 8*((intptr_t)buffer&3);
+    skip_bits_long(s, 0);
+#endif
+}
+
+static inline void align_get_bits(GetBitContext *s)
+{
+    int n= (-get_bits_count(s)) & 7;
+    if(n) skip_bits(s, n);
+}
+
+int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             int flags);
+#define INIT_VLC_USE_STATIC 1
+#define INIT_VLC_LE         2
+void free_vlc(VLC *vlc);
+
+/**
+ *
+ * if the vlc code is invalid and max_depth=1 than no bits will be removed
+ * if the vlc code is invalid and max_depth>1 than the number of bits removed
+ * is undefined
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    code = table[index][0];\
+    n    = table[index][1];\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + code;\
+        code = table[index][0];\
+        n    = table[index][1];\
+        if(max_depth > 2 && n < 0){\
+            LAST_SKIP_BITS(name, gb, nb_bits)\
+            UPDATE_CACHE(name, gb)\
+\
+            nb_bits = -n;\
+\
+            index= SHOW_UBITS(name, gb, nb_bits) + code;\
+            code = table[index][0];\
+            n    = table[index][1];\
+        }\
+    }\
+    SKIP_BITS(name, gb, n)\
+}
+
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        SKIP_BITS(name, gb, bits)\
+        if(need_update){\
+            UPDATE_CACHE(name, gb)\
+        }\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+
+
+/**
+ * parses a vlc code, faster then get_vlc()
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be readed to completly
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                  int bits, int max_depth)
+{
+    int code;
+
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+
+    GET_VLC(code, re, s, table, bits, max_depth)
+
+    CLOSE_READER(re, s)
+    return code;
+}
+
+//#define TRACE
+
+#ifdef TRACE
+static inline void print_bin(int bits, int n){
+    int i;
+
+    for(i=n-1; i>=0; i--){
+        av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
+    }
+    for(i=n; i<24; i++)
+        av_log(NULL, AV_LOG_DEBUG, " ");
+}
+
+static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int r= get_bits(s, n);
+
+    print_bin(r, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int r= get_vlc2(s, table, bits, max_depth);
+    int len= get_bits_count(s) - pos;
+    int bits2= show>>(24-len);
+
+    print_bin(bits2, len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
+    return r;
+}
+static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int show= show_bits(s, n);
+    int r= get_xbits(s, n);
+
+    print_bin(show, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+
+#define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+
+#define tprintf(...) av_log(NULL, AV_LOG_DEBUG, __VA_ARGS__)
+
+#else //TRACE
+#define tprintf(...) {}
+#endif
+
+static inline int decode012(GetBitContext *gb){
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+
+#endif /* BITSTREAM_H */
diff --git a/contrib/ffmpeg/libavcodec/bitstream_filter.c b/contrib/ffmpeg/libavcodec/bitstream_filter.c
new file mode 100644
index 000000000..b52acf60a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/bitstream_filter.c
@@ -0,0 +1,284 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "mpegaudio.h"
+
+AVBitStreamFilter *first_bitstream_filter= NULL;
+
+void av_register_bitstream_filter(AVBitStreamFilter *bsf){
+    bsf->next = first_bitstream_filter;
+    first_bitstream_filter= bsf;
+}
+
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name){
+    AVBitStreamFilter *bsf= first_bitstream_filter;
+
+    while(bsf){
+        if(!strcmp(name, bsf->name)){
+            AVBitStreamFilterContext *bsfc= av_mallocz(sizeof(AVBitStreamFilterContext));
+            bsfc->filter= bsf;
+            bsfc->priv_data= av_mallocz(bsf->priv_data_size);
+            return bsfc;
+        }
+        bsf= bsf->next;
+    }
+    return NULL;
+}
+
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsfc){
+    av_freep(&bsfc->priv_data);
+    av_parser_close(bsfc->parser);
+    av_free(bsfc);
+}
+
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    *poutbuf= (uint8_t *) buf;
+    *poutbuf_size= buf_size;
+    return bsfc->filter->filter(bsfc, avctx, args, poutbuf, poutbuf_size, buf, buf_size, keyframe);
+}
+
+static int dump_extradata(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    int cmd= args ? *args : 0;
+    /* cast to avoid warning about discarding qualifiers */
+    if(avctx->extradata){
+        if(  (keyframe && (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER) && cmd=='a')
+           ||(keyframe && (cmd=='k' || !cmd))
+           ||(cmd=='e')
+            /*||(? && (s->flags & PARSER_FLAG_DUMP_EXTRADATA_AT_BEGIN)*/){
+            int size= buf_size + avctx->extradata_size;
+            *poutbuf_size= size;
+            *poutbuf= av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+            memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
+            memcpy((*poutbuf) + avctx->extradata_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static int remove_extradata(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    int cmd= args ? *args : 0;
+    AVCodecParserContext *s;
+
+    if(!bsfc->parser){
+        bsfc->parser= av_parser_init(avctx->codec_id);
+    }
+    s= bsfc->parser;
+
+    if(s && s->parser->split){
+        if(  (((avctx->flags & CODEC_FLAG_GLOBAL_HEADER) || (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER)) && cmd=='a')
+           ||(!keyframe && cmd=='k')
+           ||(cmd=='e' || !cmd)
+          ){
+            int i= s->parser->split(avctx, buf, buf_size);
+            buf += i;
+            buf_size -= i;
+        }
+    }
+    *poutbuf= (uint8_t *) buf;
+    *poutbuf_size= buf_size;
+
+    return 0;
+}
+
+static int noise(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    int amount= args ? atoi(args) : 10000;
+    unsigned int *state= bsfc->priv_data;
+    int i;
+
+    *poutbuf= av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    memcpy(*poutbuf, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    for(i=0; i<buf_size; i++){
+        (*state) += (*poutbuf)[i] + 1;
+        if(*state % amount == 0)
+            (*poutbuf)[i] = *state;
+    }
+    return 1;
+}
+
+#define MP3_MASK 0xFFFE0CCF
+
+static int mp3_header_compress(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    uint32_t header, extraheader;
+    int mode_extension, header_size;
+
+    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
+        av_log(avctx, AV_LOG_ERROR, "not standards compliant\n");
+        return -1;
+    }
+
+    header = BE_32(buf);
+    mode_extension= (header>>4)&3;
+
+    if(ff_mpa_check_header(header) < 0 || (header&0x60000) != 0x20000){
+output_unchanged:
+        *poutbuf= (uint8_t *) buf;
+        *poutbuf_size= buf_size;
+
+        av_log(avctx, AV_LOG_INFO, "cannot compress %08X\n", header);
+        return 0;
+    }
+
+    if(avctx->extradata_size == 0){
+        avctx->extradata_size=15;
+        avctx->extradata= av_malloc(avctx->extradata_size);
+        strcpy(avctx->extradata, "FFCMP3 0.0");
+        memcpy(avctx->extradata+11, buf, 4);
+    }
+    if(avctx->extradata_size != 15){
+        av_log(avctx, AV_LOG_ERROR, "Extradata invalid\n");
+        return -1;
+    }
+    extraheader = BE_32(avctx->extradata+11);
+    if((extraheader&MP3_MASK) != (header&MP3_MASK))
+        goto output_unchanged;
+
+    header_size= (header&0x10000) ? 4 : 6;
+
+    *poutbuf_size= buf_size - header_size;
+    *poutbuf= av_malloc(buf_size - header_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    memcpy(*poutbuf, buf + header_size, buf_size - header_size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    if(avctx->channels==2){
+        if((header & (3<<19)) != 3<<19){
+            (*poutbuf)[1] &= 0x3F;
+            (*poutbuf)[1] |= mode_extension<<6;
+            FFSWAP(int, (*poutbuf)[1], (*poutbuf)[2]);
+        }else{
+            (*poutbuf)[1] &= 0x8F;
+            (*poutbuf)[1] |= mode_extension<<4;
+        }
+    }
+
+    return 1;
+}
+
+static int mp3_header_decompress(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    uint32_t header;
+    int sample_rate= avctx->sample_rate;
+    int sample_rate_index=0;
+    int lsf, mpeg25, bitrate_index, frame_size;
+
+    header = BE_32(buf);
+    if(ff_mpa_check_header(header) >= 0){
+        *poutbuf= (uint8_t *) buf;
+        *poutbuf_size= buf_size;
+
+        return 0;
+    }
+
+    if(avctx->extradata_size != 15 || strcmp(avctx->extradata, "FFCMP3 0.0")){
+        av_log(avctx, AV_LOG_ERROR, "Extradata invalid %d\n", avctx->extradata_size);
+        return -1;
+    }
+
+    header= BE_32(avctx->extradata+11) & MP3_MASK;
+
+    lsf     = sample_rate < (24000+32000)/2;
+    mpeg25  = sample_rate < (12000+16000)/2;
+    sample_rate_index= (header>>10)&3;
+    sample_rate= mpa_freq_tab[sample_rate_index] >> (lsf + mpeg25); //in case sample rate is a little off
+
+    for(bitrate_index=2; bitrate_index<30; bitrate_index++){
+        frame_size = mpa_bitrate_tab[lsf][2][bitrate_index>>1];
+        frame_size = (frame_size * 144000) / (sample_rate << lsf) + (bitrate_index&1);
+        if(frame_size == buf_size + 4)
+            break;
+        if(frame_size == buf_size + 6)
+            break;
+    }
+    if(bitrate_index == 30){
+        av_log(avctx, AV_LOG_ERROR, "couldnt find bitrate_index\n");
+        return -1;
+    }
+
+    header |= (bitrate_index&1)<<9;
+    header |= (bitrate_index>>1)<<12;
+    header |= (frame_size == buf_size + 4)<<16; //FIXME actually set a correct crc instead of 0
+
+    *poutbuf_size= frame_size;
+    *poutbuf= av_malloc(frame_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    memcpy(*poutbuf + frame_size - buf_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    if(avctx->channels==2){
+        uint8_t *p= *poutbuf + frame_size - buf_size;
+        if(lsf){
+            FFSWAP(int, p[1], p[2]);
+            header |= (p[1] & 0xC0)>>2;
+            p[1] &= 0x3F;
+        }else{
+            header |= p[1] & 0x30;
+            p[1] &= 0xCF;
+        }
+    }
+
+    (*poutbuf)[0]= header>>24;
+    (*poutbuf)[1]= header>>16;
+    (*poutbuf)[2]= header>> 8;
+    (*poutbuf)[3]= header    ;
+
+    return 1;
+}
+
+AVBitStreamFilter dump_extradata_bsf={
+    "dump_extra",
+    0,
+    dump_extradata,
+};
+
+AVBitStreamFilter remove_extradata_bsf={
+    "remove_extra",
+    0,
+    remove_extradata,
+};
+
+AVBitStreamFilter noise_bsf={
+    "noise",
+    sizeof(int),
+    noise,
+};
+
+AVBitStreamFilter mp3_header_compress_bsf={
+    "mp3comp",
+    0,
+    mp3_header_compress,
+};
+
+AVBitStreamFilter mp3_header_decompress_bsf={
+    "mp3decomp",
+    0,
+    mp3_header_decompress,
+};
diff --git a/contrib/ffmpeg/libavcodec/bmp.c b/contrib/ffmpeg/libavcodec/bmp.c
new file mode 100644
index 000000000..2a4d83393
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/bmp.c
@@ -0,0 +1,254 @@
+/*
+ * BMP image format
+ * Copyright (c) 2005 Mans Rullgard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "bswap.h"
+
+typedef struct BMPContext {
+    AVFrame picture;
+} BMPContext;
+
+#define BMP_RGB       0
+#define BMP_RLE8      1
+#define BMP_RLE4      2
+#define BMP_BITFIELDS 3
+
+#define read16(bits) bswap_16(get_bits(bits, 16))
+#define read32(bits) bswap_32(get_bits_long(bits, 32))
+
+static int bmp_decode_init(AVCodecContext *avctx){
+    BMPContext *s = avctx->priv_data;
+
+    avcodec_get_frame_defaults((AVFrame*)&s->picture);
+    avctx->coded_frame = (AVFrame*)&s->picture;
+
+    return 0;
+}
+
+static int bmp_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    BMPContext *s = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame *p = &s->picture;
+    GetBitContext bits;
+    unsigned int fsize, hsize;
+    int width, height;
+    unsigned int depth;
+    unsigned int comp;
+    unsigned int ihsize;
+    int i, j, n, linesize;
+    uint32_t rgb[3];
+    uint8_t *ptr;
+    int dsize;
+
+    if(buf_size < 14){
+        av_log(avctx, AV_LOG_ERROR, "buf size too small (%d)\n", buf_size);
+        return -1;
+    }
+
+    init_get_bits(&bits, buf, buf_size);
+
+    if(get_bits(&bits, 16) != 0x424d){ /* 'BM' */
+        av_log(avctx, AV_LOG_ERROR, "bad magic number\n");
+        return -1;
+    }
+
+    fsize = read32(&bits);
+    if(buf_size < fsize){
+        av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n",
+               buf_size, fsize);
+        return -1;
+    }
+
+    skip_bits(&bits, 16);       /* reserved1 */
+    skip_bits(&bits, 16);       /* reserved2 */
+
+    hsize = read32(&bits); /* header size */
+    if(fsize <= hsize){
+        av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n",
+               fsize, hsize);
+        return -1;
+    }
+
+    ihsize = read32(&bits);       /* more header size */
+    if(ihsize + 14 > hsize){
+        av_log(avctx, AV_LOG_ERROR, "invalid header size %d\n", hsize);
+        return -1;
+    }
+
+    width = read32(&bits);
+    height = read32(&bits);
+
+    if(read16(&bits) != 1){ /* planes */
+        av_log(avctx, AV_LOG_ERROR, "invalid BMP header\n");
+        return -1;
+    }
+
+    depth = read16(&bits);
+
+    if(ihsize > 16)
+        comp = read32(&bits);
+    else
+        comp = BMP_RGB;
+
+    if(comp != BMP_RGB && comp != BMP_BITFIELDS){
+        av_log(avctx, AV_LOG_ERROR, "BMP coding %d not supported\n", comp);
+        return -1;
+    }
+
+    if(comp == BMP_BITFIELDS){
+        skip_bits(&bits, 20 * 8);
+        rgb[0] = read32(&bits);
+        rgb[1] = read32(&bits);
+        rgb[2] = read32(&bits);
+    }
+
+    avctx->codec_id = CODEC_ID_BMP;
+    avctx->width = width;
+    avctx->height = height > 0? height: -height;
+
+    avctx->pix_fmt = PIX_FMT_NONE;
+
+    switch(depth){
+    case 32:
+        if(comp == BMP_BITFIELDS){
+            rgb[0] = (rgb[0] >> 15) & 3;
+            rgb[1] = (rgb[1] >> 15) & 3;
+            rgb[2] = (rgb[2] >> 15) & 3;
+
+            if(rgb[0] + rgb[1] + rgb[2] != 3 ||
+               rgb[0] == rgb[1] || rgb[0] == rgb[2] || rgb[1] == rgb[2]){
+                break;
+            }
+        } else {
+            rgb[0] = 2;
+            rgb[1] = 1;
+            rgb[2] = 0;
+        }
+
+        avctx->pix_fmt = PIX_FMT_BGR24;
+        break;
+    case 24:
+        avctx->pix_fmt = PIX_FMT_BGR24;
+        break;
+    case 16:
+        if(comp == BMP_RGB)
+            avctx->pix_fmt = PIX_FMT_RGB555;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "depth %d not supported\n", depth);
+        return -1;
+    }
+
+    if(avctx->pix_fmt == PIX_FMT_NONE){
+        av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
+        return -1;
+    }
+
+    p->reference = 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type = FF_I_TYPE;
+    p->key_frame = 1;
+
+    buf += hsize;
+    dsize = buf_size - hsize;
+
+    /* Line size in file multiple of 4 */
+    n = (avctx->width * (depth / 8) + 3) & ~3;
+
+    if(n * avctx->height > dsize){
+        av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n",
+               dsize, n * avctx->height);
+        return -1;
+    }
+
+    if(height > 0){
+        ptr = p->data[0] + (avctx->height - 1) * p->linesize[0];
+        linesize = -p->linesize[0];
+    } else {
+        ptr = p->data[0];
+        linesize = p->linesize[0];
+    }
+
+    switch(depth){
+    case 24:
+        for(i = 0; i < avctx->height; i++){
+            memcpy(ptr, buf, n);
+            buf += n;
+            ptr += linesize;
+        }
+        break;
+    case 16:
+        for(i = 0; i < avctx->height; i++){
+            uint16_t *src = (uint16_t *) buf;
+            uint16_t *dst = (uint16_t *) ptr;
+
+            for(j = 0; j < avctx->width; j++)
+                *dst++ = le2me_16(*src++);
+
+            buf += n;
+            ptr += linesize;
+        }
+        break;
+    case 32:
+        for(i = 0; i < avctx->height; i++){
+            uint8_t *src = buf;
+            uint8_t *dst = ptr;
+
+            for(j = 0; j < avctx->width; j++){
+                dst[0] = src[rgb[2]];
+                dst[1] = src[rgb[1]];
+                dst[2] = src[rgb[0]];
+                dst += 3;
+                src += 4;
+            }
+
+            buf += n;
+            ptr += linesize;
+        }
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "BMP decoder is broken\n");
+        return -1;
+    }
+
+    *picture = s->picture;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+AVCodec bmp_decoder = {
+    "bmp",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_BMP,
+    sizeof(BMPContext),
+    bmp_decode_init,
+    NULL,
+    NULL,
+    bmp_decode_frame
+};
diff --git a/contrib/ffmpeg/libavcodec/bytestream.h b/contrib/ffmpeg/libavcodec/bytestream.h
new file mode 100644
index 000000000..25c457fe4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/bytestream.h
@@ -0,0 +1,89 @@
+/*
+ * Bytestream functions
+ * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_BYTESTREAM_H
+#define FFMPEG_BYTESTREAM_H
+
+static always_inline unsigned int bytestream_get_le32(uint8_t **b)
+{
+    (*b) += 4;
+    return LE_32(*b - 4);
+}
+
+static always_inline unsigned int bytestream_get_le16(uint8_t **b)
+{
+    (*b) += 2;
+    return LE_16(*b - 2);
+}
+
+static always_inline unsigned int bytestream_get_byte(uint8_t **b)
+{
+    (*b)++;
+    return (*b)[-1];
+}
+
+static always_inline unsigned int bytestream_get_buffer(uint8_t **b, uint8_t *dst, unsigned int size)
+{
+    memcpy(dst, *b, size);
+    (*b) += size;
+    return size;
+}
+
+static always_inline void bytestream_put_be32(uint8_t **b, const unsigned int value)
+{
+    *(*b)++ = value >> 24;
+    *(*b)++ = value >> 16;
+    *(*b)++ = value >> 8;
+    *(*b)++ = value;
+};
+
+static always_inline void bytestream_put_be16(uint8_t **b, const unsigned int value)
+{
+    *(*b)++ = value >> 8;
+    *(*b)++ = value;
+}
+
+static always_inline void bytestream_put_le32(uint8_t **b, const unsigned int value)
+{
+    *(*b)++ = value;
+    *(*b)++ = value >> 8;
+    *(*b)++ = value >> 16;
+    *(*b)++ = value >> 24;
+}
+
+static always_inline void bytestream_put_le16(uint8_t **b, const unsigned int value)
+{
+    *(*b)++ = value;
+    *(*b)++ = value >> 8;
+}
+
+static always_inline void bytestream_put_byte(uint8_t **b, const unsigned int value)
+{
+    *(*b)++ = value;
+}
+
+static always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
+{
+    memcpy(*b, src, size);
+    (*b) += size;
+}
+
+#endif /* FFMPEG_BYTESTREAM_H */
diff --git a/contrib/ffmpeg/libavcodec/cabac.c b/contrib/ffmpeg/libavcodec/cabac.c
new file mode 100644
index 000000000..c6da6292a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cabac.c
@@ -0,0 +1,265 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file cabac.c
+ * Context Adaptive Binary Arithmetic Coder.
+ */
+
+#include <string.h>
+
+#include "common.h"
+#include "bitstream.h"
+#include "cabac.h"
+
+static const uint8_t lps_range[64][4]= {
+{128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205},
+{116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166},
+{ 95,116,137,158}, { 90,110,130,150}, { 85,104,123,142}, { 81, 99,117,135},
+{ 77, 94,111,128}, { 73, 89,105,122}, { 69, 85,100,116}, { 66, 80, 95,110},
+{ 62, 76, 90,104}, { 59, 72, 86, 99}, { 56, 69, 81, 94}, { 53, 65, 77, 89},
+{ 51, 62, 73, 85}, { 48, 59, 69, 80}, { 46, 56, 66, 76}, { 43, 53, 63, 72},
+{ 41, 50, 59, 69}, { 39, 48, 56, 65}, { 37, 45, 54, 62}, { 35, 43, 51, 59},
+{ 33, 41, 48, 56}, { 32, 39, 46, 53}, { 30, 37, 43, 50}, { 29, 35, 41, 48},
+{ 27, 33, 39, 45}, { 26, 31, 37, 43}, { 24, 30, 35, 41}, { 23, 28, 33, 39},
+{ 22, 27, 32, 37}, { 21, 26, 30, 35}, { 20, 24, 29, 33}, { 19, 23, 27, 31},
+{ 18, 22, 26, 30}, { 17, 21, 25, 28}, { 16, 20, 23, 27}, { 15, 19, 22, 25},
+{ 14, 18, 21, 24}, { 14, 17, 20, 23}, { 13, 16, 19, 22}, { 12, 15, 18, 21},
+{ 12, 14, 17, 20}, { 11, 14, 16, 19}, { 11, 13, 15, 18}, { 10, 12, 15, 17},
+{ 10, 12, 14, 16}, {  9, 11, 13, 15}, {  9, 11, 12, 14}, {  8, 10, 12, 14},
+{  8,  9, 11, 13}, {  7,  9, 11, 12}, {  7,  9, 10, 12}, {  7,  8, 10, 11},
+{  6,  8,  9, 11}, {  6,  7,  9, 10}, {  6,  7,  8,  9}, {  2,  2,  2,  2},
+};
+
+uint8_t ff_h264_mlps_state[4*64];
+uint8_t ff_h264_lps_range[4*2*64];
+uint8_t ff_h264_lps_state[2*64];
+uint8_t ff_h264_mps_state[2*64];
+
+static const uint8_t mps_state[64]= {
+  1, 2, 3, 4, 5, 6, 7, 8,
+  9,10,11,12,13,14,15,16,
+ 17,18,19,20,21,22,23,24,
+ 25,26,27,28,29,30,31,32,
+ 33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,
+ 49,50,51,52,53,54,55,56,
+ 57,58,59,60,61,62,62,63,
+};
+
+static const uint8_t lps_state[64]= {
+  0, 0, 1, 2, 2, 4, 4, 5,
+  6, 7, 8, 9, 9,11,11,12,
+ 13,13,15,15,16,16,18,18,
+ 19,19,21,21,22,22,23,24,
+ 24,25,26,26,27,27,28,29,
+ 29,30,30,30,31,32,32,33,
+ 33,33,34,34,35,35,35,36,
+ 36,36,37,37,37,38,38,63,
+};
+#if 0
+const uint8_t ff_h264_norm_shift_old[128]= {
+ 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+#endif
+const uint8_t ff_h264_norm_shift[512]= {
+ 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+/**
+ *
+ * @param buf_size size of buf in bits
+ */
+void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){
+    init_put_bits(&c->pb, buf, buf_size);
+
+    c->low= 0;
+    c->range= 0x1FE;
+    c->outstanding_count= 0;
+#ifdef STRICT_LIMITS
+    c->sym_count =0;
+#endif
+
+    c->pb.bit_left++; //avoids firstBitFlag
+}
+
+/**
+ *
+ * @param buf_size size of buf in bits
+ */
+void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
+    c->bytestream_start=
+    c->bytestream= buf;
+    c->bytestream_end= buf + buf_size;
+
+#if CABAC_BITS == 16
+    c->low =  (*c->bytestream++)<<18;
+    c->low+=  (*c->bytestream++)<<10;
+#else
+    c->low =  (*c->bytestream++)<<10;
+#endif
+    c->low+= ((*c->bytestream++)<<2) + 2;
+    c->range= 0x1FE;
+}
+
+void ff_init_cabac_states(CABACContext *c){
+    int i, j;
+
+    for(i=0; i<64; i++){
+        for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
+            ff_h264_lps_range[j*2*64+2*i+0]=
+            ff_h264_lps_range[j*2*64+2*i+1]= lps_range[i][j];
+        }
+
+        ff_h264_mlps_state[128+2*i+0]=
+        ff_h264_mps_state[2*i+0]= 2*mps_state[i]+0;
+        ff_h264_mlps_state[128+2*i+1]=
+        ff_h264_mps_state[2*i+1]= 2*mps_state[i]+1;
+
+        if( i ){
+#ifdef BRANCHLESS_CABAC_DECODER
+            ff_h264_mlps_state[128-2*i-1]= 2*lps_state[i]+0;
+            ff_h264_mlps_state[128-2*i-2]= 2*lps_state[i]+1;
+        }else{
+            ff_h264_mlps_state[128-2*i-1]= 1;
+            ff_h264_mlps_state[128-2*i-2]= 0;
+#else
+            ff_h264_lps_state[2*i+0]= 2*lps_state[i]+0;
+            ff_h264_lps_state[2*i+1]= 2*lps_state[i]+1;
+        }else{
+            ff_h264_lps_state[2*i+0]= 1;
+            ff_h264_lps_state[2*i+1]= 0;
+#endif
+        }
+    }
+}
+
+#if 0 //selftest
+#define SIZE 10240
+
+#include "avcodec.h"
+
+int main(){
+    CABACContext c;
+    uint8_t b[9*SIZE];
+    uint8_t r[9*SIZE];
+    int i;
+    uint8_t state[10]= {0};
+
+    ff_init_cabac_encoder(&c, b, SIZE);
+    ff_init_cabac_states(&c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
+
+    for(i=0; i<SIZE; i++){
+        r[i]= random()%7;
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_cabac_bypass(&c, r[i]&1);
+STOP_TIMER("put_cabac_bypass")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_cabac(&c, state, r[i]&1);
+STOP_TIMER("put_cabac")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_cabac_u(&c, state, r[i], 6, 3, i&1);
+STOP_TIMER("put_cabac_u")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_cabac_ueg(&c, state, r[i], 3, 0, 1, 2);
+STOP_TIMER("put_cabac_ueg")
+    }
+
+    put_cabac_terminate(&c, 1);
+
+    ff_init_cabac_decoder(&c, b, SIZE);
+
+    memset(state, 0, sizeof(state));
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( (r[i]&1) != get_cabac_bypass(&c) )
+            av_log(NULL, AV_LOG_ERROR, "CABAC bypass failure at %d\n", i);
+STOP_TIMER("get_cabac_bypass")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( (r[i]&1) != get_cabac(&c, state) )
+            av_log(NULL, AV_LOG_ERROR, "CABAC failure at %d\n", i);
+STOP_TIMER("get_cabac")
+    }
+#if 0
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( r[i] != get_cabac_u(&c, state, (i&1) ? 6 : 7, 3, i&1) )
+            av_log(NULL, AV_LOG_ERROR, "CABAC unary (truncated) binarization failure at %d\n", i);
+STOP_TIMER("get_cabac_u")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( r[i] != get_cabac_ueg(&c, state, 3, 0, 1, 2))
+            av_log(NULL, AV_LOG_ERROR, "CABAC unary (truncated) binarization failure at %d\n", i);
+STOP_TIMER("get_cabac_ueg")
+    }
+#endif
+    if(!get_cabac_terminate(&c))
+        av_log(NULL, AV_LOG_ERROR, "where's the Terminator?\n");
+
+    return 0;
+}
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/cabac.h b/contrib/ffmpeg/libavcodec/cabac.h
new file mode 100644
index 000000000..43fe78e3b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cabac.h
@@ -0,0 +1,859 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file cabac.h
+ * Context Adaptive Binary Arithmetic Coder.
+ */
+
+
+//#undef NDEBUG
+#include <assert.h>
+#ifdef ARCH_X86
+#include "x86_cpu.h"
+#endif
+
+#define CABAC_BITS 16
+#define CABAC_MASK ((1<<CABAC_BITS)-1)
+#define BRANCHLESS_CABAC_DECODER 1
+//#define ARCH_X86_DISABLED 1
+
+typedef struct CABACContext{
+    int low;
+    int range;
+    int outstanding_count;
+#ifdef STRICT_LIMITS
+    int symCount;
+#endif
+    const uint8_t *bytestream_start;
+    const uint8_t *bytestream;
+    const uint8_t *bytestream_end;
+    PutBitContext pb;
+}CABACContext;
+
+extern uint8_t ff_h264_mlps_state[4*64];
+extern uint8_t ff_h264_lps_range[4*2*64];  ///< rangeTabLPS
+extern uint8_t ff_h264_mps_state[2*64];     ///< transIdxMPS
+extern uint8_t ff_h264_lps_state[2*64];     ///< transIdxLPS
+extern const uint8_t ff_h264_norm_shift[512];
+
+
+void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
+void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
+void ff_init_cabac_states(CABACContext *c);
+
+
+static inline void put_cabac_bit(CABACContext *c, int b){
+    put_bits(&c->pb, 1, b);
+    for(;c->outstanding_count; c->outstanding_count--){
+        put_bits(&c->pb, 1, 1-b);
+    }
+}
+
+static inline void renorm_cabac_encoder(CABACContext *c){
+    while(c->range < 0x100){
+        //FIXME optimize
+        if(c->low<0x100){
+            put_cabac_bit(c, 0);
+        }else if(c->low<0x200){
+            c->outstanding_count++;
+            c->low -= 0x100;
+        }else{
+            put_cabac_bit(c, 1);
+            c->low -= 0x200;
+        }
+
+        c->range+= c->range;
+        c->low += c->low;
+    }
+}
+
+static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
+    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
+
+    if(bit == ((*state)&1)){
+        c->range -= RangeLPS;
+        *state= ff_h264_mps_state[*state];
+    }else{
+        c->low += c->range - RangeLPS;
+        c->range = RangeLPS;
+        *state= ff_h264_lps_state[*state];
+    }
+
+    renorm_cabac_encoder(c);
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+}
+
+static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
+    assert(c->range > RangeLPS);
+
+    if(!bit){
+        c->range -= RangeLPS;
+    }else{
+        c->low += c->range - RangeLPS;
+        c->range = RangeLPS;
+    }
+
+    renorm_cabac_encoder(c);
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+}
+
+/**
+ * @param bit 0 -> write zero bit, !=0 write one bit
+ */
+static void put_cabac_bypass(CABACContext *c, int bit){
+    c->low += c->low;
+
+    if(bit){
+        c->low += c->range;
+    }
+//FIXME optimize
+    if(c->low<0x200){
+        put_cabac_bit(c, 0);
+    }else if(c->low<0x400){
+        c->outstanding_count++;
+        c->low -= 0x200;
+    }else{
+        put_cabac_bit(c, 1);
+        c->low -= 0x400;
+    }
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+}
+
+/**
+ *
+ * @return the number of bytes written
+ */
+static int put_cabac_terminate(CABACContext *c, int bit){
+    c->range -= 2;
+
+    if(!bit){
+        renorm_cabac_encoder(c);
+    }else{
+        c->low += c->range;
+        c->range= 2;
+
+        renorm_cabac_encoder(c);
+
+        assert(c->low <= 0x1FF);
+        put_cabac_bit(c, c->low>>9);
+        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
+
+        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
+    }
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+
+    return (put_bits_count(&c->pb)+7)>>3;
+}
+
+/**
+ * put (truncated) unary binarization.
+ */
+static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
+    int i;
+
+    assert(v <= max);
+
+#if 1
+    for(i=0; i<v; i++){
+        put_cabac(c, state, 1);
+        if(i < max_index) state++;
+    }
+    if(truncated==0 || v<max)
+        put_cabac(c, state, 0);
+#else
+    if(v <= max_index){
+        for(i=0; i<v; i++){
+            put_cabac(c, state+i, 1);
+        }
+        if(truncated==0 || v<max)
+            put_cabac(c, state+i, 0);
+    }else{
+        for(i=0; i<=max_index; i++){
+            put_cabac(c, state+i, 1);
+        }
+        for(; i<v; i++){
+            put_cabac(c, state+max_index, 1);
+        }
+        if(truncated==0 || v<max)
+            put_cabac(c, state+max_index, 0);
+    }
+#endif
+}
+
+/**
+ * put unary exp golomb k-th order binarization.
+ */
+static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
+    int i;
+
+    if(v==0)
+        put_cabac(c, state, 0);
+    else{
+        const int sign= v < 0;
+
+        if(is_signed) v= FFABS(v);
+
+        if(v<max){
+            for(i=0; i<v; i++){
+                put_cabac(c, state, 1);
+                if(i < max_index) state++;
+            }
+
+            put_cabac(c, state, 0);
+        }else{
+            int m= 1<<k;
+
+            for(i=0; i<max; i++){
+                put_cabac(c, state, 1);
+                if(i < max_index) state++;
+            }
+
+            v -= max;
+            while(v >= m){ //FIXME optimize
+                put_cabac_bypass(c, 1);
+                v-= m;
+                m+= m;
+            }
+            put_cabac_bypass(c, 0);
+            while(m>>=1){
+                put_cabac_bypass(c, v&m);
+            }
+        }
+
+        if(is_signed)
+            put_cabac_bypass(c, sign);
+    }
+}
+
+static void refill(CABACContext *c){
+#if CABAC_BITS == 16
+        c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
+#else
+        c->low+= c->bytestream[0]<<1;
+#endif
+    c->low -= CABAC_MASK;
+    c->bytestream+= CABAC_BITS/8;
+}
+
+static void refill2(CABACContext *c){
+    int i, x;
+
+    x= c->low ^ (c->low-1);
+    i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
+
+    x= -CABAC_MASK;
+
+#if CABAC_BITS == 16
+        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
+#else
+        x+= c->bytestream[0]<<1;
+#endif
+
+    c->low += x<<i;
+    c->bytestream+= CABAC_BITS/8;
+}
+
+static inline void renorm_cabac_decoder(CABACContext *c){
+    while(c->range < 0x100){
+        c->range+= c->range;
+        c->low+= c->low;
+        if(!(c->low & CABAC_MASK))
+            refill(c);
+    }
+}
+
+static inline void renorm_cabac_decoder_once(CABACContext *c){
+#ifdef ARCH_X86_DISABLED
+    int temp;
+#if 0
+    //P3:683    athlon:475
+    asm(
+        "lea -0x100(%0), %2         \n\t"
+        "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64
+        "shl %%cl, %0               \n\t"
+        "shl %%cl, %1               \n\t"
+        : "+r"(c->range), "+r"(c->low), "+c"(temp)
+    );
+#elif 0
+    //P3:680    athlon:474
+    asm(
+        "cmp $0x100, %0             \n\t"
+        "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64
+        "shl %%cl, %0               \n\t"
+        "shl %%cl, %1               \n\t"
+        : "+r"(c->range), "+r"(c->low), "+c"(temp)
+    );
+#elif 1
+    int temp2;
+    //P3:665    athlon:517
+    asm(
+        "lea -0x100(%0), %%eax      \n\t"
+        "cdq                        \n\t"
+        "mov %0, %%eax              \n\t"
+        "and %%edx, %0              \n\t"
+        "and %1, %%edx              \n\t"
+        "add %%eax, %0              \n\t"
+        "add %%edx, %1              \n\t"
+        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+    );
+#elif 0
+    int temp2;
+    //P3:673    athlon:509
+    asm(
+        "cmp $0x100, %0             \n\t"
+        "sbb %%edx, %%edx           \n\t"
+        "mov %0, %%eax              \n\t"
+        "and %%edx, %0              \n\t"
+        "and %1, %%edx              \n\t"
+        "add %%eax, %0              \n\t"
+        "add %%edx, %1              \n\t"
+        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+    );
+#else
+    int temp2;
+    //P3:677    athlon:511
+    asm(
+        "cmp $0x100, %0             \n\t"
+        "lea (%0, %0), %%eax        \n\t"
+        "lea (%1, %1), %%edx        \n\t"
+        "cmovb %%eax, %0            \n\t"
+        "cmovb %%edx, %1            \n\t"
+        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+    );
+#endif
+#else
+    //P3:675    athlon:476
+    int shift= (uint32_t)(c->range - 0x100)>>31;
+    c->range<<= shift;
+    c->low  <<= shift;
+#endif
+    if(!(c->low & CABAC_MASK))
+        refill(c);
+}
+
+static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
+    //FIXME gcc generates duplicate load/stores for c->low and c->range
+#define LOW          "0"
+#define RANGE        "4"
+#ifdef ARCH_X86_64
+#define BYTESTART   "16"
+#define BYTE        "24"
+#define BYTEEND     "32"
+#else
+#define BYTESTART   "12"
+#define BYTE        "16"
+#define BYTEEND     "20"
+#endif
+#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
+    int bit;
+
+#ifndef BRANCHLESS_CABAC_DECODER
+    asm volatile(
+        "movzbl (%1), %0                        \n\t"
+        "movl "RANGE    "(%2), %%ebx            \n\t"
+        "movl "RANGE    "(%2), %%edx            \n\t"
+        "andl $0xC0, %%ebx                      \n\t"
+        "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
+        "movl "LOW      "(%2), %%ebx            \n\t"
+//eax:state ebx:low, edx:range, esi:RangeLPS
+        "subl %%esi, %%edx                      \n\t"
+        "movl %%edx, %%ecx                      \n\t"
+        "shll $17, %%ecx                        \n\t"
+        "cmpl %%ecx, %%ebx                      \n\t"
+        " ja 1f                                 \n\t"
+
+#if 1
+        //athlon:4067 P3:4110
+        "lea -0x100(%%edx), %%ecx               \n\t"
+        "shr $31, %%ecx                         \n\t"
+        "shl %%cl, %%edx                        \n\t"
+        "shl %%cl, %%ebx                        \n\t"
+#else
+        //athlon:4057 P3:4130
+        "cmp $0x100, %%edx                      \n\t" //FIXME avoidable
+        "setb %%cl                              \n\t"
+        "shl %%cl, %%edx                        \n\t"
+        "shl %%cl, %%ebx                        \n\t"
+#endif
+        "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx   \n\t"
+        "movb %%cl, (%1)                        \n\t"
+//eax:state ebx:low, edx:range, esi:RangeLPS
+        "test %%bx, %%bx                        \n\t"
+        " jnz 2f                                \n\t"
+        "mov  "BYTE     "(%2), %%"REG_S"        \n\t"
+        "subl $0xFFFF, %%ebx                    \n\t"
+        "movzwl (%%"REG_S"), %%ecx              \n\t"
+        "bswap %%ecx                            \n\t"
+        "shrl $15, %%ecx                        \n\t"
+        "add  $2, %%"REG_S"                     \n\t"
+        "addl %%ecx, %%ebx                      \n\t"
+        "mov  %%"REG_S", "BYTE    "(%2)         \n\t"
+        "jmp 2f                                 \n\t"
+        "1:                                     \n\t"
+//eax:state ebx:low, edx:range, esi:RangeLPS
+        "subl %%ecx, %%ebx                      \n\t"
+        "movl %%esi, %%edx                      \n\t"
+        "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx   \n\t"
+        "shll %%cl, %%ebx                       \n\t"
+        "shll %%cl, %%edx                       \n\t"
+        "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx   \n\t"
+        "movb %%cl, (%1)                        \n\t"
+        "add  $1, %0                            \n\t"
+        "test %%bx, %%bx                        \n\t"
+        " jnz 2f                                \n\t"
+
+        "mov  "BYTE     "(%2), %%"REG_c"        \n\t"
+        "movzwl (%%"REG_c"), %%esi              \n\t"
+        "bswap %%esi                            \n\t"
+        "shrl $15, %%esi                        \n\t"
+        "subl $0xFFFF, %%esi                    \n\t"
+        "add  $2, %%"REG_c"                     \n\t"
+        "mov  %%"REG_c", "BYTE    "(%2)         \n\t"
+
+        "leal -1(%%ebx), %%ecx                  \n\t"
+        "xorl %%ebx, %%ecx                      \n\t"
+        "shrl $15, %%ecx                        \n\t"
+        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx   \n\t"
+        "neg %%ecx                              \n\t"
+        "add $7, %%ecx                          \n\t"
+
+        "shll %%cl , %%esi                      \n\t"
+        "addl %%esi, %%ebx                      \n\t"
+        "2:                                     \n\t"
+        "movl %%edx, "RANGE    "(%2)            \n\t"
+        "movl %%ebx, "LOW      "(%2)            \n\t"
+        :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
+        :"r"(state), "r"(c)
+        : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
+    );
+    bit&=1;
+#else /* BRANCHLESS_CABAC_DECODER */
+
+
+#if defined CMOV_IS_FAST
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
+        "mov    "tmp"       , %%ecx                                     \n\t"\
+        "shl    $17         , "tmp"                                     \n\t"\
+        "cmp    "low"       , "tmp"                                     \n\t"\
+        "cmova  %%ecx       , "range"                                   \n\t"\
+        "sbb    %%ecx       , %%ecx                                     \n\t"\
+        "and    %%ecx       , "tmp"                                     \n\t"\
+        "sub    "tmp"       , "low"                                     \n\t"\
+        "xor    %%ecx       , "ret"                                     \n\t"
+#else /* CMOV_IS_FAST */
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
+        "mov    "tmp"       , %%ecx                                     \n\t"\
+        "shl    $17         , "tmp"                                     \n\t"\
+        "sub    "low"       , "tmp"                                     \n\t"\
+        "sar    $31         , "tmp"                                     \n\t" /*lps_mask*/\
+        "sub    %%ecx       , "range"                                   \n\t" /*RangeLPS - range*/\
+        "and    "tmp"       , "range"                                   \n\t" /*(RangeLPS - range)&lps_mask*/\
+        "add    %%ecx       , "range"                                   \n\t" /*new range*/\
+        "shl    $17         , %%ecx                                     \n\t"\
+        "and    "tmp"       , %%ecx                                     \n\t"\
+        "sub    %%ecx       , "low"                                     \n\t"\
+        "xor    "tmp"       , "ret"                                     \n\t"
+#endif /* CMOV_IS_FAST */
+
+
+#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
+        "movzbl "statep"    , "ret"                                     \n\t"\
+        "mov    "range"     , "tmp"                                     \n\t"\
+        "and    $0xC0       , "range"                                   \n\t"\
+        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
+        "sub    "range"     , "tmp"                                     \n\t"\
+        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
+        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
+        "shl    %%cl        , "range"                                   \n\t"\
+        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
+        "mov    "tmpbyte"   , "statep"                                  \n\t"\
+        "shl    %%cl        , "low"                                     \n\t"\
+        "test   "lowword"   , "lowword"                                 \n\t"\
+        " jnz   1f                                                      \n\t"\
+        "mov "BYTE"("cabac"), %%"REG_c"                                 \n\t"\
+        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
+        "bswap  "tmp"                                                   \n\t"\
+        "shr    $15         , "tmp"                                     \n\t"\
+        "sub    $0xFFFF     , "tmp"                                     \n\t"\
+        "add    $2          , %%"REG_c"                                 \n\t"\
+        "mov    %%"REG_c"   , "BYTE    "("cabac")                       \n\t"\
+        "lea    -1("low")   , %%ecx                                     \n\t"\
+        "xor    "low"       , %%ecx                                     \n\t"\
+        "shr    $15         , %%ecx                                     \n\t"\
+        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
+        "neg    %%ecx                                                   \n\t"\
+        "add    $7          , %%ecx                                     \n\t"\
+        "shl    %%cl        , "tmp"                                     \n\t"\
+        "add    "tmp"       , "low"                                     \n\t"\
+        "1:                                                             \n\t"
+
+    asm volatile(
+        "movl "RANGE    "(%2), %%esi            \n\t"
+        "movl "LOW      "(%2), %%ebx            \n\t"
+        BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
+        "movl %%esi, "RANGE    "(%2)            \n\t"
+        "movl %%ebx, "LOW      "(%2)            \n\t"
+
+        :"=&a"(bit)
+        :"r"(state), "r"(c)
+        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
+    );
+    bit&=1;
+#endif /* BRANCHLESS_CABAC_DECODER */
+#else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
+    int s = *state;
+    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
+    int bit, lps_mask attribute_unused;
+
+    c->range -= RangeLPS;
+#ifndef BRANCHLESS_CABAC_DECODER
+    if(c->low < (c->range<<17)){
+        bit= s&1;
+        *state= ff_h264_mps_state[s];
+        renorm_cabac_decoder_once(c);
+    }else{
+        bit= ff_h264_norm_shift[RangeLPS];
+        c->low -= (c->range<<17);
+        *state= ff_h264_lps_state[s];
+        c->range = RangeLPS<<bit;
+        c->low <<= bit;
+        bit= (s&1)^1;
+
+        if(!(c->low & 0xFFFF)){
+            refill2(c);
+        }
+    }
+#else /* BRANCHLESS_CABAC_DECODER */
+    lps_mask= ((c->range<<17) - c->low)>>31;
+
+    c->low -= (c->range<<17) & lps_mask;
+    c->range += (RangeLPS - c->range) & lps_mask;
+
+    s^=lps_mask;
+    *state= (ff_h264_mlps_state+128)[s];
+    bit= s&1;
+
+    lps_mask= ff_h264_norm_shift[c->range];
+    c->range<<= lps_mask;
+    c->low  <<= lps_mask;
+    if(!(c->low & CABAC_MASK))
+        refill2(c);
+#endif /* BRANCHLESS_CABAC_DECODER */
+#endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
+    return bit;
+}
+
+static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
+    return get_cabac_inline(c,state);
+}
+
+static int get_cabac(CABACContext *c, uint8_t * const state){
+    return get_cabac_inline(c,state);
+}
+
+static int get_cabac_bypass(CABACContext *c){
+#if 0 //not faster
+    int bit;
+    asm volatile(
+        "movl "RANGE    "(%1), %%ebx            \n\t"
+        "movl "LOW      "(%1), %%eax            \n\t"
+        "shl $17, %%ebx                         \n\t"
+        "add %%eax, %%eax                       \n\t"
+        "sub %%ebx, %%eax                       \n\t"
+        "cdq                                    \n\t"
+        "and %%edx, %%ebx                       \n\t"
+        "add %%ebx, %%eax                       \n\t"
+        "test %%ax, %%ax                        \n\t"
+        " jnz 1f                                \n\t"
+        "movl "BYTE     "(%1), %%"REG_b"        \n\t"
+        "subl $0xFFFF, %%eax                    \n\t"
+        "movzwl (%%"REG_b"), %%ecx              \n\t"
+        "bswap %%ecx                            \n\t"
+        "shrl $15, %%ecx                        \n\t"
+        "addl $2, %%"REG_b"                     \n\t"
+        "addl %%ecx, %%eax                      \n\t"
+        "movl %%"REG_b", "BYTE     "(%1)        \n\t"
+        "1:                                     \n\t"
+        "movl %%eax, "LOW      "(%1)            \n\t"
+
+        :"=&d"(bit)
+        :"r"(c)
+        : "%eax", "%"REG_b, "%ecx", "memory"
+    );
+    return bit+1;
+#else
+    int range;
+    c->low += c->low;
+
+    if(!(c->low & CABAC_MASK))
+        refill(c);
+
+    range= c->range<<17;
+    if(c->low < range){
+        return 0;
+    }else{
+        c->low -= range;
+        return 1;
+    }
+#endif
+}
+
+
+static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
+#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
+    asm volatile(
+        "movl "RANGE    "(%1), %%ebx            \n\t"
+        "movl "LOW      "(%1), %%eax            \n\t"
+        "shl $17, %%ebx                         \n\t"
+        "add %%eax, %%eax                       \n\t"
+        "sub %%ebx, %%eax                       \n\t"
+        "cdq                                    \n\t"
+        "and %%edx, %%ebx                       \n\t"
+        "add %%ebx, %%eax                       \n\t"
+        "xor %%edx, %%ecx                       \n\t"
+        "sub %%edx, %%ecx                       \n\t"
+        "test %%ax, %%ax                        \n\t"
+        " jnz 1f                                \n\t"
+        "mov  "BYTE     "(%1), %%"REG_b"        \n\t"
+        "subl $0xFFFF, %%eax                    \n\t"
+        "movzwl (%%"REG_b"), %%edx              \n\t"
+        "bswap %%edx                            \n\t"
+        "shrl $15, %%edx                        \n\t"
+        "add  $2, %%"REG_b"                     \n\t"
+        "addl %%edx, %%eax                      \n\t"
+        "mov  %%"REG_b", "BYTE     "(%1)        \n\t"
+        "1:                                     \n\t"
+        "movl %%eax, "LOW      "(%1)            \n\t"
+
+        :"+c"(val)
+        :"r"(c)
+        : "%eax", "%"REG_b, "%edx", "memory"
+    );
+    return val;
+#else
+    int range, mask;
+    c->low += c->low;
+
+    if(!(c->low & CABAC_MASK))
+        refill(c);
+
+    range= c->range<<17;
+    c->low -= range;
+    mask= c->low >> 31;
+    range &= mask;
+    c->low += range;
+    return (val^mask)-mask;
+#endif
+}
+
+//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
+//FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
+#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
+static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
+    void *end= significant_coeff_ctx_base + max_coeff - 1;
+    int minusstart= -(int)significant_coeff_ctx_base;
+    int minusindex= 4-(int)index;
+    int coeff_count;
+    asm volatile(
+        "movl "RANGE    "(%3), %%esi            \n\t"
+        "movl "LOW      "(%3), %%ebx            \n\t"
+
+        "2:                                     \n\t"
+
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+        "test $1, %%edx                         \n\t"
+        " jz 3f                                 \n\t"
+
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+        "mov  %2, %%"REG_a"                     \n\t"
+        "movl %4, %%ecx                         \n\t"
+        "add  %1, %%"REG_c"                     \n\t"
+        "movl %%ecx, (%%"REG_a")                \n\t"
+
+        "test $1, %%edx                         \n\t"
+        " jnz 4f                                \n\t"
+
+        "add  $4, %%"REG_a"                     \n\t"
+        "mov  %%"REG_a", %2                     \n\t"
+
+        "3:                                     \n\t"
+        "add  $1, %1                            \n\t"
+        "cmp  %5, %1                            \n\t"
+        " jb 2b                                 \n\t"
+        "mov  %2, %%"REG_a"                     \n\t"
+        "movl %4, %%ecx                         \n\t"
+        "add  %1, %%"REG_c"                     \n\t"
+        "movl %%ecx, (%%"REG_a")                \n\t"
+        "4:                                     \n\t"
+        "add  %6, %%eax                         \n\t"
+        "shr $2, %%eax                          \n\t"
+
+        "movl %%esi, "RANGE    "(%3)            \n\t"
+        "movl %%ebx, "LOW      "(%3)            \n\t"
+        :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
+        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
+        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"\
+    );
+    return coeff_count;
+}
+
+static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
+    int minusindex= 4-(int)index;
+    int coeff_count;
+    long last=0;
+    asm volatile(
+        "movl "RANGE    "(%3), %%esi            \n\t"
+        "movl "LOW      "(%3), %%ebx            \n\t"
+
+        "mov %1, %%"REG_D"                      \n\t"
+        "2:                                     \n\t"
+
+        "mov %6, %%"REG_a"                      \n\t"
+        "movzbl (%%"REG_a", %%"REG_D"), %%edi   \n\t"
+        "add %5, %%"REG_D"                      \n\t"
+
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+        "mov %1, %%edi                          \n\t"
+        "test $1, %%edx                         \n\t"
+        " jz 3f                                 \n\t"
+
+        "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
+        "add %5, %%"REG_D"                      \n\t"
+
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+        "mov %2, %%"REG_a"                      \n\t"
+        "mov %1, %%edi                          \n\t"
+        "movl %%edi, (%%"REG_a")                \n\t"
+
+        "test $1, %%edx                         \n\t"
+        " jnz 4f                                \n\t"
+
+        "add $4, %%"REG_a"                      \n\t"
+        "mov %%"REG_a", %2                      \n\t"
+
+        "3:                                     \n\t"
+        "addl $1, %%edi                         \n\t"
+        "mov %%edi, %1                          \n\t"
+        "cmpl $63, %%edi                        \n\t"
+        " jb 2b                                 \n\t"
+        "mov %2, %%"REG_a"                      \n\t"
+        "movl %%edi, (%%"REG_a")                \n\t"
+        "4:                                     \n\t"
+        "addl %4, %%eax                         \n\t"
+        "shr $2, %%eax                          \n\t"
+
+        "movl %%esi, "RANGE    "(%3)            \n\t"
+        "movl %%ebx, "LOW      "(%3)            \n\t"
+        :"=&a"(coeff_count),"+m"(last), "+m"(index)\
+        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
+        : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"\
+    );
+    return coeff_count;
+}
+#endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
+
+/**
+ *
+ * @return the number of bytes read or 0 if no end
+ */
+static int get_cabac_terminate(CABACContext *c){
+    c->range -= 2;
+    if(c->low < c->range<<17){
+        renorm_cabac_decoder_once(c);
+        return 0;
+    }else{
+        return c->bytestream - c->bytestream_start;
+    }
+}
+
+/**
+ * get (truncated) unnary binarization.
+ */
+static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
+    int i;
+
+    for(i=0; i<max; i++){
+        if(get_cabac(c, state)==0)
+            return i;
+
+        if(i< max_index) state++;
+    }
+
+    return truncated ? max : -1;
+}
+
+/**
+ * get unary exp golomb k-th order binarization.
+ */
+static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
+    int i, v;
+    int m= 1<<k;
+
+    if(get_cabac(c, state)==0)
+        return 0;
+
+    if(0 < max_index) state++;
+
+    for(i=1; i<max; i++){
+        if(get_cabac(c, state)==0){
+            if(is_signed && get_cabac_bypass(c)){
+                return -i;
+            }else
+                return i;
+        }
+
+        if(i < max_index) state++;
+    }
+
+    while(get_cabac_bypass(c)){
+        i+= m;
+        m+= m;
+    }
+
+    v=0;
+    while(m>>=1){
+        v+= v + get_cabac_bypass(c);
+    }
+    i += v;
+
+    if(is_signed && get_cabac_bypass(c)){
+        return -i;
+    }else
+        return i;
+}
diff --git a/contrib/ffmpeg/libavcodec/cavs.c b/contrib/ffmpeg/libavcodec/cavs.c
new file mode 100644
index 000000000..ee862bbc7
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cavs.c
@@ -0,0 +1,1540 @@
+/*
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file cavs.c
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder
+ * @author Stefan Gehrer <stefan.gehrer@gmx.de>
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "golomb.h"
+#include "mpegvideo.h"
+#include "cavsdata.h"
+
+#ifdef CONFIG_CAVS_DECODER
+typedef struct {
+    MpegEncContext s;
+    Picture picture; ///< currently decoded frame
+    Picture DPB[2];  ///< reference frames
+    int dist[2];     ///< temporal distances from current frame to ref frames
+    int profile, level;
+    int aspect_ratio;
+    int mb_width, mb_height;
+    int pic_type;
+    int progressive;
+    int pic_structure;
+    int skip_mode_flag; ///< select between skip_count or one skip_flag per MB
+    int loop_filter_disable;
+    int alpha_offset, beta_offset;
+    int ref_flag;
+    int mbx, mby;      ///< macroblock coordinates
+    int flags;         ///< availability flags of neighbouring macroblocks
+    int stc;           ///< last start code
+    uint8_t *cy, *cu, *cv; ///< current MB sample pointers
+    int left_qp;
+    uint8_t *top_qp;
+
+    /** mv motion vector cache
+       0:    D3  B2  B3  C2
+       4:    A1  X0  X1   -
+       8:    A3  X2  X3   -
+
+       X are the vectors in the current macroblock (5,6,9,10)
+       A is the macroblock to the left (4,8)
+       B is the macroblock to the top (1,2)
+       C is the macroblock to the top-right (3)
+       D is the macroblock to the top-left (0)
+
+       the same is repeated for backward motion vectors */
+    vector_t mv[2*4*3];
+    vector_t *top_mv[2];
+    vector_t *col_mv;
+
+    /** luma pred mode cache
+       0:    --  B2  B3
+       3:    A1  X0  X1
+       6:    A3  X2  X3   */
+    int pred_mode_Y[3*3];
+    int *top_pred_Y;
+    int l_stride, c_stride;
+    int luma_scan[4];
+    int qp;
+    int qp_fixed;
+    int cbp;
+    ScanTable scantable;
+
+    /** intra prediction is done with un-deblocked samples
+     they are saved here before deblocking the MB  */
+    uint8_t *top_border_y, *top_border_u, *top_border_v;
+    uint8_t left_border_y[26], left_border_u[10], left_border_v[10];
+    uint8_t intern_border_y[26];
+    uint8_t topleft_border_y, topleft_border_u, topleft_border_v;
+
+    void (*intra_pred_l[8])(uint8_t *d,uint8_t *top,uint8_t *left,int stride);
+    void (*intra_pred_c[7])(uint8_t *d,uint8_t *top,uint8_t *left,int stride);
+    uint8_t *col_type_base;
+    uint8_t *col_type;
+
+    /* scaling factors for MV prediction */
+    int sym_factor;    ///< for scaling in symmetrical B block
+    int direct_den[2]; ///< for scaling in direct B block
+    int scale_den[2];  ///< for scaling neighbouring MVs
+
+    int got_keyframe;
+    DCTELEM *block;
+} AVSContext;
+
+/*****************************************************************************
+ *
+ * in-loop deblocking filter
+ *
+ ****************************************************************************/
+
+static inline int get_bs(vector_t *mvP, vector_t *mvQ, int b) {
+    if((mvP->ref == REF_INTRA) || (mvQ->ref == REF_INTRA))
+        return 2;
+    if( (abs(mvP->x - mvQ->x) >= 4) ||  (abs(mvP->y - mvQ->y) >= 4) )
+        return 1;
+    if(b){
+        mvP += MV_BWD_OFFS;
+        mvQ += MV_BWD_OFFS;
+        if( (abs(mvP->x - mvQ->x) >= 4) ||  (abs(mvP->y - mvQ->y) >= 4) )
+            return 1;
+    }else{
+        if(mvP->ref != mvQ->ref)
+            return 1;
+    }
+    return 0;
+}
+
+#define SET_PARAMS                                            \
+    alpha = alpha_tab[clip(qp_avg + h->alpha_offset,0,63)];   \
+    beta  =  beta_tab[clip(qp_avg + h->beta_offset, 0,63)];   \
+    tc    =    tc_tab[clip(qp_avg + h->alpha_offset,0,63)];
+
+/**
+ * in-loop deblocking filter for a single macroblock
+ *
+ * boundary strength (bs) mapping:
+ *
+ * --4---5--
+ * 0   2   |
+ * | 6 | 7 |
+ * 1   3   |
+ * ---------
+ *
+ */
+static void filter_mb(AVSContext *h, enum mb_t mb_type) {
+    DECLARE_ALIGNED_8(uint8_t, bs[8]);
+    int qp_avg, alpha, beta, tc;
+    int i;
+
+    /* save un-deblocked lines */
+    h->topleft_border_y = h->top_border_y[h->mbx*16+15];
+    h->topleft_border_u = h->top_border_u[h->mbx*10+8];
+    h->topleft_border_v = h->top_border_v[h->mbx*10+8];
+    memcpy(&h->top_border_y[h->mbx*16], h->cy + 15* h->l_stride,16);
+    memcpy(&h->top_border_u[h->mbx*10+1], h->cu +  7* h->c_stride,8);
+    memcpy(&h->top_border_v[h->mbx*10+1], h->cv +  7* h->c_stride,8);
+    for(i=0;i<8;i++) {
+        h->left_border_y[i*2+1] = *(h->cy + 15 + (i*2+0)*h->l_stride);
+        h->left_border_y[i*2+2] = *(h->cy + 15 + (i*2+1)*h->l_stride);
+        h->left_border_u[i+1] = *(h->cu + 7 + i*h->c_stride);
+        h->left_border_v[i+1] = *(h->cv + 7 + i*h->c_stride);
+    }
+    if(!h->loop_filter_disable) {
+        /* determine bs */
+        if(mb_type == I_8X8)
+            *((uint64_t *)bs) = 0x0202020202020202ULL;
+        else{
+            *((uint64_t *)bs) = 0;
+            if(partition_flags[mb_type] & SPLITV){
+                bs[2] = get_bs(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X1], mb_type > P_8X8);
+                bs[3] = get_bs(&h->mv[MV_FWD_X2], &h->mv[MV_FWD_X3], mb_type > P_8X8);
+            }
+            if(partition_flags[mb_type] & SPLITH){
+                bs[6] = get_bs(&h->mv[MV_FWD_X0], &h->mv[MV_FWD_X2], mb_type > P_8X8);
+                bs[7] = get_bs(&h->mv[MV_FWD_X1], &h->mv[MV_FWD_X3], mb_type > P_8X8);
+            }
+            bs[0] = get_bs(&h->mv[MV_FWD_A1], &h->mv[MV_FWD_X0], mb_type > P_8X8);
+            bs[1] = get_bs(&h->mv[MV_FWD_A3], &h->mv[MV_FWD_X2], mb_type > P_8X8);
+            bs[4] = get_bs(&h->mv[MV_FWD_B2], &h->mv[MV_FWD_X0], mb_type > P_8X8);
+            bs[5] = get_bs(&h->mv[MV_FWD_B3], &h->mv[MV_FWD_X1], mb_type > P_8X8);
+        }
+        if( *((uint64_t *)bs) ) {
+            if(h->flags & A_AVAIL) {
+                qp_avg = (h->qp + h->left_qp + 1) >> 1;
+                SET_PARAMS;
+                h->s.dsp.cavs_filter_lv(h->cy,h->l_stride,alpha,beta,tc,bs[0],bs[1]);
+                h->s.dsp.cavs_filter_cv(h->cu,h->c_stride,alpha,beta,tc,bs[0],bs[1]);
+                h->s.dsp.cavs_filter_cv(h->cv,h->c_stride,alpha,beta,tc,bs[0],bs[1]);
+            }
+            qp_avg = h->qp;
+            SET_PARAMS;
+            h->s.dsp.cavs_filter_lv(h->cy + 8,h->l_stride,alpha,beta,tc,bs[2],bs[3]);
+            h->s.dsp.cavs_filter_lh(h->cy + 8*h->l_stride,h->l_stride,alpha,beta,tc,
+                           bs[6],bs[7]);
+
+            if(h->flags & B_AVAIL) {
+                qp_avg = (h->qp + h->top_qp[h->mbx] + 1) >> 1;
+                SET_PARAMS;
+                h->s.dsp.cavs_filter_lh(h->cy,h->l_stride,alpha,beta,tc,bs[4],bs[5]);
+                h->s.dsp.cavs_filter_ch(h->cu,h->c_stride,alpha,beta,tc,bs[4],bs[5]);
+                h->s.dsp.cavs_filter_ch(h->cv,h->c_stride,alpha,beta,tc,bs[4],bs[5]);
+            }
+        }
+    }
+    h->left_qp = h->qp;
+    h->top_qp[h->mbx] = h->qp;
+}
+
+#undef SET_PARAMS
+
+/*****************************************************************************
+ *
+ * spatial intra prediction
+ *
+ ****************************************************************************/
+
+static inline void load_intra_pred_luma(AVSContext *h, uint8_t *top,
+                                        uint8_t **left, int block) {
+    int i;
+
+    switch(block) {
+    case 0:
+        *left = h->left_border_y;
+        h->left_border_y[0] = h->left_border_y[1];
+        memset(&h->left_border_y[17],h->left_border_y[16],9);
+        memcpy(&top[1],&h->top_border_y[h->mbx*16],16);
+        top[17] = top[16];
+        top[0] = top[1];
+        if((h->flags & A_AVAIL) && (h->flags & B_AVAIL))
+            h->left_border_y[0] = top[0] = h->topleft_border_y;
+        break;
+    case 1:
+        *left = h->intern_border_y;
+        for(i=0;i<8;i++)
+            h->intern_border_y[i+1] = *(h->cy + 7 + i*h->l_stride);
+        memset(&h->intern_border_y[9],h->intern_border_y[8],9);
+        h->intern_border_y[0] = h->intern_border_y[1];
+        memcpy(&top[1],&h->top_border_y[h->mbx*16+8],8);
+        if(h->flags & C_AVAIL)
+            memcpy(&top[9],&h->top_border_y[(h->mbx + 1)*16],8);
+        else
+            memset(&top[9],top[8],9);
+        top[17] = top[16];
+        top[0] = top[1];
+        if(h->flags & B_AVAIL)
+            h->intern_border_y[0] = top[0] = h->top_border_y[h->mbx*16+7];
+        break;
+    case 2:
+        *left = &h->left_border_y[8];
+        memcpy(&top[1],h->cy + 7*h->l_stride,16);
+        top[17] = top[16];
+        top[0] = top[1];
+        if(h->flags & A_AVAIL)
+            top[0] = h->left_border_y[8];
+        break;
+    case 3:
+        *left = &h->intern_border_y[8];
+        for(i=0;i<8;i++)
+            h->intern_border_y[i+9] = *(h->cy + 7 + (i+8)*h->l_stride);
+        memset(&h->intern_border_y[17],h->intern_border_y[16],9);
+        memcpy(&top[0],h->cy + 7 + 7*h->l_stride,9);
+        memset(&top[9],top[8],9);
+        break;
+    }
+}
+
+static void intra_pred_vert(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int y;
+    uint64_t a = unaligned64(&top[1]);
+    for(y=0;y<8;y++) {
+        *((uint64_t *)(d+y*stride)) = a;
+    }
+}
+
+static void intra_pred_horiz(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int y;
+    uint64_t a;
+    for(y=0;y<8;y++) {
+        a = left[y+1] * 0x0101010101010101ULL;
+        *((uint64_t *)(d+y*stride)) = a;
+    }
+}
+
+static void intra_pred_dc_128(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int y;
+    uint64_t a = 0x8080808080808080ULL;
+    for(y=0;y<8;y++)
+        *((uint64_t *)(d+y*stride)) = a;
+}
+
+static void intra_pred_plane(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int x,y,ia;
+    int ih = 0;
+    int iv = 0;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    for(x=0; x<4; x++) {
+        ih += (x+1)*(top[5+x]-top[3-x]);
+        iv += (x+1)*(left[5+x]-left[3-x]);
+    }
+    ia = (top[8]+left[8])<<4;
+    ih = (17*ih+16)>>5;
+    iv = (17*iv+16)>>5;
+    for(y=0; y<8; y++)
+        for(x=0; x<8; x++)
+            d[y*stride+x] = cm[(ia+(x-3)*ih+(y-3)*iv+16)>>5];
+}
+
+#define LOWPASS(ARRAY,INDEX)                                            \
+    (( ARRAY[(INDEX)-1] + 2*ARRAY[(INDEX)] + ARRAY[(INDEX)+1] + 2) >> 2)
+
+static void intra_pred_lp(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int x,y;
+    for(y=0; y<8; y++)
+        for(x=0; x<8; x++)
+            d[y*stride+x] = (LOWPASS(top,x+1) + LOWPASS(left,y+1)) >> 1;
+}
+
+static void intra_pred_down_left(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int x,y;
+    for(y=0; y<8; y++)
+        for(x=0; x<8; x++)
+            d[y*stride+x] = (LOWPASS(top,x+y+2) + LOWPASS(left,x+y+2)) >> 1;
+}
+
+static void intra_pred_down_right(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int x,y;
+    for(y=0; y<8; y++)
+        for(x=0; x<8; x++)
+            if(x==y)
+                d[y*stride+x] = (left[1]+2*top[0]+top[1]+2)>>2;
+            else if(x>y)
+                d[y*stride+x] = LOWPASS(top,x-y);
+            else
+                d[y*stride+x] = LOWPASS(left,y-x);
+}
+
+static void intra_pred_lp_left(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int x,y;
+    for(y=0; y<8; y++)
+        for(x=0; x<8; x++)
+            d[y*stride+x] = LOWPASS(left,y+1);
+}
+
+static void intra_pred_lp_top(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
+    int x,y;
+    for(y=0; y<8; y++)
+        for(x=0; x<8; x++)
+            d[y*stride+x] = LOWPASS(top,x+1);
+}
+
+#undef LOWPASS
+
+static inline void modify_pred(const int_fast8_t *mod_table, int *mode) {
+    *mode = mod_table[*mode];
+    if(*mode < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Illegal intra prediction mode\n");
+        *mode = 0;
+    }
+}
+
+/*****************************************************************************
+ *
+ * motion compensation
+ *
+ ****************************************************************************/
+
+static inline void mc_dir_part(AVSContext *h,Picture *pic,int square,
+                        int chroma_height,int delta,int list,uint8_t *dest_y,
+                        uint8_t *dest_cb,uint8_t *dest_cr,int src_x_offset,
+                        int src_y_offset,qpel_mc_func *qpix_op,
+                        h264_chroma_mc_func chroma_op,vector_t *mv){
+    MpegEncContext * const s = &h->s;
+    const int mx= mv->x + src_x_offset*8;
+    const int my= mv->y + src_y_offset*8;
+    const int luma_xy= (mx&3) + ((my&3)<<2);
+    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->l_stride;
+    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->c_stride;
+    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->c_stride;
+    int extra_width= 0; //(s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
+    int extra_height= extra_width;
+    int emu=0;
+    const int full_mx= mx>>2;
+    const int full_my= my>>2;
+    const int pic_width  = 16*h->mb_width;
+    const int pic_height = 16*h->mb_height;
+
+    if(!pic->data[0])
+        return;
+    if(mx&7) extra_width -= 3;
+    if(my&7) extra_height -= 3;
+
+    if(   full_mx < 0-extra_width
+          || full_my < 0-extra_height
+          || full_mx + 16/*FIXME*/ > pic_width + extra_width
+          || full_my + 16/*FIXME*/ > pic_height + extra_height){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->l_stride, h->l_stride,
+                            16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+        src_y= s->edge_emu_buffer + 2 + 2*h->l_stride;
+        emu=1;
+    }
+
+    qpix_op[luma_xy](dest_y, src_y, h->l_stride); //FIXME try variable height perhaps?
+    if(!square){
+        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->l_stride);
+    }
+
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->c_stride,
+                            9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+        src_cb= s->edge_emu_buffer;
+    }
+    chroma_op(dest_cb, src_cb, h->c_stride, chroma_height, mx&7, my&7);
+
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->c_stride,
+                            9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+        src_cr= s->edge_emu_buffer;
+    }
+    chroma_op(dest_cr, src_cr, h->c_stride, chroma_height, mx&7, my&7);
+}
+
+static inline void mc_part_std(AVSContext *h,int square,int chroma_height,int delta,
+                        uint8_t *dest_y,uint8_t *dest_cb,uint8_t *dest_cr,
+                        int x_offset, int y_offset,qpel_mc_func *qpix_put,
+                        h264_chroma_mc_func chroma_put,qpel_mc_func *qpix_avg,
+                        h264_chroma_mc_func chroma_avg, vector_t *mv){
+    qpel_mc_func *qpix_op=  qpix_put;
+    h264_chroma_mc_func chroma_op= chroma_put;
+
+    dest_y  += 2*x_offset + 2*y_offset*h->l_stride;
+    dest_cb +=   x_offset +   y_offset*h->c_stride;
+    dest_cr +=   x_offset +   y_offset*h->c_stride;
+    x_offset += 8*h->mbx;
+    y_offset += 8*h->mby;
+
+    if(mv->ref >= 0){
+        Picture *ref= &h->DPB[mv->ref];
+        mc_dir_part(h, ref, square, chroma_height, delta, 0,
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_op, chroma_op, mv);
+
+        qpix_op=  qpix_avg;
+        chroma_op= chroma_avg;
+    }
+
+    if((mv+MV_BWD_OFFS)->ref >= 0){
+        Picture *ref= &h->DPB[0];
+        mc_dir_part(h, ref, square, chroma_height, delta, 1,
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_op, chroma_op, mv+MV_BWD_OFFS);
+    }
+}
+
+static void inter_pred(AVSContext *h, enum mb_t mb_type) {
+    if(partition_flags[mb_type] == 0){ // 16x16
+        mc_part_std(h, 1, 8, 0, h->cy, h->cu, h->cv, 0, 0,
+                h->s.dsp.put_cavs_qpel_pixels_tab[0],
+                h->s.dsp.put_h264_chroma_pixels_tab[0],
+                h->s.dsp.avg_cavs_qpel_pixels_tab[0],
+                h->s.dsp.avg_h264_chroma_pixels_tab[0],&h->mv[MV_FWD_X0]);
+    }else{
+        mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 0, 0,
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X0]);
+        mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 4, 0,
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X1]);
+        mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 0, 4,
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X2]);
+        mc_part_std(h, 1, 4, 0, h->cy, h->cu, h->cv, 4, 4,
+                h->s.dsp.put_cavs_qpel_pixels_tab[1],
+                h->s.dsp.put_h264_chroma_pixels_tab[1],
+                h->s.dsp.avg_cavs_qpel_pixels_tab[1],
+                h->s.dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X3]);
+    }
+    /* set intra prediction modes to default values */
+    h->pred_mode_Y[3] =  h->pred_mode_Y[6] = INTRA_L_LP;
+    h->top_pred_Y[h->mbx*2+0] = h->top_pred_Y[h->mbx*2+1] = INTRA_L_LP;
+}
+
+/*****************************************************************************
+ *
+ * motion vector prediction
+ *
+ ****************************************************************************/
+
+static inline void set_mvs(vector_t *mv, enum block_t size) {
+    switch(size) {
+    case BLK_16X16:
+        mv[MV_STRIDE  ] = mv[0];
+        mv[MV_STRIDE+1] = mv[0];
+    case BLK_16X8:
+        mv[1] = mv[0];
+        break;
+    case BLK_8X16:
+        mv[MV_STRIDE] = mv[0];
+        break;
+    }
+}
+
+static inline void store_mvs(AVSContext *h) {
+    h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 0] = h->mv[MV_FWD_X0];
+    h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 1] = h->mv[MV_FWD_X1];
+    h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 2] = h->mv[MV_FWD_X2];
+    h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + 3] = h->mv[MV_FWD_X3];
+}
+
+static inline void scale_mv(AVSContext *h, int *d_x, int *d_y, vector_t *src, int distp) {
+    int den = h->scale_den[src->ref];
+
+    *d_x = (src->x*distp*den + 256 + (src->x>>31)) >> 9;
+    *d_y = (src->y*distp*den + 256 + (src->y>>31)) >> 9;
+}
+
+static inline void mv_pred_median(AVSContext *h, vector_t *mvP, vector_t *mvA, vector_t *mvB, vector_t *mvC) {
+    int ax, ay, bx, by, cx, cy;
+    int len_ab, len_bc, len_ca, len_mid;
+
+    /* scale candidates according to their temporal span */
+    scale_mv(h, &ax, &ay, mvA, mvP->dist);
+    scale_mv(h, &bx, &by, mvB, mvP->dist);
+    scale_mv(h, &cx, &cy, mvC, mvP->dist);
+    /* find the geometrical median of the three candidates */
+    len_ab = abs(ax - bx) + abs(ay - by);
+    len_bc = abs(bx - cx) + abs(by - cy);
+    len_ca = abs(cx - ax) + abs(cy - ay);
+    len_mid = mid_pred(len_ab, len_bc, len_ca);
+    if(len_mid == len_ab) {
+        mvP->x = cx;
+        mvP->y = cy;
+    } else if(len_mid == len_bc) {
+        mvP->x = ax;
+        mvP->y = ay;
+    } else {
+        mvP->x = bx;
+        mvP->y = by;
+    }
+}
+
+static inline void mv_pred_direct(AVSContext *h, vector_t *pmv_fw,
+                                  vector_t *col_mv) {
+    vector_t *pmv_bw = pmv_fw + MV_BWD_OFFS;
+    int den = h->direct_den[col_mv->ref];
+    int m = col_mv->x >> 31;
+
+    pmv_fw->dist = h->dist[1];
+    pmv_bw->dist = h->dist[0];
+    pmv_fw->ref = 1;
+    pmv_bw->ref = 0;
+    /* scale the co-located motion vector according to its temporal span */
+    pmv_fw->x = (((den+(den*col_mv->x*pmv_fw->dist^m)-m-1)>>14)^m)-m;
+    pmv_bw->x = m-(((den+(den*col_mv->x*pmv_bw->dist^m)-m-1)>>14)^m);
+    m = col_mv->y >> 31;
+    pmv_fw->y = (((den+(den*col_mv->y*pmv_fw->dist^m)-m-1)>>14)^m)-m;
+    pmv_bw->y = m-(((den+(den*col_mv->y*pmv_bw->dist^m)-m-1)>>14)^m);
+}
+
+static inline void mv_pred_sym(AVSContext *h, vector_t *src, enum block_t size) {
+    vector_t *dst = src + MV_BWD_OFFS;
+
+    /* backward mv is the scaled and negated forward mv */
+    dst->x = -((src->x * h->sym_factor + 256) >> 9);
+    dst->y = -((src->y * h->sym_factor + 256) >> 9);
+    dst->ref = 0;
+    dst->dist = h->dist[0];
+    set_mvs(dst, size);
+}
+
+static void mv_pred(AVSContext *h, enum mv_loc_t nP, enum mv_loc_t nC,
+                    enum mv_pred_t mode, enum block_t size, int ref) {
+    vector_t *mvP = &h->mv[nP];
+    vector_t *mvA = &h->mv[nP-1];
+    vector_t *mvB = &h->mv[nP-4];
+    vector_t *mvC = &h->mv[nC];
+    const vector_t *mvP2 = NULL;
+
+    mvP->ref = ref;
+    mvP->dist = h->dist[mvP->ref];
+    if(mvC->ref == NOT_AVAIL)
+        mvC = &h->mv[nP-5]; // set to top-left (mvD)
+    if((mode == MV_PRED_PSKIP) &&
+       ((mvA->ref == NOT_AVAIL) || (mvB->ref == NOT_AVAIL) ||
+           ((mvA->x | mvA->y | mvA->ref) == 0)  ||
+           ((mvB->x | mvB->y | mvB->ref) == 0) )) {
+        mvP2 = &un_mv;
+    /* if there is only one suitable candidate, take it */
+    } else if((mvA->ref >= 0) && (mvB->ref < 0) && (mvC->ref < 0)) {
+        mvP2= mvA;
+    } else if((mvA->ref < 0) && (mvB->ref >= 0) && (mvC->ref < 0)) {
+        mvP2= mvB;
+    } else if((mvA->ref < 0) && (mvB->ref < 0) && (mvC->ref >= 0)) {
+        mvP2= mvC;
+    } else if(mode == MV_PRED_LEFT     && mvA->ref == ref){
+        mvP2= mvA;
+    } else if(mode == MV_PRED_TOP      && mvB->ref == ref){
+        mvP2= mvB;
+    } else if(mode == MV_PRED_TOPRIGHT && mvC->ref == ref){
+        mvP2= mvC;
+    }
+    if(mvP2){
+        mvP->x = mvP2->x;
+        mvP->y = mvP2->y;
+    }else
+        mv_pred_median(h, mvP, mvA, mvB, mvC);
+
+    if(mode < MV_PRED_PSKIP) {
+        mvP->x += get_se_golomb(&h->s.gb);
+        mvP->y += get_se_golomb(&h->s.gb);
+    }
+    set_mvs(mvP,size);
+}
+
+/*****************************************************************************
+ *
+ * residual data decoding
+ *
+ ****************************************************************************/
+
+/** kth-order exponential golomb code */
+static inline int get_ue_code(GetBitContext *gb, int order) {
+    if(order) {
+        int ret = get_ue_golomb(gb) << order;
+        return ret + get_bits(gb,order);
+    }
+    return get_ue_golomb(gb);
+}
+
+/**
+ * decode coefficients from one 8x8 block, dequantize, inverse transform
+ *  and add them to sample block
+ * @param r pointer to 2D VLC table
+ * @param esc_golomb_order escape codes are k-golomb with this order k
+ * @param qp quantizer
+ * @param dst location of sample block
+ * @param stride line stride in frame buffer
+ */
+static int decode_residual_block(AVSContext *h, GetBitContext *gb,
+                                 const residual_vlc_t *r, int esc_golomb_order,
+                                 int qp, uint8_t *dst, int stride) {
+    int i,pos = -1;
+    int level_code, esc_code, level, run, mask;
+    int level_buf[64];
+    int run_buf[64];
+    int dqm = dequant_mul[qp];
+    int dqs = dequant_shift[qp];
+    int dqa = 1 << (dqs - 1);
+    const uint8_t *scantab = h->scantable.permutated;
+    DCTELEM *block = h->block;
+
+    for(i=0;i<65;i++) {
+        level_code = get_ue_code(gb,r->golomb_order);
+        if(level_code >= ESCAPE_CODE) {
+            run = ((level_code - ESCAPE_CODE) >> 1) + 1;
+            esc_code = get_ue_code(gb,esc_golomb_order);
+            level = esc_code + (run > r->max_run ? 1 : r->level_add[run]);
+            while(level > r->inc_limit)
+                r++;
+            mask = -(level_code & 1);
+            level = (level^mask) - mask;
+        } else {
+            level = r->rltab[level_code][0];
+            if(!level) //end of block signal
+                break;
+            run   = r->rltab[level_code][1];
+            r += r->rltab[level_code][2];
+        }
+        level_buf[i] = level;
+        run_buf[i] = run;
+    }
+    /* inverse scan and dequantization */
+    while(--i >= 0){
+        pos += run_buf[i];
+        if(pos > 63) {
+            av_log(h->s.avctx, AV_LOG_ERROR,
+                   "position out of block bounds at pic %d MB(%d,%d)\n",
+                   h->picture.poc, h->mbx, h->mby);
+            return -1;
+        }
+        block[scantab[pos]] = (level_buf[i]*dqm + dqa) >> dqs;
+    }
+    h->s.dsp.cavs_idct8_add(dst,block,stride);
+    return 0;
+}
+
+
+static inline void decode_residual_chroma(AVSContext *h) {
+    if(h->cbp & (1<<4))
+        decode_residual_block(h,&h->s.gb,chroma_2dvlc,0, chroma_qp[h->qp],
+                              h->cu,h->c_stride);
+    if(h->cbp & (1<<5))
+        decode_residual_block(h,&h->s.gb,chroma_2dvlc,0, chroma_qp[h->qp],
+                              h->cv,h->c_stride);
+}
+
+static inline int decode_residual_inter(AVSContext *h) {
+    int block;
+
+    /* get coded block pattern */
+    int cbp= get_ue_golomb(&h->s.gb);
+    if(cbp > 63){
+        av_log(h->s.avctx, AV_LOG_ERROR, "illegal inter cbp\n");
+        return -1;
+    }
+    h->cbp = cbp_tab[cbp][1];
+
+    /* get quantizer */
+    if(h->cbp && !h->qp_fixed)
+        h->qp = (h->qp + get_se_golomb(&h->s.gb)) & 63;
+    for(block=0;block<4;block++)
+        if(h->cbp & (1<<block))
+            decode_residual_block(h,&h->s.gb,inter_2dvlc,0,h->qp,
+                                  h->cy + h->luma_scan[block], h->l_stride);
+    decode_residual_chroma(h);
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ * macroblock level
+ *
+ ****************************************************************************/
+
+/**
+ * initialise predictors for motion vectors and intra prediction
+ */
+static inline void init_mb(AVSContext *h) {
+    int i;
+
+    /* copy predictors from top line (MB B and C) into cache */
+    for(i=0;i<3;i++) {
+        h->mv[MV_FWD_B2+i] = h->top_mv[0][h->mbx*2+i];
+        h->mv[MV_BWD_B2+i] = h->top_mv[1][h->mbx*2+i];
+    }
+    h->pred_mode_Y[1] = h->top_pred_Y[h->mbx*2+0];
+    h->pred_mode_Y[2] = h->top_pred_Y[h->mbx*2+1];
+    /* clear top predictors if MB B is not available */
+    if(!(h->flags & B_AVAIL)) {
+        h->mv[MV_FWD_B2] = un_mv;
+        h->mv[MV_FWD_B3] = un_mv;
+        h->mv[MV_BWD_B2] = un_mv;
+        h->mv[MV_BWD_B3] = un_mv;
+        h->pred_mode_Y[1] = h->pred_mode_Y[2] = NOT_AVAIL;
+        h->flags &= ~(C_AVAIL|D_AVAIL);
+    } else if(h->mbx) {
+        h->flags |= D_AVAIL;
+    }
+    if(h->mbx == h->mb_width-1) //MB C not available
+        h->flags &= ~C_AVAIL;
+    /* clear top-right predictors if MB C is not available */
+    if(!(h->flags & C_AVAIL)) {
+        h->mv[MV_FWD_C2] = un_mv;
+        h->mv[MV_BWD_C2] = un_mv;
+    }
+    /* clear top-left predictors if MB D is not available */
+    if(!(h->flags & D_AVAIL)) {
+        h->mv[MV_FWD_D3] = un_mv;
+        h->mv[MV_BWD_D3] = un_mv;
+    }
+    /* set pointer for co-located macroblock type */
+    h->col_type = &h->col_type_base[h->mby*h->mb_width + h->mbx];
+}
+
+static inline void check_for_slice(AVSContext *h);
+
+/**
+ * save predictors for later macroblocks and increase
+ * macroblock address
+ * @returns 0 if end of frame is reached, 1 otherwise
+ */
+static inline int next_mb(AVSContext *h) {
+    int i;
+
+    h->flags |= A_AVAIL;
+    h->cy += 16;
+    h->cu += 8;
+    h->cv += 8;
+    /* copy mvs as predictors to the left */
+    for(i=0;i<=20;i+=4)
+        h->mv[i] = h->mv[i+2];
+    /* copy bottom mvs from cache to top line */
+    h->top_mv[0][h->mbx*2+0] = h->mv[MV_FWD_X2];
+    h->top_mv[0][h->mbx*2+1] = h->mv[MV_FWD_X3];
+    h->top_mv[1][h->mbx*2+0] = h->mv[MV_BWD_X2];
+    h->top_mv[1][h->mbx*2+1] = h->mv[MV_BWD_X3];
+    /* next MB address */
+    h->mbx++;
+    if(h->mbx == h->mb_width) { //new mb line
+        h->flags = B_AVAIL|C_AVAIL;
+        /* clear left pred_modes */
+        h->pred_mode_Y[3] = h->pred_mode_Y[6] = NOT_AVAIL;
+        /* clear left mv predictors */
+        for(i=0;i<=20;i+=4)
+            h->mv[i] = un_mv;
+        h->mbx = 0;
+        h->mby++;
+        /* re-calculate sample pointers */
+        h->cy = h->picture.data[0] + h->mby*16*h->l_stride;
+        h->cu = h->picture.data[1] + h->mby*8*h->c_stride;
+        h->cv = h->picture.data[2] + h->mby*8*h->c_stride;
+        if(h->mby == h->mb_height) { //frame end
+            return 0;
+        } else {
+            //check_for_slice(h);
+        }
+    }
+    return 1;
+}
+
+static int decode_mb_i(AVSContext *h, int cbp_code) {
+    GetBitContext *gb = &h->s.gb;
+    int block, pred_mode_uv;
+    uint8_t top[18];
+    uint8_t *left = NULL;
+    uint8_t *d;
+
+    init_mb(h);
+
+    /* get intra prediction modes from stream */
+    for(block=0;block<4;block++) {
+        int nA,nB,predpred;
+        int pos = scan3x3[block];
+
+        nA = h->pred_mode_Y[pos-1];
+        nB = h->pred_mode_Y[pos-3];
+        predpred = FFMIN(nA,nB);
+        if(predpred == NOT_AVAIL) // if either is not available
+            predpred = INTRA_L_LP;
+        if(!get_bits1(gb)){
+            int rem_mode= get_bits(gb, 2);
+            predpred = rem_mode + (rem_mode >= predpred);
+        }
+        h->pred_mode_Y[pos] = predpred;
+    }
+    pred_mode_uv = get_ue_golomb(gb);
+    if(pred_mode_uv > 6) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "illegal intra chroma pred mode\n");
+        return -1;
+    }
+
+    /* save pred modes before they get modified */
+    h->pred_mode_Y[3] =  h->pred_mode_Y[5];
+    h->pred_mode_Y[6] =  h->pred_mode_Y[8];
+    h->top_pred_Y[h->mbx*2+0] = h->pred_mode_Y[7];
+    h->top_pred_Y[h->mbx*2+1] = h->pred_mode_Y[8];
+
+    /* modify pred modes according to availability of neighbour samples */
+    if(!(h->flags & A_AVAIL)) {
+        modify_pred(left_modifier_l, &h->pred_mode_Y[4] );
+        modify_pred(left_modifier_l, &h->pred_mode_Y[7] );
+        modify_pred(left_modifier_c, &pred_mode_uv );
+    }
+    if(!(h->flags & B_AVAIL)) {
+        modify_pred(top_modifier_l, &h->pred_mode_Y[4] );
+        modify_pred(top_modifier_l, &h->pred_mode_Y[5] );
+        modify_pred(top_modifier_c, &pred_mode_uv );
+    }
+
+    /* get coded block pattern */
+    if(h->pic_type == FF_I_TYPE)
+        cbp_code = get_ue_golomb(gb);
+    if(cbp_code > 63){
+        av_log(h->s.avctx, AV_LOG_ERROR, "illegal intra cbp\n");
+        return -1;
+    }
+    h->cbp = cbp_tab[cbp_code][0];
+    if(h->cbp && !h->qp_fixed)
+        h->qp = (h->qp + get_se_golomb(gb)) & 63; //qp_delta
+
+    /* luma intra prediction interleaved with residual decode/transform/add */
+    for(block=0;block<4;block++) {
+        d = h->cy + h->luma_scan[block];
+        load_intra_pred_luma(h, top, &left, block);
+        h->intra_pred_l[h->pred_mode_Y[scan3x3[block]]]
+            (d, top, left, h->l_stride);
+        if(h->cbp & (1<<block))
+            decode_residual_block(h,gb,intra_2dvlc,1,h->qp,d,h->l_stride);
+    }
+
+    /* chroma intra prediction */
+    /* extend borders by one pixel */
+    h->left_border_u[9] = h->left_border_u[8];
+    h->left_border_v[9] = h->left_border_v[8];
+    h->top_border_u[h->mbx*10+9] = h->top_border_u[h->mbx*10+8];
+    h->top_border_v[h->mbx*10+9] = h->top_border_v[h->mbx*10+8];
+    if(h->mbx && h->mby) {
+        h->top_border_u[h->mbx*10] = h->left_border_u[0] = h->topleft_border_u;
+        h->top_border_v[h->mbx*10] = h->left_border_v[0] = h->topleft_border_v;
+    } else {
+        h->left_border_u[0] = h->left_border_u[1];
+        h->left_border_v[0] = h->left_border_v[1];
+        h->top_border_u[h->mbx*10] = h->top_border_u[h->mbx*10+1];
+        h->top_border_v[h->mbx*10] = h->top_border_v[h->mbx*10+1];
+    }
+    h->intra_pred_c[pred_mode_uv](h->cu, &h->top_border_u[h->mbx*10],
+                                  h->left_border_u, h->c_stride);
+    h->intra_pred_c[pred_mode_uv](h->cv, &h->top_border_v[h->mbx*10],
+                                  h->left_border_v, h->c_stride);
+
+    decode_residual_chroma(h);
+    filter_mb(h,I_8X8);
+
+    /* mark motion vectors as intra */
+    h->mv[MV_FWD_X0] = intra_mv;
+    set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
+    h->mv[MV_BWD_X0] = intra_mv;
+    set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
+    if(h->pic_type != FF_B_TYPE)
+        *h->col_type = I_8X8;
+
+    return 0;
+}
+
+static void decode_mb_p(AVSContext *h, enum mb_t mb_type) {
+    GetBitContext *gb = &h->s.gb;
+    int ref[4];
+
+    init_mb(h);
+    switch(mb_type) {
+    case P_SKIP:
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_PSKIP, BLK_16X16, 0);
+        break;
+    case P_16X16:
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_MEDIAN,   BLK_16X16,ref[0]);
+        break;
+    case P_16X8:
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
+        ref[2] = h->ref_flag ? 0 : get_bits1(gb);
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_TOP,      BLK_16X8, ref[0]);
+        mv_pred(h, MV_FWD_X2, MV_FWD_A1, MV_PRED_LEFT,     BLK_16X8, ref[2]);
+        break;
+    case P_8X16:
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
+        ref[1] = h->ref_flag ? 0 : get_bits1(gb);
+        mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_LEFT,     BLK_8X16, ref[0]);
+        mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_TOPRIGHT, BLK_8X16, ref[1]);
+        break;
+    case P_8X8:
+        ref[0] = h->ref_flag ? 0 : get_bits1(gb);
+        ref[1] = h->ref_flag ? 0 : get_bits1(gb);
+        ref[2] = h->ref_flag ? 0 : get_bits1(gb);
+        ref[3] = h->ref_flag ? 0 : get_bits1(gb);
+        mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_MEDIAN,   BLK_8X8, ref[0]);
+        mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_MEDIAN,   BLK_8X8, ref[1]);
+        mv_pred(h, MV_FWD_X2, MV_FWD_X1, MV_PRED_MEDIAN,   BLK_8X8, ref[2]);
+        mv_pred(h, MV_FWD_X3, MV_FWD_X0, MV_PRED_MEDIAN,   BLK_8X8, ref[3]);
+    }
+    inter_pred(h, mb_type);
+    store_mvs(h);
+    if(mb_type != P_SKIP)
+        decode_residual_inter(h);
+    filter_mb(h,mb_type);
+    *h->col_type = mb_type;
+}
+
+static void decode_mb_b(AVSContext *h, enum mb_t mb_type) {
+    int block;
+    enum sub_mb_t sub_type[4];
+    int flags;
+
+    init_mb(h);
+
+    /* reset all MVs */
+    h->mv[MV_FWD_X0] = dir_mv;
+    set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
+    h->mv[MV_BWD_X0] = dir_mv;
+    set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
+    switch(mb_type) {
+    case B_SKIP:
+    case B_DIRECT:
+        if(!(*h->col_type)) {
+            /* intra MB at co-location, do in-plane prediction */
+            mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_BSKIP, BLK_16X16, 1);
+            mv_pred(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_BSKIP, BLK_16X16, 0);
+        } else
+            /* direct prediction from co-located P MB, block-wise */
+            for(block=0;block<4;block++)
+                mv_pred_direct(h,&h->mv[mv_scan[block]],
+                            &h->col_mv[(h->mby*h->mb_width+h->mbx)*4 + block]);
+        break;
+    case B_FWD_16X16:
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_MEDIAN, BLK_16X16, 1);
+        break;
+    case B_SYM_16X16:
+        mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_MEDIAN, BLK_16X16, 1);
+        mv_pred_sym(h, &h->mv[MV_FWD_X0], BLK_16X16);
+        break;
+    case B_BWD_16X16:
+        mv_pred(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_MEDIAN, BLK_16X16, 0);
+        break;
+    case B_8X8:
+        for(block=0;block<4;block++)
+            sub_type[block] = get_bits(&h->s.gb,2);
+        for(block=0;block<4;block++) {
+            switch(sub_type[block]) {
+            case B_SUB_DIRECT:
+                if(!(*h->col_type)) {
+                    /* intra MB at co-location, do in-plane prediction */
+                    mv_pred(h, mv_scan[block], mv_scan[block]-3,
+                            MV_PRED_BSKIP, BLK_8X8, 1);
+                    mv_pred(h, mv_scan[block]+MV_BWD_OFFS,
+                            mv_scan[block]-3+MV_BWD_OFFS,
+                            MV_PRED_BSKIP, BLK_8X8, 0);
+                } else
+                    mv_pred_direct(h,&h->mv[mv_scan[block]],
+                                   &h->col_mv[(h->mby*h->mb_width + h->mbx)*4 + block]);
+                break;
+            case B_SUB_FWD:
+                mv_pred(h, mv_scan[block], mv_scan[block]-3,
+                        MV_PRED_MEDIAN, BLK_8X8, 1);
+                break;
+            case B_SUB_SYM:
+                mv_pred(h, mv_scan[block], mv_scan[block]-3,
+                        MV_PRED_MEDIAN, BLK_8X8, 1);
+                mv_pred_sym(h, &h->mv[mv_scan[block]], BLK_8X8);
+                break;
+            }
+        }
+        for(block=0;block<4;block++) {
+            if(sub_type[block] == B_SUB_BWD)
+                mv_pred(h, mv_scan[block]+MV_BWD_OFFS,
+                        mv_scan[block]+MV_BWD_OFFS-3,
+                        MV_PRED_MEDIAN, BLK_8X8, 0);
+        }
+        break;
+    default:
+        assert((mb_type > B_SYM_16X16) && (mb_type < B_8X8));
+        flags = partition_flags[mb_type];
+        if(mb_type & 1) { /* 16x8 macroblock types */
+            if(flags & FWD0)
+                mv_pred(h, MV_FWD_X0, MV_FWD_C2, MV_PRED_TOP,  BLK_16X8, 1);
+            if(flags & SYM0)
+                mv_pred_sym(h, &h->mv[MV_FWD_X0], BLK_16X8);
+            if(flags & FWD1)
+                mv_pred(h, MV_FWD_X2, MV_FWD_A1, MV_PRED_LEFT, BLK_16X8, 1);
+            if(flags & SYM1)
+                mv_pred_sym(h, &h->mv[MV_FWD_X2], BLK_16X8);
+            if(flags & BWD0)
+                mv_pred(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_TOP,  BLK_16X8, 0);
+            if(flags & BWD1)
+                mv_pred(h, MV_BWD_X2, MV_BWD_A1, MV_PRED_LEFT, BLK_16X8, 0);
+        } else {          /* 8x16 macroblock types */
+            if(flags & FWD0)
+                mv_pred(h, MV_FWD_X0, MV_FWD_B3, MV_PRED_LEFT, BLK_8X16, 1);
+            if(flags & SYM0)
+                mv_pred_sym(h, &h->mv[MV_FWD_X0], BLK_8X16);
+            if(flags & FWD1)
+                mv_pred(h, MV_FWD_X1, MV_FWD_C2, MV_PRED_TOPRIGHT,BLK_8X16, 1);
+            if(flags & SYM1)
+                mv_pred_sym(h, &h->mv[MV_FWD_X1], BLK_8X16);
+            if(flags & BWD0)
+                mv_pred(h, MV_BWD_X0, MV_BWD_B3, MV_PRED_LEFT, BLK_8X16, 0);
+            if(flags & BWD1)
+                mv_pred(h, MV_BWD_X1, MV_BWD_C2, MV_PRED_TOPRIGHT,BLK_8X16, 0);
+        }
+    }
+    inter_pred(h, mb_type);
+    if(mb_type != B_SKIP)
+        decode_residual_inter(h);
+    filter_mb(h,mb_type);
+}
+
+/*****************************************************************************
+ *
+ * slice level
+ *
+ ****************************************************************************/
+
+static inline int decode_slice_header(AVSContext *h, GetBitContext *gb) {
+    if(h->stc > 0xAF)
+        av_log(h->s.avctx, AV_LOG_ERROR, "unexpected start code 0x%02x\n", h->stc);
+    h->mby = h->stc;
+    if((h->mby == 0) && (!h->qp_fixed)){
+        h->qp_fixed = get_bits1(gb);
+        h->qp = get_bits(gb,6);
+    }
+    /* inter frame or second slice can have weighting params */
+    if((h->pic_type != FF_I_TYPE) || (!h->pic_structure && h->mby >= h->mb_width/2))
+        if(get_bits1(gb)) { //slice_weighting_flag
+            av_log(h->s.avctx, AV_LOG_ERROR,
+                   "weighted prediction not yet supported\n");
+        }
+    return 0;
+}
+
+static inline void check_for_slice(AVSContext *h) {
+    GetBitContext *gb = &h->s.gb;
+    int align;
+    align = (-get_bits_count(gb)) & 7;
+    if((show_bits_long(gb,24+align) & 0xFFFFFF) == 0x000001) {
+        get_bits_long(gb,24+align);
+        h->stc = get_bits(gb,8);
+        decode_slice_header(h,gb);
+    }
+}
+
+/*****************************************************************************
+ *
+ * frame level
+ *
+ ****************************************************************************/
+
+static void init_pic(AVSContext *h) {
+    int i;
+
+    /* clear some predictors */
+    for(i=0;i<=20;i+=4)
+        h->mv[i] = un_mv;
+    h->mv[MV_BWD_X0] = dir_mv;
+    set_mvs(&h->mv[MV_BWD_X0], BLK_16X16);
+    h->mv[MV_FWD_X0] = dir_mv;
+    set_mvs(&h->mv[MV_FWD_X0], BLK_16X16);
+    h->pred_mode_Y[3] = h->pred_mode_Y[6] = NOT_AVAIL;
+    h->cy = h->picture.data[0];
+    h->cu = h->picture.data[1];
+    h->cv = h->picture.data[2];
+    h->l_stride = h->picture.linesize[0];
+    h->c_stride = h->picture.linesize[1];
+    h->luma_scan[2] = 8*h->l_stride;
+    h->luma_scan[3] = 8*h->l_stride+8;
+    h->mbx = h->mby = 0;
+    h->flags = 0;
+}
+
+static int decode_pic(AVSContext *h) {
+    MpegEncContext *s = &h->s;
+    int skip_count;
+    enum mb_t mb_type;
+
+    if (!s->context_initialized) {
+        s->avctx->idct_algo = FF_IDCT_CAVS;
+        if (MPV_common_init(s) < 0)
+            return -1;
+        ff_init_scantable(s->dsp.idct_permutation,&h->scantable,ff_zigzag_direct);
+    }
+    get_bits(&s->gb,16);//bbv_dwlay
+    if(h->stc == PIC_PB_START_CODE) {
+        h->pic_type = get_bits(&s->gb,2) + FF_I_TYPE;
+        if(h->pic_type > FF_B_TYPE) {
+            av_log(s->avctx, AV_LOG_ERROR, "illegal picture type\n");
+            return -1;
+        }
+        /* make sure we have the reference frames we need */
+        if(!h->DPB[0].data[0] ||
+          (!h->DPB[1].data[0] && h->pic_type == FF_B_TYPE))
+            return -1;
+    } else {
+        h->pic_type = FF_I_TYPE;
+        if(get_bits1(&s->gb))
+            get_bits(&s->gb,16);//time_code
+    }
+    /* release last B frame */
+    if(h->picture.data[0])
+        s->avctx->release_buffer(s->avctx, (AVFrame *)&h->picture);
+
+    s->avctx->get_buffer(s->avctx, (AVFrame *)&h->picture);
+    init_pic(h);
+    h->picture.poc = get_bits(&s->gb,8)*2;
+
+    /* get temporal distances and MV scaling factors */
+    if(h->pic_type != FF_B_TYPE) {
+        h->dist[0] = (h->picture.poc - h->DPB[0].poc  + 512) % 512;
+    } else {
+        h->dist[0] = (h->DPB[0].poc  - h->picture.poc + 512) % 512;
+    }
+    h->dist[1] = (h->picture.poc - h->DPB[1].poc  + 512) % 512;
+    h->scale_den[0] = h->dist[0] ? 512/h->dist[0] : 0;
+    h->scale_den[1] = h->dist[1] ? 512/h->dist[1] : 0;
+    if(h->pic_type == FF_B_TYPE) {
+        h->sym_factor = h->dist[0]*h->scale_den[1];
+    } else {
+        h->direct_den[0] = h->dist[0] ? 16384/h->dist[0] : 0;
+        h->direct_den[1] = h->dist[1] ? 16384/h->dist[1] : 0;
+    }
+
+    if(s->low_delay)
+        get_ue_golomb(&s->gb); //bbv_check_times
+    h->progressive             = get_bits1(&s->gb);
+    if(h->progressive)
+        h->pic_structure = 1;
+    else if(!(h->pic_structure = get_bits1(&s->gb) && (h->stc == PIC_PB_START_CODE)) )
+        get_bits1(&s->gb);     //advanced_pred_mode_disable
+    skip_bits1(&s->gb);        //top_field_first
+    skip_bits1(&s->gb);        //repeat_first_field
+    h->qp_fixed                = get_bits1(&s->gb);
+    h->qp                      = get_bits(&s->gb,6);
+    if(h->pic_type == FF_I_TYPE) {
+        if(!h->progressive && !h->pic_structure)
+            skip_bits1(&s->gb);//what is this?
+        skip_bits(&s->gb,4);   //reserved bits
+    } else {
+        if(!(h->pic_type == FF_B_TYPE && h->pic_structure == 1))
+            h->ref_flag        = get_bits1(&s->gb);
+        skip_bits(&s->gb,4);   //reserved bits
+        h->skip_mode_flag      = get_bits1(&s->gb);
+    }
+    h->loop_filter_disable     = get_bits1(&s->gb);
+    if(!h->loop_filter_disable && get_bits1(&s->gb)) {
+        h->alpha_offset        = get_se_golomb(&s->gb);
+        h->beta_offset         = get_se_golomb(&s->gb);
+    } else {
+        h->alpha_offset = h->beta_offset  = 0;
+    }
+    check_for_slice(h);
+    if(h->pic_type == FF_I_TYPE) {
+        do {
+            decode_mb_i(h, 0);
+        } while(next_mb(h));
+    } else if(h->pic_type == FF_P_TYPE) {
+        do {
+            if(h->skip_mode_flag) {
+                skip_count = get_ue_golomb(&s->gb);
+                while(skip_count--) {
+                    decode_mb_p(h,P_SKIP);
+                    if(!next_mb(h))
+                        goto done;
+                }
+                mb_type = get_ue_golomb(&s->gb) + P_16X16;
+            } else
+                mb_type = get_ue_golomb(&s->gb) + P_SKIP;
+            if(mb_type > P_8X8) {
+                decode_mb_i(h, mb_type - P_8X8 - 1);
+            } else
+                decode_mb_p(h,mb_type);
+        } while(next_mb(h));
+    } else { /* FF_B_TYPE */
+        do {
+            if(h->skip_mode_flag) {
+                skip_count = get_ue_golomb(&s->gb);
+                while(skip_count--) {
+                    decode_mb_b(h,B_SKIP);
+                    if(!next_mb(h))
+                        goto done;
+                }
+                mb_type = get_ue_golomb(&s->gb) + B_DIRECT;
+            } else
+                mb_type = get_ue_golomb(&s->gb) + B_SKIP;
+            if(mb_type > B_8X8) {
+                decode_mb_i(h, mb_type - B_8X8 - 1);
+            } else
+                decode_mb_b(h,mb_type);
+        } while(next_mb(h));
+    }
+ done:
+    if(h->pic_type != FF_B_TYPE) {
+        if(h->DPB[1].data[0])
+            s->avctx->release_buffer(s->avctx, (AVFrame *)&h->DPB[1]);
+        memcpy(&h->DPB[1], &h->DPB[0], sizeof(Picture));
+        memcpy(&h->DPB[0], &h->picture, sizeof(Picture));
+        memset(&h->picture,0,sizeof(Picture));
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ * headers and interface
+ *
+ ****************************************************************************/
+
+/**
+ * some predictions require data from the top-neighbouring macroblock.
+ * this data has to be stored for one complete row of macroblocks
+ * and this storage space is allocated here
+ */
+static void init_top_lines(AVSContext *h) {
+    /* alloc top line of predictors */
+    h->top_qp       = av_malloc( h->mb_width);
+    h->top_mv[0]    = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
+    h->top_mv[1]    = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
+    h->top_pred_Y   = av_malloc( h->mb_width*2*sizeof(*h->top_pred_Y));
+    h->top_border_y = av_malloc((h->mb_width+1)*16);
+    h->top_border_u = av_malloc((h->mb_width)*10);
+    h->top_border_v = av_malloc((h->mb_width)*10);
+
+    /* alloc space for co-located MVs and types */
+    h->col_mv       = av_malloc( h->mb_width*h->mb_height*4*sizeof(vector_t));
+    h->col_type_base = av_malloc(h->mb_width*h->mb_height);
+    h->block        = av_mallocz(64*sizeof(DCTELEM));
+}
+
+static int decode_seq_header(AVSContext *h) {
+    MpegEncContext *s = &h->s;
+    extern const AVRational ff_frame_rate_tab[];
+    int frame_rate_code;
+
+    h->profile =         get_bits(&s->gb,8);
+    h->level =           get_bits(&s->gb,8);
+    skip_bits1(&s->gb); //progressive sequence
+    s->width =           get_bits(&s->gb,14);
+    s->height =          get_bits(&s->gb,14);
+    skip_bits(&s->gb,2); //chroma format
+    skip_bits(&s->gb,3); //sample_precision
+    h->aspect_ratio =    get_bits(&s->gb,4);
+    frame_rate_code =    get_bits(&s->gb,4);
+    skip_bits(&s->gb,18);//bit_rate_lower
+    skip_bits1(&s->gb);  //marker_bit
+    skip_bits(&s->gb,12);//bit_rate_upper
+    s->low_delay =       get_bits1(&s->gb);
+    h->mb_width  = (s->width  + 15) >> 4;
+    h->mb_height = (s->height + 15) >> 4;
+    h->s.avctx->time_base.den = ff_frame_rate_tab[frame_rate_code].num;
+    h->s.avctx->time_base.num = ff_frame_rate_tab[frame_rate_code].den;
+    h->s.avctx->width  = s->width;
+    h->s.avctx->height = s->height;
+    if(!h->top_qp)
+        init_top_lines(h);
+    return 0;
+}
+
+static void cavs_flush(AVCodecContext * avctx) {
+    AVSContext *h = avctx->priv_data;
+    h->got_keyframe = 0;
+}
+
+static int cavs_decode_frame(AVCodecContext * avctx,void *data, int *data_size,
+                             uint8_t * buf, int buf_size) {
+    AVSContext *h = avctx->priv_data;
+    MpegEncContext *s = &h->s;
+    int input_size;
+    const uint8_t *buf_end;
+    const uint8_t *buf_ptr;
+    AVFrame *picture = data;
+    uint32_t stc;
+
+    s->avctx = avctx;
+
+    if (buf_size == 0) {
+        if(!s->low_delay && h->DPB[0].data[0]) {
+            *data_size = sizeof(AVPicture);
+            *picture = *(AVFrame *) &h->DPB[0];
+        }
+        return 0;
+    }
+
+    buf_ptr = buf;
+    buf_end = buf + buf_size;
+    for(;;) {
+        buf_ptr = ff_find_start_code(buf_ptr,buf_end, &stc);
+        if(stc & 0xFFFFFE00)
+            return FFMAX(0, buf_ptr - buf - s->parse_context.last_index);
+        input_size = (buf_end - buf_ptr)*8;
+        switch(stc) {
+        case SEQ_START_CODE:
+            init_get_bits(&s->gb, buf_ptr, input_size);
+            decode_seq_header(h);
+            break;
+        case PIC_I_START_CODE:
+            if(!h->got_keyframe) {
+                if(h->DPB[0].data[0])
+                    avctx->release_buffer(avctx, (AVFrame *)&h->DPB[0]);
+                if(h->DPB[1].data[0])
+                    avctx->release_buffer(avctx, (AVFrame *)&h->DPB[1]);
+                h->got_keyframe = 1;
+            }
+        case PIC_PB_START_CODE:
+            *data_size = 0;
+            if(!h->got_keyframe)
+                break;
+            init_get_bits(&s->gb, buf_ptr, input_size);
+            h->stc = stc;
+            if(decode_pic(h))
+                break;
+            *data_size = sizeof(AVPicture);
+            if(h->pic_type != FF_B_TYPE) {
+                if(h->DPB[1].data[0]) {
+                    *picture = *(AVFrame *) &h->DPB[1];
+                } else {
+                    *data_size = 0;
+                }
+            } else
+                *picture = *(AVFrame *) &h->picture;
+            break;
+        case EXT_START_CODE:
+            //mpeg_decode_extension(avctx,buf_ptr, input_size);
+            break;
+        case USER_START_CODE:
+            //mpeg_decode_user_data(avctx,buf_ptr, input_size);
+            break;
+        default:
+            if (stc >= SLICE_MIN_START_CODE &&
+                stc <= SLICE_MAX_START_CODE) {
+                init_get_bits(&s->gb, buf_ptr, input_size);
+                decode_slice_header(h, &s->gb);
+            }
+            break;
+        }
+    }
+}
+
+static int cavs_decode_init(AVCodecContext * avctx) {
+    AVSContext *h = avctx->priv_data;
+    MpegEncContext * const s = &h->s;
+
+    MPV_decode_defaults(s);
+    s->avctx = avctx;
+
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    h->luma_scan[0] = 0;
+    h->luma_scan[1] = 8;
+    h->intra_pred_l[      INTRA_L_VERT] = intra_pred_vert;
+    h->intra_pred_l[     INTRA_L_HORIZ] = intra_pred_horiz;
+    h->intra_pred_l[        INTRA_L_LP] = intra_pred_lp;
+    h->intra_pred_l[ INTRA_L_DOWN_LEFT] = intra_pred_down_left;
+    h->intra_pred_l[INTRA_L_DOWN_RIGHT] = intra_pred_down_right;
+    h->intra_pred_l[   INTRA_L_LP_LEFT] = intra_pred_lp_left;
+    h->intra_pred_l[    INTRA_L_LP_TOP] = intra_pred_lp_top;
+    h->intra_pred_l[    INTRA_L_DC_128] = intra_pred_dc_128;
+    h->intra_pred_c[        INTRA_C_LP] = intra_pred_lp;
+    h->intra_pred_c[     INTRA_C_HORIZ] = intra_pred_horiz;
+    h->intra_pred_c[      INTRA_C_VERT] = intra_pred_vert;
+    h->intra_pred_c[     INTRA_C_PLANE] = intra_pred_plane;
+    h->intra_pred_c[   INTRA_C_LP_LEFT] = intra_pred_lp_left;
+    h->intra_pred_c[    INTRA_C_LP_TOP] = intra_pred_lp_top;
+    h->intra_pred_c[    INTRA_C_DC_128] = intra_pred_dc_128;
+    h->mv[ 7] = un_mv;
+    h->mv[19] = un_mv;
+    return 0;
+}
+
+static int cavs_decode_end(AVCodecContext * avctx) {
+    AVSContext *h = avctx->priv_data;
+
+    av_free(h->top_qp);
+    av_free(h->top_mv[0]);
+    av_free(h->top_mv[1]);
+    av_free(h->top_pred_Y);
+    av_free(h->top_border_y);
+    av_free(h->top_border_u);
+    av_free(h->top_border_v);
+    av_free(h->col_mv);
+    av_free(h->col_type_base);
+    av_free(h->block);
+    return 0;
+}
+
+AVCodec cavs_decoder = {
+    "cavs",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CAVS,
+    sizeof(AVSContext),
+    cavs_decode_init,
+    NULL,
+    cavs_decode_end,
+    cavs_decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush= cavs_flush,
+};
+#endif /* CONFIG_CAVS_DECODER */
+
+#ifdef CONFIG_CAVSVIDEO_PARSER
+/**
+ * finds the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or -1
+ */
+static int cavs_find_frame_end(ParseContext *pc, const uint8_t *buf,
+                               int buf_size) {
+    int pic_found, i;
+    uint32_t state;
+
+    pic_found= pc->frame_start_found;
+    state= pc->state;
+
+    i=0;
+    if(!pic_found){
+        for(i=0; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if(state == PIC_I_START_CODE || state == PIC_PB_START_CODE){
+                i++;
+                pic_found=1;
+                break;
+            }
+        }
+    }
+
+    if(pic_found){
+        /* EOF considered as end of frame */
+        if (buf_size == 0)
+            return 0;
+        for(; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if((state&0xFFFFFF00) == 0x100){
+                if(state < SLICE_MIN_START_CODE || state > SLICE_MAX_START_CODE){
+                    pc->frame_start_found=0;
+                    pc->state=-1;
+                    return i-3;
+                }
+            }
+        }
+    }
+    pc->frame_start_found= pic_found;
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+static int cavsvideo_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
+        next= buf_size;
+    }else{
+        next= cavs_find_frame_end(pc, buf, buf_size);
+
+        if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+    }
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser cavsvideo_parser = {
+    { CODEC_ID_CAVS },
+    sizeof(ParseContext1),
+    NULL,
+    cavsvideo_parse,
+    ff_parse1_close,
+    ff_mpeg4video_split,
+};
+#endif /* CONFIG_CAVSVIDEO_PARSER */
diff --git a/contrib/ffmpeg/libavcodec/cavsdata.h b/contrib/ffmpeg/libavcodec/cavsdata.h
new file mode 100644
index 000000000..d76985136
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cavsdata.h
@@ -0,0 +1,643 @@
+/*
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#define SLICE_MIN_START_CODE    0x00000101
+#define SLICE_MAX_START_CODE    0x000001af
+#define EXT_START_CODE          0x000001b5
+#define USER_START_CODE         0x000001b2
+#define SEQ_START_CODE          0x000001b0
+#define PIC_I_START_CODE        0x000001b3
+#define PIC_PB_START_CODE       0x000001b6
+
+#define A_AVAIL                          1
+#define B_AVAIL                          2
+#define C_AVAIL                          4
+#define D_AVAIL                          8
+#define NOT_AVAIL                       -1
+#define REF_INTRA                       -2
+#define REF_DIR                         -3
+
+#define ESCAPE_CODE                     59
+
+#define FWD0                          0x01
+#define FWD1                          0x02
+#define BWD0                          0x04
+#define BWD1                          0x08
+#define SYM0                          0x10
+#define SYM1                          0x20
+#define SPLITH                        0x40
+#define SPLITV                        0x80
+
+#define MV_BWD_OFFS                     12
+#define MV_STRIDE                        4
+
+enum mb_t {
+  I_8X8 = 0,
+  P_SKIP,
+  P_16X16,
+  P_16X8,
+  P_8X16,
+  P_8X8,
+  B_SKIP,
+  B_DIRECT,
+  B_FWD_16X16,
+  B_BWD_16X16,
+  B_SYM_16X16,
+  B_8X8 = 29
+};
+
+enum sub_mb_t {
+  B_SUB_DIRECT,
+  B_SUB_FWD,
+  B_SUB_BWD,
+  B_SUB_SYM
+};
+
+enum intra_luma_t {
+  INTRA_L_VERT,
+  INTRA_L_HORIZ,
+  INTRA_L_LP,
+  INTRA_L_DOWN_LEFT,
+  INTRA_L_DOWN_RIGHT,
+  INTRA_L_LP_LEFT,
+  INTRA_L_LP_TOP,
+  INTRA_L_DC_128
+};
+
+enum intra_chroma_t {
+  INTRA_C_LP,
+  INTRA_C_HORIZ,
+  INTRA_C_VERT,
+  INTRA_C_PLANE,
+  INTRA_C_LP_LEFT,
+  INTRA_C_LP_TOP,
+  INTRA_C_DC_128,
+};
+
+enum mv_pred_t {
+  MV_PRED_MEDIAN,
+  MV_PRED_LEFT,
+  MV_PRED_TOP,
+  MV_PRED_TOPRIGHT,
+  MV_PRED_PSKIP,
+  MV_PRED_BSKIP
+};
+
+enum block_t {
+  BLK_16X16,
+  BLK_16X8,
+  BLK_8X16,
+  BLK_8X8
+};
+
+enum mv_loc_t {
+  MV_FWD_D3 = 0,
+  MV_FWD_B2,
+  MV_FWD_B3,
+  MV_FWD_C2,
+  MV_FWD_A1,
+  MV_FWD_X0,
+  MV_FWD_X1,
+  MV_FWD_A3 = 8,
+  MV_FWD_X2,
+  MV_FWD_X3,
+  MV_BWD_D3 = MV_BWD_OFFS,
+  MV_BWD_B2,
+  MV_BWD_B3,
+  MV_BWD_C2,
+  MV_BWD_A1,
+  MV_BWD_X0,
+  MV_BWD_X1,
+  MV_BWD_A3 = MV_BWD_OFFS+8,
+  MV_BWD_X2,
+  MV_BWD_X3
+};
+
+#ifdef CONFIG_CAVS_DECODER
+static const uint8_t partition_flags[30] = {
+  0,                                 //I_8X8
+  0,                                 //P_SKIP
+  0,                                 //P_16X16
+                      SPLITH,        //P_16X8
+                             SPLITV, //P_8X16
+                      SPLITH|SPLITV, //P_8X8
+                      SPLITH|SPLITV, //B_SKIP
+                      SPLITH|SPLITV, //B_DIRECT
+  0,                                 //B_FWD_16X16
+  0,                                 //B_BWD_16X16
+  0,                                 //B_SYM_16X16
+  FWD0|FWD1          |SPLITH,
+  FWD0|FWD1                 |SPLITV,
+  BWD0|BWD1          |SPLITH,
+  BWD0|BWD1                 |SPLITV,
+  FWD0|BWD1          |SPLITH,
+  FWD0|BWD1                 |SPLITV,
+  BWD0|FWD1          |SPLITH,
+  BWD0|FWD1                 |SPLITV,
+  FWD0|FWD1     |SYM1|SPLITH,
+  FWD0|FWD1     |SYM1       |SPLITV,
+  BWD0|FWD1     |SYM1|SPLITH,
+  BWD0|FWD1     |SYM1       |SPLITV,
+  FWD0|FWD1|SYM0     |SPLITH,
+  FWD0|FWD1|SYM0            |SPLITV,
+  FWD0|BWD1|SYM0     |SPLITH,
+  FWD0|BWD1|SYM0            |SPLITV,
+  FWD0|FWD1|SYM0|SYM1|SPLITH,
+  FWD0|FWD1|SYM0|SYM1       |SPLITV,
+                      SPLITH|SPLITV, //B_8X8 = 29
+};
+
+static const uint8_t scan3x3[4] = {4,5,7,8};
+
+static const uint8_t mv_scan[4] = {
+    MV_FWD_X0,MV_FWD_X1,
+    MV_FWD_X2,MV_FWD_X3
+};
+
+static const uint8_t cbp_tab[64][2] = {
+  {63, 0},{15,15},{31,63},{47,31},{ 0,16},{14,32},{13,47},{11,13},
+  { 7,14},{ 5,11},{10,12},{ 8, 5},{12,10},{61, 7},{ 4,48},{55, 3},
+  { 1, 2},{ 2, 8},{59, 4},{ 3, 1},{62,61},{ 9,55},{ 6,59},{29,62},
+  {45,29},{51,27},{23,23},{39,19},{27,30},{46,28},{53, 9},{30, 6},
+  {43,60},{37,21},{60,44},{16,26},{21,51},{28,35},{19,18},{35,20},
+  {42,24},{26,53},{44,17},{32,37},{58,39},{24,45},{20,58},{17,43},
+  {18,42},{48,46},{22,36},{33,33},{25,34},{49,40},{40,52},{36,49},
+  {34,50},{50,56},{52,25},{54,22},{41,54},{56,57},{38,41},{57,38}
+};
+
+static const uint8_t chroma_qp[64] = {
+  0,  1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
+  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+  32,33,34,35,36,37,38,39,40,41,42,42,43,43,44,44,
+  45,45,46,46,47,47,48,48,48,49,49,49,50,50,50,51
+};
+
+static const uint8_t dequant_shift[64] = {
+  14,14,14,14,14,14,14,14,
+  13,13,13,13,13,13,13,13,
+  13,12,12,12,12,12,12,12,
+  11,11,11,11,11,11,11,11,
+  11,10,10,10,10,10,10,10,
+  10, 9, 9, 9, 9, 9, 9, 9,
+  9, 8, 8, 8, 8, 8, 8, 8,
+  7, 7, 7, 7, 7, 7, 7, 7
+};
+
+static const uint16_t dequant_mul[64] = {
+  32768,36061,38968,42495,46341,50535,55437,60424,
+  32932,35734,38968,42495,46177,50535,55109,59933,
+  65535,35734,38968,42577,46341,50617,55027,60097,
+  32809,35734,38968,42454,46382,50576,55109,60056,
+  65535,35734,38968,42495,46320,50515,55109,60076,
+  65535,35744,38968,42495,46341,50535,55099,60087,
+  65535,35734,38973,42500,46341,50535,55109,60097,
+  32771,35734,38965,42497,46341,50535,55109,60099
+};
+
+DECLARE_ALIGNED_8(typedef, struct) {
+    int16_t x;
+    int16_t y;
+    int16_t dist;
+    int16_t ref;
+} vector_t;
+
+/** marks block as unavailable, i.e. out of picture
+    or not yet decoded */
+static const vector_t un_mv    = {0,0,1,NOT_AVAIL};
+
+/** marks block as "no prediction from this direction"
+    e.g. forward motion vector in BWD partition */
+static const vector_t dir_mv   = {0,0,1,REF_DIR};
+
+/** marks block as using intra prediction */
+static const vector_t intra_mv = {0,0,1,REF_INTRA};
+
+typedef struct residual_vlc_t {
+  int8_t rltab[59][3];
+  int8_t level_add[27];
+  int8_t golomb_order;
+  int inc_limit;
+  int8_t max_run;
+} residual_vlc_t;
+
+#define EOB 0,0,0
+
+static const residual_vlc_t intra_2dvlc[7] = {
+  {
+    { //level / run / table_inc
+      {  1, 1, 1},{ -1, 1, 1},{  1, 2, 1},{ -1, 2, 1},{  1, 3, 1},{ -1, 3, 1},
+      {  1, 4, 1},{ -1, 4, 1},{  1, 5, 1},{ -1, 5, 1},{  1, 6, 1},{ -1, 6, 1},
+      {  1, 7, 1},{ -1, 7, 1},{  1, 8, 1},{ -1, 8, 1},{  1, 9, 1},{ -1, 9, 1},
+      {  1,10, 1},{ -1,10, 1},{  1,11, 1},{ -1,11, 1},{  2, 1, 2},{ -2, 1, 2},
+      {  1,12, 1},{ -1,12, 1},{  1,13, 1},{ -1,13, 1},{  1,14, 1},{ -1,14, 1},
+      {  1,15, 1},{ -1,15, 1},{  2, 2, 2},{ -2, 2, 2},{  1,16, 1},{ -1,16, 1},
+      {  1,17, 1},{ -1,17, 1},{  3, 1, 3},{ -3, 1, 3},{  1,18, 1},{ -1,18, 1},
+      {  1,19, 1},{ -1,19, 1},{  2, 3, 2},{ -2, 3, 2},{  1,20, 1},{ -1,20, 1},
+      {  1,21, 1},{ -1,21, 1},{  2, 4, 2},{ -2, 4, 2},{  1,22, 1},{ -1,22, 1},
+      {  2, 5, 2},{ -2, 5, 2},{  1,23, 1},{ -1,23, 1},{   EOB   }
+    },
+    //level_add
+    { 0, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2,-1,-1,-1},
+    2, //golomb_order
+    0, //inc_limit
+    23, //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  2, 1, 1},{ -2, 1, 1},
+      {  1, 3, 0},{ -1, 3, 0},{   EOB   },{  1, 4, 0},{ -1, 4, 0},{  1, 5, 0},
+      { -1, 5, 0},{  1, 6, 0},{ -1, 6, 0},{  3, 1, 2},{ -3, 1, 2},{  2, 2, 1},
+      { -2, 2, 1},{  1, 7, 0},{ -1, 7, 0},{  1, 8, 0},{ -1, 8, 0},{  1, 9, 0},
+      { -1, 9, 0},{  2, 3, 1},{ -2, 3, 1},{  4, 1, 2},{ -4, 1, 2},{  1,10, 0},
+      { -1,10, 0},{  1,11, 0},{ -1,11, 0},{  2, 4, 1},{ -2, 4, 1},{  3, 2, 2},
+      { -3, 2, 2},{  1,12, 0},{ -1,12, 0},{  2, 5, 1},{ -2, 5, 1},{  5, 1, 3},
+      { -5, 1, 3},{  1,13, 0},{ -1,13, 0},{  2, 6, 1},{ -2, 6, 1},{  1,14, 0},
+      { -1,14, 0},{  2, 7, 1},{ -2, 7, 1},{  2, 8, 1},{ -2, 8, 1},{  3, 3, 2},
+      { -3, 3, 2},{  6, 1, 3},{ -6, 1, 3},{  1,15, 0},{ -1,15, 0}
+    },
+    //level_add
+    { 0, 7, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,-1,
+      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    1, //inc_limit
+    15, //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  1, 2, 0},{ -1, 2, 0},
+      {  3, 1, 1},{ -3, 1, 1},{   EOB   },{  1, 3, 0},{ -1, 3, 0},{  2, 2, 0},
+      { -2, 2, 0},{  4, 1, 1},{ -4, 1, 1},{  1, 4, 0},{ -1, 4, 0},{  5, 1, 2},
+      { -5, 1, 2},{  1, 5, 0},{ -1, 5, 0},{  3, 2, 1},{ -3, 2, 1},{  2, 3, 0},
+      { -2, 3, 0},{  1, 6, 0},{ -1, 6, 0},{  6, 1, 2},{ -6, 1, 2},{  2, 4, 0},
+      { -2, 4, 0},{  1, 7, 0},{ -1, 7, 0},{  4, 2, 1},{ -4, 2, 1},{  7, 1, 2},
+      { -7, 1, 2},{  3, 3, 1},{ -3, 3, 1},{  2, 5, 0},{ -2, 5, 0},{  1, 8, 0},
+      { -1, 8, 0},{  2, 6, 0},{ -2, 6, 0},{  8, 1, 3},{ -8, 1, 3},{  1, 9, 0},
+      { -1, 9, 0},{  5, 2, 2},{ -5, 2, 2},{  3, 4, 1},{ -3, 4, 1},{  2, 7, 0},
+      { -2, 7, 0},{  9, 1, 3},{ -9, 1, 3},{  1,10, 0},{ -1,10, 0}
+    },
+    //level_add
+    { 0,10, 6, 4, 4, 3, 3, 3, 2, 2, 2,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    2, //inc_limit
+    10, //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},{ -3, 1, 0},
+      {  1, 2, 0},{ -1, 2, 0},{   EOB   },{  4, 1, 0},{ -4, 1, 0},{  5, 1, 1},
+      { -5, 1, 1},{  2, 2, 0},{ -2, 2, 0},{  1, 3, 0},{ -1, 3, 0},{  6, 1, 1},
+      { -6, 1, 1},{  3, 2, 0},{ -3, 2, 0},{  7, 1, 1},{ -7, 1, 1},{  1, 4, 0},
+      { -1, 4, 0},{  8, 1, 2},{ -8, 1, 2},{  2, 3, 0},{ -2, 3, 0},{  4, 2, 0},
+      { -4, 2, 0},{  1, 5, 0},{ -1, 5, 0},{  9, 1, 2},{ -9, 1, 2},{  5, 2, 1},
+      { -5, 2, 1},{  2, 4, 0},{ -2, 4, 0},{ 10, 1, 2},{-10, 1, 2},{  3, 3, 0},
+      { -3, 3, 0},{  1, 6, 0},{ -1, 6, 0},{ 11, 1, 3},{-11, 1, 3},{  6, 2, 1},
+      { -6, 2, 1},{  1, 7, 0},{ -1, 7, 0},{  2, 5, 0},{ -2, 5, 0},{  3, 4, 0},
+      { -3, 4, 0},{ 12, 1, 3},{-12, 1, 3},{  4, 3, 0},{ -4, 3, 0}
+    },
+    //level_add
+    { 0,13, 7, 5, 4, 3, 2, 2,-1,-1,-1 -1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    4, //inc_limit
+    7, //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},{ -3, 1, 0},
+      {   EOB   },{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},{ -5, 1, 0},{  6, 1, 0},
+      { -6, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  7, 1, 0},{ -7, 1, 0},{  8, 1, 1},
+      { -8, 1, 1},{  2, 2, 0},{ -2, 2, 0},{  9, 1, 1},{ -9, 1, 1},{ 10, 1, 1},
+      {-10, 1, 1},{  1, 3, 0},{ -1, 3, 0},{  3, 2, 0},{ -3, 2, 0},{ 11, 1, 2},
+      {-11, 1, 2},{  4, 2, 0},{ -4, 2, 0},{ 12, 1, 2},{-12, 1, 2},{ 13, 1, 2},
+      {-13, 1, 2},{  5, 2, 0},{ -5, 2, 0},{  1, 4, 0},{ -1, 4, 0},{  2, 3, 0},
+      { -2, 3, 0},{ 14, 1, 2},{-14, 1, 2},{  6, 2, 0},{ -6, 2, 0},{ 15, 1, 2},
+      {-15, 1, 2},{ 16, 1, 2},{-16, 1, 2},{  3, 3, 0},{ -3, 3, 0},{  1, 5, 0},
+      { -1, 5, 0},{  7, 2, 0},{ -7, 2, 0},{ 17, 1, 2},{-17, 1, 2}
+    },
+    //level_add
+    { 0,18, 8, 4, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    7, //inc_limit
+    5, //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},{ -5, 1, 0},{  6, 1, 0},
+      { -6, 1, 0},{  7, 1, 0},{ -7, 1, 0},{  8, 1, 0},{ -8, 1, 0},{  9, 1, 0},
+      { -9, 1, 0},{ 10, 1, 0},{-10, 1, 0},{  1, 2, 0},{ -1, 2, 0},{ 11, 1, 1},
+      {-11, 1, 1},{ 12, 1, 1},{-12, 1, 1},{ 13, 1, 1},{-13, 1, 1},{  2, 2, 0},
+      { -2, 2, 0},{ 14, 1, 1},{-14, 1, 1},{ 15, 1, 1},{-15, 1, 1},{  3, 2, 0},
+      { -3, 2, 0},{ 16, 1, 1},{-16, 1, 1},{  1, 3, 0},{ -1, 3, 0},{ 17, 1, 1},
+      {-17, 1, 1},{  4, 2, 0},{ -4, 2, 0},{ 18, 1, 1},{-18, 1, 1},{  5, 2, 0},
+      { -5, 2, 0},{ 19, 1, 1},{-19, 1, 1},{ 20, 1, 1},{-20, 1, 1},{  6, 2, 0},
+      { -6, 2, 0},{ 21, 1, 1},{-21, 1, 1},{  2, 3, 0},{ -2, 3, 0}
+    },
+    //level_add
+    { 0,22, 7, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    10, //inc_limit
+    3, //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},{ -5, 1, 0},{  6, 1, 0},
+      { -6, 1, 0},{  7, 1, 0},{ -7, 1, 0},{  8, 1, 0},{ -8, 1, 0},{  9, 1, 0},
+      { -9, 1, 0},{ 10, 1, 0},{-10, 1, 0},{ 11, 1, 0},{-11, 1, 0},{ 12, 1, 0},
+      {-12, 1, 0},{ 13, 1, 0},{-13, 1, 0},{ 14, 1, 0},{-14, 1, 0},{ 15, 1, 0},
+      {-15, 1, 0},{ 16, 1, 0},{-16, 1, 0},{  1, 2, 0},{ -1, 2, 0},{ 17, 1, 0},
+      {-17, 1, 0},{ 18, 1, 0},{-18, 1, 0},{ 19, 1, 0},{-19, 1, 0},{ 20, 1, 0},
+      {-20, 1, 0},{ 21, 1, 0},{-21, 1, 0},{  2, 2, 0},{ -2, 2, 0},{ 22, 1, 0},
+      {-22, 1, 0},{ 23, 1, 0},{-23, 1, 0},{ 24, 1, 0},{-24, 1, 0},{ 25, 1, 0},
+      {-25, 1, 0},{  3, 2, 0},{ -3, 2, 0},{ 26, 1, 0},{-26, 1, 0}
+    },
+    //level_add
+    { 0,27, 4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    INT_MAX, //inc_limit
+    2, //max_run
+  }
+};
+
+static const residual_vlc_t inter_2dvlc[7] = {
+  {
+    { //level / run
+      {  1, 1, 1},{ -1, 1, 1},{  1, 2, 1},{ -1, 2, 1},{  1, 3, 1},{ -1, 3, 1},
+      {  1, 4, 1},{ -1, 4, 1},{  1, 5, 1},{ -1, 5, 1},{  1, 6, 1},{ -1, 6, 1},
+      {  1, 7, 1},{ -1, 7, 1},{  1, 8, 1},{ -1, 8, 1},{  1, 9, 1},{ -1, 9, 1},
+      {  1,10, 1},{ -1,10, 1},{  1,11, 1},{ -1,11, 1},{  1,12, 1},{ -1,12, 1},
+      {  1,13, 1},{ -1,13, 1},{  2, 1, 2},{ -2, 1, 2},{  1,14, 1},{ -1,14, 1},
+      {  1,15, 1},{ -1,15, 1},{  1,16, 1},{ -1,16, 1},{  1,17, 1},{ -1,17, 1},
+      {  1,18, 1},{ -1,18, 1},{  1,19, 1},{ -1,19, 1},{  3, 1, 3},{ -3, 1, 3},
+      {  1,20, 1},{ -1,20, 1},{  1,21, 1},{ -1,21, 1},{  2, 2, 2},{ -2, 2, 2},
+      {  1,22, 1},{ -1,22, 1},{  1,23, 1},{ -1,23, 1},{  1,24, 1},{ -1,24, 1},
+      {  1,25, 1},{ -1,25, 1},{  1,26, 1},{ -1,26, 1},{   EOB   }
+    },
+    //level_add
+    { 0, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+    3, //golomb_order
+    0, //inc_limit
+    26 //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{   EOB   },{  1, 2, 0},{ -1, 2, 0},{  1, 3, 0},
+      { -1, 3, 0},{  1, 4, 0},{ -1, 4, 0},{  1, 5, 0},{ -1, 5, 0},{  1, 6, 0},
+      { -1, 6, 0},{  2, 1, 1},{ -2, 1, 1},{  1, 7, 0},{ -1, 7, 0},{  1, 8, 0},
+      { -1, 8, 0},{  1, 9, 0},{ -1, 9, 0},{  1,10, 0},{ -1,10, 0},{  2, 2, 1},
+      { -2, 2, 1},{  1,11, 0},{ -1,11, 0},{  1,12, 0},{ -1,12, 0},{  3, 1, 2},
+      { -3, 1, 2},{  1,13, 0},{ -1,13, 0},{  1,14, 0},{ -1,14, 0},{  2, 3, 1},
+      { -2, 3, 1},{  1,15, 0},{ -1,15, 0},{  2, 4, 1},{ -2, 4, 1},{  1,16, 0},
+      { -1,16, 0},{  2, 5, 1},{ -2, 5, 1},{  1,17, 0},{ -1,17, 0},{  4, 1, 3},
+      { -4, 1, 3},{  2, 6, 1},{ -2, 6, 1},{  1,18, 0},{ -1,18, 0},{  1,19, 0},
+      { -1,19, 0},{  2, 7, 1},{ -2, 7, 1},{  3, 2, 2},{ -3, 2, 2}
+    },
+    //level_add
+    { 0, 5, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    1, //inc_limit
+    19 //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{   EOB   },{  1, 2, 0},{ -1, 2, 0},{  2, 1, 0},
+      { -2, 1, 0},{  1, 3, 0},{ -1, 3, 0},{  1, 4, 0},{ -1, 4, 0},{  3, 1, 1},
+      { -3, 1, 1},{  2, 2, 0},{ -2, 2, 0},{  1, 5, 0},{ -1, 5, 0},{  1, 6, 0},
+      { -1, 6, 0},{  1, 7, 0},{ -1, 7, 0},{  2, 3, 0},{ -2, 3, 0},{  4, 1, 2},
+      { -4, 1, 2},{  1, 8, 0},{ -1, 8, 0},{  3, 2, 1},{ -3, 2, 1},{  2, 4, 0},
+      { -2, 4, 0},{  1, 9, 0},{ -1, 9, 0},{  1,10, 0},{ -1,10, 0},{  5, 1, 2},
+      { -5, 1, 2},{  2, 5, 0},{ -2, 5, 0},{  1,11, 0},{ -1,11, 0},{  2, 6, 0},
+      { -2, 6, 0},{  1,12, 0},{ -1,12, 0},{  3, 3, 1},{ -3, 3, 1},{  6, 1, 2},
+      { -6, 1, 2},{  4, 2, 2},{ -4, 2, 2},{  1,13, 0},{ -1,13, 0},{  2, 7, 0},
+      { -2, 7, 0},{  3, 4, 1},{ -3, 4, 1},{  1,14, 0},{ -1,14, 0}
+    },
+    //level_add
+    { 0, 7, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,-1,-1,
+      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    2, //inc_limit
+    14 //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{   EOB   },{  2, 1, 0},{ -2, 1, 0},{  1, 2, 0},
+      { -1, 2, 0},{  3, 1, 0},{ -3, 1, 0},{  1, 3, 0},{ -1, 3, 0},{  2, 2, 0},
+      { -2, 2, 0},{  4, 1, 1},{ -4, 1, 1},{  1, 4, 0},{ -1, 4, 0},{  5, 1, 1},
+      { -5, 1, 1},{  1, 5, 0},{ -1, 5, 0},{  3, 2, 0},{ -3, 2, 0},{  2, 3, 0},
+      { -2, 3, 0},{  1, 6, 0},{ -1, 6, 0},{  6, 1, 1},{ -6, 1, 1},{  2, 4, 0},
+      { -2, 4, 0},{  1, 7, 0},{ -1, 7, 0},{  4, 2, 1},{ -4, 2, 1},{  7, 1, 2},
+      { -7, 1, 2},{  3, 3, 0},{ -3, 3, 0},{  1, 8, 0},{ -1, 8, 0},{  2, 5, 0},
+      { -2, 5, 0},{  8, 1, 2},{ -8, 1, 2},{  1, 9, 0},{ -1, 9, 0},{  3, 4, 0},
+      { -3, 4, 0},{  2, 6, 0},{ -2, 6, 0},{  5, 2, 1},{ -5, 2, 1},{  1,10, 0},
+      { -1,10, 0},{  9, 1, 2},{ -9, 1, 2},{  4, 3, 1},{ -4, 3, 1}
+    },
+    //level_add
+    { 0,10, 6, 5, 4, 3, 3, 2, 2, 2, 2,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    3, //inc_limit
+    10 //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{   EOB   },{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},
+      { -5, 1, 0},{  2, 2, 0},{ -2, 2, 0},{  1, 3, 0},{ -1, 3, 0},{  6, 1, 0},
+      { -6, 1, 0},{  3, 2, 0},{ -3, 2, 0},{  7, 1, 1},{ -7, 1, 1},{  1, 4, 0},
+      { -1, 4, 0},{  8, 1, 1},{ -8, 1, 1},{  2, 3, 0},{ -2, 3, 0},{  4, 2, 0},
+      { -4, 2, 0},{  1, 5, 0},{ -1, 5, 0},{  9, 1, 1},{ -9, 1, 1},{  5, 2, 0},
+      { -5, 2, 0},{  2, 4, 0},{ -2, 4, 0},{  1, 6, 0},{ -1, 6, 0},{ 10, 1, 2},
+      {-10, 1, 2},{  3, 3, 0},{ -3, 3, 0},{ 11, 1, 2},{-11, 1, 2},{  1, 7, 0},
+      { -1, 7, 0},{  6, 2, 0},{ -6, 2, 0},{  3, 4, 0},{ -3, 4, 0},{  2, 5, 0},
+      { -2, 5, 0},{ 12, 1, 2},{-12, 1, 2},{  4, 3, 0},{ -4, 3, 0}
+    },
+    //level_add
+    { 0,13, 7, 5, 4, 3, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    6, //inc_limit
+    7  //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},{ -5, 1, 0},{  1, 2, 0},
+      { -1, 2, 0},{  6, 1, 0},{ -6, 1, 0},{  7, 1, 0},{ -7, 1, 0},{  8, 1, 0},
+      { -8, 1, 0},{  2, 2, 0},{ -2, 2, 0},{  9, 1, 0},{ -9, 1, 0},{  1, 3, 0},
+      { -1, 3, 0},{ 10, 1, 1},{-10, 1, 1},{  3, 2, 0},{ -3, 2, 0},{ 11, 1, 1},
+      {-11, 1, 1},{  4, 2, 0},{ -4, 2, 0},{ 12, 1, 1},{-12, 1, 1},{  1, 4, 0},
+      { -1, 4, 0},{  2, 3, 0},{ -2, 3, 0},{ 13, 1, 1},{-13, 1, 1},{  5, 2, 0},
+      { -5, 2, 0},{ 14, 1, 1},{-14, 1, 1},{  6, 2, 0},{ -6, 2, 0},{  1, 5, 0},
+      { -1, 5, 0},{ 15, 1, 1},{-15, 1, 1},{  3, 3, 0},{ -3, 3, 0},{ 16, 1, 1},
+      {-16, 1, 1},{  2, 4, 0},{ -2, 4, 0},{  7, 2, 0},{ -7, 2, 0}
+    },
+    //level_add
+    { 0,17, 8, 4, 3, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    9, //inc_limit
+    5  //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},{ -5, 1, 0},{  6, 1, 0},
+      { -6, 1, 0},{  7, 1, 0},{ -7, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  8, 1, 0},
+      { -8, 1, 0},{  9, 1, 0},{ -9, 1, 0},{ 10, 1, 0},{-10, 1, 0},{ 11, 1, 0},
+      {-11, 1, 0},{ 12, 1, 0},{-12, 1, 0},{  2, 2, 0},{ -2, 2, 0},{ 13, 1, 0},
+      {-13, 1, 0},{  1, 3, 0},{ -1, 3, 0},{ 14, 1, 0},{-14, 1, 0},{ 15, 1, 0},
+      {-15, 1, 0},{  3, 2, 0},{ -3, 2, 0},{ 16, 1, 0},{-16, 1, 0},{ 17, 1, 0},
+      {-17, 1, 0},{ 18, 1, 0},{-18, 1, 0},{  4, 2, 0},{ -4, 2, 0},{ 19, 1, 0},
+      {-19, 1, 0},{ 20, 1, 0},{-20, 1, 0},{  2, 3, 0},{ -2, 3, 0},{  1, 4, 0},
+      { -1, 4, 0},{  5, 2, 0},{ -5, 2, 0},{ 21, 1, 0},{-21, 1, 0}
+    },
+    //level_add
+    { 0,22, 6, 3, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    2, //golomb_order
+    INT_MAX, //inc_limit
+    4 //max_run
+  }
+};
+
+static const residual_vlc_t chroma_2dvlc[5] = {
+  {
+    { //level / run
+      {  1, 1, 1},{ -1, 1, 1},{  1, 2, 1},{ -1, 2, 1},{  1, 3, 1},{ -1, 3, 1},
+      {  1, 4, 1},{ -1, 4, 1},{  1, 5, 1},{ -1, 5, 1},{  1, 6, 1},{ -1, 6, 1},
+      {  1, 7, 1},{ -1, 7, 1},{  2, 1, 2},{ -2, 1, 2},{  1, 8, 1},{ -1, 8, 1},
+      {  1, 9, 1},{ -1, 9, 1},{  1,10, 1},{ -1,10, 1},{  1,11, 1},{ -1,11, 1},
+      {  1,12, 1},{ -1,12, 1},{  1,13, 1},{ -1,13, 1},{  1,14, 1},{ -1,14, 1},
+      {  1,15, 1},{ -1,15, 1},{  3, 1, 3},{ -3, 1, 3},{  1,16, 1},{ -1,16, 1},
+      {  1,17, 1},{ -1,17, 1},{  1,18, 1},{ -1,18, 1},{  1,19, 1},{ -1,19, 1},
+      {  1,20, 1},{ -1,20, 1},{  1,21, 1},{ -1,21, 1},{  1,22, 1},{ -1,22, 1},
+      {  2, 2, 2},{ -2, 2, 2},{  1,23, 1},{ -1,23, 1},{  1,24, 1},{ -1,24, 1},
+      {  1,25, 1},{ -1,25, 1},{  4, 1, 3},{ -4, 1, 3},{   EOB   }
+    },
+    //level_add
+    { 0, 5, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2, 2, 2,-1},
+    2, //golomb_order
+    0, //inc_limit
+    25 //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  2, 1, 1},
+      { -2, 1, 1},{  1, 3, 0},{ -1, 3, 0},{  1, 4, 0},{ -1, 4, 0},{  1, 5, 0},
+      { -1, 5, 0},{  1, 6, 0},{ -1, 6, 0},{  3, 1, 2},{ -3, 1, 2},{  1, 7, 0},
+      { -1, 7, 0},{  1, 8, 0},{ -1, 8, 0},{  2, 2, 1},{ -2, 2, 1},{  1, 9, 0},
+      { -1, 9, 0},{  1,10, 0},{ -1,10, 0},{  1,11, 0},{ -1,11, 0},{  4, 1, 2},
+      { -4, 1, 2},{  1,12, 0},{ -1,12, 0},{  1,13, 0},{ -1,13, 0},{  1,14, 0},
+      { -1,14, 0},{  2, 3, 1},{ -2, 3, 1},{  1,15, 0},{ -1,15, 0},{  2, 4, 1},
+      { -2, 4, 1},{  5, 1, 3},{ -5, 1, 3},{  3, 2, 2},{ -3, 2, 2},{  1,16, 0},
+      { -1,16, 0},{  1,17, 0},{ -1,17, 0},{  1,18, 0},{ -1,18, 0},{  2, 5, 1},
+      { -2, 5, 1},{  1,19, 0},{ -1,19, 0},{  1,20, 0},{ -1,20, 0}
+    },
+    //level_add
+    { 0, 6, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2,-1,-1,-1,-1,-1,-1},
+    0, //golomb_order
+    1, //inc_limit
+    20 //max_run
+  },{
+    { //level / run
+      {  1, 1, 0},{ -1, 1, 0},{   EOB   },{  2, 1, 0},{ -2, 1, 0},{  1, 2, 0},
+      { -1, 2, 0},{  3, 1, 1},{ -3, 1, 1},{  1, 3, 0},{ -1, 3, 0},{  4, 1, 1},
+      { -4, 1, 1},{  2, 2, 0},{ -2, 2, 0},{  1, 4, 0},{ -1, 4, 0},{  5, 1, 2},
+      { -5, 1, 2},{  1, 5, 0},{ -1, 5, 0},{  3, 2, 1},{ -3, 2, 1},{  2, 3, 0},
+      { -2, 3, 0},{  1, 6, 0},{ -1, 6, 0},{  6, 1, 2},{ -6, 1, 2},{  1, 7, 0},
+      { -1, 7, 0},{  2, 4, 0},{ -2, 4, 0},{  7, 1, 2},{ -7, 1, 2},{  1, 8, 0},
+      { -1, 8, 0},{  4, 2, 1},{ -4, 2, 1},{  1, 9, 0},{ -1, 9, 0},{  3, 3, 1},
+      { -3, 3, 1},{  2, 5, 0},{ -2, 5, 0},{  2, 6, 0},{ -2, 6, 0},{  8, 1, 2},
+      { -8, 1, 2},{  1,10, 0},{ -1,10, 0},{  1,11, 0},{ -1,11, 0},{  9, 1, 2},
+      { -9, 1, 2},{  5, 2, 2},{ -5, 2, 2},{  3, 4, 1},{ -3, 4, 1},
+    },
+    //level_add
+    { 0,10, 6, 4, 4, 3, 3, 2, 2, 2, 2, 2,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    1, //golomb_order
+    2, //inc_limit
+    11 //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  4, 1, 0},{ -4, 1, 0},{  1, 2, 0},{ -1, 2, 0},{  5, 1, 1},
+      { -5, 1, 1},{  2, 2, 0},{ -2, 2, 0},{  6, 1, 1},{ -6, 1, 1},{  1, 3, 0},
+      { -1, 3, 0},{  7, 1, 1},{ -7, 1, 1},{  3, 2, 0},{ -3, 2, 0},{  8, 1, 1},
+      { -8, 1, 1},{  1, 4, 0},{ -1, 4, 0},{  2, 3, 0},{ -2, 3, 0},{  9, 1, 1},
+      { -9, 1, 1},{  4, 2, 0},{ -4, 2, 0},{  1, 5, 0},{ -1, 5, 0},{ 10, 1, 1},
+      {-10, 1, 1},{  3, 3, 0},{ -3, 3, 0},{  5, 2, 1},{ -5, 2, 1},{  2, 4, 0},
+      { -2, 4, 0},{ 11, 1, 1},{-11, 1, 1},{  1, 6, 0},{ -1, 6, 0},{ 12, 1, 1},
+      {-12, 1, 1},{  1, 7, 0},{ -1, 7, 0},{  6, 2, 1},{ -6, 2, 1},{ 13, 1, 1},
+      {-13, 1, 1},{  2, 5, 0},{ -2, 5, 0},{  1, 8, 0},{ -1, 8, 0},
+    },
+    //level_add
+    { 0,14, 7, 4, 3, 3, 2, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    1, //golomb_order
+    4, //inc_limit
+    8  //max_run
+  },{
+    { //level / run
+      {   EOB   },{  1, 1, 0},{ -1, 1, 0},{  2, 1, 0},{ -2, 1, 0},{  3, 1, 0},
+      { -3, 1, 0},{  4, 1, 0},{ -4, 1, 0},{  5, 1, 0},{ -5, 1, 0},{  6, 1, 0},
+      { -6, 1, 0},{  7, 1, 0},{ -7, 1, 0},{  8, 1, 0},{ -8, 1, 0},{  1, 2, 0},
+      { -1, 2, 0},{  9, 1, 0},{ -9, 1, 0},{ 10, 1, 0},{-10, 1, 0},{ 11, 1, 0},
+      {-11, 1, 0},{  2, 2, 0},{ -2, 2, 0},{ 12, 1, 0},{-12, 1, 0},{ 13, 1, 0},
+      {-13, 1, 0},{  3, 2, 0},{ -3, 2, 0},{ 14, 1, 0},{-14, 1, 0},{  1, 3, 0},
+      { -1, 3, 0},{ 15, 1, 0},{-15, 1, 0},{  4, 2, 0},{ -4, 2, 0},{ 16, 1, 0},
+      {-16, 1, 0},{ 17, 1, 0},{-17, 1, 0},{  5, 2, 0},{ -5, 2, 0},{  1, 4, 0},
+      { -1, 4, 0},{  2, 3, 0},{ -2, 3, 0},{ 18, 1, 0},{-18, 1, 0},{  6, 2, 0},
+      { -6, 2, 0},{ 19, 1, 0},{-19, 1, 0},{  1, 5, 0},{ -1, 5, 0},
+    },
+    //level_add
+    { 0,20, 7, 3, 2, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    0, //golomb_order
+    INT_MAX, //inc_limit
+    5, //max_run
+  }
+};
+
+#undef EOB
+
+static const uint8_t alpha_tab[64] = {
+   0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  3,  3,
+   4,  4,  5,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 18, 20,
+  22, 24, 26, 28, 30, 33, 33, 35, 35, 36, 37, 37, 39, 39, 42, 44,
+  46, 48, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+};
+
+static const uint8_t beta_tab[64] = {
+   0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,
+   2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,
+   6,  7,  7,  7,  8,  8,  8,  9,  9, 10, 10, 11, 11, 12, 13, 14,
+  15, 16, 17, 18, 19, 20, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27
+};
+
+static const uint8_t tc_tab[64] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
+  2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
+  5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9
+};
+
+static const int_fast8_t left_modifier_l[8] = { 0,-1, 6,-1,-1, 7, 6, 7};
+static const int_fast8_t top_modifier_l[8]  = {-1, 1, 5,-1,-1, 5, 7, 7};
+static const int_fast8_t left_modifier_c[7] = { 5,-1, 2,-1, 6, 5, 6};
+static const int_fast8_t top_modifier_c[7]  = { 4, 1,-1,-1, 4, 6, 6};
+#endif /* CONFIG_CAVS_DECODER */
diff --git a/contrib/ffmpeg/libavcodec/cavsdsp.c b/contrib/ffmpeg/libavcodec/cavsdsp.c
new file mode 100644
index 000000000..220dec1b8
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cavsdsp.c
@@ -0,0 +1,546 @@
+/*
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ *
+ * DSP functions
+ *
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <stdio.h>
+#include "dsputil.h"
+
+/*****************************************************************************
+ *
+ * in-loop deblocking filter
+ *
+ ****************************************************************************/
+
+#define P2 p0_p[-3*stride]
+#define P1 p0_p[-2*stride]
+#define P0 p0_p[-1*stride]
+#define Q0 p0_p[ 0*stride]
+#define Q1 p0_p[ 1*stride]
+#define Q2 p0_p[ 2*stride]
+
+static inline void loop_filter_l2(uint8_t *p0_p,int stride,int alpha, int beta) {
+    int p0 = P0;
+    int q0 = Q0;
+
+    if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
+        int s = p0 + q0 + 2;
+        alpha = (alpha>>2) + 2;
+        if(abs(P2-p0) < beta && abs(p0-q0) < alpha) {
+            P0 = (P1 + p0 + s) >> 2;
+            P1 = (2*P1 + s) >> 2;
+        } else
+            P0 = (2*P1 + s) >> 2;
+        if(abs(Q2-q0) < beta && abs(q0-p0) < alpha) {
+            Q0 = (Q1 + q0 + s) >> 2;
+            Q1 = (2*Q1 + s) >> 2;
+        } else
+            Q0 = (2*Q1 + s) >> 2;
+    }
+}
+
+static inline void loop_filter_l1(uint8_t *p0_p, int stride, int alpha, int beta, int tc) {
+    int p0 = P0;
+    int q0 = Q0;
+
+    if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
+        int delta = clip(((q0-p0)*3+P1-Q1+4)>>3,-tc, tc);
+        P0 = clip_uint8(p0+delta);
+        Q0 = clip_uint8(q0-delta);
+        if(abs(P2-p0)<beta) {
+            delta = clip(((P0-P1)*3+P2-Q0+4)>>3, -tc, tc);
+            P1 = clip_uint8(P1+delta);
+        }
+        if(abs(Q2-q0)<beta) {
+            delta = clip(((Q1-Q0)*3+P0-Q2+4)>>3, -tc, tc);
+            Q1 = clip_uint8(Q1-delta);
+        }
+    }
+}
+
+static inline void loop_filter_c2(uint8_t *p0_p,int stride,int alpha, int beta) {
+    int p0 = P0;
+    int q0 = Q0;
+
+    if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
+        int s = p0 + q0 + 2;
+        alpha = (alpha>>2) + 2;
+        if(abs(P2-p0) < beta && abs(p0-q0) < alpha) {
+            P0 = (P1 + p0 + s) >> 2;
+        } else
+            P0 = (2*P1 + s) >> 2;
+        if(abs(Q2-q0) < beta && abs(q0-p0) < alpha) {
+            Q0 = (Q1 + q0 + s) >> 2;
+        } else
+            Q0 = (2*Q1 + s) >> 2;
+    }
+}
+
+static inline void loop_filter_c1(uint8_t *p0_p,int stride,int alpha, int beta,
+                                  int tc) {
+    if(abs(P0-Q0)<alpha && abs(P1-P0)<beta && abs(Q1-Q0)<beta) {
+        int delta = clip(((Q0-P0)*3+P1-Q1+4)>>3, -tc, tc);
+        P0 = clip_uint8(P0+delta);
+        Q0 = clip_uint8(Q0-delta);
+    }
+}
+
+#undef P0
+#undef P1
+#undef P2
+#undef Q0
+#undef Q1
+#undef Q2
+
+static void cavs_filter_lv_c(uint8_t *d, int stride, int alpha, int beta, int tc,
+                           int bs1, int bs2) {
+    int i;
+    if(bs1==2)
+        for(i=0;i<16;i++)
+            loop_filter_l2(d + i*stride,1,alpha,beta);
+    else {
+        if(bs1)
+            for(i=0;i<8;i++)
+                loop_filter_l1(d + i*stride,1,alpha,beta,tc);
+        if (bs2)
+            for(i=8;i<16;i++)
+                loop_filter_l1(d + i*stride,1,alpha,beta,tc);
+    }
+}
+
+static void cavs_filter_lh_c(uint8_t *d, int stride, int alpha, int beta, int tc,
+                           int bs1, int bs2) {
+    int i;
+    if(bs1==2)
+        for(i=0;i<16;i++)
+            loop_filter_l2(d + i,stride,alpha,beta);
+    else {
+        if(bs1)
+            for(i=0;i<8;i++)
+                loop_filter_l1(d + i,stride,alpha,beta,tc);
+        if (bs2)
+            for(i=8;i<16;i++)
+                loop_filter_l1(d + i,stride,alpha,beta,tc);
+    }
+}
+
+static void cavs_filter_cv_c(uint8_t *d, int stride, int alpha, int beta, int tc,
+                           int bs1, int bs2) {
+    int i;
+    if(bs1==2)
+        for(i=0;i<8;i++)
+            loop_filter_c2(d + i*stride,1,alpha,beta);
+    else {
+        if(bs1)
+            for(i=0;i<4;i++)
+                loop_filter_c1(d + i*stride,1,alpha,beta,tc);
+        if (bs2)
+            for(i=4;i<8;i++)
+                loop_filter_c1(d + i*stride,1,alpha,beta,tc);
+    }
+}
+
+static void cavs_filter_ch_c(uint8_t *d, int stride, int alpha, int beta, int tc,
+                           int bs1, int bs2) {
+    int i;
+    if(bs1==2)
+        for(i=0;i<8;i++)
+            loop_filter_c2(d + i,stride,alpha,beta);
+    else {
+        if(bs1)
+            for(i=0;i<4;i++)
+                loop_filter_c1(d + i,stride,alpha,beta,tc);
+        if (bs2)
+            for(i=4;i<8;i++)
+                loop_filter_c1(d + i,stride,alpha,beta,tc);
+    }
+}
+
+/*****************************************************************************
+ *
+ * inverse transform
+ *
+ ****************************************************************************/
+
+static void cavs_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride) {
+    int i;
+    DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    src[0][0] += 8;
+
+    for( i = 0; i < 8; i++ ) {
+        const int a0 =  3*src[i][1] - (src[i][7]<<1);
+        const int a1 =  3*src[i][3] + (src[i][5]<<1);
+        const int a2 =  (src[i][3]<<1) - 3*src[i][5];
+        const int a3 =  (src[i][1]<<1) + 3*src[i][7];
+
+        const int b4 = ((a0 + a1 + a3)<<1) + a1;
+        const int b5 = ((a0 - a1 + a2)<<1) + a0;
+        const int b6 = ((a3 - a2 - a1)<<1) + a3;
+        const int b7 = ((a0 - a2 - a3)<<1) - a2;
+
+        const int a7 = (src[i][2]<<2) - 10*src[i][6];
+        const int a6 = (src[i][6]<<2) + 10*src[i][2];
+        const int a5 = ((src[i][0] - src[i][4]) << 3) + 4;
+        const int a4 = ((src[i][0] + src[i][4]) << 3) + 4;
+
+        const int b0 = a4 + a6;
+        const int b1 = a5 + a7;
+        const int b2 = a5 - a7;
+        const int b3 = a4 - a6;
+
+        src[i][0] = (b0 + b4) >> 3;
+        src[i][1] = (b1 + b5) >> 3;
+        src[i][2] = (b2 + b6) >> 3;
+        src[i][3] = (b3 + b7) >> 3;
+        src[i][4] = (b3 - b7) >> 3;
+        src[i][5] = (b2 - b6) >> 3;
+        src[i][6] = (b1 - b5) >> 3;
+        src[i][7] = (b0 - b4) >> 3;
+    }
+    for( i = 0; i < 8; i++ ) {
+        const int a0 =  3*src[1][i] - (src[7][i]<<1);
+        const int a1 =  3*src[3][i] + (src[5][i]<<1);
+        const int a2 =  (src[3][i]<<1) - 3*src[5][i];
+        const int a3 =  (src[1][i]<<1) + 3*src[7][i];
+
+        const int b4 = ((a0 + a1 + a3)<<1) + a1;
+        const int b5 = ((a0 - a1 + a2)<<1) + a0;
+        const int b6 = ((a3 - a2 - a1)<<1) + a3;
+        const int b7 = ((a0 - a2 - a3)<<1) - a2;
+
+        const int a7 = (src[2][i]<<2) - 10*src[6][i];
+        const int a6 = (src[6][i]<<2) + 10*src[2][i];
+        const int a5 = (src[0][i] - src[4][i]) << 3;
+        const int a4 = (src[0][i] + src[4][i]) << 3;
+
+        const int b0 = a4 + a6;
+        const int b1 = a5 + a7;
+        const int b2 = a5 - a7;
+        const int b3 = a4 - a6;
+
+        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b4) >> 7)];
+        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b1 + b5) >> 7)];
+        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b2 + b6) >> 7)];
+        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b3 + b7) >> 7)];
+        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b3 - b7) >> 7)];
+        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b2 - b6) >> 7)];
+        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b1 - b5) >> 7)];
+        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b4) >> 7)];
+    }
+    memset(block,0,64*sizeof(DCTELEM));
+}
+
+/*****************************************************************************
+ *
+ * motion compensation
+ *
+ ****************************************************************************/
+
+#define CAVS_SUBPIX(OPNAME, OP, NAME, A, B, C, D, E, F) \
+static void OPNAME ## cavs_filt8_h_ ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], A*src[-2] + B*src[-1] + C*src[0] + D*src[1] + E*src[2] + F*src[3]);\
+        OP(dst[1], A*src[-1] + B*src[ 0] + C*src[1] + D*src[2] + E*src[3] + F*src[4]);\
+        OP(dst[2], A*src[ 0] + B*src[ 1] + C*src[2] + D*src[3] + E*src[4] + F*src[5]);\
+        OP(dst[3], A*src[ 1] + B*src[ 2] + C*src[3] + D*src[4] + E*src[5] + F*src[6]);\
+        OP(dst[4], A*src[ 2] + B*src[ 3] + C*src[4] + D*src[5] + E*src[6] + F*src[7]);\
+        OP(dst[5], A*src[ 3] + B*src[ 4] + C*src[5] + D*src[6] + E*src[7] + F*src[8]);\
+        OP(dst[6], A*src[ 4] + B*src[ 5] + C*src[6] + D*src[7] + E*src[8] + F*src[9]);\
+        OP(dst[7], A*src[ 5] + B*src[ 6] + C*src[7] + D*src[8] + E*src[9] + F*src[10]);\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## cavs_filt8_v_  ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        const int src7= src[7 *srcStride];\
+        const int src8= src[8 *srcStride];\
+        const int src9= src[9 *srcStride];\
+        const int src10= src[10 *srcStride];\
+        OP(dst[0*dstStride], A*srcB + B*srcA + C*src0 + D*src1 + E*src2 + F*src3);\
+        OP(dst[1*dstStride], A*srcA + B*src0 + C*src1 + D*src2 + E*src3 + F*src4);\
+        OP(dst[2*dstStride], A*src0 + B*src1 + C*src2 + D*src3 + E*src4 + F*src5);\
+        OP(dst[3*dstStride], A*src1 + B*src2 + C*src3 + D*src4 + E*src5 + F*src6);\
+        OP(dst[4*dstStride], A*src2 + B*src3 + C*src4 + D*src5 + E*src6 + F*src7);\
+        OP(dst[5*dstStride], A*src3 + B*src4 + C*src5 + D*src6 + E*src7 + F*src8);\
+        OP(dst[6*dstStride], A*src4 + B*src5 + C*src6 + D*src7 + E*src8 + F*src9);\
+        OP(dst[7*dstStride], A*src5 + B*src6 + C*src7 + D*src8 + E*src9 + F*src10);\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## cavs_filt16_v_ ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_filt8_v_ ## NAME(dst  , src  , dstStride, srcStride);\
+    OPNAME ## cavs_filt8_v_ ## NAME(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## cavs_filt8_v_ ## NAME(dst  , src  , dstStride, srcStride);\
+    OPNAME ## cavs_filt8_v_ ## NAME(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## cavs_filt16_h_ ## NAME(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_filt8_h_ ## NAME(dst  , src  , dstStride, srcStride);\
+    OPNAME ## cavs_filt8_h_ ## NAME(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## cavs_filt8_h_ ## NAME(dst  , src  , dstStride, srcStride);\
+    OPNAME ## cavs_filt8_h_ ## NAME(dst+8, src+8, dstStride, srcStride);\
+}\
+
+#define CAVS_SUBPIX_HV(OPNAME, OP, NAME, AH, BH, CH, DH, EH, FH, AV, BV, CV, DV, EV, FV, FULL) \
+static void OPNAME ## cavs_filt8_hv_ ## NAME(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int srcStride){\
+    int16_t temp[8*(8+5)];\
+    int16_t *tmp = temp;\
+    const int h=8;\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src1 -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= AH*src1[-2] + BH*src1[-1] + CH*src1[0] + DH*src1[1] + EH*src1[2] + FH*src1[3];\
+        tmp[1]= AH*src1[-1] + BH*src1[ 0] + CH*src1[1] + DH*src1[2] + EH*src1[3] + FH*src1[4];\
+        tmp[2]= AH*src1[ 0] + BH*src1[ 1] + CH*src1[2] + DH*src1[3] + EH*src1[4] + FH*src1[5];\
+        tmp[3]= AH*src1[ 1] + BH*src1[ 2] + CH*src1[3] + DH*src1[4] + EH*src1[5] + FH*src1[6];\
+        tmp[4]= AH*src1[ 2] + BH*src1[ 3] + CH*src1[4] + DH*src1[5] + EH*src1[6] + FH*src1[7];\
+        tmp[5]= AH*src1[ 3] + BH*src1[ 4] + CH*src1[5] + DH*src1[6] + EH*src1[7] + FH*src1[8];\
+        tmp[6]= AH*src1[ 4] + BH*src1[ 5] + CH*src1[6] + DH*src1[7] + EH*src1[8] + FH*src1[9];\
+        tmp[7]= AH*src1[ 5] + BH*src1[ 6] + CH*src1[7] + DH*src1[8] + EH*src1[9] + FH*src1[10];\
+        tmp+=8;\
+        src1+=srcStride;\
+    }\
+    if(FULL) {\
+      tmp = temp+8*2;                           \
+      for(i=0; i<w; i++)                        \
+        {                                       \
+          const int tmpB= tmp[-2*8];    \
+          const int tmpA= tmp[-1*8];    \
+          const int tmp0= tmp[0 *8];    \
+          const int tmp1= tmp[1 *8];    \
+          const int tmp2= tmp[2 *8];    \
+          const int tmp3= tmp[3 *8];    \
+          const int tmp4= tmp[4 *8];    \
+          const int tmp5= tmp[5 *8];    \
+          const int tmp6= tmp[6 *8];    \
+          const int tmp7= tmp[7 *8];    \
+          const int tmp8= tmp[8 *8];    \
+          const int tmp9= tmp[9 *8];    \
+          const int tmp10=tmp[10*8];                            \
+          OP(dst[0*dstStride], AV*tmpB + BV*tmpA + CV*tmp0 + DV*tmp1 + EV*tmp2 + FV*tmp3 + 64*src2[0*srcStride]); \
+          OP(dst[1*dstStride], AV*tmpA + BV*tmp0 + CV*tmp1 + DV*tmp2 + EV*tmp3 + FV*tmp4 + 64*src2[1*srcStride]); \
+          OP(dst[2*dstStride], AV*tmp0 + BV*tmp1 + CV*tmp2 + DV*tmp3 + EV*tmp4 + FV*tmp5 + 64*src2[2*srcStride]); \
+          OP(dst[3*dstStride], AV*tmp1 + BV*tmp2 + CV*tmp3 + DV*tmp4 + EV*tmp5 + FV*tmp6 + 64*src2[3*srcStride]); \
+          OP(dst[4*dstStride], AV*tmp2 + BV*tmp3 + CV*tmp4 + DV*tmp5 + EV*tmp6 + FV*tmp7 + 64*src2[4*srcStride]); \
+          OP(dst[5*dstStride], AV*tmp3 + BV*tmp4 + CV*tmp5 + DV*tmp6 + EV*tmp7 + FV*tmp8 + 64*src2[5*srcStride]); \
+          OP(dst[6*dstStride], AV*tmp4 + BV*tmp5 + CV*tmp6 + DV*tmp7 + EV*tmp8 + FV*tmp9 + 64*src2[6*srcStride]); \
+          OP(dst[7*dstStride], AV*tmp5 + BV*tmp6 + CV*tmp7 + DV*tmp8 + EV*tmp9 + FV*tmp10 + 64*src2[7*srcStride]); \
+          dst++;                                                        \
+          tmp++;                                                        \
+          src2++;                                                       \
+        }                                                               \
+    } else {\
+      tmp = temp+8*2;                           \
+      for(i=0; i<w; i++)                        \
+        {                                       \
+          const int tmpB= tmp[-2*8];    \
+          const int tmpA= tmp[-1*8];    \
+          const int tmp0= tmp[0 *8];    \
+          const int tmp1= tmp[1 *8];    \
+          const int tmp2= tmp[2 *8];    \
+          const int tmp3= tmp[3 *8];    \
+          const int tmp4= tmp[4 *8];    \
+          const int tmp5= tmp[5 *8];    \
+          const int tmp6= tmp[6 *8];    \
+          const int tmp7= tmp[7 *8];    \
+          const int tmp8= tmp[8 *8];    \
+          const int tmp9= tmp[9 *8];    \
+          const int tmp10=tmp[10*8];                            \
+          OP(dst[0*dstStride], AV*tmpB + BV*tmpA + CV*tmp0 + DV*tmp1 + EV*tmp2 + FV*tmp3); \
+          OP(dst[1*dstStride], AV*tmpA + BV*tmp0 + CV*tmp1 + DV*tmp2 + EV*tmp3 + FV*tmp4); \
+          OP(dst[2*dstStride], AV*tmp0 + BV*tmp1 + CV*tmp2 + DV*tmp3 + EV*tmp4 + FV*tmp5); \
+          OP(dst[3*dstStride], AV*tmp1 + BV*tmp2 + CV*tmp3 + DV*tmp4 + EV*tmp5 + FV*tmp6); \
+          OP(dst[4*dstStride], AV*tmp2 + BV*tmp3 + CV*tmp4 + DV*tmp5 + EV*tmp6 + FV*tmp7); \
+          OP(dst[5*dstStride], AV*tmp3 + BV*tmp4 + CV*tmp5 + DV*tmp6 + EV*tmp7 + FV*tmp8); \
+          OP(dst[6*dstStride], AV*tmp4 + BV*tmp5 + CV*tmp6 + DV*tmp7 + EV*tmp8 + FV*tmp9); \
+          OP(dst[7*dstStride], AV*tmp5 + BV*tmp6 + CV*tmp7 + DV*tmp8 + EV*tmp9 + FV*tmp10); \
+          dst++;                                                        \
+          tmp++;                                                        \
+        }                                                               \
+    }\
+}\
+\
+static void OPNAME ## cavs_filt16_hv_ ## NAME(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int srcStride){ \
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst  , src1,   src2  , dstStride, srcStride); \
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst+8, src1+8, src2+8, dstStride, srcStride); \
+    src1 += 8*srcStride;\
+    src2 += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst  , src1,   src2  , dstStride, srcStride); \
+    OPNAME ## cavs_filt8_hv_ ## NAME(dst+8, src1+8, src2+8, dstStride, srcStride); \
+}\
+
+#define CAVS_MC(OPNAME, SIZE) \
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_filt ## SIZE ## _h_qpel_l(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_filt ## SIZE ## _h_hpel(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_filt ## SIZE ## _h_qpel_r(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_filt ## SIZE ## _v_qpel_l(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_filt ## SIZE ## _v_hpel(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_filt ## SIZE ## _v_qpel_r(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_jj(dst, src, NULL, stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src, stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride, stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+1, stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride+1,stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_ff(dst, src, src+stride+1,stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_ii(dst, src, src+stride+1,stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_kk(dst, src, src+stride+1,stride, stride); \
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+  OPNAME ## cavs_filt ## SIZE ## _hv_qq(dst, src, src+stride+1,stride, stride); \
+}\
+
+#define op_put1(a, b)  a = cm[((b)+4)>>3]
+#define op_put2(a, b)  a = cm[((b)+64)>>7]
+#define op_put3(a, b)  a = cm[((b)+32)>>6]
+#define op_put4(a, b)  a = cm[((b)+512)>>10]
+#define op_avg1(a, b)  a = ((a)+cm[((b)+4)>>3]   +1)>>1
+#define op_avg2(a, b)  a = ((a)+cm[((b)+64)>>7]  +1)>>1
+#define op_avg3(a, b)  a = ((a)+cm[((b)+32)>>6]  +1)>>1
+#define op_avg4(a, b)  a = ((a)+cm[((b)+512)>>10]+1)>>1
+CAVS_SUBPIX(put_   , op_put1, hpel,    0, -1,  5,  5, -1,  0)
+CAVS_SUBPIX(put_   , op_put2, qpel_l, -1, -2, 96, 42, -7,  0)
+CAVS_SUBPIX(put_   , op_put2, qpel_r,  0, -7, 42, 96, -2, -1)
+CAVS_SUBPIX_HV(put_, op_put3, jj,      0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 0)
+CAVS_SUBPIX_HV(put_, op_put4, ff,      0, -1,  5,  5, -1,  0, -1, -2, 96, 42, -7, 0, 0)
+CAVS_SUBPIX_HV(put_, op_put4, ii,     -1, -2, 96, 42, -7,  0,  0, -1,  5,  5, -1, 0, 0)
+CAVS_SUBPIX_HV(put_, op_put4, kk,      0, -7, 42, 96, -2, -1,  0, -1,  5,  5, -1, 0, 0)
+CAVS_SUBPIX_HV(put_, op_put4, qq,      0, -1,  5,  5, -1,  0,  0, -7, 42, 96, -2,-1, 0)
+CAVS_SUBPIX_HV(put_, op_put2, egpr,    0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 1)
+CAVS_SUBPIX(avg_   , op_avg1, hpel,    0, -1,  5,  5, -1,  0)
+CAVS_SUBPIX(avg_   , op_avg2, qpel_l, -1, -2, 96, 42, -7,  0)
+CAVS_SUBPIX(avg_   , op_avg2, qpel_r,  0, -7, 42, 96, -2, -1)
+CAVS_SUBPIX_HV(avg_, op_avg3, jj,      0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 0)
+CAVS_SUBPIX_HV(avg_, op_avg4, ff,      0, -1,  5,  5, -1,  0, -1, -2, 96, 42, -7, 0, 0)
+CAVS_SUBPIX_HV(avg_, op_avg4, ii,     -1, -2, 96, 42, -7,  0,  0, -1,  5,  5, -1, 0, 0)
+CAVS_SUBPIX_HV(avg_, op_avg4, kk,      0, -7, 42, 96, -2, -1,  0, -1,  5,  5, -1, 0, 0)
+CAVS_SUBPIX_HV(avg_, op_avg4, qq,      0, -1,  5,  5, -1,  0,  0, -7, 42, 96, -2,-1, 0)
+CAVS_SUBPIX_HV(avg_, op_avg2, egpr,    0, -1,  5,  5, -1,  0,  0, -1,  5,  5, -1, 0, 1)
+CAVS_MC(put_, 8)
+CAVS_MC(put_, 16)
+CAVS_MC(avg_, 8)
+CAVS_MC(avg_, 16)
+
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+
+void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx) {
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_c; \
+    c->PFX ## _pixels_tab[IDX][ 1] = ff_ ## PFX ## NUM ## _mc10_c; \
+    c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_c; \
+    c->PFX ## _pixels_tab[IDX][ 3] = ff_ ## PFX ## NUM ## _mc30_c; \
+    c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_c; \
+    c->PFX ## _pixels_tab[IDX][ 5] = ff_ ## PFX ## NUM ## _mc11_c; \
+    c->PFX ## _pixels_tab[IDX][ 6] = ff_ ## PFX ## NUM ## _mc21_c; \
+    c->PFX ## _pixels_tab[IDX][ 7] = ff_ ## PFX ## NUM ## _mc31_c; \
+    c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_c; \
+    c->PFX ## _pixels_tab[IDX][ 9] = ff_ ## PFX ## NUM ## _mc12_c; \
+    c->PFX ## _pixels_tab[IDX][10] = ff_ ## PFX ## NUM ## _mc22_c; \
+    c->PFX ## _pixels_tab[IDX][11] = ff_ ## PFX ## NUM ## _mc32_c; \
+    c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_c; \
+    c->PFX ## _pixels_tab[IDX][13] = ff_ ## PFX ## NUM ## _mc13_c; \
+    c->PFX ## _pixels_tab[IDX][14] = ff_ ## PFX ## NUM ## _mc23_c; \
+    c->PFX ## _pixels_tab[IDX][15] = ff_ ## PFX ## NUM ## _mc33_c
+    dspfunc(put_cavs_qpel, 0, 16);
+    dspfunc(put_cavs_qpel, 1, 8);
+    dspfunc(avg_cavs_qpel, 0, 16);
+    dspfunc(avg_cavs_qpel, 1, 8);
+    c->cavs_filter_lv = cavs_filter_lv_c;
+    c->cavs_filter_lh = cavs_filter_lh_c;
+    c->cavs_filter_cv = cavs_filter_cv_c;
+    c->cavs_filter_ch = cavs_filter_ch_c;
+    c->cavs_idct8_add = cavs_idct8_add_c;
+}
diff --git a/contrib/ffmpeg/libavcodec/cinepak.c b/contrib/ffmpeg/libavcodec/cinepak.c
new file mode 100644
index 000000000..e137377e5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cinepak.c
@@ -0,0 +1,453 @@
+/*
+ * Cinepak Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file cinepak.c
+ * Cinepak video decoder
+ * by Ewald Snel <ewald@rambo.its.tudelft.nl>
+ * For more information on the Cinepak algorithm, visit:
+ *   http://www.csse.monash.edu.au/~timf/
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+
+typedef struct {
+    uint8_t  y0, y1, y2, y3;
+    uint8_t  u, v;
+} cvid_codebook_t;
+
+#define MAX_STRIPS      32
+
+typedef struct {
+    uint16_t          id;
+    uint16_t          x1, y1;
+    uint16_t          x2, y2;
+    cvid_codebook_t   v4_codebook[256];
+    cvid_codebook_t   v1_codebook[256];
+} cvid_strip_t;
+
+typedef struct CinepakContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *data;
+    int size;
+
+    int width, height;
+
+    int palette_video;
+    cvid_strip_t strips[MAX_STRIPS];
+
+} CinepakContext;
+
+static void cinepak_decode_codebook (cvid_codebook_t *codebook,
+                                     int chunk_id, int size, uint8_t *data)
+{
+    uint8_t *eod = (data + size);
+    uint32_t flag, mask;
+    int      i, n;
+
+    /* check if this chunk contains 4- or 6-element vectors */
+    n    = (chunk_id & 0x0400) ? 4 : 6;
+    flag = 0;
+    mask = 0;
+
+    for (i=0; i < 256; i++) {
+        if ((chunk_id & 0x0100) && !(mask >>= 1)) {
+            if ((data + 4) > eod)
+                break;
+
+            flag  = BE_32 (data);
+            data += 4;
+            mask  = 0x80000000;
+        }
+
+        if (!(chunk_id & 0x0100) || (flag & mask)) {
+            if ((data + n) > eod)
+                break;
+
+            if (n == 6) {
+                codebook[i].y0 = *data++;
+                codebook[i].y1 = *data++;
+                codebook[i].y2 = *data++;
+                codebook[i].y3 = *data++;
+                codebook[i].u  = 128 + *data++;
+                codebook[i].v  = 128 + *data++;
+            } else {
+                /* this codebook type indicates either greyscale or
+                 * palettized video; if palettized, U & V components will
+                 * not be used so it is safe to set them to 128 for the
+                 * benefit of greyscale rendering in YUV420P */
+                codebook[i].y0 = *data++;
+                codebook[i].y1 = *data++;
+                codebook[i].y2 = *data++;
+                codebook[i].y3 = *data++;
+                codebook[i].u  = 128;
+                codebook[i].v  = 128;
+            }
+        }
+    }
+}
+
+static int cinepak_decode_vectors (CinepakContext *s, cvid_strip_t *strip,
+                                   int chunk_id, int size, uint8_t *data)
+{
+    uint8_t         *eod = (data + size);
+    uint32_t         flag, mask;
+    cvid_codebook_t *codebook;
+    unsigned int     x, y;
+    uint32_t         iy[4];
+    uint32_t         iu[2];
+    uint32_t         iv[2];
+
+    flag = 0;
+    mask = 0;
+
+    for (y=strip->y1; y < strip->y2; y+=4) {
+
+        iy[0] = strip->x1 + (y * s->frame.linesize[0]);
+        iy[1] = iy[0] + s->frame.linesize[0];
+        iy[2] = iy[1] + s->frame.linesize[0];
+        iy[3] = iy[2] + s->frame.linesize[0];
+        iu[0] = (strip->x1/2) + ((y/2) * s->frame.linesize[1]);
+        iu[1] = iu[0] + s->frame.linesize[1];
+        iv[0] = (strip->x1/2) + ((y/2) * s->frame.linesize[2]);
+        iv[1] = iv[0] + s->frame.linesize[2];
+
+        for (x=strip->x1; x < strip->x2; x+=4) {
+            if ((chunk_id & 0x0100) && !(mask >>= 1)) {
+                if ((data + 4) > eod)
+                    return -1;
+
+                flag  = BE_32 (data);
+                data += 4;
+                mask  = 0x80000000;
+            }
+
+            if (!(chunk_id & 0x0100) || (flag & mask)) {
+                if (!(chunk_id & 0x0200) && !(mask >>= 1)) {
+                    if ((data + 4) > eod)
+                        return -1;
+
+                    flag  = BE_32 (data);
+                    data += 4;
+                    mask  = 0x80000000;
+                }
+
+                if ((chunk_id & 0x0200) || (~flag & mask)) {
+                    if (data >= eod)
+                        return -1;
+
+                    codebook = &strip->v1_codebook[*data++];
+                    s->frame.data[0][iy[0] + 0] = codebook->y0;
+                    s->frame.data[0][iy[0] + 1] = codebook->y0;
+                    s->frame.data[0][iy[1] + 0] = codebook->y0;
+                    s->frame.data[0][iy[1] + 1] = codebook->y0;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[0]] = codebook->u;
+                        s->frame.data[2][iv[0]] = codebook->v;
+                    }
+
+                    s->frame.data[0][iy[0] + 2] = codebook->y1;
+                    s->frame.data[0][iy[0] + 3] = codebook->y1;
+                    s->frame.data[0][iy[1] + 2] = codebook->y1;
+                    s->frame.data[0][iy[1] + 3] = codebook->y1;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[0] + 1] = codebook->u;
+                        s->frame.data[2][iv[0] + 1] = codebook->v;
+                    }
+
+                    s->frame.data[0][iy[2] + 0] = codebook->y2;
+                    s->frame.data[0][iy[2] + 1] = codebook->y2;
+                    s->frame.data[0][iy[3] + 0] = codebook->y2;
+                    s->frame.data[0][iy[3] + 1] = codebook->y2;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[1]] = codebook->u;
+                        s->frame.data[2][iv[1]] = codebook->v;
+                    }
+
+                    s->frame.data[0][iy[2] + 2] = codebook->y3;
+                    s->frame.data[0][iy[2] + 3] = codebook->y3;
+                    s->frame.data[0][iy[3] + 2] = codebook->y3;
+                    s->frame.data[0][iy[3] + 3] = codebook->y3;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[1] + 1] = codebook->u;
+                        s->frame.data[2][iv[1] + 1] = codebook->v;
+                    }
+
+                } else if (flag & mask) {
+                    if ((data + 4) > eod)
+                        return -1;
+
+                    codebook = &strip->v4_codebook[*data++];
+                    s->frame.data[0][iy[0] + 0] = codebook->y0;
+                    s->frame.data[0][iy[0] + 1] = codebook->y1;
+                    s->frame.data[0][iy[1] + 0] = codebook->y2;
+                    s->frame.data[0][iy[1] + 1] = codebook->y3;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[0]] = codebook->u;
+                        s->frame.data[2][iv[0]] = codebook->v;
+                    }
+
+                    codebook = &strip->v4_codebook[*data++];
+                    s->frame.data[0][iy[0] + 2] = codebook->y0;
+                    s->frame.data[0][iy[0] + 3] = codebook->y1;
+                    s->frame.data[0][iy[1] + 2] = codebook->y2;
+                    s->frame.data[0][iy[1] + 3] = codebook->y3;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[0] + 1] = codebook->u;
+                        s->frame.data[2][iv[0] + 1] = codebook->v;
+                    }
+
+                    codebook = &strip->v4_codebook[*data++];
+                    s->frame.data[0][iy[2] + 0] = codebook->y0;
+                    s->frame.data[0][iy[2] + 1] = codebook->y1;
+                    s->frame.data[0][iy[3] + 0] = codebook->y2;
+                    s->frame.data[0][iy[3] + 1] = codebook->y3;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[1]] = codebook->u;
+                        s->frame.data[2][iv[1]] = codebook->v;
+                    }
+
+                    codebook = &strip->v4_codebook[*data++];
+                    s->frame.data[0][iy[2] + 2] = codebook->y0;
+                    s->frame.data[0][iy[2] + 3] = codebook->y1;
+                    s->frame.data[0][iy[3] + 2] = codebook->y2;
+                    s->frame.data[0][iy[3] + 3] = codebook->y3;
+                    if (!s->palette_video) {
+                        s->frame.data[1][iu[1] + 1] = codebook->u;
+                        s->frame.data[2][iv[1] + 1] = codebook->v;
+                    }
+
+                }
+            }
+
+            iy[0] += 4;  iy[1] += 4;
+            iy[2] += 4;  iy[3] += 4;
+            iu[0] += 2;  iu[1] += 2;
+            iv[0] += 2;  iv[1] += 2;
+        }
+    }
+
+    return 0;
+}
+
+static int cinepak_decode_strip (CinepakContext *s,
+                                 cvid_strip_t *strip, uint8_t *data, int size)
+{
+    uint8_t *eod = (data + size);
+    int      chunk_id, chunk_size;
+
+    /* coordinate sanity checks */
+    if (strip->x1 >= s->width  || strip->x2 > s->width  ||
+        strip->y1 >= s->height || strip->y2 > s->height ||
+        strip->x1 >= strip->x2 || strip->y1 >= strip->y2)
+        return -1;
+
+    while ((data + 4) <= eod) {
+        chunk_id   = BE_16 (&data[0]);
+        chunk_size = BE_16 (&data[2]) - 4;
+        if(chunk_size < 0)
+            return -1;
+
+        data      += 4;
+        chunk_size = ((data + chunk_size) > eod) ? (eod - data) : chunk_size;
+
+        switch (chunk_id) {
+
+        case 0x2000:
+        case 0x2100:
+        case 0x2400:
+        case 0x2500:
+            cinepak_decode_codebook (strip->v4_codebook, chunk_id,
+                chunk_size, data);
+            break;
+
+        case 0x2200:
+        case 0x2300:
+        case 0x2600:
+        case 0x2700:
+            cinepak_decode_codebook (strip->v1_codebook, chunk_id,
+                chunk_size, data);
+            break;
+
+        case 0x3000:
+        case 0x3100:
+        case 0x3200:
+            return cinepak_decode_vectors (s, strip, chunk_id,
+                chunk_size, data);
+        }
+
+        data += chunk_size;
+    }
+
+    return -1;
+}
+
+static int cinepak_decode (CinepakContext *s)
+{
+    uint8_t      *eod = (s->data + s->size);
+    int           i, result, strip_size, frame_flags, num_strips;
+    int           y0 = 0;
+    int           encoded_buf_size;
+    /* if true, Cinepak data is from a Sega FILM/CPK file */
+    int           sega_film_data = 0;
+
+    if (s->size < 10)
+        return -1;
+
+    frame_flags = s->data[0];
+    num_strips  = BE_16 (&s->data[8]);
+    encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2]));
+    if (encoded_buf_size != s->size)
+        sega_film_data = 1;
+    if (sega_film_data)
+        s->data    += 12;
+    else
+        s->data    += 10;
+
+    if (num_strips > MAX_STRIPS)
+        num_strips = MAX_STRIPS;
+
+    for (i=0; i < num_strips; i++) {
+        if ((s->data + 12) > eod)
+            return -1;
+
+        s->strips[i].id = BE_16 (s->data);
+        s->strips[i].y1 = y0;
+        s->strips[i].x1 = 0;
+        s->strips[i].y2 = y0 + BE_16 (&s->data[8]);
+        s->strips[i].x2 = s->avctx->width;
+
+        strip_size = BE_16 (&s->data[2]) - 12;
+        s->data   += 12;
+        strip_size = ((s->data + strip_size) > eod) ? (eod - s->data) : strip_size;
+
+        if ((i > 0) && !(frame_flags & 0x01)) {
+            memcpy (s->strips[i].v4_codebook, s->strips[i-1].v4_codebook,
+                sizeof(s->strips[i].v4_codebook));
+            memcpy (s->strips[i].v1_codebook, s->strips[i-1].v1_codebook,
+                sizeof(s->strips[i].v1_codebook));
+        }
+
+        result = cinepak_decode_strip (s, &s->strips[i], s->data, strip_size);
+
+        if (result != 0)
+            return result;
+
+        s->data += strip_size;
+        y0    = s->strips[i].y2;
+    }
+    return 0;
+}
+
+static int cinepak_decode_init(AVCodecContext *avctx)
+{
+    CinepakContext *s = (CinepakContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+    s->width = (avctx->width + 3) & ~3;
+    s->height = (avctx->height + 3) & ~3;
+
+    // check for paletted data
+    if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) {
+        s->palette_video = 0;
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+    } else {
+        s->palette_video = 1;
+        avctx->pix_fmt = PIX_FMT_PAL8;
+    }
+
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int cinepak_decode_frame(AVCodecContext *avctx,
+                                void *data, int *data_size,
+                                uint8_t *buf, int buf_size)
+{
+    CinepakContext *s = (CinepakContext *)avctx->priv_data;
+
+    s->data = buf;
+    s->size = buf_size;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE |
+                            FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    cinepak_decode(s);
+
+    if (s->palette_video) {
+        memcpy (s->frame.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
+        if (avctx->palctrl->palette_changed) {
+            s->frame.palette_has_changed = 1;
+            avctx->palctrl->palette_changed = 0;
+        } else
+            s->frame.palette_has_changed = 0;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int cinepak_decode_end(AVCodecContext *avctx)
+{
+    CinepakContext *s = (CinepakContext *)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec cinepak_decoder = {
+    "cinepak",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CINEPAK,
+    sizeof(CinepakContext),
+    cinepak_decode_init,
+    NULL,
+    cinepak_decode_end,
+    cinepak_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/cljr.c b/contrib/ffmpeg/libavcodec/cljr.c
new file mode 100644
index 000000000..44810f5cf
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cljr.c
@@ -0,0 +1,158 @@
+/*
+ * Cirrus Logic AccuPak (CLJR) codec
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file cljr.c
+ * Cirrus Logic AccuPak codec.
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+typedef struct CLJRContext{
+    AVCodecContext *avctx;
+    AVFrame picture;
+    int delta[16];
+    int offset[4];
+    GetBitContext gb;
+} CLJRContext;
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    CLJRContext * const a = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    int x, y;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    init_get_bits(&a->gb, buf, buf_size);
+
+    for(y=0; y<avctx->height; y++){
+        uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];
+        uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
+        uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
+        for(x=0; x<avctx->width; x+=4){
+                luma[3] = get_bits(&a->gb, 5) << 3;
+            luma[2] = get_bits(&a->gb, 5) << 3;
+            luma[1] = get_bits(&a->gb, 5) << 3;
+            luma[0] = get_bits(&a->gb, 5) << 3;
+            luma+= 4;
+            *(cb++) = get_bits(&a->gb, 6) << 2;
+            *(cr++) = get_bits(&a->gb, 6) << 2;
+        }
+    }
+
+    *picture= *(AVFrame*)&a->picture;
+    *data_size = sizeof(AVPicture);
+
+    emms_c();
+
+    return buf_size;
+}
+
+#if 0
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    CLJRContext * const a = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    int size;
+    int mb_x, mb_y;
+
+    *p = *pict;
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    emms_c();
+
+    align_put_bits(&a->pb);
+    while(get_bit_count(&a->pb)&31)
+        put_bits(&a->pb, 8, 0);
+
+    size= get_bit_count(&a->pb)/32;
+
+    return size*4;
+}
+#endif
+
+static void common_init(AVCodecContext *avctx){
+    CLJRContext * const a = avctx->priv_data;
+
+    avctx->coded_frame= (AVFrame*)&a->picture;
+    a->avctx= avctx;
+}
+
+static int decode_init(AVCodecContext *avctx){
+
+    common_init(avctx);
+
+    avctx->pix_fmt= PIX_FMT_YUV411P;
+
+    return 0;
+}
+
+#if 0
+static int encode_init(AVCodecContext *avctx){
+
+    common_init(avctx);
+
+    return 0;
+}
+#endif
+
+AVCodec cljr_decoder = {
+    "cljr",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CLJR,
+    sizeof(CLJRContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+#if 0
+#ifdef CONFIG_ENCODERS
+
+AVCodec cljr_encoder = {
+    "cljr",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_cljr,
+    sizeof(CLJRContext),
+    encode_init,
+    encode_frame,
+    //encode_end,
+};
+
+#endif //CONFIG_ENCODERS
+#endif
diff --git a/contrib/ffmpeg/libavcodec/cook.c b/contrib/ffmpeg/libavcodec/cook.c
new file mode 100644
index 000000000..47d9ce2c3
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cook.c
@@ -0,0 +1,1312 @@
+/*
+ * COOK compatible decoder
+ * Copyright (c) 2003 Sascha Sommer
+ * Copyright (c) 2005 Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file cook.c
+ * Cook compatible decoder.
+ * This decoder handles RealNetworks, RealAudio G2 data.
+ * Cook is identified by the codec name cook in RM files.
+ *
+ * To use this decoder, a calling application must supply the extradata
+ * bytes provided from the RM container; 8+ bytes for mono streams and
+ * 16+ for stereo streams (maybe more).
+ *
+ * Codec technicalities (all this assume a buffer length of 1024):
+ * Cook works with several different techniques to achieve its compression.
+ * In the timedomain the buffer is divided into 8 pieces and quantized. If
+ * two neighboring pieces have different quantization index a smooth
+ * quantization curve is used to get a smooth overlap between the different
+ * pieces.
+ * To get to the transformdomain Cook uses a modulated lapped transform.
+ * The transform domain has 50 subbands with 20 elements each. This
+ * means only a maximum of 50*20=1000 coefficients are used out of the 1024
+ * available.
+ */
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+#include "cookdata.h"
+
+/* the different Cook versions */
+#define MONO_COOK1      0x1000001
+#define MONO_COOK2      0x1000002
+#define JOINT_STEREO    0x1000003
+#define MC_COOK         0x2000000   //multichannel Cook, not supported
+
+#define SUBBAND_SIZE    20
+//#define COOKDEBUG
+
+typedef struct {
+    int     size;
+    int     qidx_table1[8];
+    int     qidx_table2[8];
+} COOKgain;
+
+typedef struct __attribute__((__packed__)){
+    /* codec data start */
+    uint32_t cookversion;               //in network order, bigendian
+    uint16_t samples_per_frame;         //amount of samples per frame per channel, bigendian
+    uint16_t subbands;                  //amount of bands used in the frequency domain, bigendian
+    /* Mono extradata ends here. */
+    uint32_t unused;
+    uint16_t js_subband_start;          //bigendian
+    uint16_t js_vlc_bits;               //bigendian
+    /* Stereo extradata ends here. */
+} COOKextradata;
+
+
+typedef struct {
+    GetBitContext       gb;
+    /* stream data */
+    int                 nb_channels;
+    int                 joint_stereo;
+    int                 bit_rate;
+    int                 sample_rate;
+    int                 samples_per_channel;
+    int                 samples_per_frame;
+    int                 subbands;
+    int                 log2_numvector_size;
+    int                 numvector_size;                //1 << log2_numvector_size;
+    int                 js_subband_start;
+    int                 total_subbands;
+    int                 num_vectors;
+    int                 bits_per_subpacket;
+    /* states */
+    int                 random_state;
+
+    /* transform data */
+    FFTContext          fft_ctx;
+    FFTSample           mlt_tmp[1024] __attribute__((aligned(16))); /* temporary storage for imlt */
+    float*              mlt_window;
+    float*              mlt_precos;
+    float*              mlt_presin;
+    float*              mlt_postcos;
+    int                 fft_size;
+    int                 fft_order;
+    int                 mlt_size;       //modulated lapped transform size
+
+    /* gain buffers */
+    COOKgain*           gain_now_ptr;
+    COOKgain*           gain_previous_ptr;
+    COOKgain            gain_current;
+    COOKgain            gain_now;
+    COOKgain            gain_previous;
+    COOKgain            gain_channel1[2];
+    COOKgain            gain_channel2[2];
+
+    /* VLC data */
+    int                 js_vlc_bits;
+    VLC                 envelope_quant_index[13];
+    VLC                 sqvh[7];          //scalar quantization
+    VLC                 ccpl;             //channel coupling
+
+    /* generatable tables and related variables */
+    int                 gain_size_factor;
+    float               gain_table[23];
+    float               pow2tab[127];
+    float               rootpow2tab[127];
+
+    /* data buffers */
+
+    uint8_t*            decoded_bytes_buffer;
+    float               mono_mdct_output[2048] __attribute__((aligned(16)));
+    float*              previous_buffer_ptr[2];
+    float               mono_previous_buffer1[1024];
+    float               mono_previous_buffer2[1024];
+    float*              decode_buf_ptr[4];
+    float*              decode_buf_ptr2[2];
+    float               decode_buffer_1[1024];
+    float               decode_buffer_2[1024];
+    float               decode_buffer_3[1024];
+    float               decode_buffer_4[1024];
+} COOKContext;
+
+/* debug functions */
+
+#ifdef COOKDEBUG
+static void dump_float_table(float* table, int size, int delimiter) {
+    int i=0;
+    av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i);
+    for (i=0 ; i<size ; i++) {
+        av_log(NULL, AV_LOG_ERROR, "%5.1f, ", table[i]);
+        if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1);
+    }
+}
+
+static void dump_int_table(int* table, int size, int delimiter) {
+    int i=0;
+    av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i);
+    for (i=0 ; i<size ; i++) {
+        av_log(NULL, AV_LOG_ERROR, "%d, ", table[i]);
+        if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1);
+    }
+}
+
+static void dump_short_table(short* table, int size, int delimiter) {
+    int i=0;
+    av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i);
+    for (i=0 ; i<size ; i++) {
+        av_log(NULL, AV_LOG_ERROR, "%d, ", table[i]);
+        if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1);
+    }
+}
+
+#endif
+
+/*************** init functions ***************/
+
+/* table generator */
+static void init_pow2table(COOKContext *q){
+    int i;
+    q->pow2tab[63] = 1.0;
+    for (i=1 ; i<64 ; i++){
+        q->pow2tab[63+i]=(float)((uint64_t)1<<i);
+        q->pow2tab[63-i]=1.0/(float)((uint64_t)1<<i);
+    }
+}
+
+/* table generator */
+static void init_rootpow2table(COOKContext *q){
+    int i;
+    q->rootpow2tab[63] = 1.0;
+    for (i=1 ; i<64 ; i++){
+        q->rootpow2tab[63+i]=sqrt((float)((uint64_t)1<<i));
+        q->rootpow2tab[63-i]=sqrt(1.0/(float)((uint64_t)1<<i));
+    }
+}
+
+/* table generator */
+static void init_gain_table(COOKContext *q) {
+    int i;
+    q->gain_size_factor = q->samples_per_channel/8;
+    for (i=0 ; i<23 ; i++) {
+        q->gain_table[i] = pow((double)q->pow2tab[i+52] ,
+                               (1.0/(double)q->gain_size_factor));
+    }
+}
+
+
+static int init_cook_vlc_tables(COOKContext *q) {
+    int i, result;
+
+    result = 0;
+    for (i=0 ; i<13 ; i++) {
+        result &= init_vlc (&q->envelope_quant_index[i], 9, 24,
+            envelope_quant_index_huffbits[i], 1, 1,
+            envelope_quant_index_huffcodes[i], 2, 2, 0);
+    }
+    av_log(NULL,AV_LOG_DEBUG,"sqvh VLC init\n");
+    for (i=0 ; i<7 ; i++) {
+        result &= init_vlc (&q->sqvh[i], vhvlcsize_tab[i], vhsize_tab[i],
+            cvh_huffbits[i], 1, 1,
+            cvh_huffcodes[i], 2, 2, 0);
+    }
+
+    if (q->nb_channels==2 && q->joint_stereo==1){
+        result &= init_vlc (&q->ccpl, 6, (1<<q->js_vlc_bits)-1,
+            ccpl_huffbits[q->js_vlc_bits-2], 1, 1,
+            ccpl_huffcodes[q->js_vlc_bits-2], 2, 2, 0);
+        av_log(NULL,AV_LOG_DEBUG,"Joint-stereo VLC used.\n");
+    }
+
+    av_log(NULL,AV_LOG_DEBUG,"VLC tables initialized.\n");
+    return result;
+}
+
+static int init_cook_mlt(COOKContext *q) {
+    int j;
+    float alpha;
+
+    /* Allocate the buffers, could be replaced with a static [512]
+       array if needed. */
+    q->mlt_size = q->samples_per_channel;
+    q->mlt_window = av_malloc(sizeof(float)*q->mlt_size);
+    q->mlt_precos = av_malloc(sizeof(float)*q->mlt_size/2);
+    q->mlt_presin = av_malloc(sizeof(float)*q->mlt_size/2);
+    q->mlt_postcos = av_malloc(sizeof(float)*q->mlt_size/2);
+
+    /* Initialize the MLT window: simple sine window. */
+    alpha = M_PI / (2.0 * (float)q->mlt_size);
+    for(j=0 ; j<q->mlt_size ; j++) {
+        q->mlt_window[j] = sin((j + 512.0/(float)q->mlt_size) * alpha);
+    }
+
+    /* pre/post twiddle factors */
+    for (j=0 ; j<q->mlt_size/2 ; j++){
+        q->mlt_precos[j] = cos( ((j+0.25)*M_PI)/q->mlt_size);
+        q->mlt_presin[j] = sin( ((j+0.25)*M_PI)/q->mlt_size);
+        q->mlt_postcos[j] = (float)sqrt(2.0/(float)q->mlt_size)*cos( ((float)j*M_PI) /q->mlt_size); //sqrt(2/MLT_size) = scalefactor
+    }
+
+    /* Initialize the FFT. */
+    ff_fft_init(&q->fft_ctx, av_log2(q->mlt_size)-1, 0);
+    av_log(NULL,AV_LOG_DEBUG,"FFT initialized, order = %d.\n",
+           av_log2(q->samples_per_channel)-1);
+
+    return (int)(q->mlt_window && q->mlt_precos && q->mlt_presin && q->mlt_postcos);
+}
+
+/*************** init functions end ***********/
+
+/**
+ * Cook indata decoding, every 32 bits are XORed with 0x37c511f2.
+ * Why? No idea, some checksum/error detection method maybe.
+ * Nice way to waste CPU cycles.
+ *
+ * @param in        pointer to 32bit array of indata
+ * @param bits      amount of bits
+ * @param out       pointer to 32bit array of outdata
+ */
+
+static inline void decode_bytes(uint8_t* inbuffer, uint8_t* out, int bytes){
+    int i;
+    uint32_t* buf = (uint32_t*) inbuffer;
+    uint32_t* obuf = (uint32_t*) out;
+    /* FIXME: 64 bit platforms would be able to do 64 bits at a time.
+     * I'm too lazy though, should be something like
+     * for(i=0 ; i<bitamount/64 ; i++)
+     *     (int64_t)out[i] = 0x37c511f237c511f2^be2me_64(int64_t)in[i]);
+     * Buffer alignment needs to be checked. */
+
+
+    for(i=0 ; i<bytes/4 ; i++){
+#ifdef WORDS_BIGENDIAN
+        obuf[i] = 0x37c511f2^buf[i];
+#else
+        obuf[i] = 0xf211c537^buf[i];
+#endif
+    }
+}
+
+/**
+ * Cook uninit
+ */
+
+static int cook_decode_close(AVCodecContext *avctx)
+{
+    int i;
+    COOKContext *q = avctx->priv_data;
+    av_log(NULL,AV_LOG_DEBUG, "Deallocating memory.\n");
+
+    /* Free allocated memory buffers. */
+    av_free(q->mlt_window);
+    av_free(q->mlt_precos);
+    av_free(q->mlt_presin);
+    av_free(q->mlt_postcos);
+    av_free(q->decoded_bytes_buffer);
+
+    /* Free the transform. */
+    ff_fft_end(&q->fft_ctx);
+
+    /* Free the VLC tables. */
+    for (i=0 ; i<13 ; i++) {
+        free_vlc(&q->envelope_quant_index[i]);
+    }
+    for (i=0 ; i<7 ; i++) {
+        free_vlc(&q->sqvh[i]);
+    }
+    if(q->nb_channels==2 && q->joint_stereo==1 ){
+        free_vlc(&q->ccpl);
+    }
+
+    av_log(NULL,AV_LOG_DEBUG,"Memory deallocated.\n");
+
+    return 0;
+}
+
+/**
+ * Fill the COOKgain structure for the timedomain quantization.
+ *
+ * @param q                 pointer to the COOKContext
+ * @param gaininfo          pointer to the COOKgain
+ */
+
+static void decode_gain_info(GetBitContext *gb, COOKgain* gaininfo) {
+    int i;
+
+    while (get_bits1(gb)) {}
+
+    gaininfo->size = get_bits_count(gb) - 1;     //amount of elements*2 to update
+
+    if (get_bits_count(gb) - 1 <= 0) return;
+
+    for (i=0 ; i<gaininfo->size ; i++){
+        gaininfo->qidx_table1[i] = get_bits(gb,3);
+        if (get_bits1(gb)) {
+            gaininfo->qidx_table2[i] = get_bits(gb,4) - 7;  //convert to signed
+        } else {
+            gaininfo->qidx_table2[i] = -1;
+        }
+    }
+}
+
+/**
+ * Create the quant index table needed for the envelope.
+ *
+ * @param q                 pointer to the COOKContext
+ * @param quant_index_table pointer to the array
+ */
+
+static void decode_envelope(COOKContext *q, int* quant_index_table) {
+    int i,j, vlc_index;
+    int bitbias;
+
+    bitbias = get_bits_count(&q->gb);
+    quant_index_table[0]= get_bits(&q->gb,6) - 6;       //This is used later in categorize
+
+    for (i=1 ; i < q->total_subbands ; i++){
+        vlc_index=i;
+        if (i >= q->js_subband_start * 2) {
+            vlc_index-=q->js_subband_start;
+        } else {
+            vlc_index/=2;
+            if(vlc_index < 1) vlc_index = 1;
+        }
+        if (vlc_index>13) vlc_index = 13;           //the VLC tables >13 are identical to No. 13
+
+        j = get_vlc2(&q->gb, q->envelope_quant_index[vlc_index-1].table,
+                     q->envelope_quant_index[vlc_index-1].bits,2);
+        quant_index_table[i] = quant_index_table[i-1] + j - 12;    //differential encoding
+    }
+}
+
+/**
+ * Create the quant value table.
+ *
+ * @param q                 pointer to the COOKContext
+ * @param quant_value_table pointer to the array
+ */
+
+static void inline dequant_envelope(COOKContext *q, int* quant_index_table,
+                                    float* quant_value_table){
+
+    int i;
+    for(i=0 ; i < q->total_subbands ; i++){
+        quant_value_table[i] = q->rootpow2tab[quant_index_table[i]+63];
+    }
+}
+
+/**
+ * Calculate the category and category_index vector.
+ *
+ * @param q                     pointer to the COOKContext
+ * @param quant_index_table     pointer to the array
+ * @param category              pointer to the category array
+ * @param category_index        pointer to the category_index array
+ */
+
+static void categorize(COOKContext *q, int* quant_index_table,
+                       int* category, int* category_index){
+    int exp_idx, bias, tmpbias, bits_left, num_bits, index, v, i, j;
+    int exp_index2[102];
+    int exp_index1[102];
+
+    int tmp_categorize_array1[128];
+    int tmp_categorize_array1_idx=0;
+    int tmp_categorize_array2[128];
+    int tmp_categorize_array2_idx=0;
+    int category_index_size=0;
+
+    bits_left =  q->bits_per_subpacket - get_bits_count(&q->gb);
+
+    if(bits_left > q->samples_per_channel) {
+        bits_left = q->samples_per_channel +
+                    ((bits_left - q->samples_per_channel)*5)/8;
+        //av_log(NULL, AV_LOG_ERROR, "bits_left = %d\n",bits_left);
+    }
+
+    memset(&exp_index1,0,102*sizeof(int));
+    memset(&exp_index2,0,102*sizeof(int));
+    memset(&tmp_categorize_array1,0,128*sizeof(int));
+    memset(&tmp_categorize_array2,0,128*sizeof(int));
+
+    bias=-32;
+
+    /* Estimate bias. */
+    for (i=32 ; i>0 ; i=i/2){
+        num_bits = 0;
+        index = 0;
+        for (j=q->total_subbands ; j>0 ; j--){
+            exp_idx = (i - quant_index_table[index] + bias) / 2;
+            if (exp_idx<0){
+                exp_idx=0;
+            } else if(exp_idx >7) {
+                exp_idx=7;
+            }
+            index++;
+            num_bits+=expbits_tab[exp_idx];
+        }
+        if(num_bits >= bits_left - 32){
+            bias+=i;
+        }
+    }
+
+    /* Calculate total number of bits. */
+    num_bits=0;
+    for (i=0 ; i<q->total_subbands ; i++) {
+        exp_idx = (bias - quant_index_table[i]) / 2;
+        if (exp_idx<0) {
+            exp_idx=0;
+        } else if(exp_idx >7) {
+            exp_idx=7;
+        }
+        num_bits += expbits_tab[exp_idx];
+        exp_index1[i] = exp_idx;
+        exp_index2[i] = exp_idx;
+    }
+    tmpbias = bias = num_bits;
+
+    for (j = 1 ; j < q->numvector_size ; j++) {
+        if (tmpbias + bias > 2*bits_left) {  /* ---> */
+            int max = -999999;
+            index=-1;
+            for (i=0 ; i<q->total_subbands ; i++){
+                if (exp_index1[i] < 7) {
+                    v = (-2*exp_index1[i]) - quant_index_table[i] - 32;
+                    if ( v >= max) {
+                        max = v;
+                        index = i;
+                    }
+                }
+            }
+            if(index==-1)break;
+            tmp_categorize_array1[tmp_categorize_array1_idx++] = index;
+            tmpbias -= expbits_tab[exp_index1[index]] -
+                       expbits_tab[exp_index1[index]+1];
+            ++exp_index1[index];
+        } else {  /* <--- */
+            int min = 999999;
+            index=-1;
+            for (i=0 ; i<q->total_subbands ; i++){
+                if(exp_index2[i] > 0){
+                    v = (-2*exp_index2[i])-quant_index_table[i];
+                    if ( v < min) {
+                        min = v;
+                        index = i;
+                    }
+                }
+            }
+            if(index == -1)break;
+            tmp_categorize_array2[tmp_categorize_array2_idx++] = index;
+            tmpbias -= expbits_tab[exp_index2[index]] -
+                       expbits_tab[exp_index2[index]-1];
+            --exp_index2[index];
+        }
+    }
+
+    for(i=0 ; i<q->total_subbands ; i++)
+        category[i] = exp_index2[i];
+
+    /* Concatenate the two arrays. */
+    for(i=tmp_categorize_array2_idx-1 ; i >= 0; i--)
+        category_index[category_index_size++] =  tmp_categorize_array2[i];
+
+    for(i=0;i<tmp_categorize_array1_idx;i++)
+        category_index[category_index_size++ ] =  tmp_categorize_array1[i];
+
+    /* FIXME: mc_sich_ra8_20.rm triggers this, not sure with what we
+       should fill the remaining bytes. */
+    for(i=category_index_size;i<q->numvector_size;i++)
+        category_index[i]=0;
+
+}
+
+
+/**
+ * Expand the category vector.
+ *
+ * @param q                     pointer to the COOKContext
+ * @param category              pointer to the category array
+ * @param category_index        pointer to the category_index array
+ */
+
+static void inline expand_category(COOKContext *q, int* category,
+                                   int* category_index){
+    int i;
+    for(i=0 ; i<q->num_vectors ; i++){
+        ++category[category_index[i]];
+    }
+}
+
+/**
+ * The real requantization of the mltcoefs
+ *
+ * @param q                     pointer to the COOKContext
+ * @param index                 index
+ * @param band                  current subband
+ * @param quant_value_table     pointer to the array
+ * @param subband_coef_index    array of indexes to quant_centroid_tab
+ * @param subband_coef_noise    use random noise instead of predetermined value
+ * @param mlt_buffer            pointer to the mlt buffer
+ */
+
+
+static void scalar_dequant(COOKContext *q, int index, int band,
+                           float* quant_value_table, int* subband_coef_index,
+                           int* subband_coef_noise, float* mlt_buffer){
+    int i;
+    float f1;
+
+    for(i=0 ; i<SUBBAND_SIZE ; i++) {
+        if (subband_coef_index[i]) {
+            if (subband_coef_noise[i]) {
+                f1 = -quant_centroid_tab[index][subband_coef_index[i]];
+            } else {
+                f1 = quant_centroid_tab[index][subband_coef_index[i]];
+            }
+        } else {
+            /* noise coding if subband_coef_noise[i] == 0 */
+            q->random_state = q->random_state * 214013 + 2531011;    //typical RNG numbers
+            f1 = randsign[(q->random_state/0x1000000)&1] * dither_tab[index]; //>>31
+        }
+        mlt_buffer[band*20+ i] = f1 * quant_value_table[band];
+    }
+}
+/**
+ * Unpack the subband_coef_index and subband_coef_noise vectors.
+ *
+ * @param q                     pointer to the COOKContext
+ * @param category              pointer to the category array
+ * @param subband_coef_index    array of indexes to quant_centroid_tab
+ * @param subband_coef_noise    use random noise instead of predetermined value
+ */
+
+static int unpack_SQVH(COOKContext *q, int category, int* subband_coef_index,
+                       int* subband_coef_noise) {
+    int i,j;
+    int vlc, vd ,tmp, result;
+    int ub;
+    int cb;
+
+    vd = vd_tab[category];
+    result = 0;
+    for(i=0 ; i<vpr_tab[category] ; i++){
+        ub = get_bits_count(&q->gb);
+        vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3);
+        cb = get_bits_count(&q->gb);
+        if (q->bits_per_subpacket < get_bits_count(&q->gb)){
+            vlc = 0;
+            result = 1;
+        }
+        for(j=vd-1 ; j>=0 ; j--){
+            tmp = (vlc * invradix_tab[category])/0x100000;
+            subband_coef_index[vd*i+j] = vlc - tmp * (kmax_tab[category]+1);
+            vlc = tmp;
+        }
+        for(j=0 ; j<vd ; j++){
+            if (subband_coef_index[i*vd + j]) {
+                if(get_bits_count(&q->gb) < q->bits_per_subpacket){
+                    subband_coef_noise[i*vd+j] = get_bits1(&q->gb);
+                } else {
+                    result=1;
+                    subband_coef_noise[i*vd+j]=0;
+                }
+            } else {
+                subband_coef_noise[i*vd+j]=0;
+            }
+        }
+    }
+    return result;
+}
+
+
+/**
+ * Fill the mlt_buffer with mlt coefficients.
+ *
+ * @param q                 pointer to the COOKContext
+ * @param category          pointer to the category array
+ * @param quant_value_table pointer to the array
+ * @param mlt_buffer        pointer to mlt coefficients
+ */
+
+
+static void decode_vectors(COOKContext* q, int* category,
+                           float* quant_value_table, float* mlt_buffer){
+    /* A zero in this table means that the subband coefficient is
+       random noise coded. */
+    int subband_coef_noise[SUBBAND_SIZE];
+    /* A zero in this table means that the subband coefficient is a
+       positive multiplicator. */
+    int subband_coef_index[SUBBAND_SIZE];
+    int band, j;
+    int index=0;
+
+    for(band=0 ; band<q->total_subbands ; band++){
+        index = category[band];
+        if(category[band] < 7){
+            if(unpack_SQVH(q, category[band], subband_coef_index, subband_coef_noise)){
+                index=7;
+                for(j=0 ; j<q->total_subbands ; j++) category[band+j]=7;
+            }
+        }
+        if(index==7) {
+            memset(subband_coef_index, 0, sizeof(subband_coef_index));
+            memset(subband_coef_noise, 0, sizeof(subband_coef_noise));
+        }
+        scalar_dequant(q, index, band, quant_value_table, subband_coef_index,
+                       subband_coef_noise, mlt_buffer);
+    }
+
+    if(q->total_subbands*SUBBAND_SIZE >= q->samples_per_channel){
+        return;
+    }
+}
+
+
+/**
+ * function for decoding mono data
+ *
+ * @param q                 pointer to the COOKContext
+ * @param mlt_buffer1       pointer to left channel mlt coefficients
+ * @param mlt_buffer2       pointer to right channel mlt coefficients
+ */
+
+static void mono_decode(COOKContext *q, float* mlt_buffer) {
+
+    int category_index[128];
+    float quant_value_table[102];
+    int quant_index_table[102];
+    int category[128];
+
+    memset(&category, 0, 128*sizeof(int));
+    memset(&quant_value_table, 0, 102*sizeof(int));
+    memset(&category_index, 0, 128*sizeof(int));
+
+    decode_envelope(q, quant_index_table);
+    q->num_vectors = get_bits(&q->gb,q->log2_numvector_size);
+    dequant_envelope(q, quant_index_table, quant_value_table);
+    categorize(q, quant_index_table, category, category_index);
+    expand_category(q, category, category_index);
+    decode_vectors(q, category, quant_value_table, mlt_buffer);
+}
+
+
+/**
+ * The modulated lapped transform, this takes transform coefficients
+ * and transforms them into timedomain samples. This is done through
+ * an FFT-based algorithm with pre- and postrotation steps.
+ * A window and reorder step is also included.
+ *
+ * @param q                 pointer to the COOKContext
+ * @param inbuffer          pointer to the mltcoefficients
+ * @param outbuffer         pointer to the timedomain buffer
+ * @param mlt_tmp           pointer to temporary storage space
+ */
+
+static void cook_imlt(COOKContext *q, float* inbuffer, float* outbuffer,
+                      float* mlt_tmp){
+    int i;
+
+    /* prerotation */
+    for(i=0 ; i<q->mlt_size ; i+=2){
+        outbuffer[i] = (q->mlt_presin[i/2] * inbuffer[q->mlt_size-1-i]) +
+                       (q->mlt_precos[i/2] * inbuffer[i]);
+        outbuffer[i+1] = (q->mlt_precos[i/2] * inbuffer[q->mlt_size-1-i]) -
+                         (q->mlt_presin[i/2] * inbuffer[i]);
+    }
+
+    /* FFT */
+    ff_fft_permute(&q->fft_ctx, (FFTComplex *) outbuffer);
+    ff_fft_calc (&q->fft_ctx, (FFTComplex *) outbuffer);
+
+    /* postrotation */
+    for(i=0 ; i<q->mlt_size ; i+=2){
+        mlt_tmp[i] =               (q->mlt_postcos[(q->mlt_size-1-i)/2] * outbuffer[i+1]) +
+                                   (q->mlt_postcos[i/2] * outbuffer[i]);
+        mlt_tmp[q->mlt_size-1-i] = (q->mlt_postcos[(q->mlt_size-1-i)/2] * outbuffer[i]) -
+                                   (q->mlt_postcos[i/2] * outbuffer[i+1]);
+    }
+
+    /* window and reorder */
+    for(i=0 ; i<q->mlt_size/2 ; i++){
+        outbuffer[i] = mlt_tmp[q->mlt_size/2-1-i] * q->mlt_window[i];
+        outbuffer[q->mlt_size-1-i]= mlt_tmp[q->mlt_size/2-1-i] *
+                                    q->mlt_window[q->mlt_size-1-i];
+        outbuffer[q->mlt_size+i]= mlt_tmp[q->mlt_size/2+i] *
+                                  q->mlt_window[q->mlt_size-1-i];
+        outbuffer[2*q->mlt_size-1-i]= -(mlt_tmp[q->mlt_size/2+i] *
+                                      q->mlt_window[i]);
+    }
+}
+
+
+/**
+ * the actual requantization of the timedomain samples
+ *
+ * @param q                 pointer to the COOKContext
+ * @param buffer            pointer to the timedomain buffer
+ * @param gain_index        index for the block multiplier
+ * @param gain_index_next   index for the next block multiplier
+ */
+
+static void interpolate(COOKContext *q, float* buffer,
+                        int gain_index, int gain_index_next){
+    int i;
+    float fc1, fc2;
+    fc1 = q->pow2tab[gain_index+63];
+
+    if(gain_index == gain_index_next){              //static gain
+        for(i=0 ; i<q->gain_size_factor ; i++){
+            buffer[i]*=fc1;
+        }
+        return;
+    } else {                                        //smooth gain
+        fc2 = q->gain_table[11 + (gain_index_next-gain_index)];
+        for(i=0 ; i<q->gain_size_factor ; i++){
+            buffer[i]*=fc1;
+            fc1*=fc2;
+        }
+        return;
+    }
+}
+
+/**
+ * timedomain requantization of the timedomain samples
+ *
+ * @param q                 pointer to the COOKContext
+ * @param buffer            pointer to the timedomain buffer
+ * @param gain_now          current gain structure
+ * @param gain_previous     previous gain structure
+ */
+
+static void gain_window(COOKContext *q, float* buffer, COOKgain* gain_now,
+                        COOKgain* gain_previous){
+    int i, index;
+    int gain_index[9];
+    int tmp_gain_index;
+
+    gain_index[8]=0;
+    index = gain_previous->size;
+    for (i=7 ; i>=0 ; i--) {
+        if(index && gain_previous->qidx_table1[index-1]==i) {
+            gain_index[i] = gain_previous->qidx_table2[index-1];
+            index--;
+        } else {
+            gain_index[i]=gain_index[i+1];
+        }
+    }
+    /* This is applied to the to be previous data buffer. */
+    for(i=0;i<8;i++){
+        interpolate(q, &buffer[q->samples_per_channel+q->gain_size_factor*i],
+                    gain_index[i], gain_index[i+1]);
+    }
+
+    tmp_gain_index = gain_index[0];
+    index = gain_now->size;
+    for (i=7 ; i>=0 ; i--) {
+        if(index && gain_now->qidx_table1[index-1]==i) {
+            gain_index[i]= gain_now->qidx_table2[index-1];
+            index--;
+        } else {
+            gain_index[i]=gain_index[i+1];
+        }
+    }
+
+    /* This is applied to the to be current block. */
+    for(i=0;i<8;i++){
+        interpolate(q, &buffer[i*q->gain_size_factor],
+                    tmp_gain_index+gain_index[i],
+                    tmp_gain_index+gain_index[i+1]);
+    }
+}
+
+
+/**
+ * mlt overlapping and buffer management
+ *
+ * @param q                 pointer to the COOKContext
+ * @param buffer            pointer to the timedomain buffer
+ * @param gain_now          current gain structure
+ * @param gain_previous     previous gain structure
+ * @param previous_buffer   pointer to the previous buffer to be used for overlapping
+ *
+ */
+
+static void gain_compensate(COOKContext *q, float* buffer, COOKgain* gain_now,
+                            COOKgain* gain_previous, float* previous_buffer) {
+    int i;
+    if((gain_now->size  || gain_previous->size)) {
+        gain_window(q, buffer, gain_now, gain_previous);
+    }
+
+    /* Overlap with the previous block. */
+    for(i=0 ; i<q->samples_per_channel ; i++) buffer[i]+=previous_buffer[i];
+
+    /* Save away the current to be previous block. */
+    memcpy(previous_buffer, buffer+q->samples_per_channel,
+           sizeof(float)*q->samples_per_channel);
+}
+
+
+/**
+ * function for getting the jointstereo coupling information
+ *
+ * @param q                 pointer to the COOKContext
+ * @param decouple_tab      decoupling array
+ *
+ */
+
+static void decouple_info(COOKContext *q, int* decouple_tab){
+    int length, i;
+
+    if(get_bits1(&q->gb)) {
+        if(cplband[q->js_subband_start] > cplband[q->subbands-1]) return;
+
+        length = cplband[q->subbands-1] - cplband[q->js_subband_start] + 1;
+        for (i=0 ; i<length ; i++) {
+            decouple_tab[cplband[q->js_subband_start] + i] = get_vlc2(&q->gb, q->ccpl.table, q->ccpl.bits, 2);
+        }
+        return;
+    }
+
+    if(cplband[q->js_subband_start] > cplband[q->subbands-1]) return;
+
+    length = cplband[q->subbands-1] - cplband[q->js_subband_start] + 1;
+    for (i=0 ; i<length ; i++) {
+       decouple_tab[cplband[q->js_subband_start] + i] = get_bits(&q->gb, q->js_vlc_bits);
+    }
+    return;
+}
+
+
+/**
+ * function for decoding joint stereo data
+ *
+ * @param q                 pointer to the COOKContext
+ * @param mlt_buffer1       pointer to left channel mlt coefficients
+ * @param mlt_buffer2       pointer to right channel mlt coefficients
+ */
+
+static void joint_decode(COOKContext *q, float* mlt_buffer1,
+                         float* mlt_buffer2) {
+    int i,j;
+    int decouple_tab[SUBBAND_SIZE];
+    float decode_buffer[1060];
+    int idx, cpl_tmp,tmp_idx;
+    float f1,f2;
+    float* cplscale;
+
+    memset(decouple_tab, 0, sizeof(decouple_tab));
+    memset(decode_buffer, 0, sizeof(decode_buffer));
+
+    /* Make sure the buffers are zeroed out. */
+    memset(mlt_buffer1,0, 1024*sizeof(float));
+    memset(mlt_buffer2,0, 1024*sizeof(float));
+    decouple_info(q, decouple_tab);
+    mono_decode(q, decode_buffer);
+
+    /* The two channels are stored interleaved in decode_buffer. */
+    for (i=0 ; i<q->js_subband_start ; i++) {
+        for (j=0 ; j<SUBBAND_SIZE ; j++) {
+            mlt_buffer1[i*20+j] = decode_buffer[i*40+j];
+            mlt_buffer2[i*20+j] = decode_buffer[i*40+20+j];
+        }
+    }
+
+    /* When we reach js_subband_start (the higher frequencies)
+       the coefficients are stored in a coupling scheme. */
+    idx = (1 << q->js_vlc_bits) - 1;
+    for (i=q->js_subband_start ; i<q->subbands ; i++) {
+        cpl_tmp = cplband[i];
+        idx -=decouple_tab[cpl_tmp];
+        cplscale = (float*)cplscales[q->js_vlc_bits-2];  //choose decoupler table
+        f1 = cplscale[decouple_tab[cpl_tmp]];
+        f2 = cplscale[idx-1];
+        for (j=0 ; j<SUBBAND_SIZE ; j++) {
+            tmp_idx = ((q->js_subband_start + i)*20)+j;
+            mlt_buffer1[20*i + j] = f1 * decode_buffer[tmp_idx];
+            mlt_buffer2[20*i + j] = f2 * decode_buffer[tmp_idx];
+        }
+        idx = (1 << q->js_vlc_bits) - 1;
+    }
+}
+
+/**
+ * Cook subpacket decoding. This function returns one decoded subpacket,
+ * usually 1024 samples per channel.
+ *
+ * @param q                 pointer to the COOKContext
+ * @param inbuffer          pointer to the inbuffer
+ * @param sub_packet_size   subpacket size
+ * @param outbuffer         pointer to the outbuffer
+ */
+
+
+static int decode_subpacket(COOKContext *q, uint8_t *inbuffer,
+                            int sub_packet_size, int16_t *outbuffer) {
+    int i,j;
+    int value;
+    float* tmp_ptr;
+
+    /* packet dump */
+//    for (i=0 ; i<sub_packet_size ; i++) {
+//        av_log(NULL, AV_LOG_ERROR, "%02x", inbuffer[i]);
+//    }
+//    av_log(NULL, AV_LOG_ERROR, "\n");
+
+    decode_bytes(inbuffer, q->decoded_bytes_buffer, sub_packet_size);
+    init_get_bits(&q->gb, q->decoded_bytes_buffer, sub_packet_size*8);
+    decode_gain_info(&q->gb, &q->gain_current);
+
+    if(q->nb_channels==2 && q->joint_stereo==1){
+        joint_decode(q, q->decode_buf_ptr[0], q->decode_buf_ptr[2]);
+
+        /* Swap buffer pointers. */
+        tmp_ptr = q->decode_buf_ptr[1];
+        q->decode_buf_ptr[1] = q->decode_buf_ptr[0];
+        q->decode_buf_ptr[0] = tmp_ptr;
+        tmp_ptr = q->decode_buf_ptr[3];
+        q->decode_buf_ptr[3] = q->decode_buf_ptr[2];
+        q->decode_buf_ptr[2] = tmp_ptr;
+
+        /* FIXME: Rethink the gainbuffer handling, maybe a rename?
+           now/previous swap */
+        q->gain_now_ptr = &q->gain_now;
+        q->gain_previous_ptr = &q->gain_previous;
+        for (i=0 ; i<q->nb_channels ; i++){
+
+            cook_imlt(q, q->decode_buf_ptr[i*2], q->mono_mdct_output, q->mlt_tmp);
+            gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr,
+                            q->gain_previous_ptr, q->previous_buffer_ptr[0]);
+
+            /* Swap out the previous buffer. */
+            tmp_ptr = q->previous_buffer_ptr[0];
+            q->previous_buffer_ptr[0] = q->previous_buffer_ptr[1];
+            q->previous_buffer_ptr[1] = tmp_ptr;
+
+            /* Clip and convert the floats to 16 bits. */
+            for (j=0 ; j<q->samples_per_frame ; j++){
+                value = lrintf(q->mono_mdct_output[j]);
+                if(value < -32768) value = -32768;
+                else if(value > 32767) value = 32767;
+                outbuffer[2*j+i] = value;
+            }
+        }
+
+        memcpy(&q->gain_now, &q->gain_previous, sizeof(COOKgain));
+        memcpy(&q->gain_previous, &q->gain_current, sizeof(COOKgain));
+
+    } else if (q->nb_channels==2 && q->joint_stereo==0) {
+            /* channel 0 */
+            mono_decode(q, q->decode_buf_ptr2[0]);
+
+            tmp_ptr = q->decode_buf_ptr2[0];
+            q->decode_buf_ptr2[0] = q->decode_buf_ptr2[1];
+            q->decode_buf_ptr2[1] = tmp_ptr;
+
+            memcpy(&q->gain_channel1[0], &q->gain_current ,sizeof(COOKgain));
+            q->gain_now_ptr = &q->gain_channel1[0];
+            q->gain_previous_ptr = &q->gain_channel1[1];
+
+            cook_imlt(q, q->decode_buf_ptr2[0], q->mono_mdct_output,q->mlt_tmp);
+            gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr,
+                            q->gain_previous_ptr, q->mono_previous_buffer1);
+
+            memcpy(&q->gain_channel1[1], &q->gain_channel1[0],sizeof(COOKgain));
+
+
+            for (j=0 ; j<q->samples_per_frame ; j++){
+                value = lrintf(q->mono_mdct_output[j]);
+                if(value < -32768) value = -32768;
+                else if(value > 32767) value = 32767;
+                outbuffer[2*j+1] = value;
+            }
+
+            /* channel 1 */
+            //av_log(NULL,AV_LOG_ERROR,"bits = %d\n",get_bits_count(&q->gb));
+            init_get_bits(&q->gb, q->decoded_bytes_buffer, sub_packet_size*8+q->bits_per_subpacket);
+
+            q->gain_now_ptr = &q->gain_channel2[0];
+            q->gain_previous_ptr = &q->gain_channel2[1];
+
+            decode_gain_info(&q->gb, &q->gain_channel2[0]);
+            mono_decode(q, q->decode_buf_ptr[0]);
+
+            tmp_ptr = q->decode_buf_ptr[0];
+            q->decode_buf_ptr[0] = q->decode_buf_ptr[1];
+            q->decode_buf_ptr[1] = tmp_ptr;
+
+            cook_imlt(q, q->decode_buf_ptr[0], q->mono_mdct_output,q->mlt_tmp);
+            gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr,
+                            q->gain_previous_ptr, q->mono_previous_buffer2);
+
+            /* Swap out the previous buffer. */
+            tmp_ptr = q->previous_buffer_ptr[0];
+            q->previous_buffer_ptr[0] = q->previous_buffer_ptr[1];
+            q->previous_buffer_ptr[1] = tmp_ptr;
+
+            memcpy(&q->gain_channel2[1], &q->gain_channel2[0] ,sizeof(COOKgain));
+
+            for (j=0 ; j<q->samples_per_frame ; j++){
+                value = lrintf(q->mono_mdct_output[j]);
+                if(value < -32768) value = -32768;
+                else if(value > 32767) value = 32767;
+                outbuffer[2*j] = value;
+            }
+
+    } else {
+        mono_decode(q, q->decode_buf_ptr[0]);
+
+        /* Swap buffer pointers. */
+        tmp_ptr = q->decode_buf_ptr[1];
+        q->decode_buf_ptr[1] = q->decode_buf_ptr[0];
+        q->decode_buf_ptr[0] = tmp_ptr;
+
+        /* FIXME: Rethink the gainbuffer handling, maybe a rename?
+           now/previous swap */
+        q->gain_now_ptr = &q->gain_now;
+        q->gain_previous_ptr = &q->gain_previous;
+
+        cook_imlt(q, q->decode_buf_ptr[0], q->mono_mdct_output,q->mlt_tmp);
+        gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr,
+                        q->gain_previous_ptr, q->mono_previous_buffer1);
+
+        /* Clip and convert the floats to 16 bits */
+        for (j=0 ; j<q->samples_per_frame ; j++){
+            value = lrintf(q->mono_mdct_output[j]);
+            if(value < -32768) value = -32768;
+            else if(value > 32767) value = 32767;
+            outbuffer[j] = value;
+        }
+        memcpy(&q->gain_now, &q->gain_previous, sizeof(COOKgain));
+        memcpy(&q->gain_previous, &q->gain_current, sizeof(COOKgain));
+    }
+    return q->samples_per_frame * sizeof(int16_t);
+}
+
+
+/**
+ * Cook frame decoding
+ *
+ * @param avctx     pointer to the AVCodecContext
+ */
+
+static int cook_decode_frame(AVCodecContext *avctx,
+            void *data, int *data_size,
+            uint8_t *buf, int buf_size) {
+    COOKContext *q = avctx->priv_data;
+
+    if (buf_size < avctx->block_align)
+        return buf_size;
+
+    *data_size = decode_subpacket(q, buf, avctx->block_align, data);
+
+    return avctx->block_align;
+}
+
+#ifdef COOKDEBUG
+static void dump_cook_context(COOKContext *q, COOKextradata *e)
+{
+    //int i=0;
+#define PRINT(a,b) av_log(NULL,AV_LOG_ERROR," %s = %d\n", a, b);
+    av_log(NULL,AV_LOG_ERROR,"COOKextradata\n");
+    av_log(NULL,AV_LOG_ERROR,"cookversion=%x\n",e->cookversion);
+    if (e->cookversion > MONO_COOK2) {
+        PRINT("js_subband_start",e->js_subband_start);
+        PRINT("js_vlc_bits",e->js_vlc_bits);
+    }
+    av_log(NULL,AV_LOG_ERROR,"COOKContext\n");
+    PRINT("nb_channels",q->nb_channels);
+    PRINT("bit_rate",q->bit_rate);
+    PRINT("sample_rate",q->sample_rate);
+    PRINT("samples_per_channel",q->samples_per_channel);
+    PRINT("samples_per_frame",q->samples_per_frame);
+    PRINT("subbands",q->subbands);
+    PRINT("random_state",q->random_state);
+    PRINT("mlt_size",q->mlt_size);
+    PRINT("js_subband_start",q->js_subband_start);
+    PRINT("log2_numvector_size",q->log2_numvector_size);
+    PRINT("numvector_size",q->numvector_size);
+    PRINT("total_subbands",q->total_subbands);
+}
+#endif
+
+/**
+ * Cook initialization
+ *
+ * @param avctx     pointer to the AVCodecContext
+ */
+
+static int cook_decode_init(AVCodecContext *avctx)
+{
+    COOKextradata *e = avctx->extradata;
+    COOKContext *q = avctx->priv_data;
+
+    /* Take care of the codec specific extradata. */
+    if (avctx->extradata_size <= 0) {
+        av_log(NULL,AV_LOG_ERROR,"Necessary extradata missing!\n");
+        return -1;
+    } else {
+        /* 8 for mono, 16 for stereo, ? for multichannel
+           Swap to right endianness so we don't need to care later on. */
+        av_log(NULL,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size);
+        if (avctx->extradata_size >= 8){
+            e->cookversion = be2me_32(e->cookversion);
+            e->samples_per_frame = be2me_16(e->samples_per_frame);
+            e->subbands = be2me_16(e->subbands);
+        }
+        if (avctx->extradata_size >= 16){
+            e->js_subband_start = be2me_16(e->js_subband_start);
+            e->js_vlc_bits = be2me_16(e->js_vlc_bits);
+        }
+    }
+
+    /* Take data from the AVCodecContext (RM container). */
+    q->sample_rate = avctx->sample_rate;
+    q->nb_channels = avctx->channels;
+    q->bit_rate = avctx->bit_rate;
+
+    /* Initialize state. */
+    q->random_state = 1;
+
+    /* Initialize extradata related variables. */
+    q->samples_per_channel = e->samples_per_frame / q->nb_channels;
+    q->samples_per_frame = e->samples_per_frame;
+    q->subbands = e->subbands;
+    q->bits_per_subpacket = avctx->block_align * 8;
+
+    /* Initialize default data states. */
+    q->js_subband_start = 0;
+    q->log2_numvector_size = 5;
+    q->total_subbands = q->subbands;
+
+    /* Initialize version-dependent variables */
+    av_log(NULL,AV_LOG_DEBUG,"e->cookversion=%x\n",e->cookversion);
+    switch (e->cookversion) {
+        case MONO_COOK1:
+            if (q->nb_channels != 1) {
+                av_log(NULL,AV_LOG_ERROR,"Container channels != 1, report sample!\n");
+                return -1;
+            }
+            av_log(NULL,AV_LOG_DEBUG,"MONO_COOK1\n");
+            break;
+        case MONO_COOK2:
+            if (q->nb_channels != 1) {
+                q->joint_stereo = 0;
+                q->bits_per_subpacket = q->bits_per_subpacket/2;
+            }
+            av_log(NULL,AV_LOG_DEBUG,"MONO_COOK2\n");
+            break;
+        case JOINT_STEREO:
+            if (q->nb_channels != 2) {
+                av_log(NULL,AV_LOG_ERROR,"Container channels != 2, report sample!\n");
+                return -1;
+            }
+            av_log(NULL,AV_LOG_DEBUG,"JOINT_STEREO\n");
+            if (avctx->extradata_size >= 16){
+                q->total_subbands = q->subbands + e->js_subband_start;
+                q->js_subband_start = e->js_subband_start;
+                q->joint_stereo = 1;
+                q->js_vlc_bits = e->js_vlc_bits;
+            }
+            if (q->samples_per_channel > 256) {
+                q->log2_numvector_size  = 6;
+            }
+            if (q->samples_per_channel > 512) {
+                q->log2_numvector_size  = 7;
+            }
+            break;
+        case MC_COOK:
+            av_log(NULL,AV_LOG_ERROR,"MC_COOK not supported!\n");
+            return -1;
+            break;
+        default:
+            av_log(NULL,AV_LOG_ERROR,"Unknown Cook version, report sample!\n");
+            return -1;
+            break;
+    }
+
+    /* Initialize variable relations */
+    q->mlt_size = q->samples_per_channel;
+    q->numvector_size = (1 << q->log2_numvector_size);
+
+    /* Generate tables */
+    init_rootpow2table(q);
+    init_pow2table(q);
+    init_gain_table(q);
+
+    if (init_cook_vlc_tables(q) != 0)
+        return -1;
+
+
+    if(avctx->block_align >= UINT_MAX/2)
+        return -1;
+
+    /* Pad the databuffer with FF_INPUT_BUFFER_PADDING_SIZE,
+       this is for the bitstreamreader. */
+    if ((q->decoded_bytes_buffer = av_mallocz((avctx->block_align+(4-avctx->block_align%4) + FF_INPUT_BUFFER_PADDING_SIZE)*sizeof(uint8_t)))  == NULL)
+        return -1;
+
+    q->decode_buf_ptr[0] = q->decode_buffer_1;
+    q->decode_buf_ptr[1] = q->decode_buffer_2;
+    q->decode_buf_ptr[2] = q->decode_buffer_3;
+    q->decode_buf_ptr[3] = q->decode_buffer_4;
+
+    q->decode_buf_ptr2[0] = q->decode_buffer_3;
+    q->decode_buf_ptr2[1] = q->decode_buffer_4;
+
+    q->previous_buffer_ptr[0] = q->mono_previous_buffer1;
+    q->previous_buffer_ptr[1] = q->mono_previous_buffer2;
+
+    /* Initialize transform. */
+    if ( init_cook_mlt(q) == 0 )
+        return -1;
+
+    /* Try to catch some obviously faulty streams, othervise it might be exploitable */
+    if (q->total_subbands > 53) {
+        av_log(NULL,AV_LOG_ERROR,"total_subbands > 53, report sample!\n");
+        return -1;
+    }
+    if (q->subbands > 50) {
+        av_log(NULL,AV_LOG_ERROR,"subbands > 50, report sample!\n");
+        return -1;
+    }
+    if ((q->samples_per_channel == 256) || (q->samples_per_channel == 512) || (q->samples_per_channel == 1024)) {
+    } else {
+        av_log(NULL,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel);
+        return -1;
+    }
+
+#ifdef COOKDEBUG
+    dump_cook_context(q,e);
+#endif
+    return 0;
+}
+
+
+AVCodec cook_decoder =
+{
+    .name = "cook",
+    .type = CODEC_TYPE_AUDIO,
+    .id = CODEC_ID_COOK,
+    .priv_data_size = sizeof(COOKContext),
+    .init = cook_decode_init,
+    .close = cook_decode_close,
+    .decode = cook_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/cookdata.h b/contrib/ffmpeg/libavcodec/cookdata.h
new file mode 100644
index 000000000..395c9a7dd
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cookdata.h
@@ -0,0 +1,559 @@
+/*
+ * COOK compatible decoder data
+ * Copyright (c) 2003 Sascha Sommer
+ * Copyright (c) 2005 Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file cookdata.h
+ * Cook AKA RealAudio G2 compatible decoderdata
+ */
+
+/* various data tables */
+
+static const int expbits_tab[8] = {
+    52,47,43,37,29,22,16,0,
+};
+
+static const float dither_tab[8] = {
+  0.0, 0.0, 0.0, 0.0, 0.0, 0.176777, 0.25, 0.707107,
+};
+
+static const float randsign[2] = {1.0, -1.0};
+
+static const float quant_centroid_tab[7][14] = {
+  { 0.000, 0.392, 0.761, 1.120, 1.477, 1.832, 2.183, 2.541, 2.893, 3.245, 3.598, 3.942, 4.288, 4.724 },
+  { 0.000, 0.544, 1.060, 1.563, 2.068, 2.571, 3.072, 3.562, 4.070, 4.620, 0.000, 0.000, 0.000, 0.000 },
+  { 0.000, 0.746, 1.464, 2.180, 2.882, 3.584, 4.316, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 },
+  { 0.000, 1.006, 2.000, 2.993, 3.985, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 },
+  { 0.000, 1.321, 2.703, 3.983, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 },
+  { 0.000, 1.657, 3.491, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 },
+  { 0.000, 1.964, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }
+};
+
+static const int invradix_tab[7] = {
+    74899, 104858, 149797, 209716, 262144, 349526, 524288,
+};
+
+static const int kmax_tab[7] = {
+    13, 9, 6, 4, 3, 2, 1,
+};
+
+static const int vd_tab[7] = {
+    2, 2, 2, 4, 4, 5, 5,
+};
+
+static const int vpr_tab[7] = {
+    10, 10, 10, 5, 5, 4, 4,
+};
+
+
+
+/* VLC data */
+
+static const int vhsize_tab[7] = {
+    191, 97, 48, 607, 246, 230, 32,
+};
+
+static const int vhvlcsize_tab[7] = {
+    8, 7, 7, 10, 9, 9, 6,
+};
+
+static const uint8_t envelope_quant_index_huffbits[13][24] = {
+    {  4,  6,  5,  5,  4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 5, 7,  8,  9, 11, 11, 12, 12, 12, 12 },
+    { 10,  8,  6,  5,  5, 4, 3, 3, 3, 3, 3, 3, 4, 5, 7, 9, 11, 12, 13, 15, 15, 15, 16, 16 },
+    { 12, 10,  8,  6,  5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 4,  5,  5,  7,  9, 11, 13, 14, 14 },
+    { 13, 10,  9,  9,  7, 7, 5, 5, 4, 3, 3, 3, 3, 3, 4, 4,  4,  5,  7,  9, 11, 13, 13, 13 },
+    { 12, 13, 10,  8,  6, 6, 5, 5, 4, 4, 3, 3, 3, 3, 3, 4,  5,  5,  6,  7,  9, 11, 14, 14 },
+    { 12, 11,  9,  8,  8, 7, 5, 4, 4, 3, 3, 3, 3, 3, 4, 4,  5,  5,  7,  8, 10, 13, 14, 14 },
+    { 15, 16, 15, 12, 10, 8, 6, 5, 4, 3, 3, 3, 2, 3, 4, 5,  5,  7,  9, 11, 13, 16, 16, 16 },
+    { 14, 14, 11, 10,  9, 7, 7, 5, 5, 4, 3, 3, 2, 3, 3, 4,  5,  7,  9,  9, 12, 14, 15, 15 },
+    {  9,  9,  9,  8,  7, 6, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5,  6,  7,  8, 10, 11, 12, 13, 13 },
+    { 14, 12, 10,  8,  6, 6, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5,  6,  8,  8,  9, 11, 14, 14, 14 },
+    { 13, 10,  9,  8,  6, 6, 5, 4, 4, 4, 3, 3, 2, 3, 4, 5,  6,  8,  9,  9, 11, 12, 14, 14 },
+    { 16, 13, 12, 11,  9, 6, 5, 5, 4, 4, 4, 3, 2, 3, 3, 4,  5,  7,  8, 10, 14, 16, 16, 16 },
+    { 13, 14, 14, 14, 10, 8, 7, 7, 5, 4, 3, 3, 2, 3, 3, 4,  5,  5,  7,  9, 11, 14, 14, 14 },
+};
+
+static const uint16_t envelope_quant_index_huffcodes[13][24] = {
+    {0x0006, 0x003e, 0x001c, 0x001d, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x0000, 0x0001,
+     0x0002, 0x000d, 0x001e, 0x007e, 0x00fe, 0x01fe, 0x07fc, 0x07fd, 0x0ffc, 0x0ffd, 0x0ffe, 0x0fff},
+    {0x03fe, 0x00fe, 0x003e, 0x001c, 0x001d, 0x000c, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005,
+     0x000d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x0ffe, 0x1ffe, 0x7ffc, 0x7ffd, 0x7ffe, 0xfffe, 0xffff},
+    {0x0ffe, 0x03fe, 0x00fe, 0x003e, 0x001c, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x0000,
+     0x0001, 0x0002, 0x000c, 0x000d, 0x001d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x1ffe, 0x3ffe, 0x3fff},
+    {0x1ffc, 0x03fe, 0x01fc, 0x01fd, 0x007c, 0x007d, 0x001c, 0x001d, 0x000a, 0x0000, 0x0001, 0x0002,
+     0x0003, 0x0004, 0x000b, 0x000c, 0x000d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x1ffd, 0x1ffe, 0x1fff},
+    {0x0ffe, 0x1ffe, 0x03fe, 0x00fe, 0x003c, 0x003d, 0x001a, 0x001b, 0x000a, 0x000b, 0x0000, 0x0001,
+     0x0002, 0x0003, 0x0004, 0x000c, 0x001c, 0x001d, 0x003e, 0x007e, 0x01fe, 0x07fe, 0x3ffe, 0x3fff},
+    {0x0ffe, 0x07fe, 0x01fe, 0x00fc, 0x00fd, 0x007c, 0x001c, 0x000a, 0x000b, 0x0000, 0x0001, 0x0002,
+     0x0003, 0x0004, 0x000c, 0x000d, 0x001d, 0x001e, 0x007d, 0x00fe, 0x03fe, 0x1ffe, 0x3ffe, 0x3fff},
+    {0x7ffc, 0xfffc, 0x7ffd, 0x0ffe, 0x03fe, 0x00fe, 0x003e, 0x001c, 0x000c, 0x0002, 0x0003, 0x0004,
+     0x0000, 0x0005, 0x000d, 0x001d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x1ffe, 0xfffd, 0xfffe, 0xffff},
+    {0x3ffc, 0x3ffd, 0x07fe, 0x03fe, 0x01fc, 0x007c, 0x007d, 0x001c, 0x001d, 0x000c, 0x0002, 0x0003,
+     0x0000, 0x0004, 0x0005, 0x000d, 0x001e, 0x007e, 0x01fd, 0x01fe, 0x0ffe, 0x3ffe, 0x7ffe, 0x7fff},
+    {0x01fc, 0x01fd, 0x01fe, 0x00fc, 0x007c, 0x003c, 0x001c, 0x000c, 0x0000, 0x0001, 0x0002, 0x0003,
+     0x0004, 0x0005, 0x000d, 0x001d, 0x003d, 0x007d, 0x00fd, 0x03fe, 0x07fe, 0x0ffe, 0x1ffe, 0x1fff},
+    {0x3ffc, 0x0ffe, 0x03fe, 0x00fc, 0x003c, 0x003d, 0x001c, 0x000c, 0x0000, 0x0001, 0x0002, 0x0003,
+     0x0004, 0x0005, 0x000d, 0x001d, 0x003e, 0x00fd, 0x00fe, 0x01fe, 0x07fe, 0x3ffd, 0x3ffe, 0x3fff},
+    {0x1ffe, 0x03fe, 0x01fc, 0x00fc, 0x003c, 0x003d, 0x001c, 0x000a, 0x000b, 0x000c, 0x0002, 0x0003,
+     0x0000, 0x0004, 0x000d, 0x001d, 0x003e, 0x00fd, 0x01fd, 0x01fe, 0x07fe, 0x0ffe, 0x3ffe, 0x3fff},
+    {0xfffc, 0x1ffe, 0x0ffe, 0x07fe, 0x01fe, 0x003e, 0x001c, 0x001d, 0x000a, 0x000b, 0x000c, 0x0002,
+     0x0000, 0x0003, 0x0004, 0x000d, 0x001e, 0x007e, 0x00fe, 0x03fe, 0x3ffe, 0xfffd, 0xfffe, 0xffff},
+    {0x1ffc, 0x3ffa, 0x3ffb, 0x3ffc, 0x03fe, 0x00fe, 0x007c, 0x007d, 0x001c, 0x000c, 0x0002, 0x0003,
+     0x0000, 0x0004, 0x0005, 0x000d, 0x001d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x3ffd, 0x3ffe, 0x3fff},
+};
+
+
+static const uint8_t cvh_huffbits0[191] = {
+    1, 4, 6, 6, 7, 7, 8, 8, 8, 9, 9, 10,
+    11, 11, 4, 5, 6, 7, 7, 8, 8, 9, 9, 9,
+    9, 10, 11, 11, 5, 6, 7, 8, 8, 9, 9, 9,
+    9, 10, 10, 10, 11, 12, 6, 7, 8, 9, 9, 9,
+    9, 10, 10, 10, 10, 11, 12, 13, 7, 7, 8, 9,
+    9, 9, 10, 10, 10, 10, 11, 11, 12, 13, 8, 8,
+    9, 9, 9, 10, 10, 10, 10, 11, 11, 12, 13, 14,
+    8, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 13,
+    13, 15, 8, 8, 9, 9, 10, 10, 11, 11, 11, 12,
+    12, 13, 14, 15, 9, 9, 9, 10, 10, 10, 11, 11,
+    12, 13, 12, 14, 15, 16, 9, 9, 10, 10, 10, 10,
+    11, 12, 12, 14, 14, 16, 16, 0, 9, 9, 10, 10,
+    11, 11, 12, 13, 13, 14, 14, 15, 0, 0, 10, 10,
+    10, 11, 11, 12, 12, 13, 15, 15, 16, 0, 0, 0,
+    11, 11, 11, 12, 13, 13, 13, 15, 16, 16, 0, 0,
+    0, 0, 11, 11, 12, 13, 13, 14, 15, 16, 16,
+};
+
+static const uint16_t cvh_huffcodes0[191] = {
+    0x0000,0x0008,0x002c,0x002d,0x0062,0x0063,0x00d4,0x00d5,0x00d6,0x01c6,0x01c7,0x03ca,
+    0x07d6,0x07d7,0x0009,0x0014,0x002e,0x0064,0x0065,0x00d7,0x00d8,0x01c8,0x01c9,0x01ca,
+    0x01cb,0x03cb,0x07d8,0x07d9,0x0015,0x002f,0x0066,0x00d9,0x00da,0x01cc,0x01cd,0x01ce,
+    0x01cf,0x03cc,0x03cd,0x03ce,0x07da,0x0fe4,0x0030,0x0067,0x00db,0x01d0,0x01d1,0x01d2,
+    0x01d3,0x03cf,0x03d0,0x03d1,0x03d2,0x07db,0x0fe5,0x1fea,0x0068,0x0069,0x00dc,0x01d4,
+    0x01d5,0x01d6,0x03d3,0x03d4,0x03d5,0x03d6,0x07dc,0x07dd,0x0fe6,0x1feb,0x00dd,0x00de,
+    0x01d7,0x01d8,0x01d9,0x03d7,0x03d8,0x03d9,0x03da,0x07de,0x07df,0x0fe7,0x1fec,0x3ff2,
+    0x00df,0x00e0,0x01da,0x01db,0x03db,0x03dc,0x07e0,0x07e1,0x07e2,0x0fe8,0x0fe9,0x1fed,
+    0x1fee,0x7ff4,0x00e1,0x00e2,0x01dc,0x01dd,0x03dd,0x03de,0x07e3,0x07e4,0x07e5,0x0fea,
+    0x0feb,0x1fef,0x3ff3,0x7ff5,0x01de,0x01df,0x01e0,0x03df,0x03e0,0x03e1,0x07e6,0x07e7,
+    0x0fec,0x1ff0,0x0fed,0x3ff4,0x7ff6,0xfff8,0x01e1,0x01e2,0x03e2,0x03e3,0x03e4,0x03e5,
+    0x07e8,0x0fee,0x0fef,0x3ff5,0x3ff6,0xfff9,0xfffa,0xfffa,0x01e3,0x01e4,0x03e6,0x03e7,
+    0x07e9,0x07ea,0x0ff0,0x1ff1,0x1ff2,0x3ff7,0x3ff8,0x7ff7,0x7ff7,0xfffa,0x03e8,0x03e9,
+    0x03ea,0x07eb,0x07ec,0x0ff1,0x0ff2,0x1ff3,0x7ff8,0x7ff9,0xfffb,0x3ff8,0x7ff7,0x7ff7,
+    0x07ed,0x07ee,0x07ef,0x0ff3,0x1ff4,0x1ff5,0x1ff6,0x7ffa,0xfffc,0xfffd,0xfffb,0xfffb,
+    0x3ff8,0x7ff7,0x07f0,0x07f1,0x0ff4,0x1ff7,0x1ff8,0x3ff9,0x7ffb,0xfffe,0xffff,
+};
+
+
+static const uint8_t cvh_huffbits1[97] = {
+    1, 4, 5, 6, 7, 8, 8, 9, 10, 10, 4, 5,
+    6, 7, 7, 8, 8, 9, 9, 11, 5, 5, 6, 7,
+    8, 8, 9, 9, 10, 11, 6, 6, 7, 8, 8, 9,
+    9, 10, 11, 12, 7, 7, 8, 8, 9, 9, 10, 11,
+    11, 13, 8, 8, 8, 9, 9, 10, 10, 11, 12, 14,
+    8, 8, 8, 9, 10, 11, 11, 12, 13, 15, 9, 9,
+    9, 10, 11, 12, 12, 14, 14, 0, 9, 9, 9, 10,
+    11, 12, 14, 16, 0, 0, 10, 10, 11, 12, 13, 14,
+    16,
+};
+
+
+static const uint16_t cvh_huffcodes1[97] = {
+    0x0000,0x0008,0x0014,0x0030,0x006a,0x00e2,0x00e3,0x01e4,0x03ec,0x03ed,0x0009,0x0015,
+    0x0031,0x006b,0x006c,0x00e4,0x00e5,0x01e5,0x01e6,0x07f0,0x0016,0x0017,0x0032,0x006d,
+    0x00e6,0x00e7,0x01e7,0x01e8,0x03ee,0x07f1,0x0033,0x0034,0x006e,0x00e8,0x00e9,0x01e9,
+    0x01ea,0x03ef,0x07f2,0x0ff6,0x006f,0x0070,0x00ea,0x00eb,0x01eb,0x01ec,0x03f0,0x07f3,
+    0x07f4,0x1ffa,0x00ec,0x00ed,0x00ee,0x01ed,0x01ee,0x03f1,0x03f2,0x07f5,0x0ff7,0x3ffa,
+    0x00ef,0x00f0,0x00f1,0x01ef,0x03f3,0x07f6,0x07f7,0x0ff8,0x1ffb,0x7ffe,0x01f0,0x01f1,
+    0x01f2,0x03f4,0x07f8,0x0ff9,0x0ffa,0x3ffb,0x3ffc,0x0000,0x01f3,0x01f4,0x01f5,0x03f5,
+    0x07f9,0x0ffb,0x3ffd,0xfffe,0x0000,0x0000,0x03f6,0x03f7,0x07fa,0x0ffc,0x1ffc,0x3ffe,
+    0xffff,
+};
+
+static const uint8_t cvh_huffbits2[48] = {
+    1, 4, 5, 7, 8, 9, 10, 3, 4, 5, 7, 8,
+    9, 10, 5, 5, 6, 7, 8, 10, 10, 7, 6, 7,
+    8, 9, 10, 12, 8, 8, 8, 9, 10, 12, 14, 8,
+    9, 9, 10, 11, 15, 16, 9, 10, 11, 12, 13, 16,
+};
+
+static const uint16_t cvh_huffcodes2[48] = {
+    0x0000,0x000a,0x0018,0x0074,0x00f2,0x01f4,0x03f6,0x0004,0x000b,0x0019,0x0075,0x00f3,
+    0x01f5,0x03f7,0x001a,0x001b,0x0038,0x0076,0x00f4,0x03f8,0x03f9,0x0077,0x0039,0x0078,
+    0x00f5,0x01f6,0x03fa,0x0ffc,0x00f6,0x00f7,0x00f8,0x01f7,0x03fb,0x0ffd,0x3ffe,0x00f9,
+    0x01f8,0x01f9,0x03fc,0x07fc,0x7ffe,0xfffe,0x01fa,0x03fd,0x07fd,0x0ffe,0x1ffe,0xffff,
+};
+
+static const uint8_t cvh_huffbits3[607] = {
+    2, 4, 6, 8, 10, 5, 5, 6, 8, 10, 7, 8,
+    8, 10, 12, 9, 9, 10, 12, 15, 10, 11, 13, 16,
+    16, 5, 6, 8, 10, 11, 5, 6, 8, 10, 12, 7,
+    7, 8, 10, 13, 9, 9, 10, 12, 15, 12, 11, 13,
+    16, 16, 7, 9, 10, 12, 15, 7, 8, 10, 12, 13,
+    9, 9, 11, 13, 16, 11, 11, 12, 14, 16, 12, 12,
+    14, 16, 0, 9, 11, 12, 16, 16, 9, 10, 13, 15,
+    16, 10, 11, 12, 16, 16, 13, 13, 16, 16, 16, 16,
+    16, 15, 16, 0, 11, 13, 16, 16, 15, 11, 13, 15,
+    16, 16, 13, 13, 16, 16, 0, 14, 16, 16, 16, 0,
+    16, 16, 0, 0, 0, 4, 6, 8, 10, 13, 6, 6,
+    8, 10, 13, 9, 8, 10, 12, 16, 10, 10, 11, 15,
+    16, 13, 12, 14, 16, 16, 5, 6, 8, 11, 13, 6,
+    6, 8, 10, 13, 8, 8, 9, 11, 14, 10, 10, 12,
+    12, 16, 13, 12, 13, 15, 16, 7, 8, 9, 12, 16,
+    7, 8, 10, 12, 14, 9, 9, 10, 13, 16, 11, 10,
+    12, 15, 16, 13, 13, 16, 16, 0, 9, 11, 13, 16,
+    16, 9, 10, 12, 15, 16, 10, 11, 13, 16, 16, 13,
+    12, 16, 16, 16, 16, 16, 16, 16, 0, 11, 13, 16,
+    16, 16, 11, 13, 16, 16, 16, 12, 13, 15, 16, 0,
+    16, 16, 16, 16, 0, 16, 16, 0, 0, 0, 6, 8,
+    11, 13, 16, 8, 8, 10, 12, 16, 11, 10, 11, 13,
+    16, 12, 13, 13, 15, 16, 16, 16, 14, 16, 0, 6,
+    8, 10, 13, 16, 8, 8, 10, 12, 16, 10, 10, 11,
+    13, 16, 13, 12, 13, 16, 16, 14, 14, 14, 16, 0,
+    8, 9, 11, 13, 16, 8, 9, 11, 16, 14, 10, 10,
+    12, 15, 16, 12, 12, 13, 16, 16, 15, 16, 16, 16,
+    0, 10, 12, 15, 16, 16, 10, 12, 12, 14, 16, 12,
+    12, 13, 16, 16, 14, 15, 16, 16, 0, 16, 16, 16,
+    0, 0, 12, 15, 15, 16, 0, 13, 13, 16, 16, 0,
+    14, 16, 16, 16, 0, 16, 16, 16, 0, 0, 0, 0,
+    0, 0, 0, 8, 10, 13, 15, 16, 10, 11, 13, 16,
+    16, 13, 13, 14, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 0, 8, 10, 11, 15, 16, 9, 10, 12,
+    16, 16, 12, 12, 15, 16, 16, 16, 14, 16, 16, 16,
+    16, 16, 16, 16, 0, 9, 11, 14, 16, 16, 10, 11,
+    13, 16, 16, 14, 13, 14, 16, 16, 16, 15, 15, 16,
+    0, 16, 16, 16, 0, 0, 11, 13, 16, 16, 16, 11,
+    13, 15, 16, 16, 13, 16, 16, 16, 0, 16, 16, 16,
+    16, 0, 16, 16, 0, 0, 0, 15, 16, 16, 16, 0,
+    14, 16, 16, 16, 0, 16, 16, 16, 0, 0, 16, 16,
+    0, 0, 0, 0, 0, 0, 0, 0, 9, 13, 16, 16,
+    16, 11, 13, 16, 16, 16, 14, 15, 16, 16, 0, 15,
+    16, 16, 16, 0, 16, 16, 0, 0, 0, 9, 13, 15,
+    15, 16, 12, 13, 14, 16, 16, 16, 15, 16, 16, 0,
+    16, 16, 16, 16, 0, 16, 16, 0, 0, 0, 11, 13,
+    15, 16, 0, 12, 14, 16, 16, 0, 16, 16, 16, 16,
+    0, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 16,
+    16, 16, 16, 0, 16, 16, 16, 16, 0, 16, 16, 16,
+    0, 0, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0,
+    16, 16, 0, 0, 0, 16, 16,
+};
+
+
+static const uint16_t cvh_huffcodes3[607] = {
+    0x0000,0x0004,0x0022,0x00c6,0x03b0,0x000c,0x000d,0x0023,0x00c7,0x03b1,0x005c,0x00c8,
+    0x00c9,0x03b2,0x0fa4,0x01c2,0x01c3,0x03b3,0x0fa5,0x7f72,0x03b4,0x07b2,0x1f9a,0xff24,
+    0xff25,0x000e,0x0024,0x00ca,0x03b5,0x07b3,0x000f,0x0025,0x00cb,0x03b6,0x0fa6,0x005d,
+    0x005e,0x00cc,0x03b7,0x1f9b,0x01c4,0x01c5,0x03b8,0x0fa7,0x7f73,0x0fa8,0x07b4,0x1f9c,
+    0xff26,0xff27,0x005f,0x01c6,0x03b9,0x0fa9,0x7f74,0x0060,0x00cd,0x03ba,0x0faa,0x1f9d,
+    0x01c7,0x01c8,0x07b5,0x1f9e,0xff28,0x07b6,0x07b7,0x0fab,0x3fa2,0xff29,0x0fac,0x0fad,
+    0x3fa3,0xff2a,0x3fa2,0x01c9,0x07b8,0x0fae,0xff2b,0xff2c,0x01ca,0x03bb,0x1f9f,0x7f75,
+    0xff2d,0x03bc,0x07b9,0x0faf,0xff2e,0xff2f,0x1fa0,0x1fa1,0xff30,0xff31,0xff32,0xff33,
+    0xff34,0x7f76,0xff35,0xff31,0x07ba,0x1fa2,0xff36,0xff37,0x7f77,0x07bb,0x1fa3,0x7f78,
+    0xff38,0xff39,0x1fa4,0x1fa5,0xff3a,0xff3b,0xff2e,0x3fa4,0xff3c,0xff3d,0xff3e,0xff31,
+    0xff3f,0xff40,0xff30,0xff31,0xff31,0x0005,0x0026,0x00ce,0x03bd,0x1fa6,0x0027,0x0028,
+    0x00cf,0x03be,0x1fa7,0x01cb,0x00d0,0x03bf,0x0fb0,0xff41,0x03c0,0x03c1,0x07bc,0x7f79,
+    0xff42,0x1fa8,0x0fb1,0x3fa5,0xff43,0xff44,0x0010,0x0029,0x00d1,0x07bd,0x1fa9,0x002a,
+    0x002b,0x00d2,0x03c2,0x1faa,0x00d3,0x00d4,0x01cc,0x07be,0x3fa6,0x03c3,0x03c4,0x0fb2,
+    0x0fb3,0xff45,0x1fab,0x0fb4,0x1fac,0x7f7a,0xff46,0x0061,0x00d5,0x01cd,0x0fb5,0xff47,
+    0x0062,0x00d6,0x03c5,0x0fb6,0x3fa7,0x01ce,0x01cf,0x03c6,0x1fad,0xff48,0x07bf,0x03c7,
+    0x0fb7,0x7f7b,0xff49,0x1fae,0x1faf,0xff4a,0xff4b,0x7f7b,0x01d0,0x07c0,0x1fb0,0xff4c,
+    0xff4d,0x01d1,0x03c8,0x0fb8,0x7f7c,0xff4e,0x03c9,0x07c1,0x1fb1,0xff4f,0xff50,0x1fb2,
+    0x0fb9,0xff51,0xff52,0xff53,0xff54,0xff55,0xff56,0xff57,0xff52,0x07c2,0x1fb3,0xff58,
+    0xff59,0xff5a,0x07c3,0x1fb4,0xff5b,0xff5c,0xff5d,0x0fba,0x1fb5,0x7f7d,0xff5e,0xff4f,
+    0xff5f,0xff60,0xff61,0xff62,0xff52,0xff63,0xff64,0xff51,0xff52,0xff52,0x002c,0x00d7,
+    0x07c4,0x1fb6,0xff65,0x00d8,0x00d9,0x03ca,0x0fbb,0xff66,0x07c5,0x03cb,0x07c6,0x1fb7,
+    0xff67,0x0fbc,0x1fb8,0x1fb9,0x7f7e,0xff68,0xff69,0xff6a,0x3fa8,0xff6b,0x7f7e,0x002d,
+    0x00da,0x03cc,0x1fba,0xff6c,0x00db,0x00dc,0x03cd,0x0fbd,0xff6d,0x03ce,0x03cf,0x07c7,
+    0x1fbb,0xff6e,0x1fbc,0x0fbe,0x1fbd,0xff6f,0xff70,0x3fa9,0x3faa,0x3fab,0xff71,0xff6f,
+    0x00dd,0x01d2,0x07c8,0x1fbe,0xff72,0x00de,0x01d3,0x07c9,0xff73,0x3fac,0x03d0,0x03d1,
+    0x0fbf,0x7f7f,0xff74,0x0fc0,0x0fc1,0x1fbf,0xff75,0xff76,0x7f80,0xff77,0xff78,0xff79,
+    0xff75,0x03d2,0x0fc2,0x7f81,0xff7a,0xff7b,0x03d3,0x0fc3,0x0fc4,0x3fad,0xff7c,0x0fc5,
+    0x0fc6,0x1fc0,0xff7d,0xff7e,0x3fae,0x7f82,0xff7f,0xff80,0xff80,0xff81,0xff82,0xff83,
+    0xff80,0xff80,0x0fc7,0x7f83,0x7f84,0xff84,0xff7a,0x1fc1,0x1fc2,0xff85,0xff86,0x3fad,
+    0x3faf,0xff87,0xff88,0xff89,0xff7d,0xff8a,0xff8b,0xff8c,0xff80,0xff80,0x3fae,0x7f82,
+    0xff7f,0xff80,0xff80,0x00df,0x03d4,0x1fc3,0x7f85,0xff8d,0x03d5,0x07ca,0x1fc4,0xff8e,
+    0xff8f,0x1fc5,0x1fc6,0x3fb0,0xff90,0xff91,0xff92,0xff93,0xff94,0xff95,0xff96,0xff97,
+    0xff98,0xff99,0xff9a,0xff95,0x00e0,0x03d6,0x07cb,0x7f86,0xff9b,0x01d4,0x03d7,0x0fc8,
+    0xff9c,0xff9d,0x0fc9,0x0fca,0x7f87,0xff9e,0xff9f,0xffa0,0x3fb1,0xffa1,0xffa2,0xffa3,
+    0xffa4,0xffa5,0xffa6,0xffa7,0xffa2,0x01d5,0x07cc,0x3fb2,0xffa8,0xffa9,0x03d8,0x07cd,
+    0x1fc7,0xffaa,0xffab,0x3fb3,0x1fc8,0x3fb4,0xffac,0xffad,0xffae,0x7f88,0x7f89,0xffaf,
+    0xffaf,0xffb0,0xffb1,0xffb2,0xffaf,0xffaf,0x07ce,0x1fc9,0xffb3,0xffb4,0xffb5,0x07cf,
+    0x1fca,0x7f8a,0xffb6,0xffb7,0x1fcb,0xffb8,0xffb9,0xffba,0xffba,0xffbb,0xffbc,0xffbd,
+    0xffbe,0xffbe,0xffbf,0xffc0,0xffbd,0xffbe,0xffbe,0x7f8b,0xffc1,0xffc2,0xffc3,0xffb4,
+    0x3fb5,0xffc4,0xffc5,0xffc6,0xffb6,0xffc7,0xffc8,0xffc9,0xffba,0xffba,0xffca,0xffcb,
+    0xffbd,0xffbe,0xffbe,0xffbb,0xffbc,0xffbd,0xffbe,0xffbe,0x01d6,0x1fcc,0xffcc,0xffcd,
+    0xffce,0x07d0,0x1fcd,0xffcf,0xffd0,0xffd1,0x3fb6,0x7f8c,0xffd2,0xffd3,0xff90,0x7f8d,
+    0xffd4,0xffd5,0xffd6,0xff95,0xffd7,0xffd8,0xff94,0xff95,0xff95,0x01d7,0x1fce,0x7f8e,
+    0x7f8f,0xffd9,0x0fcb,0x1fcf,0x3fb7,0xffda,0xffdb,0xffdc,0x7f90,0xffdd,0xffde,0xff9e,
+    0xffdf,0xffe0,0xffe1,0xffe2,0xffa2,0xffe3,0xffe4,0xffa1,0xffa2,0xffa2,0x07d1,0x1fd0,
+    0x7f91,0xffe5,0xffa8,0x0fcc,0x3fb8,0xffe6,0xffe7,0xffaa,0xffe8,0xffe9,0xffea,0xffeb,
+    0xffac,0xffec,0xffed,0xffee,0xffaf,0xffaf,0xffae,0x7f88,0x7f89,0xffaf,0xffaf,0xffef,
+    0xfff0,0xfff1,0xfff2,0xffb4,0xfff3,0xfff4,0xfff5,0xfff6,0xffb6,0xfff7,0xfff8,0xfff9,
+    0xffba,0xffba,0xfffa,0xfffb,0xffbd,0xffbe,0xffbe,0xffbb,0xffbc,0xffbd,0xffbe,0xffbe,
+    0xfffc,0xfffd,0xffb3,0xffb4,0xffb4,0xfffe,0xffff,
+};
+
+static const uint8_t cvh_huffbits4[246] = {
+    2, 4, 7, 10, 4, 5, 7, 10, 7, 8, 10, 14,
+    11, 11, 15, 15, 4, 5, 9, 12, 5, 5, 8, 12,
+    8, 7, 10, 15, 11, 11, 15, 15, 7, 9, 12, 15,
+    8, 8, 12, 15, 10, 10, 13, 15, 14, 14, 15, 0,
+    11, 13, 15, 15, 11, 13, 15, 15, 14, 15, 15, 0,
+    15, 15, 0, 0, 4, 5, 9, 13, 5, 6, 9, 13,
+    9, 9, 11, 15, 14, 13, 15, 15, 4, 6, 9, 12,
+    5, 6, 9, 13, 9, 8, 11, 15, 13, 12, 15, 15,
+    7, 9, 12, 15, 7, 8, 11, 15, 10, 10, 14, 15,
+    14, 15, 15, 0, 10, 12, 15, 15, 11, 13, 15, 15,
+    15, 15, 15, 0, 15, 15, 0, 0, 6, 9, 13, 14,
+    8, 9, 12, 15, 12, 12, 15, 15, 15, 15, 15, 0,
+    7, 9, 13, 15, 8, 9, 12, 15, 11, 12, 15, 15,
+    15, 15, 15, 0, 9, 11, 15, 15, 9, 11, 15, 15,
+    14, 14, 15, 0, 15, 15, 0, 0, 14, 15, 15, 0,
+    14, 15, 15, 0, 15, 15, 0, 0, 0, 0, 0, 0,
+    9, 12, 15, 15, 12, 13, 15, 15, 15, 15, 15, 0,
+    15, 15, 0, 0, 10, 12, 15, 15, 12, 14, 15, 15,
+    15, 15, 15, 0, 15, 15, 0, 0, 14, 15, 15, 0,
+    15, 15, 15, 0, 15, 15, 0, 0, 0, 0, 0, 0,
+    15, 15, 0, 0, 15, 15,
+};
+
+
+static const uint16_t cvh_huffcodes4[246] = {
+    0x0000,0x0004,0x006c,0x03e6,0x0005,0x0012,0x006d,0x03e7,0x006e,0x00e8,0x03e8,0x3fc4,
+    0x07e0,0x07e1,0x7fa4,0x7fa5,0x0006,0x0013,0x01e2,0x0fda,0x0014,0x0015,0x00e9,0x0fdb,
+    0x00ea,0x006f,0x03e9,0x7fa6,0x07e2,0x07e3,0x7fa7,0x7fa8,0x0070,0x01e3,0x0fdc,0x7fa9,
+    0x00eb,0x00ec,0x0fdd,0x7faa,0x03ea,0x03eb,0x1fd6,0x7fab,0x3fc5,0x3fc6,0x7fac,0x1fd6,
+    0x07e4,0x1fd7,0x7fad,0x7fae,0x07e5,0x1fd8,0x7faf,0x7fb0,0x3fc7,0x7fb1,0x7fb2,0x1fd6,
+    0x7fb3,0x7fb4,0x1fd6,0x1fd6,0x0007,0x0016,0x01e4,0x1fd9,0x0017,0x0032,0x01e5,0x1fda,
+    0x01e6,0x01e7,0x07e6,0x7fb5,0x3fc8,0x1fdb,0x7fb6,0x7fb7,0x0008,0x0033,0x01e8,0x0fde,
+    0x0018,0x0034,0x01e9,0x1fdc,0x01ea,0x00ed,0x07e7,0x7fb8,0x1fdd,0x0fdf,0x7fb9,0x7fba,
+    0x0071,0x01eb,0x0fe0,0x7fbb,0x0072,0x00ee,0x07e8,0x7fbc,0x03ec,0x03ed,0x3fc9,0x7fbd,
+    0x3fca,0x7fbe,0x7fbf,0x3fc9,0x03ee,0x0fe1,0x7fc0,0x7fc1,0x07e9,0x1fde,0x7fc2,0x7fc3,
+    0x7fc4,0x7fc5,0x7fc6,0x3fc9,0x7fc7,0x7fc8,0x3fc9,0x3fc9,0x0035,0x01ec,0x1fdf,0x3fcb,
+    0x00ef,0x01ed,0x0fe2,0x7fc9,0x0fe3,0x0fe4,0x7fca,0x7fcb,0x7fcc,0x7fcd,0x7fce,0x7fca,
+    0x0073,0x01ee,0x1fe0,0x7fcf,0x00f0,0x01ef,0x0fe5,0x7fd0,0x07ea,0x0fe6,0x7fd1,0x7fd2,
+    0x7fd3,0x7fd4,0x7fd5,0x7fd1,0x01f0,0x07eb,0x7fd6,0x7fd7,0x01f1,0x07ec,0x7fd8,0x7fd9,
+    0x3fcc,0x3fcd,0x7fda,0x7fda,0x7fdb,0x7fdc,0x7fda,0x7fda,0x3fce,0x7fdd,0x7fde,0x7fd6,
+    0x3fcf,0x7fdf,0x7fe0,0x7fd8,0x7fe1,0x7fe2,0x7fda,0x7fda,0x3fcc,0x3fcd,0x7fda,0x7fda,
+    0x01f2,0x0fe7,0x7fe3,0x7fe4,0x0fe8,0x1fe1,0x7fe5,0x7fe6,0x7fe7,0x7fe8,0x7fe9,0x7fca,
+    0x7fea,0x7feb,0x7fca,0x7fca,0x03ef,0x0fe9,0x7fec,0x7fed,0x0fea,0x3fd0,0x7fee,0x7fef,
+    0x7ff0,0x7ff1,0x7ff2,0x7fd1,0x7ff3,0x7ff4,0x7fd1,0x7fd1,0x3fd1,0x7ff5,0x7ff6,0x7fd6,
+    0x7ff7,0x7ff8,0x7ff9,0x7fd8,0x7ffa,0x7ffb,0x7fda,0x7fda,0x3fcc,0x3fcd,0x7fda,0x7fda,
+    0x7ffc,0x7ffd,0x7fd6,0x7fd6,0x7ffe,0x7fff,
+};
+
+
+static const uint8_t cvh_huffbits5[230] = {
+    2, 4, 8, 4, 5, 9, 9, 10, 14, 4, 6, 11,
+    5, 6, 12, 10, 11, 15, 9, 11, 15, 10, 13, 15,
+    14, 15, 0, 4, 6, 12, 6, 7, 12, 12, 12, 15,
+    5, 7, 13, 6, 7, 13, 12, 13, 15, 10, 12, 15,
+    11, 13, 15, 15, 15, 0, 8, 13, 15, 11, 12, 15,
+    15, 15, 0, 10, 13, 15, 12, 15, 15, 15, 15, 0,
+    15, 15, 0, 15, 15, 0, 0, 0, 0, 4, 5, 11,
+    5, 7, 12, 11, 12, 15, 6, 7, 13, 7, 8, 14,
+    12, 14, 15, 11, 13, 15, 12, 13, 15, 15, 15, 0,
+    5, 6, 13, 7, 8, 15, 12, 14, 15, 6, 8, 14,
+    7, 8, 15, 14, 15, 15, 12, 12, 15, 12, 13, 15,
+    15, 15, 0, 9, 13, 15, 12, 13, 15, 15, 15, 0,
+    11, 13, 15, 13, 13, 15, 15, 15, 0, 14, 15, 0,
+    15, 15, 0, 0, 0, 0, 8, 10, 15, 11, 12, 15,
+    15, 15, 0, 10, 12, 15, 12, 13, 15, 15, 15, 0,
+    14, 15, 0, 15, 15, 0, 0, 0, 0, 8, 12, 15,
+    12, 13, 15, 15, 15, 0, 11, 13, 15, 13, 15, 15,
+    15, 15, 0, 15, 15, 0, 15, 15, 0, 0, 0, 0,
+    14, 15, 0, 15, 15, 0, 0, 0, 0, 15, 15, 0,
+    15, 15,
+};
+
+
+
+static const uint16_t cvh_huffcodes5[230] = {
+    0x0000,0x0004,0x00f0,0x0005,0x0012,0x01f0,0x01f1,0x03e8,0x3fce,0x0006,0x0030,0x07de,
+    0x0013,0x0031,0x0fd2,0x03e9,0x07df,0x7fb0,0x01f2,0x07e0,0x7fb1,0x03ea,0x1fd2,0x7fb2,
+    0x3fcf,0x7fb3,0x0031,0x0007,0x0032,0x0fd3,0x0033,0x0070,0x0fd4,0x0fd5,0x0fd6,0x7fb4,
+    0x0014,0x0071,0x1fd3,0x0034,0x0072,0x1fd4,0x0fd7,0x1fd5,0x7fb5,0x03eb,0x0fd8,0x7fb6,
+    0x07e1,0x1fd6,0x7fb7,0x7fb8,0x7fb9,0x0072,0x00f1,0x1fd7,0x7fba,0x07e2,0x0fd9,0x7fbb,
+    0x7fbc,0x7fbd,0x0070,0x03ec,0x1fd8,0x7fbe,0x0fda,0x7fbf,0x7fc0,0x7fc1,0x7fc2,0x0072,
+    0x7fc3,0x7fc4,0x0071,0x7fc5,0x7fc6,0x0072,0x0034,0x0072,0x0072,0x0008,0x0015,0x07e3,
+    0x0016,0x0073,0x0fdb,0x07e4,0x0fdc,0x7fc7,0x0035,0x0074,0x1fd9,0x0075,0x00f2,0x3fd0,
+    0x0fdd,0x3fd1,0x7fc8,0x07e5,0x1fda,0x7fc9,0x0fde,0x1fdb,0x7fca,0x7fcb,0x7fcc,0x00f2,
+    0x0017,0x0036,0x1fdc,0x0076,0x00f3,0x7fcd,0x0fdf,0x3fd2,0x7fce,0x0037,0x00f4,0x3fd3,
+    0x0077,0x00f5,0x7fcf,0x3fd4,0x7fd0,0x7fd1,0x0fe0,0x0fe1,0x7fd2,0x0fe2,0x1fdd,0x7fd3,
+    0x7fd4,0x7fd5,0x00f5,0x01f3,0x1fde,0x7fd6,0x0fe3,0x1fdf,0x7fd7,0x7fd8,0x7fd9,0x00f3,
+    0x07e6,0x1fe0,0x7fda,0x1fe1,0x1fe2,0x7fdb,0x7fdc,0x7fdd,0x00f5,0x3fd5,0x7fde,0x00f4,
+    0x7fdf,0x7fe0,0x00f5,0x0077,0x00f5,0x00f5,0x00f6,0x03ed,0x7fe1,0x07e7,0x0fe4,0x7fe2,
+    0x7fe3,0x7fe4,0x0073,0x03ee,0x0fe5,0x7fe5,0x0fe6,0x1fe3,0x7fe6,0x7fe7,0x7fe8,0x00f2,
+    0x3fd6,0x7fe9,0x0074,0x7fea,0x7feb,0x00f2,0x0075,0x00f2,0x00f2,0x00f7,0x0fe7,0x7fec,
+    0x0fe8,0x1fe4,0x7fed,0x7fee,0x7fef,0x00f3,0x07e8,0x1fe5,0x7ff0,0x1fe6,0x7ff1,0x7ff2,
+    0x7ff3,0x7ff4,0x00f5,0x7ff5,0x7ff6,0x00f4,0x7ff7,0x7ff8,0x00f5,0x0077,0x00f5,0x00f5,
+    0x3fd7,0x7ff9,0x0036,0x7ffa,0x7ffb,0x00f3,0x0076,0x00f3,0x00f3,0x7ffc,0x7ffd,0x0000,
+    0x7ffe,0x7fff,
+};
+
+
+static const uint8_t cvh_huffbits6[32] = {
+     1,  4,  4,  6,  4,  6,  6,  8,  4,  6,  6,  8,
+     6,  9,  8, 10,  4,  6,  7,  8,  6,  9,  8, 11,
+     6,  9,  8, 10,  8, 10,  9,  11,
+};
+
+static const uint16_t cvh_huffcodes6[32] = {
+    0x0000,0x0008,0x0009,0x0034,0x000a,0x0035,0x0036,0x00f6,0x000b,0x0037,0x0038,0x00f7,
+    0x0039,0x01fa,0x00f8,0x03fc,0x000c,0x003a,0x007a,0x00f9,0x003b,0x01fb,0x00fa,0x07fe,
+    0x003c,0x01fc,0x00fb,0x03fd,0x00fc,0x03fe,0x01fd,0x07ff,
+};
+
+static const uint16_t* cvh_huffcodes[7] = {
+    cvh_huffcodes0, cvh_huffcodes1, cvh_huffcodes2, cvh_huffcodes3,
+    cvh_huffcodes4, cvh_huffcodes5, cvh_huffcodes6,
+};
+
+static const uint8_t* cvh_huffbits[7] = {
+    cvh_huffbits0, cvh_huffbits1, cvh_huffbits2, cvh_huffbits3,
+    cvh_huffbits4, cvh_huffbits5, cvh_huffbits6,
+};
+
+
+static const uint16_t ccpl_huffcodes2[3] = {
+    0x02,0x00,0x03,
+};
+
+static const uint16_t ccpl_huffcodes3[7] = {
+    0x3e,0x1e,0x02,0x00,0x06,0x0e,0x3f,
+};
+
+static const uint16_t ccpl_huffcodes4[15] = {
+    0xfc,0xfd,0x7c,0x3c,0x1c,0x0c,0x04,0x00,0x05,0x0d,0x1d,0x3d,
+    0x7d,0xfe,0xff,
+};
+
+static const uint16_t ccpl_huffcodes5[31] = {
+    0x03f8,0x03f9,0x03fa,0x03fb,0x01f8,0x01f9,0x00f8,0x00f9,0x0078,0x0079,0x0038,0x0039,
+    0x0018,0x0019,0x0004,0x0000,0x0005,0x001a,0x001b,0x003a,0x003b,0x007a,0x007b,0x00fa,
+    0x00fb,0x01fa,0x01fb,0x03fc,0x03fd,0x03fe,0x03ff,
+};
+
+static const uint16_t ccpl_huffcodes6[63] = {
+    0x0004,0x0005,0x0005,0x0006,0x0006,0x0007,0x0007,0x0007,0x0007,0x0008,0x0008,0x0008,
+    0x0008,0x0009,0x0009,0x0009,0x0009,0x000a,0x000a,0x000a,0x000a,0x000a,0x000b,0x000b,
+    0x000b,0x000b,0x000c,0x000d,0x000e,0x000e,0x0010,0x0000,0x000a,0x0018,0x0019,0x0036,
+    0x0037,0x0074,0x0075,0x0076,0x0077,0x00f4,0x00f5,0x00f6,0x00f7,0x01f5,0x01f6,0x01f7,
+    0x01f8,0x03f6,0x03f7,0x03f8,0x03f9,0x03fa,0x07fa,0x07fb,0x07fc,0x07fd,0x0ffd,0x1ffd,
+    0x3ffd,0x3ffe,0xffff,
+};
+
+static const uint8_t ccpl_huffbits2[3] = {
+    2,1,2,
+};
+
+static const uint8_t ccpl_huffbits3[7] = {
+    6,5,2,1,3,4,6,
+};
+
+static const uint8_t ccpl_huffbits4[15] = {
+    8,8,7,6,5,4,3,1,3,4,5,6,7,8,8,
+};
+
+static const uint8_t ccpl_huffbits5[31] = {
+    10,10,10,10,9,9,8,8,7,7,6,6,
+    5,5,3,1,3,5,5,6,6,7,7,8,
+    8,9,9,10,10,10,10,
+};
+
+static const uint8_t ccpl_huffbits6[63] = {
+    16,15,14,13,12,11,11,11,11,10,10,10,
+    10,9,9,9,9,9,8,8,8,8,7,7,
+    7,7,6,6,5,5,3,1,4,5,5,6,
+    6,7,7,7,7,8,8,8,8,9,9,9,
+    9,10,10,10,10,10,11,11,11,11,12,13,
+    14,14,16,
+};
+
+static const uint16_t* ccpl_huffcodes[5] = {
+    ccpl_huffcodes2,ccpl_huffcodes3,
+    ccpl_huffcodes4,ccpl_huffcodes5,ccpl_huffcodes6
+};
+
+static const uint8_t* ccpl_huffbits[5] = {
+    ccpl_huffbits2,ccpl_huffbits3,
+    ccpl_huffbits4,ccpl_huffbits5,ccpl_huffbits6
+};
+
+
+//Coupling tables
+
+static const int cplband[51] = {
+    0,1,2,3,4,5,6,7,8,9,
+    10,11,11,12,12,13,13,14,14,14,
+    15,15,15,15,16,16,16,16,16,17,
+    17,17,17,17,17,18,18,18,18,18,
+    18,18,19,19,19,19,19,19,19,19,
+    19,
+};
+
+static const float cplscale2[3] = {
+0.953020632266998,0.70710676908493,0.302905440330505,
+};
+
+static const float cplscale3[7] = {
+0.981279790401459,0.936997592449188,0.875934481620789,0.70710676908493,
+0.482430040836334,0.349335819482803,0.192587479948997,
+};
+
+static const float cplscale4[15] = {
+0.991486728191376,0.973249018192291,0.953020632266998,0.930133521556854,
+0.903453230857849,0.870746195316315,0.826180458068848,0.70710676908493,
+0.563405573368073,0.491732746362686,0.428686618804932,0.367221474647522,
+0.302905440330505,0.229752898216248,0.130207896232605,
+};
+
+static const float cplscale5[31] = {
+0.995926380157471,0.987517595291138,0.978726446628571,0.969505727291107,
+0.95979779958725,0.949531257152557,0.938616216182709,0.926936149597168,
+0.914336204528809,0.900602877140045,0.885426938533783,0.868331849575043,
+0.84851086139679,0.824381768703461,0.791833400726318,0.70710676908493,
+0.610737144947052,0.566034197807312,0.529177963733673,0.495983630418777,
+0.464778542518616,0.434642940759659,0.404955863952637,0.375219136476517,
+0.344963222742081,0.313672333955765,0.280692428350449,0.245068684220314,
+0.205169528722763,0.157508864998817,0.0901700109243393,
+};
+
+static const float cplscale6[63] = {
+0.998005926609039,0.993956744670868,0.989822506904602,0.985598564147949,
+0.981279790401459,0.976860702037811,0.972335040569305,0.967696130275726,
+0.962936460971832,0.958047747612000,0.953020632266998,0.947844684123993,
+0.942508161067963,0.936997592449188,0.931297719478607,0.925390899181366,
+0.919256627559662,0.912870943546295,0.906205296516418,0.899225592613220,
+0.891890347003937,0.884148240089417,0.875934481620789,0.867165684700012,
+0.857730865478516,0.847477376461029,0.836184680461884,0.823513329029083,
+0.808890223503113,0.791194140911102,0.767520070075989,0.707106769084930,
+0.641024887561798,0.611565053462982,0.587959706783295,0.567296981811523,
+0.548448026180267,0.530831515789032,0.514098942279816,0.498019754886627,
+0.482430040836334,0.467206478118896,0.452251672744751,0.437485188245773,
+0.422837972640991,0.408248275518417,0.393658757209778,0.379014074802399,
+0.364258885383606,0.349335819482803,0.334183186292648,0.318732559680939,
+0.302905440330505,0.286608695983887,0.269728302955627,0.252119421958923,
+0.233590632677078,0.213876649737358,0.192587479948997,0.169101938605309,
+0.142307326197624,0.109772264957428,0.0631198287010193,
+};
+
+static const float* cplscales[5] = {
+    cplscale2, cplscale3, cplscale4, cplscale5, cplscale6,
+};
diff --git a/contrib/ffmpeg/libavcodec/cscd.c b/contrib/ffmpeg/libavcodec/cscd.c
new file mode 100644
index 000000000..e4257f4c0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cscd.c
@@ -0,0 +1,264 @@
+/*
+ * CamStudio decoder
+ * Copyright (c) 2006 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+#endif
+#include "lzo.h"
+
+typedef struct {
+    AVFrame pic;
+    int linelen, height, bpp;
+    unsigned int decomp_size;
+    unsigned char* decomp_buf;
+} CamStudioContext;
+
+static void copy_frame_default(AVFrame *f, uint8_t *src,
+                               int linelen, int height) {
+    int i;
+    uint8_t *dst = f->data[0];
+    dst += (height - 1) * f->linesize[0];
+    for (i = height; i; i--) {
+        memcpy(dst, src, linelen);
+        src += linelen;
+        dst -= f->linesize[0];
+    }
+}
+
+static void add_frame_default(AVFrame *f, uint8_t *src,
+                              int linelen, int height) {
+    int i, j;
+    uint8_t *dst = f->data[0];
+    dst += (height - 1) * f->linesize[0];
+    for (i = height; i; i--) {
+        for (j = linelen; j; j--)
+            *dst++ += *src++;
+        dst -= f->linesize[0] + linelen;
+    }
+}
+
+#ifndef WORDS_BIGENDIAN
+#define copy_frame_16 copy_frame_default
+#define copy_frame_32 copy_frame_default
+#define add_frame_16 add_frame_default
+#define add_frame_32 add_frame_default
+#else
+static void copy_frame_16(AVFrame *f, uint8_t *src,
+                          int linelen, int height) {
+    int i, j;
+    uint8_t *dst = f->data[0];
+    dst += (height - 1) * f->linesize[0];
+    for (i = height; i; i--) {
+        for (j = linelen / 2; j; j--) {
+          dst[0] = src[1];
+          dst[1] = src[0];
+          src += 2;
+          dst += 2;
+        }
+        dst -= f->linesize[0] + linelen;
+    }
+}
+
+static void copy_frame_32(AVFrame *f, uint8_t *src,
+                          int linelen, int height) {
+    int i, j;
+    uint8_t *dst = f->data[0];
+    dst += (height - 1) * f->linesize[0];
+    for (i = height; i; i--) {
+        for (j = linelen / 4; j; j--) {
+          dst[0] = src[3];
+          dst[1] = src[2];
+          dst[2] = src[1];
+          dst[3] = src[0];
+          src += 4;
+          dst += 4;
+        }
+        dst -= f->linesize[0] + linelen;
+    }
+}
+
+static void add_frame_16(AVFrame *f, uint8_t *src,
+                         int linelen, int height) {
+    int i, j;
+    uint8_t *dst = f->data[0];
+    dst += (height - 1) * f->linesize[0];
+    for (i = height; i; i--) {
+        for (j = linelen / 2; j; j--) {
+          dst[0] += src[1];
+          dst[1] += src[0];
+          src += 2;
+          dst += 2;
+        }
+        dst -= f->linesize[0] + linelen;
+    }
+}
+
+static void add_frame_32(AVFrame *f, uint8_t *src,
+                         int linelen, int height) {
+    int i, j;
+    uint8_t *dst = f->data[0];
+    dst += (height - 1) * f->linesize[0];
+    for (i = height; i; i--) {
+        for (j = linelen / 4; j; j--) {
+          dst[0] += src[3];
+          dst[1] += src[2];
+          dst[2] += src[1];
+          dst[3] += src[0];
+          src += 4;
+          dst += 4;
+        }
+        dst -= f->linesize[0] + linelen;
+    }
+}
+#endif
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                        uint8_t *buf, int buf_size) {
+    CamStudioContext *c = (CamStudioContext *)avctx->priv_data;
+    AVFrame *picture = data;
+
+    if (buf_size < 2) {
+        av_log(avctx, AV_LOG_ERROR, "coded frame too small\n");
+        return -1;
+    }
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+    c->pic.reference = 1;
+    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_READABLE |
+                          FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->get_buffer(avctx, &c->pic) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    // decompress data
+    switch ((buf[0] >> 1) & 7) {
+        case 0: { // lzo compression
+            int outlen = c->decomp_size, inlen = buf_size - 2;
+            if (lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen))
+                av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
+            break;
+        }
+        case 1: { // zlib compression
+#ifdef CONFIG_ZLIB
+            unsigned long dlen = c->decomp_size;
+            if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK)
+                av_log(avctx, AV_LOG_ERROR, "error during zlib decompression\n");
+            break;
+#else
+            av_log(avctx, AV_LOG_ERROR, "compiled without zlib support\n");
+            return -1;
+#endif
+        }
+        default:
+            av_log(avctx, AV_LOG_ERROR, "unknown compression\n");
+            return -1;
+    }
+
+    // flip upside down, add difference frame
+    if (buf[0] & 1) { // keyframe
+        c->pic.pict_type = FF_I_TYPE;
+        c->pic.key_frame = 1;
+        switch (c->bpp) {
+          case 16:
+              copy_frame_16(&c->pic, c->decomp_buf, c->linelen, c->height);
+              break;
+          case 32:
+              copy_frame_32(&c->pic, c->decomp_buf, c->linelen, c->height);
+              break;
+          default:
+              copy_frame_default(&c->pic, c->decomp_buf, c->linelen, c->height);
+        }
+    } else {
+        c->pic.pict_type = FF_P_TYPE;
+        c->pic.key_frame = 0;
+        switch (c->bpp) {
+          case 16:
+              add_frame_16(&c->pic, c->decomp_buf, c->linelen, c->height);
+              break;
+          case 32:
+              add_frame_32(&c->pic, c->decomp_buf, c->linelen, c->height);
+              break;
+          default:
+              add_frame_default(&c->pic, c->decomp_buf, c->linelen, c->height);
+        }
+    }
+
+    *picture = c->pic;
+    *data_size = sizeof(AVFrame);
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx) {
+    CamStudioContext *c = (CamStudioContext *)avctx->priv_data;
+    if (avcodec_check_dimensions(avctx, avctx->height, avctx->width) < 0) {
+        return 1;
+    }
+    avctx->has_b_frames = 0;
+    switch (avctx->bits_per_sample) {
+        case 16: avctx->pix_fmt = PIX_FMT_RGB565; break;
+        case 24: avctx->pix_fmt = PIX_FMT_BGR24; break;
+        case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break;
+        default:
+            av_log(avctx, AV_LOG_ERROR,
+                   "CamStudio codec error: unvalid depth %i bpp\n",
+                   avctx->bits_per_sample);
+             return 1;
+    }
+    c->bpp = avctx->bits_per_sample;
+    c->pic.data[0] = NULL;
+    c->linelen = avctx->width * avctx->bits_per_sample / 8;
+    c->height = avctx->height;
+    c->decomp_size = c->height * c->linelen;
+    c->decomp_buf = av_malloc(c->decomp_size + LZO_OUTPUT_PADDING);
+    if (!c->decomp_buf) {
+        av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
+        return 1;
+    }
+    return 0;
+}
+
+static int decode_end(AVCodecContext *avctx) {
+    CamStudioContext *c = (CamStudioContext *)avctx->priv_data;
+    av_freep(&c->decomp_buf);
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+    return 0;
+}
+
+AVCodec cscd_decoder = {
+    "camstudio",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CSCD,
+    sizeof(CamStudioContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/cyuv.c b/contrib/ffmpeg/libavcodec/cyuv.c
new file mode 100644
index 000000000..101f2bd85
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/cyuv.c
@@ -0,0 +1,190 @@
+/*
+ *
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Creative YUV (CYUV) Video Decoder
+ *   by Mike Melanson (melanson@pcisys.net)
+ * based on "Creative YUV (CYUV) stream format for AVI":
+ *   http://www.csse.monash.edu.au/~timf/videocodec/cyuv.txt
+ *
+ */
+
+/**
+ * @file cyuv.c
+ * Creative YUV (CYUV) Video Decoder.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+
+typedef struct CyuvDecodeContext {
+    AVCodecContext *avctx;
+    int width, height;
+    AVFrame frame;
+} CyuvDecodeContext;
+
+static int cyuv_decode_init(AVCodecContext *avctx)
+{
+    CyuvDecodeContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->width = avctx->width;
+    /* width needs to be divisible by 4 for this codec to work */
+    if (s->width & 0x3)
+        return -1;
+    s->height = avctx->height;
+    avctx->pix_fmt = PIX_FMT_YUV411P;
+    avctx->has_b_frames = 0;
+
+    return 0;
+}
+
+static int cyuv_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    CyuvDecodeContext *s=avctx->priv_data;
+
+    unsigned char *y_plane;
+    unsigned char *u_plane;
+    unsigned char *v_plane;
+    int y_ptr;
+    int u_ptr;
+    int v_ptr;
+
+    /* prediction error tables (make it clear that they are signed values) */
+    signed char *y_table = (signed char*)buf +  0;
+    signed char *u_table = (signed char*)buf + 16;
+    signed char *v_table = (signed char*)buf + 32;
+
+    unsigned char y_pred, u_pred, v_pred;
+    int stream_ptr;
+    unsigned char cur_byte;
+    int pixel_groups;
+
+    /* sanity check the buffer size: A buffer has 3x16-bytes tables
+     * followed by (height) lines each with 3 bytes to represent groups
+     * of 4 pixels. Thus, the total size of the buffer ought to be:
+     *    (3 * 16) + height * (width * 3 / 4) */
+    if (buf_size != 48 + s->height * (s->width * 3 / 4)) {
+      av_log(avctx, AV_LOG_ERROR, "ffmpeg: cyuv: got a buffer with %d bytes when %d were expected\n",
+        buf_size,
+        48 + s->height * (s->width * 3 / 4));
+      return -1;
+    }
+
+    /* pixel data starts 48 bytes in, after 3x16-byte tables */
+    stream_ptr = 48;
+
+    if(s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID;
+    s->frame.reference = 0;
+    if(avctx->get_buffer(avctx, &s->frame) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    y_plane = s->frame.data[0];
+    u_plane = s->frame.data[1];
+    v_plane = s->frame.data[2];
+
+    /* iterate through each line in the height */
+    for (y_ptr = 0, u_ptr = 0, v_ptr = 0;
+         y_ptr < (s->height * s->frame.linesize[0]);
+         y_ptr += s->frame.linesize[0] - s->width,
+         u_ptr += s->frame.linesize[1] - s->width / 4,
+         v_ptr += s->frame.linesize[2] - s->width / 4) {
+
+        /* reset predictors */
+        cur_byte = buf[stream_ptr++];
+        u_plane[u_ptr++] = u_pred = cur_byte & 0xF0;
+        y_plane[y_ptr++] = y_pred = (cur_byte & 0x0F) << 4;
+
+        cur_byte = buf[stream_ptr++];
+        v_plane[v_ptr++] = v_pred = cur_byte & 0xF0;
+        y_pred += y_table[cur_byte & 0x0F];
+        y_plane[y_ptr++] = y_pred;
+
+        cur_byte = buf[stream_ptr++];
+        y_pred += y_table[cur_byte & 0x0F];
+        y_plane[y_ptr++] = y_pred;
+        y_pred += y_table[(cur_byte & 0xF0) >> 4];
+        y_plane[y_ptr++] = y_pred;
+
+        /* iterate through the remaining pixel groups (4 pixels/group) */
+        pixel_groups = s->width / 4 - 1;
+        while (pixel_groups--) {
+
+            cur_byte = buf[stream_ptr++];
+            u_pred += u_table[(cur_byte & 0xF0) >> 4];
+            u_plane[u_ptr++] = u_pred;
+            y_pred += y_table[cur_byte & 0x0F];
+            y_plane[y_ptr++] = y_pred;
+
+            cur_byte = buf[stream_ptr++];
+            v_pred += v_table[(cur_byte & 0xF0) >> 4];
+            v_plane[v_ptr++] = v_pred;
+            y_pred += y_table[cur_byte & 0x0F];
+            y_plane[y_ptr++] = y_pred;
+
+            cur_byte = buf[stream_ptr++];
+            y_pred += y_table[cur_byte & 0x0F];
+            y_plane[y_ptr++] = y_pred;
+            y_pred += y_table[(cur_byte & 0xF0) >> 4];
+            y_plane[y_ptr++] = y_pred;
+
+        }
+    }
+
+    *data_size=sizeof(AVFrame);
+    *(AVFrame*)data= s->frame;
+
+    return buf_size;
+}
+
+static int cyuv_decode_end(AVCodecContext *avctx)
+{
+/*    CyuvDecodeContext *s = avctx->priv_data;*/
+
+    return 0;
+}
+
+AVCodec cyuv_decoder = {
+    "cyuv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_CYUV,
+    sizeof(CyuvDecodeContext),
+    cyuv_decode_init,
+    NULL,
+    cyuv_decode_end,
+    cyuv_decode_frame,
+    CODEC_CAP_DR1,
+    NULL
+};
+
diff --git a/contrib/ffmpeg/libavcodec/dct-test.c b/contrib/ffmpeg/libavcodec/dct-test.c
new file mode 100644
index 000000000..2c16f47e4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dct-test.c
@@ -0,0 +1,535 @@
+/*
+ * (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file dct-test.c
+ * DCT test. (c) 2001 Fabrice Bellard.
+ * Started from sample code by Juan J. Sierralta P.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "dsputil.h"
+
+#include "simple_idct.h"
+#include "faandct.h"
+
+#ifndef MAX
+#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
+#endif
+
+#undef printf
+
+void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
+
+/* reference fdct/idct */
+extern void fdct(DCTELEM *block);
+extern void idct(DCTELEM *block);
+extern void ff_idct_xvid_mmx(DCTELEM *block);
+extern void ff_idct_xvid_mmx2(DCTELEM *block);
+extern void init_fdct();
+
+extern void j_rev_dct(DCTELEM *data);
+extern void ff_mmx_idct(DCTELEM *data);
+extern void ff_mmxext_idct(DCTELEM *data);
+
+extern void odivx_idct_c (short *block);
+
+#define AANSCALE_BITS 12
+static const unsigned short aanscales[64] = {
+    /* precomputed values scaled up by 14 bits */
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+};
+
+uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
+
+int64_t gettime(void)
+{
+    struct timeval tv;
+    gettimeofday(&tv,NULL);
+    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+#define NB_ITS 20000
+#define NB_ITS_SPEED 50000
+
+static short idct_mmx_perm[64];
+
+static short idct_simple_mmx_perm[64]={
+        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
+void idct_mmx_init(void)
+{
+    int i;
+
+    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
+    for (i = 0; i < 64; i++) {
+        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
+    }
+}
+
+static DCTELEM block[64] __attribute__ ((aligned (8)));
+static DCTELEM block1[64] __attribute__ ((aligned (8)));
+static DCTELEM block_org[64] __attribute__ ((aligned (8)));
+
+void dct_error(const char *name, int is_idct,
+               void (*fdct_func)(DCTELEM *block),
+               void (*fdct_ref)(DCTELEM *block), int test)
+{
+    int it, i, scale;
+    int err_inf, v;
+    int64_t err2, ti, ti1, it1;
+    int64_t sysErr[64], sysErrMax=0;
+    int maxout=0;
+    int blockSumErrMax=0, blockSumErr;
+
+    srandom(0);
+
+    err_inf = 0;
+    err2 = 0;
+    for(i=0; i<64; i++) sysErr[i]=0;
+    for(it=0;it<NB_ITS;it++) {
+        for(i=0;i<64;i++)
+            block1[i] = 0;
+        switch(test){
+        case 0:
+            for(i=0;i<64;i++)
+                block1[i] = (random() % 512) -256;
+            if (is_idct){
+                fdct(block1);
+
+                for(i=0;i<64;i++)
+                    block1[i]>>=3;
+            }
+        break;
+        case 1:{
+            int num= (random()%10)+1;
+            for(i=0;i<num;i++)
+                block1[random()%64] = (random() % 512) -256;
+        }break;
+        case 2:
+            block1[0]= (random()%4096)-2048;
+            block1[63]= (block1[0]&1)^1;
+        break;
+        }
+
+#if 0 // simulate mismatch control
+{ int sum=0;
+        for(i=0;i<64;i++)
+           sum+=block1[i];
+
+        if((sum&1)==0) block1[63]^=1;
+}
+#endif
+
+        for(i=0; i<64; i++)
+            block_org[i]= block1[i];
+
+        if (fdct_func == ff_mmx_idct ||
+            fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
+            for(i=0;i<64;i++)
+                block[idct_mmx_perm[i]] = block1[i];
+        } else if(fdct_func == ff_simple_idct_mmx ) {
+            for(i=0;i<64;i++)
+                block[idct_simple_mmx_perm[i]] = block1[i];
+
+        } else {
+            for(i=0; i<64; i++)
+                block[i]= block1[i];
+        }
+#if 0 // simulate mismatch control for tested IDCT but not the ref
+{ int sum=0;
+        for(i=0;i<64;i++)
+           sum+=block[i];
+
+        if((sum&1)==0) block[63]^=1;
+}
+#endif
+
+        fdct_func(block);
+        emms(); /* for ff_mmx_idct */
+
+        if (fdct_func == fdct_ifast
+#ifndef FAAN_POSTSCALE
+            || fdct_func == ff_faandct
+#endif
+            ) {
+            for(i=0; i<64; i++) {
+                scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
+                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
+            }
+        }
+
+        fdct_ref(block1);
+
+        blockSumErr=0;
+        for(i=0;i<64;i++) {
+            v = abs(block[i] - block1[i]);
+            if (v > err_inf)
+                err_inf = v;
+            err2 += v * v;
+            sysErr[i] += block[i] - block1[i];
+            blockSumErr += v;
+            if( abs(block[i])>maxout) maxout=abs(block[i]);
+        }
+        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
+#if 0 // print different matrix pairs
+        if(blockSumErr){
+            printf("\n");
+            for(i=0; i<64; i++){
+                if((i&7)==0) printf("\n");
+                printf("%4d ", block_org[i]);
+            }
+            for(i=0; i<64; i++){
+                if((i&7)==0) printf("\n");
+                printf("%4d ", block[i] - block1[i]);
+            }
+        }
+#endif
+    }
+    for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
+
+#if 1 // dump systematic errors
+    for(i=0; i<64; i++){
+        if(i%8==0) printf("\n");
+        printf("%5d ", (int)sysErr[i]);
+    }
+    printf("\n");
+#endif
+
+    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
+           is_idct ? "IDCT" : "DCT",
+           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
+#if 1 //Speed test
+    /* speed test */
+    for(i=0;i<64;i++)
+        block1[i] = 0;
+    switch(test){
+    case 0:
+        for(i=0;i<64;i++)
+            block1[i] = (random() % 512) -256;
+        if (is_idct){
+            fdct(block1);
+
+            for(i=0;i<64;i++)
+                block1[i]>>=3;
+        }
+    break;
+    case 1:{
+    case 2:
+        block1[0] = (random() % 512) -256;
+        block1[1] = (random() % 512) -256;
+        block1[2] = (random() % 512) -256;
+        block1[3] = (random() % 512) -256;
+    }break;
+    }
+
+    if (fdct_func == ff_mmx_idct ||
+        fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
+        for(i=0;i<64;i++)
+            block[idct_mmx_perm[i]] = block1[i];
+    } else if(fdct_func == ff_simple_idct_mmx ) {
+        for(i=0;i<64;i++)
+            block[idct_simple_mmx_perm[i]] = block1[i];
+    } else {
+        for(i=0; i<64; i++)
+            block[i]= block1[i];
+    }
+
+    ti = gettime();
+    it1 = 0;
+    do {
+        for(it=0;it<NB_ITS_SPEED;it++) {
+            for(i=0; i<64; i++)
+                block[i]= block1[i];
+//            memcpy(block, block1, sizeof(DCTELEM) * 64);
+// dont memcpy especially not fastmemcpy because it does movntq !!!
+            fdct_func(block);
+        }
+        it1 += NB_ITS_SPEED;
+        ti1 = gettime() - ti;
+    } while (ti1 < 1000000);
+    emms();
+
+    printf("%s %s: %0.1f kdct/s\n",
+           is_idct ? "IDCT" : "DCT",
+           name, (double)it1 * 1000.0 / (double)ti1);
+#endif
+}
+
+static uint8_t img_dest[64] __attribute__ ((aligned (8)));
+static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
+
+void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
+{
+    static int init;
+    static double c8[8][8];
+    static double c4[4][4];
+    double block1[64], block2[64], block3[64];
+    double s, sum, v;
+    int i, j, k;
+
+    if (!init) {
+        init = 1;
+
+        for(i=0;i<8;i++) {
+            sum = 0;
+            for(j=0;j<8;j++) {
+                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
+                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
+                sum += c8[i][j] * c8[i][j];
+            }
+        }
+
+        for(i=0;i<4;i++) {
+            sum = 0;
+            for(j=0;j<4;j++) {
+                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
+                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
+                sum += c4[i][j] * c4[i][j];
+            }
+        }
+    }
+
+    /* butterfly */
+    s = 0.5 * sqrt(2.0);
+    for(i=0;i<4;i++) {
+        for(j=0;j<8;j++) {
+            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
+            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
+        }
+    }
+
+    /* idct8 on lines */
+    for(i=0;i<8;i++) {
+        for(j=0;j<8;j++) {
+            sum = 0;
+            for(k=0;k<8;k++)
+                sum += c8[k][j] * block1[8*i+k];
+            block2[8*i+j] = sum;
+        }
+    }
+
+    /* idct4 */
+    for(i=0;i<8;i++) {
+        for(j=0;j<4;j++) {
+            /* top */
+            sum = 0;
+            for(k=0;k<4;k++)
+                sum += c4[k][j] * block2[8*(2*k)+i];
+            block3[8*(2*j)+i] = sum;
+
+            /* bottom */
+            sum = 0;
+            for(k=0;k<4;k++)
+                sum += c4[k][j] * block2[8*(2*k+1)+i];
+            block3[8*(2*j+1)+i] = sum;
+        }
+    }
+
+    /* clamp and store the result */
+    for(i=0;i<8;i++) {
+        for(j=0;j<8;j++) {
+            v = block3[8*i+j];
+            if (v < 0)
+                v = 0;
+            else if (v > 255)
+                v = 255;
+            dest[i * linesize + j] = (int)rint(v);
+        }
+    }
+}
+
+void idct248_error(const char *name,
+                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
+{
+    int it, i, it1, ti, ti1, err_max, v;
+
+    srandom(0);
+
+    /* just one test to see if code is correct (precision is less
+       important here) */
+    err_max = 0;
+    for(it=0;it<NB_ITS;it++) {
+
+        /* XXX: use forward transform to generate values */
+        for(i=0;i<64;i++)
+            block1[i] = (random() % 256) - 128;
+        block1[0] += 1024;
+
+        for(i=0; i<64; i++)
+            block[i]= block1[i];
+        idct248_ref(img_dest1, 8, block);
+
+        for(i=0; i<64; i++)
+            block[i]= block1[i];
+        idct248_put(img_dest, 8, block);
+
+        for(i=0;i<64;i++) {
+            v = abs((int)img_dest[i] - (int)img_dest1[i]);
+            if (v == 255)
+                printf("%d %d\n", img_dest[i], img_dest1[i]);
+            if (v > err_max)
+                err_max = v;
+        }
+#if 0
+        printf("ref=\n");
+        for(i=0;i<8;i++) {
+            int j;
+            for(j=0;j<8;j++) {
+                printf(" %3d", img_dest1[i*8+j]);
+            }
+            printf("\n");
+        }
+
+        printf("out=\n");
+        for(i=0;i<8;i++) {
+            int j;
+            for(j=0;j<8;j++) {
+                printf(" %3d", img_dest[i*8+j]);
+            }
+            printf("\n");
+        }
+#endif
+    }
+    printf("%s %s: err_inf=%d\n",
+           1 ? "IDCT248" : "DCT248",
+           name, err_max);
+
+    ti = gettime();
+    it1 = 0;
+    do {
+        for(it=0;it<NB_ITS_SPEED;it++) {
+            for(i=0; i<64; i++)
+                block[i]= block1[i];
+//            memcpy(block, block1, sizeof(DCTELEM) * 64);
+// dont memcpy especially not fastmemcpy because it does movntq !!!
+            idct248_put(img_dest, 8, block);
+        }
+        it1 += NB_ITS_SPEED;
+        ti1 = gettime() - ti;
+    } while (ti1 < 1000000);
+    emms();
+
+    printf("%s %s: %0.1f kdct/s\n",
+           1 ? "IDCT248" : "DCT248",
+           name, (double)it1 * 1000.0 / (double)ti1);
+}
+
+void help(void)
+{
+    printf("dct-test [-i] [<test-number>]\n"
+           "test-number 0 -> test with random matrixes\n"
+           "            1 -> test with random sparse matrixes\n"
+           "            2 -> do 3. test from mpeg4 std\n"
+           "-i          test IDCT implementations\n"
+           "-4          test IDCT248 implementations\n");
+    exit(1);
+}
+
+int main(int argc, char **argv)
+{
+    int test_idct = 0, test_248_dct = 0;
+    int c,i;
+    int test=1;
+
+    init_fdct();
+    idct_mmx_init();
+
+    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
+    for(i=0;i<MAX_NEG_CROP;i++) {
+        cropTbl[i] = 0;
+        cropTbl[i + MAX_NEG_CROP + 256] = 255;
+    }
+
+    for(;;) {
+        c = getopt(argc, argv, "ih4");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'i':
+            test_idct = 1;
+            break;
+        case '4':
+            test_248_dct = 1;
+            break;
+        default :
+        case 'h':
+            help();
+            break;
+        }
+    }
+
+    if(optind <argc) test= atoi(argv[optind]);
+
+    printf("ffmpeg DCT/IDCT test\n");
+
+    if (test_248_dct) {
+        idct248_error("SIMPLE-C", simple_idct248_put);
+    } else {
+        if (!test_idct) {
+            dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
+            dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
+            dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
+            dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
+            dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
+            dct_error("FAAN", 0, ff_faandct, fdct, test);
+        } else {
+            dct_error("REF-DBL", 1, idct, idct, test);
+            dct_error("INT", 1, j_rev_dct, idct, test);
+            dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
+            dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
+            dct_error("SIMPLE-C", 1, simple_idct, idct, test);
+            dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
+            dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
+            dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
+            //        dct_error("ODIVX-C", 1, odivx_idct_c, idct);
+            //printf(" test against odivx idct\n");
+            //        dct_error("REF", 1, idct, odivx_idct_c);
+            //        dct_error("INT", 1, j_rev_dct, odivx_idct_c);
+            //        dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
+            //        dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
+            //        dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
+            //        dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
+            //        dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
+        }
+    }
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/dpcm.c b/contrib/ffmpeg/libavcodec/dpcm.c
new file mode 100644
index 000000000..99c0cac64
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dpcm.c
@@ -0,0 +1,335 @@
+/*
+ * Assorted DPCM codecs
+ * Copyright (c) 2003 The ffmpeg Project.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file: dpcm.c
+ * Assorted DPCM (differential pulse code modulation) audio codecs
+ * by Mike Melanson (melanson@pcisys.net)
+ * Xan DPCM decoder by Mario Brito (mbrito@student.dei.uc.pt)
+ * for more information on the specific data formats, visit:
+ *   http://www.pcisys.net/~melanson/codecs/simpleaudio.html
+ * SOL DPCMs implemented by Konstantin Shishkov
+ *
+ * Note about using the Xan DPCM decoder: Xan DPCM is used in AVI files
+ * found in the Wing Commander IV computer game. These AVI files contain
+ * WAVEFORMAT headers which report the audio format as 0x01: raw PCM.
+ * Clearly incorrect. To detect Xan DPCM, you will probably have to
+ * special-case your AVI demuxer to use Xan DPCM if the file uses 'Xxan'
+ * (Xan video) for its video codec. Alternately, such AVI files also contain
+ * the fourcc 'Axan' in the 'auds' chunk of the AVI header.
+ */
+
+#include "avcodec.h"
+
+typedef struct DPCMContext {
+    int channels;
+    short roq_square_array[256];
+    long sample[2];//for SOL_DPCM
+    const int *sol_table;//for SOL_DPCM
+} DPCMContext;
+
+#define SATURATE_S16(x)  if (x < -32768) x = -32768; \
+  else if (x > 32767) x = 32767;
+#define SE_16BIT(x)  if (x & 0x8000) x -= 0x10000;
+
+static int interplay_delta_table[] = {
+         0,      1,      2,      3,      4,      5,      6,      7,
+         8,      9,     10,     11,     12,     13,     14,     15,
+        16,     17,     18,     19,     20,     21,     22,     23,
+        24,     25,     26,     27,     28,     29,     30,     31,
+        32,     33,     34,     35,     36,     37,     38,     39,
+        40,     41,     42,     43,     47,     51,     56,     61,
+        66,     72,     79,     86,     94,    102,    112,    122,
+       133,    145,    158,    173,    189,    206,    225,    245,
+       267,    292,    318,    348,    379,    414,    452,    493,
+       538,    587,    640,    699,    763,    832,    908,    991,
+      1081,   1180,   1288,   1405,   1534,   1673,   1826,   1993,
+      2175,   2373,   2590,   2826,   3084,   3365,   3672,   4008,
+      4373,   4772,   5208,   5683,   6202,   6767,   7385,   8059,
+      8794,   9597,  10472,  11428,  12471,  13609,  14851,  16206,
+     17685,  19298,  21060,  22981,  25078,  27367,  29864,  32589,
+    -29973, -26728, -23186, -19322, -15105, -10503,  -5481,     -1,
+         1,      1,   5481,  10503,  15105,  19322,  23186,  26728,
+     29973, -32589, -29864, -27367, -25078, -22981, -21060, -19298,
+    -17685, -16206, -14851, -13609, -12471, -11428, -10472,  -9597,
+     -8794,  -8059,  -7385,  -6767,  -6202,  -5683,  -5208,  -4772,
+     -4373,  -4008,  -3672,  -3365,  -3084,  -2826,  -2590,  -2373,
+     -2175,  -1993,  -1826,  -1673,  -1534,  -1405,  -1288,  -1180,
+     -1081,   -991,   -908,   -832,   -763,   -699,   -640,   -587,
+      -538,   -493,   -452,   -414,   -379,   -348,   -318,   -292,
+      -267,   -245,   -225,   -206,   -189,   -173,   -158,   -145,
+      -133,   -122,   -112,   -102,    -94,    -86,    -79,    -72,
+       -66,    -61,    -56,    -51,    -47,    -43,    -42,    -41,
+       -40,    -39,    -38,    -37,    -36,    -35,    -34,    -33,
+       -32,    -31,    -30,    -29,    -28,    -27,    -26,    -25,
+       -24,    -23,    -22,    -21,    -20,    -19,    -18,    -17,
+       -16,    -15,    -14,    -13,    -12,    -11,    -10,     -9,
+        -8,     -7,     -6,     -5,     -4,     -3,     -2,     -1
+
+};
+
+static const int sol_table_old[16] =
+    { 0x0,  0x1,  0x2 , 0x3,  0x6,  0xA,  0xF, 0x15,
+    -0x15, -0xF, -0xA, -0x6, -0x3, -0x2, -0x1, 0x0};
+
+static const int sol_table_new[16] =
+    { 0x0,  0x1,  0x2,  0x3,  0x6,  0xA,  0xF,  0x15,
+      0x0, -0x1, -0x2, -0x3, -0x6, -0xA, -0xF, -0x15};
+
+static const int sol_table_16[128] = {
+    0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080,
+    0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120,
+    0x130, 0x140, 0x150, 0x160, 0x170, 0x180, 0x190, 0x1A0, 0x1B0, 0x1C0,
+    0x1D0, 0x1E0, 0x1F0, 0x200, 0x208, 0x210, 0x218, 0x220, 0x228, 0x230,
+    0x238, 0x240, 0x248, 0x250, 0x258, 0x260, 0x268, 0x270, 0x278, 0x280,
+    0x288, 0x290, 0x298, 0x2A0, 0x2A8, 0x2B0, 0x2B8, 0x2C0, 0x2C8, 0x2D0,
+    0x2D8, 0x2E0, 0x2E8, 0x2F0, 0x2F8, 0x300, 0x308, 0x310, 0x318, 0x320,
+    0x328, 0x330, 0x338, 0x340, 0x348, 0x350, 0x358, 0x360, 0x368, 0x370,
+    0x378, 0x380, 0x388, 0x390, 0x398, 0x3A0, 0x3A8, 0x3B0, 0x3B8, 0x3C0,
+    0x3C8, 0x3D0, 0x3D8, 0x3E0, 0x3E8, 0x3F0, 0x3F8, 0x400, 0x440, 0x480,
+    0x4C0, 0x500, 0x540, 0x580, 0x5C0, 0x600, 0x640, 0x680, 0x6C0, 0x700,
+    0x740, 0x780, 0x7C0, 0x800, 0x900, 0xA00, 0xB00, 0xC00, 0xD00, 0xE00,
+    0xF00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x3000, 0x4000
+};
+
+
+
+static int dpcm_decode_init(AVCodecContext *avctx)
+{
+    DPCMContext *s = avctx->priv_data;
+    int i;
+    short square;
+
+    s->channels = avctx->channels;
+    s->sample[0] = s->sample[1] = 0;
+
+    switch(avctx->codec->id) {
+
+    case CODEC_ID_ROQ_DPCM:
+        /* initialize square table */
+        for (i = 0; i < 128; i++) {
+            square = i * i;
+            s->roq_square_array[i] = square;
+            s->roq_square_array[i + 128] = -square;
+        }
+        break;
+
+
+    case CODEC_ID_SOL_DPCM:
+        switch(avctx->codec_tag){
+        case 1:
+            s->sol_table=sol_table_old;
+            s->sample[0] = s->sample[1] = 0x80;
+            break;
+        case 2:
+            s->sol_table=sol_table_new;
+            s->sample[0] = s->sample[1] = 0x80;
+            break;
+        case 3:
+            s->sol_table=sol_table_16;
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unknown SOL subcodec\n");
+            return -1;
+        }
+        break;
+
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+static int dpcm_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    DPCMContext *s = avctx->priv_data;
+    int in, out = 0;
+    int predictor[2];
+    int channel_number = 0;
+    short *output_samples = data;
+    int shift[2];
+    unsigned char byte;
+    short diff;
+
+    if (!buf_size)
+        return 0;
+
+    switch(avctx->codec->id) {
+
+    case CODEC_ID_ROQ_DPCM:
+        if (s->channels == 1)
+            predictor[0] = LE_16(&buf[6]);
+        else {
+            predictor[0] = buf[7] << 8;
+            predictor[1] = buf[6] << 8;
+        }
+        SE_16BIT(predictor[0]);
+        SE_16BIT(predictor[1]);
+
+        /* decode the samples */
+        for (in = 8, out = 0; in < buf_size; in++, out++) {
+            predictor[channel_number] += s->roq_square_array[buf[in]];
+            SATURATE_S16(predictor[channel_number]);
+            output_samples[out] = predictor[channel_number];
+
+            /* toggle channel */
+            channel_number ^= s->channels - 1;
+        }
+        break;
+
+    case CODEC_ID_INTERPLAY_DPCM:
+        in = 6;  /* skip over the stream mask and stream length */
+        predictor[0] = LE_16(&buf[in]);
+        in += 2;
+        SE_16BIT(predictor[0])
+        output_samples[out++] = predictor[0];
+        if (s->channels == 2) {
+            predictor[1] = LE_16(&buf[in]);
+            in += 2;
+            SE_16BIT(predictor[1])
+            output_samples[out++] = predictor[1];
+        }
+
+        while (in < buf_size) {
+            predictor[channel_number] += interplay_delta_table[buf[in++]];
+            SATURATE_S16(predictor[channel_number]);
+            output_samples[out++] = predictor[channel_number];
+
+            /* toggle channel */
+            channel_number ^= s->channels - 1;
+        }
+
+        break;
+
+    case CODEC_ID_XAN_DPCM:
+        in = 0;
+        shift[0] = shift[1] = 4;
+        predictor[0] = LE_16(&buf[in]);
+        in += 2;
+        SE_16BIT(predictor[0]);
+        if (s->channels == 2) {
+            predictor[1] = LE_16(&buf[in]);
+            in += 2;
+            SE_16BIT(predictor[1]);
+        }
+
+        while (in < buf_size) {
+            byte = buf[in++];
+            diff = (byte & 0xFC) << 8;
+            if ((byte & 0x03) == 3)
+                shift[channel_number]++;
+            else
+                shift[channel_number] -= (2 * (byte & 3));
+            /* saturate the shifter to a lower limit of 0 */
+            if (shift[channel_number] < 0)
+                shift[channel_number] = 0;
+
+            diff >>= shift[channel_number];
+            predictor[channel_number] += diff;
+
+            SATURATE_S16(predictor[channel_number]);
+            output_samples[out++] = predictor[channel_number];
+
+            /* toggle channel */
+            channel_number ^= s->channels - 1;
+        }
+        break;
+    case CODEC_ID_SOL_DPCM:
+        in = 0;
+        if (avctx->codec_tag != 3) {
+            while (in < buf_size) {
+                int n1, n2;
+                n1 = (buf[in] >> 4) & 0xF;
+                n2 = buf[in++] & 0xF;
+                s->sample[0] += s->sol_table[n1];
+                 if (s->sample[0] < 0) s->sample[0] = 0;
+                if (s->sample[0] > 255) s->sample[0] = 255;
+                output_samples[out++] = (s->sample[0] - 128) << 8;
+                s->sample[s->channels - 1] += s->sol_table[n2];
+                if (s->sample[s->channels - 1] < 0) s->sample[s->channels - 1] = 0;
+                if (s->sample[s->channels - 1] > 255) s->sample[s->channels - 1] = 255;
+                output_samples[out++] = (s->sample[s->channels - 1] - 128) << 8;
+            }
+        } else {
+            while (in < buf_size) {
+                int n;
+                n = buf[in++];
+                if (n & 0x80) s->sample[channel_number] -= s->sol_table[n & 0x7F];
+                else s->sample[channel_number] += s->sol_table[n & 0x7F];
+                SATURATE_S16(s->sample[channel_number]);
+                output_samples[out++] = s->sample[channel_number];
+                /* toggle channel */
+                channel_number ^= s->channels - 1;
+            }
+        }
+        break;
+    }
+
+    *data_size = out * sizeof(short);
+    return buf_size;
+}
+
+AVCodec roq_dpcm_decoder = {
+    "roq_dpcm",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_ROQ_DPCM,
+    sizeof(DPCMContext),
+    dpcm_decode_init,
+    NULL,
+    NULL,
+    dpcm_decode_frame,
+};
+
+AVCodec interplay_dpcm_decoder = {
+    "interplay_dpcm",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_INTERPLAY_DPCM,
+    sizeof(DPCMContext),
+    dpcm_decode_init,
+    NULL,
+    NULL,
+    dpcm_decode_frame,
+};
+
+AVCodec xan_dpcm_decoder = {
+    "xan_dpcm",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_XAN_DPCM,
+    sizeof(DPCMContext),
+    dpcm_decode_init,
+    NULL,
+    NULL,
+    dpcm_decode_frame,
+};
+
+AVCodec sol_dpcm_decoder = {
+    "sol_dpcm",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_SOL_DPCM,
+    sizeof(DPCMContext),
+    dpcm_decode_init,
+    NULL,
+    NULL,
+    dpcm_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/dsicinav.c b/contrib/ffmpeg/libavcodec/dsicinav.c
new file mode 100644
index 000000000..ded53c45a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dsicinav.c
@@ -0,0 +1,362 @@
+/*
+ * Delphine Software International CIN Audio/Video Decoders
+ * Copyright (c) 2006 Gregory Montoir (cyx@users.sourceforge.net)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file dsicinav.c
+ * Delphine Software International CIN audio/video decoders
+ */
+
+#include "avcodec.h"
+#include "common.h"
+
+
+typedef enum CinVideoBitmapIndex {
+    CIN_CUR_BMP = 0, /* current */
+    CIN_PRE_BMP = 1, /* previous */
+    CIN_INT_BMP = 2  /* intermediate */
+} CinVideoBitmapIndex;
+
+typedef struct CinVideoContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+    unsigned int bitmap_size;
+    uint32_t palette[256];
+    uint8_t *bitmap_table[3];
+} CinVideoContext;
+
+typedef struct CinAudioContext {
+    AVCodecContext *avctx;
+    int initial_decode_frame;
+    int delta;
+} CinAudioContext;
+
+
+/* table defining a geometric sequence with multiplier = 32767 ^ (1 / 128) */
+static const int16_t cinaudio_delta16_table[256] = {
+         0,      0,      0,      0,      0,      0,      0,      0,
+         0,      0,      0,      0,      0,      0,      0,      0,
+         0,      0,      0, -30210, -27853, -25680, -23677, -21829,
+    -20126, -18556, -17108, -15774, -14543, -13408, -12362, -11398,
+    -10508,  -9689,  -8933,  -8236,  -7593,  -7001,  -6455,  -5951,
+     -5487,  -5059,  -4664,  -4300,  -3964,  -3655,  -3370,  -3107,
+     -2865,  -2641,  -2435,  -2245,  -2070,  -1908,  -1759,  -1622,
+     -1495,  -1379,  -1271,  -1172,  -1080,   -996,   -918,   -847,
+      -781,   -720,   -663,   -612,   -564,   -520,   -479,   -442,
+      -407,   -376,   -346,   -319,   -294,   -271,   -250,   -230,
+      -212,   -196,   -181,   -166,   -153,   -141,   -130,   -120,
+      -111,   -102,    -94,    -87,    -80,    -74,    -68,    -62,
+       -58,    -53,    -49,    -45,    -41,    -38,    -35,    -32,
+       -30,    -27,    -25,    -23,    -21,    -20,    -18,    -17,
+       -15,    -14,    -13,    -12,    -11,    -10,     -9,     -8,
+        -7,     -6,     -5,     -4,     -3,     -2,     -1,      0,
+         0,      1,      2,      3,      4,      5,      6,      7,
+         8,      9,     10,     11,     12,     13,     14,     15,
+        17,     18,     20,     21,     23,     25,     27,     30,
+        32,     35,     38,     41,     45,     49,     53,     58,
+        62,     68,     74,     80,     87,     94,    102,    111,
+       120,    130,    141,    153,    166,    181,    196,    212,
+       230,    250,    271,    294,    319,    346,    376,    407,
+       442,    479,    520,    564,    612,    663,    720,    781,
+       847,    918,    996,   1080,   1172,   1271,   1379,   1495,
+      1622,   1759,   1908,   2070,   2245,   2435,   2641,   2865,
+      3107,   3370,   3655,   3964,   4300,   4664,   5059,   5487,
+      5951,   6455,   7001,   7593,   8236,   8933,   9689,  10508,
+     11398,  12362,  13408,  14543,  15774,  17108,  18556,  20126,
+     21829,  23677,  25680,  27853,  30210,      0,      0,      0,
+         0,      0,      0,      0,      0,      0,      0,      0,
+         0,      0,      0,      0,      0,      0,      0,      0
+};
+
+
+static int cinvideo_decode_init(AVCodecContext *avctx)
+{
+    CinVideoContext *cin = (CinVideoContext *)avctx->priv_data;
+    unsigned int i;
+
+    cin->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+
+    cin->frame.data[0] = NULL;
+
+    cin->bitmap_size = avctx->width * avctx->height;
+    for (i = 0; i < 3; ++i) {
+        cin->bitmap_table[i] = av_mallocz(cin->bitmap_size);
+        if (!cin->bitmap_table[i])
+            av_log(avctx, AV_LOG_ERROR, "Can't allocate bitmap buffers.\n");
+    }
+
+    return 0;
+}
+
+static void cin_apply_delta_data(const unsigned char *src, unsigned char *dst, int size)
+{
+    while (size--)
+        *dst++ += *src++;
+}
+
+static int cin_decode_huffman(const unsigned char *src, int src_size, unsigned char *dst, int dst_size)
+{
+    int b, huff_code = 0;
+    unsigned char huff_code_table[15];
+    unsigned char *dst_cur = dst;
+    unsigned char *dst_end = dst + dst_size;
+    const unsigned char *src_end = src + src_size;
+
+    memcpy(huff_code_table, src, 15); src += 15; src_size -= 15;
+
+    while (src < src_end) {
+        huff_code = *src++;
+        if ((huff_code >> 4) == 15) {
+            b = huff_code << 4;
+            huff_code = *src++;
+            *dst_cur++ = b | (huff_code >> 4);
+        } else
+            *dst_cur++ = huff_code_table[huff_code >> 4];
+        if (dst_cur >= dst_end)
+            break;
+
+        huff_code &= 15;
+        if (huff_code == 15) {
+            *dst_cur++ = *src++;
+        } else
+            *dst_cur++ = huff_code_table[huff_code];
+        if (dst_cur >= dst_end)
+            break;
+    }
+
+    return dst_cur - dst;
+}
+
+static void cin_decode_lzss(const unsigned char *src, int src_size, unsigned char *dst, int dst_size)
+{
+    uint16_t cmd;
+    int i, sz, offset, code;
+    unsigned char *dst_end = dst + dst_size;
+    const unsigned char *src_end = src + src_size;
+
+    while (src < src_end && dst < dst_end) {
+        code = *src++;
+        for (i = 0; i < 8 && src < src_end && dst < dst_end; ++i) {
+            if (code & (1 << i)) {
+                *dst++ = *src++;
+            } else {
+                cmd = LE_16(src); src += 2;
+                offset = cmd >> 4;
+                sz = (cmd & 0xF) + 2;
+                /* don't use memcpy/memmove here as the decoding routine (ab)uses */
+                /* buffer overlappings to repeat bytes in the destination */
+                sz = FFMIN(sz, dst_end - dst);
+                while (sz--) {
+                    *dst = *(dst - offset - 1);
+                    ++dst;
+                }
+            }
+        }
+    }
+}
+
+static void cin_decode_rle(const unsigned char *src, int src_size, unsigned char *dst, int dst_size)
+{
+    int len, code;
+    unsigned char *dst_end = dst + dst_size;
+    const unsigned char *src_end = src + src_size;
+
+    while (src < src_end && dst < dst_end) {
+        code = *src++;
+        if (code & 0x80) {
+            len = code - 0x7F;
+            memset(dst, *src++, FFMIN(len, dst_end - dst));
+        } else {
+            len = code + 1;
+            memcpy(dst, src, FFMIN(len, dst_end - dst));
+            src += len;
+        }
+        dst += len;
+    }
+}
+
+static int cinvideo_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+    CinVideoContext *cin = (CinVideoContext *)avctx->priv_data;
+    int i, y, palette_type, palette_colors_count, bitmap_frame_type, bitmap_frame_size;
+
+    cin->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &cin->frame)) {
+        av_log(cin->avctx, AV_LOG_ERROR, "delphinecinvideo: reget_buffer() failed to allocate a frame\n");
+        return -1;
+    }
+
+    palette_type = buf[0];
+    palette_colors_count = buf[1] | (buf[2] << 8);
+    bitmap_frame_type = buf[3];
+    buf += 4;
+
+    bitmap_frame_size = buf_size - 4;
+
+    /* handle palette */
+    if (palette_type == 0) {
+        for (i = 0; i < palette_colors_count; ++i) {
+            cin->palette[i] = (buf[2] << 16) | (buf[1] << 8) | buf[0];
+            buf += 3;
+            bitmap_frame_size -= 3;
+        }
+    } else {
+        for (i = 0; i < palette_colors_count; ++i) {
+            cin->palette[buf[0]] = (buf[3] << 16) | (buf[2] << 8) | buf[1];
+            buf += 4;
+            bitmap_frame_size -= 4;
+        }
+    }
+    memcpy(cin->frame.data[1], cin->palette, sizeof(cin->palette));
+    cin->frame.palette_has_changed = 1;
+
+    /* note: the decoding routines below assumes that surface.width = surface.pitch */
+    switch (bitmap_frame_type) {
+    case 9:
+        cin_decode_rle(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    case 34:
+        cin_decode_rle(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        cin_apply_delta_data(cin->bitmap_table[CIN_PRE_BMP],
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    case 35:
+        cin_decode_huffman(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_INT_BMP], cin->bitmap_size);
+        cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    case 36:
+        bitmap_frame_size = cin_decode_huffman(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_INT_BMP], cin->bitmap_size);
+        cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        cin_apply_delta_data(cin->bitmap_table[CIN_PRE_BMP],
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    case 37:
+        cin_decode_huffman(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    case 38:
+        cin_decode_lzss(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    case 39:
+        cin_decode_lzss(buf, bitmap_frame_size,
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        cin_apply_delta_data(cin->bitmap_table[CIN_PRE_BMP],
+          cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
+        break;
+    }
+
+    for (y = 0; y < cin->avctx->height; ++y)
+        memcpy(cin->frame.data[0] + (cin->avctx->height - 1 - y) * cin->frame.linesize[0],
+          cin->bitmap_table[CIN_CUR_BMP] + y * cin->avctx->width,
+          cin->avctx->width);
+
+    FFSWAP(uint8_t *, cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_table[CIN_PRE_BMP]);
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame *)data = cin->frame;
+
+    return buf_size;
+}
+
+static int cinvideo_decode_end(AVCodecContext *avctx)
+{
+    CinVideoContext *cin = (CinVideoContext *)avctx->priv_data;
+    int i;
+
+    if (cin->frame.data[0])
+        avctx->release_buffer(avctx, &cin->frame);
+
+    for (i = 0; i < 3; ++i)
+        av_free(cin->bitmap_table[i]);
+
+    return 0;
+}
+
+static int cinaudio_decode_init(AVCodecContext *avctx)
+{
+    CinAudioContext *cin = (CinAudioContext *)avctx->priv_data;
+
+    cin->avctx = avctx;
+    cin->initial_decode_frame = 1;
+    cin->delta = 0;
+
+    return 0;
+}
+
+static int cinaudio_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+    CinAudioContext *cin = (CinAudioContext *)avctx->priv_data;
+    uint8_t *src = buf;
+    int16_t *samples = (int16_t *)data;
+
+    if (cin->initial_decode_frame) {
+        cin->initial_decode_frame = 0;
+        cin->delta = (int16_t)LE_16(src); src += 2;
+        *samples++ = cin->delta;
+        buf_size -= 2;
+    }
+    while (buf_size > 0) {
+        cin->delta += cinaudio_delta16_table[*src++];
+        cin->delta = clip(cin->delta, -32768, 32767);
+        *samples++ = cin->delta;
+        --buf_size;
+    }
+
+    *data_size = (uint8_t *)samples - (uint8_t *)data;
+
+    return src - buf;
+}
+
+
+AVCodec dsicinvideo_decoder = {
+    "dsicinvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_DSICINVIDEO,
+    sizeof(CinVideoContext),
+    cinvideo_decode_init,
+    NULL,
+    cinvideo_decode_end,
+    cinvideo_decode_frame,
+    CODEC_CAP_DR1,
+};
+
+AVCodec dsicinaudio_decoder = {
+    "dsicinaudio",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_DSICINAUDIO,
+    sizeof(CinAudioContext),
+    cinaudio_decode_init,
+    NULL,
+    NULL,
+    cinaudio_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/dsputil.c b/contrib/ffmpeg/libavcodec/dsputil.c
new file mode 100644
index 000000000..51eddbc60
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dsputil.c
@@ -0,0 +1,4156 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file dsputil.c
+ * DSP utils
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "simple_idct.h"
+#include "faandct.h"
+#include "snow.h"
+
+/* snow.c */
+void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
+
+/* vorbis.c */
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
+
+uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
+uint32_t ff_squareTbl[512] = {0, };
+
+const uint8_t ff_zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+/* Specific zigzag scan for 248 idct. NOTE that unlike the
+   specification, we interleave the fields */
+const uint8_t ff_zigzag248_direct[64] = {
+     0,  8,  1,  9, 16, 24,  2, 10,
+    17, 25, 32, 40, 48, 56, 33, 41,
+    18, 26,  3, 11,  4, 12, 19, 27,
+    34, 42, 49, 57, 50, 58, 35, 43,
+    20, 28,  5, 13,  6, 14, 21, 29,
+    36, 44, 51, 59, 52, 60, 37, 45,
+    22, 30,  7, 15, 23, 31, 38, 46,
+    53, 61, 54, 62, 39, 47, 55, 63,
+};
+
+/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
+DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
+
+const uint8_t ff_alternate_horizontal_scan[64] = {
+    0,  1,   2,  3,  8,  9, 16, 17,
+    10, 11,  4,  5,  6,  7, 15, 14,
+    13, 12, 19, 18, 24, 25, 32, 33,
+    26, 27, 20, 21, 22, 23, 28, 29,
+    30, 31, 34, 35, 40, 41, 48, 49,
+    42, 43, 36, 37, 38, 39, 44, 45,
+    46, 47, 50, 51, 56, 57, 58, 59,
+    52, 53, 54, 55, 60, 61, 62, 63,
+};
+
+const uint8_t ff_alternate_vertical_scan[64] = {
+    0,  8,  16, 24,  1,  9,  2, 10,
+    17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12,
+    19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14,
+    21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31,
+    38, 46, 54, 62, 39, 47, 55, 63,
+};
+
+/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
+const uint32_t ff_inverse[256]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+};
+
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64]={
+        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
+static int pix_sum_c(uint8_t * pix, int line_size)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+            s += pix[0];
+            s += pix[1];
+            s += pix[2];
+            s += pix[3];
+            s += pix[4];
+            s += pix[5];
+            s += pix[6];
+            s += pix[7];
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+
+static int pix_norm1_c(uint8_t * pix, int line_size)
+{
+    int s, i, j;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+#if 0
+            s += sq[pix[0]];
+            s += sq[pix[1]];
+            s += sq[pix[2]];
+            s += sq[pix[3]];
+            s += sq[pix[4]];
+            s += sq[pix[5]];
+            s += sq[pix[6]];
+            s += sq[pix[7]];
+#else
+#if LONG_MAX > 2147483647
+            register uint64_t x=*(uint64_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+            s += sq[(x>>32)&0xff];
+            s += sq[(x>>40)&0xff];
+            s += sq[(x>>48)&0xff];
+            s += sq[(x>>56)&0xff];
+#else
+            register uint32_t x=*(uint32_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+            x=*(uint32_t*)(pix+4);
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+#endif
+#endif
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+
+static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
+    int i;
+
+    for(i=0; i+8<=w; i+=8){
+        dst[i+0]= bswap_32(src[i+0]);
+        dst[i+1]= bswap_32(src[i+1]);
+        dst[i+2]= bswap_32(src[i+2]);
+        dst[i+3]= bswap_32(src[i+3]);
+        dst[i+4]= bswap_32(src[i+4]);
+        dst[i+5]= bswap_32(src[i+5]);
+        dst[i+6]= bswap_32(src[i+6]);
+        dst[i+7]= bswap_32(src[i+7]);
+    }
+    for(;i<w; i++){
+        dst[i+0]= bswap_32(src[i+0]);
+    }
+}
+
+static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[0] - pix2[0]];
+        s += sq[pix1[1] - pix2[1]];
+        s += sq[pix1[2] - pix2[2]];
+        s += sq[pix1[3] - pix2[3]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[0] - pix2[0]];
+        s += sq[pix1[1] - pix2[1]];
+        s += sq[pix1[2] - pix2[2]];
+        s += sq[pix1[3] - pix2[3]];
+        s += sq[pix1[4] - pix2[4]];
+        s += sq[pix1[5] - pix2[5]];
+        s += sq[pix1[6] - pix2[6]];
+        s += sq[pix1[7] - pix2[7]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[ 0] - pix2[ 0]];
+        s += sq[pix1[ 1] - pix2[ 1]];
+        s += sq[pix1[ 2] - pix2[ 2]];
+        s += sq[pix1[ 3] - pix2[ 3]];
+        s += sq[pix1[ 4] - pix2[ 4]];
+        s += sq[pix1[ 5] - pix2[ 5]];
+        s += sq[pix1[ 6] - pix2[ 6]];
+        s += sq[pix1[ 7] - pix2[ 7]];
+        s += sq[pix1[ 8] - pix2[ 8]];
+        s += sq[pix1[ 9] - pix2[ 9]];
+        s += sq[pix1[10] - pix2[10]];
+        s += sq[pix1[11] - pix2[11]];
+        s += sq[pix1[12] - pix2[12]];
+        s += sq[pix1[13] - pix2[13]];
+        s += sq[pix1[14] - pix2[14]];
+        s += sq[pix1[15] - pix2[15]];
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+
+#ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
+static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
+    int s, i, j;
+    const int dec_count= w==8 ? 3 : 4;
+    int tmp[32*32];
+    int level, ori;
+    static const int scale[2][2][4][4]={
+      {
+        {
+            // 9/7 8x8 dec=3
+            {268, 239, 239, 213},
+            {  0, 224, 224, 152},
+            {  0, 135, 135, 110},
+        },{
+            // 9/7 16x16 or 32x32 dec=4
+            {344, 310, 310, 280},
+            {  0, 320, 320, 228},
+            {  0, 175, 175, 136},
+            {  0, 129, 129, 102},
+        }
+      },{
+        {
+            // 5/3 8x8 dec=3
+            {275, 245, 245, 218},
+            {  0, 230, 230, 156},
+            {  0, 138, 138, 113},
+        },{
+            // 5/3 16x16 or 32x32 dec=4
+            {352, 317, 317, 286},
+            {  0, 328, 328, 233},
+            {  0, 180, 180, 140},
+            {  0, 132, 132, 105},
+        }
+      }
+    };
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j+=4) {
+            tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
+            tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
+            tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
+            tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
+        }
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+
+    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
+
+    s=0;
+    assert(w==h);
+    for(level=0; level<dec_count; level++){
+        for(ori= level ? 1 : 0; ori<4; ori++){
+            int size= w>>(dec_count-level);
+            int sx= (ori&1) ? size : 0;
+            int stride= 32<<(dec_count-level);
+            int sy= (ori&2) ? stride>>1 : 0;
+
+            for(i=0; i<size; i++){
+                for(j=0; j<size; j++){
+                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
+                    s += FFABS(v);
+                }
+            }
+        }
+    }
+    assert(s>=0);
+    return s>>9;
+}
+
+static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size,  8, h, 1);
+}
+
+static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size,  8, h, 0);
+}
+
+static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 16, h, 1);
+}
+
+static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 16, h, 0);
+}
+
+int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 32, h, 1);
+}
+
+int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+    return w_c(v, pix1, pix2, line_size, 32, h, 0);
+}
+#endif
+
+static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = pixels[0];
+        block[1] = pixels[1];
+        block[2] = pixels[2];
+        block[3] = pixels[3];
+        block[4] = pixels[4];
+        block[5] = pixels[5];
+        block[6] = pixels[6];
+        block[7] = pixels[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
+                          const uint8_t *s2, int stride){
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = s1[0] - s2[0];
+        block[1] = s1[1] - s2[1];
+        block[2] = s1[2] - s2[2];
+        block[3] = s1[3] - s2[3];
+        block[4] = s1[4] - s2[4];
+        block[5] = s1[5] - s2[5];
+        block[6] = s1[6] - s2[6];
+        block[7] = s1[7] - s2[7];
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    }
+}
+
+
+static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+        pixels[4] = cm[block[4]];
+        pixels[5] = cm[block[5]];
+        pixels[6] = cm[block[6]];
+        pixels[7] = cm[block[7]];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+                                        uint8_t *restrict pixels,
+                                        int line_size)
+{
+    int i, j;
+
+    for (i = 0; i < 8; i++) {
+        for (j = 0; j < 8; j++) {
+            if (*block < -128)
+                *pixels = 0;
+            else if (*block > 127)
+                *pixels = 255;
+            else
+                *pixels = (uint8_t)(*block + 128);
+            block++;
+            pixels++;
+        }
+        pixels += (line_size - 8);
+    }
+}
+
+static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels[4] = cm[pixels[4] + block[4]];
+        pixels[5] = cm[pixels[5] + block[5]];
+        pixels[6] = cm[pixels[6] + block[6]];
+        pixels[7] = cm[pixels[7] + block[7]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<8;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels[4] += block[4];
+        pixels[5] += block[5];
+        pixels[6] += block[6];
+        pixels[7] += block[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<4;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels += line_size;
+        block += 4;
+    }
+}
+
+#if 0
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint64_t*)block), LD64(pixels));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= LD64(pixels  );\
+        const uint64_t b= LD64(pixels+1);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= LD64(pixels  );\
+        const uint64_t b= LD64(pixels+1);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= LD64(pixels          );\
+        const uint64_t b= LD64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= LD64(pixels          );\
+        const uint64_t b= LD64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= LD64(pixels  );\
+        const uint64_t b= LD64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0202020202020202ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= LD64(pixels  );\
+            uint64_t b= LD64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= LD64(pixels  );\
+            b= LD64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0202020202020202ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= LD64(pixels  );\
+        const uint64_t b= LD64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0101010101010101ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= LD64(pixels  );\
+            uint64_t b= LD64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= LD64(pixels  );\
+            b= LD64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0101010101010101ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
+
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint16_t*)(block  )), LD16(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), LD32(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), LD32(pixels  ));\
+        OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= LD32(&src1[i*src_stride1  ]);\
+        b= LD32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
+        a= LD32(&src1[i*src_stride1+4]);\
+        b= LD32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= LD32(&src1[i*src_stride1  ]);\
+        b= LD32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        a= LD32(&src1[i*src_stride1+4]);\
+        b= LD32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= LD32(&src1[i*src_stride1  ]);\
+        b= LD32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= LD16(&src1[i*src_stride1  ]);\
+        b= LD16(&src2[i*src_stride2  ]);\
+        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= LD32(&src1[i*src_stride1]);\
+        b= LD32(&src2[i*src_stride2]);\
+        c= LD32(&src3[i*src_stride3]);\
+        d= LD32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= LD32(&src1[i*src_stride1+4]);\
+        b= LD32(&src2[i*src_stride2+4]);\
+        c= LD32(&src3[i*src_stride3+4]);\
+        d= LD32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= LD32(&src1[i*src_stride1]);\
+        b= LD32(&src2[i*src_stride2]);\
+        c= LD32(&src3[i*src_stride3]);\
+        d= LD32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= LD32(&src1[i*src_stride1+4]);\
+        b= LD32(&src2[i*src_stride2+4]);\
+        c= LD32(&src3[i*src_stride3+4]);\
+        d= LD32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i, a0, b0, a1, b1;\
+        a0= pixels[0];\
+        b0= pixels[1] + 2;\
+        a0 += b0;\
+        b0 += pixels[2];\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            a1= pixels[0];\
+            b1= pixels[1];\
+            a1 += b1;\
+            b1 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2; /* FIXME non put */\
+            block[1]= (b1+b0)>>2;\
+\
+            pixels+=line_size;\
+            block +=line_size;\
+\
+            a0= pixels[0];\
+            b0= pixels[1] + 2;\
+            a0 += b0;\
+            b0 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2;\
+            block[1]= (b1+b0)>>2;\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint32_t a= LD32(pixels  );\
+        const uint32_t b= LD32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= LD32(pixels  );\
+            uint32_t b= LD32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= LD32(pixels  );\
+            b= LD32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= LD32(pixels  );\
+        const uint32_t b= LD32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= LD32(pixels  );\
+            uint32_t b= LD32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= LD32(pixels  );\
+            b= LD32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= LD32(pixels  );\
+        const uint32_t b= LD32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x01010101UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= LD32(pixels  );\
+            uint32_t b= LD32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= LD32(pixels  );\
+            b= LD32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x01010101UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
+
+#define op_avg(a, b) a = rnd_avg32(a, b)
+#endif
+#define op_put(a, b) a = b
+
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
+{
+    const int A=(16-x16)*(16-y16);
+    const int B=(   x16)*(16-y16);
+    const int C=(16-x16)*(   y16);
+    const int D=(   x16)*(   y16);
+    int i;
+
+    for(i=0; i<h; i++)
+    {
+        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
+        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
+        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
+        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
+        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
+        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
+        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
+        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
+        dst+= stride;
+        src+= stride;
+    }
+}
+
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
+{
+    int y, vx, vy;
+    const int s= 1<<shift;
+
+    width--;
+    height--;
+
+    for(y=0; y<h; y++){
+        int x;
+
+        vx= ox;
+        vy= oy;
+        for(x=0; x<8; x++){ //XXX FIXME optimize
+            int src_x, src_y, frac_x, frac_y, index;
+
+            src_x= vx>>16;
+            src_y= vy>>16;
+            frac_x= src_x&(s-1);
+            frac_y= src_y&(s-1);
+            src_x>>=shift;
+            src_y>>=shift;
+
+            if((unsigned)src_x < width){
+                if((unsigned)src_y < height){
+                    index= src_x + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
+                                           + src[index       +1]*   frac_x )*(s-frac_y)
+                                        + (  src[index+stride  ]*(s-frac_x)
+                                           + src[index+stride+1]*   frac_x )*   frac_y
+                                        + r)>>(shift*2);
+                }else{
+                    index= src_x + clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
+                                          + src[index       +1]*   frac_x )*s
+                                        + r)>>(shift*2);
+                }
+            }else{
+                if((unsigned)src_y < height){
+                    index= clip(src_x, 0, width) + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
+                                           + src[index+stride  ]*   frac_y )*s
+                                        + r)>>(shift*2);
+                }else{
+                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]=    src[index         ];
+                }
+            }
+
+            vx+= dxx;
+            vy+= dyx;
+        }
+        ox += dxy;
+        oy += dyy;
+    }
+}
+
+static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: put_pixels2_c (dst, src, stride, height); break;
+    case 4: put_pixels4_c (dst, src, stride, height); break;
+    case 8: put_pixels8_c (dst, src, stride, height); break;
+    case 16:put_pixels16_c(dst, src, stride, height); break;
+    }
+}
+
+static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: avg_pixels2_c (dst, src, stride, height); break;
+    case 4: avg_pixels4_c (dst, src, stride, height); break;
+    case 8: avg_pixels8_c (dst, src, stride, height); break;
+    case 16:avg_pixels16_c(dst, src, stride, height); break;
+    }
+}
+
+static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+#if 0
+#define TPEL_WIDTH(width)\
+static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
+#endif
+
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+        OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+        dst+= stride;\
+        src+= stride;\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+        OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+        OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+        OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+        dst+= stride;\
+        src+= stride;\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+        OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+        OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+        OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+        OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+        OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+        OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+        OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+        dst+= stride;\
+        src+= stride;\
+    }\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_       , op_put)
+H264_CHROMA_MC(avg_       , op_avg)
+#undef op_avg
+#undef op_put
+
+static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
+    const int A=(8-x)*(8-y);
+    const int B=(  x)*(8-y);
+    const int C=(8-x)*(  y);
+    const int D=(  x)*(  y);
+    int i;
+
+    assert(x<8 && y<8 && x>=0 && y>=0);
+
+    for(i=0; i<h; i++)
+    {
+        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
+        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
+        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
+        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
+        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
+        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
+        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
+        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
+        dst+= stride;
+        src+= stride;
+    }
+}
+
+#define QPEL_MC(r, OPNAME, RND, OP) \
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    \
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
+        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
+        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
+        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
+        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
+        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
+        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
+        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
+        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
+        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
+        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
+        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
+        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
+        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
+        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
+        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    const int w=16;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        const int src9= src[9*srcStride];\
+        const int src10= src[10*srcStride];\
+        const int src11= src[11*srcStride];\
+        const int src12= src[12*srcStride];\
+        const int src13= src[13*srcStride];\
+        const int src14= src[14*srcStride];\
+        const int src15= src[15*srcStride];\
+        const int src16= src[16*srcStride];\
+        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
+        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
+        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
+        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
+        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
+        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
+        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
+        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
+        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
+        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
+        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
+        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
+        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
+        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
+        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
+        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels8_c(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    copy_block9(full, src, 16, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
+}\
+\
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels16_c(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    copy_block17(full, src, 24, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
+}\
+\
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}
+
+#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
+#define op_put(a, b) a = cm[((b) + 16)>>5]
+#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
+
+QPEL_MC(0, put_       , _       , op_put)
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
+QPEL_MC(0, avg_       , _       , op_avg)
+//QPEL_MC(1, avg_no_rnd , _       , op_avg)
+#undef op_avg
+#undef op_avg_no_rnd
+#undef op_put
+#undef op_put_no_rnd
+
+#if 1
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=2;\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=4;\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        const int src7= src[7 *srcStride];\
+        const int src8= src[8 *srcStride];\
+        const int src9= src[9 *srcStride];\
+        const int src10=src[10*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=8;\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        const int tmp7= tmp[7 *tmpStride];\
+        const int tmp8= tmp[8 *tmpStride];\
+        const int tmp9= tmp[9 *tmpStride];\
+        const int tmp10=tmp[10*tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
+        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
+        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
+        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+}\
+
+#define H264_MC(OPNAME, SIZE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+
+#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b)  a = cm[((b) + 16)>>5]
+#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
+#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+
+H264_LOWPASS(put_       , op_put, op2_put)
+H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 2)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+#endif
+
+#define op_scale1(x)  block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
+#define op_scale2(x)  dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
+#define H264_WEIGHT(W,H) \
+static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+    int y; \
+    offset <<= log2_denom; \
+    if(log2_denom) offset += 1<<(log2_denom-1); \
+    for(y=0; y<H; y++, block += stride){ \
+        op_scale1(0); \
+        op_scale1(1); \
+        if(W==2) continue; \
+        op_scale1(2); \
+        op_scale1(3); \
+        if(W==4) continue; \
+        op_scale1(4); \
+        op_scale1(5); \
+        op_scale1(6); \
+        op_scale1(7); \
+        if(W==8) continue; \
+        op_scale1(8); \
+        op_scale1(9); \
+        op_scale1(10); \
+        op_scale1(11); \
+        op_scale1(12); \
+        op_scale1(13); \
+        op_scale1(14); \
+        op_scale1(15); \
+    } \
+} \
+static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    int y; \
+    offset = ((offset + 1) | 1) << log2_denom; \
+    for(y=0; y<H; y++, dst += stride, src += stride){ \
+        op_scale2(0); \
+        op_scale2(1); \
+        if(W==2) continue; \
+        op_scale2(2); \
+        op_scale2(3); \
+        if(W==4) continue; \
+        op_scale2(4); \
+        op_scale2(5); \
+        op_scale2(6); \
+        op_scale2(7); \
+        if(W==8) continue; \
+        op_scale2(8); \
+        op_scale2(9); \
+        op_scale2(10); \
+        op_scale2(11); \
+        op_scale2(12); \
+        op_scale2(13); \
+        op_scale2(14); \
+        op_scale2(15); \
+    } \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16,8)
+H264_WEIGHT(8,16)
+H264_WEIGHT(8,8)
+H264_WEIGHT(8,4)
+H264_WEIGHT(4,8)
+H264_WEIGHT(4,4)
+H264_WEIGHT(4,2)
+H264_WEIGHT(2,4)
+H264_WEIGHT(2,2)
+
+#undef op_scale1
+#undef op_scale2
+#undef H264_WEIGHT
+
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+
+    for(i=0; i<h; i++){
+        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
+        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
+        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
+        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
+        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
+        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
+        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
+        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+#ifdef CONFIG_CAVS_DECODER
+/* AVS specific */
+void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
+
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels16_c(dst, src, stride, 16);
+}
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels16_c(dst, src, stride, 16);
+}
+#endif /* CONFIG_CAVS_DECODER */
+
+#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
+/* VC-1 specific */
+void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
+
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
+
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+
+    for(i=0; i<w; i++){
+        const int src_1= src[ -srcStride];
+        const int src0 = src[0          ];
+        const int src1 = src[  srcStride];
+        const int src2 = src[2*srcStride];
+        const int src3 = src[3*srcStride];
+        const int src4 = src[4*srcStride];
+        const int src5 = src[5*srcStride];
+        const int src6 = src[6*srcStride];
+        const int src7 = src[7*srcStride];
+        const int src8 = src[8*srcStride];
+        const int src9 = src[9*srcStride];
+        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
+        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
+        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
+        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
+        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
+        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
+        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
+        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
+        src++;
+        dst++;
+    }
+}
+
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_c(dst, src, stride, 8);
+}
+
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
+}
+
+static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
+    int x;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+
+    for(x=0; x<8; x++){
+        int d1, d2, ad1;
+        int p0= src[x-2*stride];
+        int p1= src[x-1*stride];
+        int p2= src[x+0*stride];
+        int p3= src[x+1*stride];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+
+        src[x-1*stride] = p1;
+        src[x+0*stride] = p2;
+
+        ad1= FFABS(d1)>>1;
+
+        d2= clip((p0-p3)/4, -ad1, ad1);
+
+        src[x-2*stride] = p0 - d2;
+        src[x+  stride] = p3 + d2;
+    }
+}
+
+static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
+    int y;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+
+    for(y=0; y<8; y++){
+        int d1, d2, ad1;
+        int p0= src[y*stride-2];
+        int p1= src[y*stride-1];
+        int p2= src[y*stride+0];
+        int p3= src[y*stride+1];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+
+        src[y*stride-1] = p1;
+        src[y*stride+0] = p2;
+
+        ad1= FFABS(d1)>>1;
+
+        d2= clip((p0-p3)/4, -ad1, ad1);
+
+        src[y*stride-2] = p0 - d2;
+        src[y*stride+1] = p3 + d2;
+    }
+}
+
+static void h261_loop_filter_c(uint8_t *src, int stride){
+    int x,y,xy,yz;
+    int temp[64];
+
+    for(x=0; x<8; x++){
+        temp[x      ] = 4*src[x           ];
+        temp[x + 7*8] = 4*src[x + 7*stride];
+    }
+    for(y=1; y<7; y++){
+        for(x=0; x<8; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
+        }
+    }
+
+    for(y=0; y<8; y++){
+        src[  y*stride] = (temp[  y*8] + 2)>>2;
+        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
+        for(x=1; x<7; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
+        }
+    }
+}
+
+static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        if( tc0[i] < 0 ) {
+            pix += 4*ystride;
+            continue;
+        }
+        for( d = 0; d < 4; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int p2 = pix[-3*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+            const int q2 = pix[2*xstride];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                int tc = tc0[i];
+                int i_delta;
+
+                if( FFABS( p2 - p0 ) < beta ) {
+                    pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+                if( FFABS( q2 - q0 ) < beta ) {
+                    pix[   xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+
+                i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-xstride] = clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]        = clip_uint8( q0 - i_delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
+}
+
+static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        const int tc = tc0[i];
+        if( tc <= 0 ) {
+            pix += 2*ystride;
+            continue;
+        }
+        for( d = 0; d < 2; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                int delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+                pix[-xstride] = clip_uint8( p0 + delta );    /* p0' */
+                pix[0]        = clip_uint8( q0 - delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
+}
+
+static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 8; d++ ) {
+        const int p0 = pix[-1*xstride];
+        const int p1 = pix[-2*xstride];
+        const int q0 = pix[0];
+        const int q1 = pix[1*xstride];
+
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+
+            pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+            pix[0]        = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
+}
+static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
+}
+
+static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        s += abs(pix1[8] - pix2[8]);
+        s += abs(pix1[9] - pix2[9]);
+        s += abs(pix1[10] - pix2[10]);
+        s += abs(pix1[11] - pix2[11]);
+        s += abs(pix1[12] - pix2[12]);
+        s += abs(pix1[13] - pix2[13]);
+        s += abs(pix1[14] - pix2[14]);
+        s += abs(pix1[15] - pix2[15]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
+        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
+        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
+        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
+        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
+        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
+        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
+        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
+        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
+        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
+        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
+        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
+        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
+        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
+        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
+        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
+        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
+        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
+        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
+        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
+        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
+        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<16; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<15; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+
+static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<8; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<7; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    int i;
+    unsigned int sum=0;
+
+    for(i=0; i<8*8; i++){
+        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
+        int w= weight[i];
+        b>>= RECON_SHIFT;
+        assert(-512<b && b<512);
+
+        sum += (w*b)*(w*b)>>4;
+    }
+    return sum>>2;
+}
+
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
+    int i;
+
+    for(i=0; i<8*8; i++){
+        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+    }
+}
+
+/**
+ * permutes an 8x8 block.
+ * @param block the block which will be permuted according to the given permutation vector
+ * @param permutation the permutation vector
+ * @param last the last non zero coefficient in scantable order, used to speed the permutation up
+ * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
+ *                  (inverse) permutated to scantable order!
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
+{
+    int i;
+    DCTELEM temp[64];
+
+    if(last<=0) return;
+    //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
+
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        temp[j]= block[j];
+        block[j]=0;
+    }
+
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        const int perm_j= permutation[j];
+        block[perm_j]= temp[j];
+    }
+}
+
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
+    return 0;
+}
+
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
+    int i;
+
+    memset(cmp, 0, sizeof(void*)*5);
+
+    for(i=0; i<5; i++){
+        switch(type&0xFF){
+        case FF_CMP_SAD:
+            cmp[i]= c->sad[i];
+            break;
+        case FF_CMP_SATD:
+            cmp[i]= c->hadamard8_diff[i];
+            break;
+        case FF_CMP_SSE:
+            cmp[i]= c->sse[i];
+            break;
+        case FF_CMP_DCT:
+            cmp[i]= c->dct_sad[i];
+            break;
+        case FF_CMP_DCT264:
+            cmp[i]= c->dct264_sad[i];
+            break;
+        case FF_CMP_DCTMAX:
+            cmp[i]= c->dct_max[i];
+            break;
+        case FF_CMP_PSNR:
+            cmp[i]= c->quant_psnr[i];
+            break;
+        case FF_CMP_BIT:
+            cmp[i]= c->bit[i];
+            break;
+        case FF_CMP_RD:
+            cmp[i]= c->rd[i];
+            break;
+        case FF_CMP_VSAD:
+            cmp[i]= c->vsad[i];
+            break;
+        case FF_CMP_VSSE:
+            cmp[i]= c->vsse[i];
+            break;
+        case FF_CMP_ZERO:
+            cmp[i]= zero_cmp;
+            break;
+        case FF_CMP_NSSE:
+            cmp[i]= c->nsse[i];
+            break;
+#ifdef CONFIG_SNOW_ENCODER
+        case FF_CMP_W53:
+            cmp[i]= c->w53[i];
+            break;
+        case FF_CMP_W97:
+            cmp[i]= c->w97[i];
+            break;
+#endif
+        default:
+            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
+        }
+    }
+}
+
+/**
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)
+ */
+static void clear_blocks_c(DCTELEM *blocks)
+{
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+
+static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
+    int i;
+    for(i=0; i+7<w; i+=8){
+        dst[i+0] += src[i+0];
+        dst[i+1] += src[i+1];
+        dst[i+2] += src[i+2];
+        dst[i+3] += src[i+3];
+        dst[i+4] += src[i+4];
+        dst[i+5] += src[i+5];
+        dst[i+6] += src[i+6];
+        dst[i+7] += src[i+7];
+    }
+    for(; i<w; i++)
+        dst[i+0] += src[i+0];
+}
+
+static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    int i;
+    for(i=0; i+7<w; i+=8){
+        dst[i+0] = src1[i+0]-src2[i+0];
+        dst[i+1] = src1[i+1]-src2[i+1];
+        dst[i+2] = src1[i+2]-src2[i+2];
+        dst[i+3] = src1[i+3]-src2[i+3];
+        dst[i+4] = src1[i+4]-src2[i+4];
+        dst[i+5] = src1[i+5]-src2[i+5];
+        dst[i+6] = src1[i+6]-src2[i+6];
+        dst[i+7] = src1[i+7]-src2[i+7];
+    }
+    for(; i<w; i++)
+        dst[i+0] = src1[i+0]-src2[i+0];
+}
+
+static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+
+    l= *left;
+    lt= *left_top;
+
+    for(i=0; i<w; i++){
+        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
+        lt= src1[i];
+        l= src2[i];
+        dst[i]= l - pred;
+    }
+
+    *left= l;
+    *left_top= lt;
+}
+
+#define BUTTERFLY2(o1,o2,i1,i2) \
+o1= (i1)+(i2);\
+o2= (i1)-(i2);
+
+#define BUTTERFLY1(x,y) \
+{\
+    int a,b;\
+    a= x;\
+    b= y;\
+    x= a+b;\
+    y= a-b;\
+}
+
+#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
+
+static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+
+    assert(h==8);
+
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+#if 0
+static int maxi=0;
+if(sum>maxi){
+    maxi=sum;
+    printf("MAX:%d\n", maxi);
+}
+#endif
+    return sum;
+}
+
+static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+
+    assert(h==8);
+
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+
+    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
+
+    return sum;
+}
+
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    int sum=0, i;
+
+    assert(h==8);
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+
+    for(i=0; i<64; i++)
+        sum+= FFABS(temp[i]);
+
+    return sum;
+}
+
+#ifdef CONFIG_GPL
+#define DCT8_1D {\
+    const int s07 = SRC(0) + SRC(7);\
+    const int s16 = SRC(1) + SRC(6);\
+    const int s25 = SRC(2) + SRC(5);\
+    const int s34 = SRC(3) + SRC(4);\
+    const int a0 = s07 + s34;\
+    const int a1 = s16 + s25;\
+    const int a2 = s07 - s34;\
+    const int a3 = s16 - s25;\
+    const int d07 = SRC(0) - SRC(7);\
+    const int d16 = SRC(1) - SRC(6);\
+    const int d25 = SRC(2) - SRC(5);\
+    const int d34 = SRC(3) - SRC(4);\
+    const int a4 = d16 + d25 + (d07 + (d07>>1));\
+    const int a5 = d07 - d34 - (d25 + (d25>>1));\
+    const int a6 = d07 + d34 - (d16 + (d16>>1));\
+    const int a7 = d16 - d25 + (d34 + (d34>>1));\
+    DST(0,  a0 + a1     ) ;\
+    DST(1,  a4 + (a7>>2)) ;\
+    DST(2,  a2 + (a3>>1)) ;\
+    DST(3,  a5 + (a6>>2)) ;\
+    DST(4,  a0 - a1     ) ;\
+    DST(5,  a6 - (a5>>2)) ;\
+    DST(6, (a2>>1) - a3 ) ;\
+    DST(7, (a4>>2) - a7 ) ;\
+}
+
+static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    int16_t dct[8][8];
+    int i;
+    int sum=0;
+
+    s->dsp.diff_pixels(dct, src1, src2, stride);
+
+#define SRC(x) dct[i][x]
+#define DST(x,v) dct[i][x]= v
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+
+#define SRC(x) dct[x][i]
+#define DST(x,v) sum += FFABS(v)
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+    return sum;
+}
+#endif
+
+static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    int sum=0, i;
+
+    assert(h==8);
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+
+    for(i=0; i<64; i++)
+        sum= FFMAX(sum, FFABS(temp[i]));
+
+    return sum;
+}
+
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
+    int sum=0, i;
+
+    assert(h==8);
+    s->mb_intra=0;
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+
+    memcpy(bak, temp, 64*sizeof(DCTELEM));
+
+    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    simple_idct(temp); //FIXME
+
+    for(i=0; i<64; i++)
+        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
+
+    return sum;
+}
+
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    uint8_t * const bak= (uint8_t*)aligned_bak;
+    int i, last, run, bits, level, distoration, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+
+    assert(h==8);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
+        ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
+    }
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+
+    bits=0;
+
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+
+        level= temp[i] + 64;
+
+        assert(level - 64);
+
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+
+    }
+
+    if(last>=0){
+        if(s->mb_intra)
+            s->dct_unquantize_intra(s, temp, 0, s->qscale);
+        else
+            s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    }
+
+    s->dsp.idct_add(bak, stride, temp);
+
+    distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
+
+    return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
+}
+
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    int i, last, run, bits, level, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+
+    assert(h==8);
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+
+    bits=0;
+
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+
+        level= temp[i] + 64;
+
+        assert(level - 64);
+
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+    }
+
+    return bits;
+}
+
+static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
+    int score=0;
+    int x,y;
+
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x+=4){
+            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])
+                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);
+        }
+        s+= stride;
+    }
+
+    return score;
+}
+
+static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    return score;
+}
+
+#define SQ(a) ((a)*(a))
+static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
+    int score=0;
+    int x,y;
+
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x+=4){
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
+                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
+        }
+        s+= stride;
+    }
+
+    return score;
+}
+
+static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    return score;
+}
+
+WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
+WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
+WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
+#ifdef CONFIG_GPL
+WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
+#endif
+WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
+WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
+WARPER8_16_SQ(rd8x8_c, rd16_c)
+WARPER8_16_SQ(bit8x8_c, bit16_c)
+
+static void vector_fmul_c(float *dst, const float *src, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] *= src[i];
+}
+
+static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
+    int i;
+    src1 += len-1;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[-i];
+}
+
+void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
+}
+
+void ff_float_to_int16_c(int16_t *dst, const float *src, int len){
+    int i;
+    for(i=0; i<len; i++) {
+        int_fast32_t tmp = ((int32_t*)src)[i];
+        if(tmp & 0xf0000){
+            tmp = (0x43c0ffff - tmp)>>31;
+            // is this faster on some gcc/cpu combinations?
+//          if(tmp > 0x43c0ffff) tmp = 0xFFFF;
+//          else                 tmp = 0;
+        }
+        dst[i] = tmp - 0x8000;
+    }
+}
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    dest[0] = cm[(block[0] + 4)>>3];
+}
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+}
+
+static void just_return() { return; }
+
+/* init static data */
+void dsputil_static_init(void)
+{
+    int i;
+
+    for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
+    for(i=0;i<MAX_NEG_CROP;i++) {
+        ff_cropTbl[i] = 0;
+        ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
+    }
+
+    for(i=0;i<512;i++) {
+        ff_squareTbl[i] = (i - 256) * (i - 256);
+    }
+
+    for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
+}
+
+
+void dsputil_init(DSPContext* c, AVCodecContext *avctx)
+{
+    int i;
+
+#ifdef CONFIG_ENCODERS
+    if(avctx->dct_algo==FF_DCT_FASTINT) {
+        c->fdct = fdct_ifast;
+        c->fdct248 = fdct_ifast248;
+    }
+    else if(avctx->dct_algo==FF_DCT_FAAN) {
+        c->fdct = ff_faandct;
+        c->fdct248 = ff_faandct248;
+    }
+    else {
+        c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
+        c->fdct248 = ff_fdct248_islow;
+    }
+#endif //CONFIG_ENCODERS
+
+    if(avctx->lowres==1){
+        if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){
+            c->idct_put= ff_jref_idct4_put;
+            c->idct_add= ff_jref_idct4_add;
+        }else{
+            c->idct_put= ff_h264_lowres_idct_put_c;
+            c->idct_add= ff_h264_lowres_idct_add_c;
+        }
+        c->idct    = j_rev_dct4;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==2){
+        c->idct_put= ff_jref_idct2_put;
+        c->idct_add= ff_jref_idct2_add;
+        c->idct    = j_rev_dct2;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==3){
+        c->idct_put= ff_jref_idct1_put;
+        c->idct_add= ff_jref_idct1_add;
+        c->idct    = j_rev_dct1;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else{
+        if(avctx->idct_algo==FF_IDCT_INT){
+            c->idct_put= ff_jref_idct_put;
+            c->idct_add= ff_jref_idct_add;
+            c->idct    = j_rev_dct;
+            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+        }else if(avctx->idct_algo==FF_IDCT_VP3){
+            c->idct_put= ff_vp3_idct_put_c;
+            c->idct_add= ff_vp3_idct_add_c;
+            c->idct    = ff_vp3_idct_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else{ //accurate/default
+            c->idct_put= simple_idct_put;
+            c->idct_add= simple_idct_add;
+            c->idct    = simple_idct;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }
+    }
+
+    c->h264_idct_add= ff_h264_idct_add_c;
+    c->h264_idct8_add= ff_h264_idct8_add_c;
+    c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
+    c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
+
+    c->get_pixels = get_pixels_c;
+    c->diff_pixels = diff_pixels_c;
+    c->put_pixels_clamped = put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+    c->add_pixels_clamped = add_pixels_clamped_c;
+    c->add_pixels8 = add_pixels8_c;
+    c->add_pixels4 = add_pixels4_c;
+    c->gmc1 = gmc1_c;
+    c->gmc = ff_gmc_c;
+    c->clear_blocks = clear_blocks_c;
+    c->pix_sum = pix_sum_c;
+    c->pix_norm1 = pix_norm1_c;
+
+    /* TODO [0] 16  [1] 8 */
+    c->pix_abs[0][0] = pix_abs16_c;
+    c->pix_abs[0][1] = pix_abs16_x2_c;
+    c->pix_abs[0][2] = pix_abs16_y2_c;
+    c->pix_abs[0][3] = pix_abs16_xy2_c;
+    c->pix_abs[1][0] = pix_abs8_c;
+    c->pix_abs[1][1] = pix_abs8_x2_c;
+    c->pix_abs[1][2] = pix_abs8_y2_c;
+    c->pix_abs[1][3] = pix_abs8_xy2_c;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
+    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
+    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
+    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
+
+    dspfunc(put, 0, 16);
+    dspfunc(put_no_rnd, 0, 16);
+    dspfunc(put, 1, 8);
+    dspfunc(put_no_rnd, 1, 8);
+    dspfunc(put, 2, 4);
+    dspfunc(put, 3, 2);
+
+    dspfunc(avg, 0, 16);
+    dspfunc(avg_no_rnd, 0, 16);
+    dspfunc(avg, 1, 8);
+    dspfunc(avg_no_rnd, 1, 8);
+    dspfunc(avg, 2, 4);
+    dspfunc(avg, 3, 2);
+#undef dspfunc
+
+    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
+    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
+
+    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
+    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
+    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
+    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
+    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
+    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
+    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
+    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
+    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
+
+    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
+    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
+    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
+    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
+    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
+    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
+    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
+    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
+    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
+
+    dspfunc(put_qpel, 0, 16);
+    dspfunc(put_no_rnd_qpel, 0, 16);
+
+    dspfunc(avg_qpel, 0, 16);
+    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
+
+    dspfunc(put_qpel, 1, 8);
+    dspfunc(put_no_rnd_qpel, 1, 8);
+
+    dspfunc(avg_qpel, 1, 8);
+    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
+
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(put_h264_qpel, 1, 8);
+    dspfunc(put_h264_qpel, 2, 4);
+    dspfunc(put_h264_qpel, 3, 2);
+    dspfunc(avg_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 1, 8);
+    dspfunc(avg_h264_qpel, 2, 4);
+
+#undef dspfunc
+    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
+    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
+    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
+    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
+    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
+    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
+    c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c;
+
+    c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
+    c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
+    c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
+    c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
+    c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
+    c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
+    c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
+    c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
+    c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
+    c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
+    c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
+    c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
+    c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
+    c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
+    c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
+    c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
+    c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
+    c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
+    c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
+    c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
+
+#ifdef CONFIG_CAVS_DECODER
+    ff_cavsdsp_init(c,avctx);
+#endif
+#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
+    ff_vc1dsp_init(c,avctx);
+#endif
+
+    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
+    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
+    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
+    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
+    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
+    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
+    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
+    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
+
+#define SET_CMP_FUNC(name) \
+    c->name[0]= name ## 16_c;\
+    c->name[1]= name ## 8x8_c;
+
+    SET_CMP_FUNC(hadamard8_diff)
+    c->hadamard8_diff[4]= hadamard8_intra16_c;
+    SET_CMP_FUNC(dct_sad)
+    SET_CMP_FUNC(dct_max)
+#ifdef CONFIG_GPL
+    SET_CMP_FUNC(dct264_sad)
+#endif
+    c->sad[0]= pix_abs16_c;
+    c->sad[1]= pix_abs8_c;
+    c->sse[0]= sse16_c;
+    c->sse[1]= sse8_c;
+    c->sse[2]= sse4_c;
+    SET_CMP_FUNC(quant_psnr)
+    SET_CMP_FUNC(rd)
+    SET_CMP_FUNC(bit)
+    c->vsad[0]= vsad16_c;
+    c->vsad[4]= vsad_intra16_c;
+    c->vsse[0]= vsse16_c;
+    c->vsse[4]= vsse_intra16_c;
+    c->nsse[0]= nsse16_c;
+    c->nsse[1]= nsse8_c;
+#ifdef CONFIG_SNOW_ENCODER
+    c->w53[0]= w53_16_c;
+    c->w53[1]= w53_8_c;
+    c->w97[0]= w97_16_c;
+    c->w97[1]= w97_8_c;
+#endif
+
+    c->add_bytes= add_bytes_c;
+    c->diff_bytes= diff_bytes_c;
+    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
+    c->bswap_buf= bswap_buf;
+
+    c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
+    c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
+    c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
+    c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
+    c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
+    c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
+    c->h264_loop_filter_strength= NULL;
+
+    c->h263_h_loop_filter= h263_h_loop_filter_c;
+    c->h263_v_loop_filter= h263_v_loop_filter_c;
+
+    c->h261_loop_filter= h261_loop_filter_c;
+
+    c->try_8x8basis= try_8x8basis_c;
+    c->add_8x8basis= add_8x8basis_c;
+
+#ifdef CONFIG_SNOW_ENCODER
+    c->vertical_compose97i = ff_snow_vertical_compose97i;
+    c->horizontal_compose97i = ff_snow_horizontal_compose97i;
+    c->inner_add_yblock = ff_snow_inner_add_yblock;
+#endif
+
+#ifdef CONFIG_VORBIS_DECODER
+    c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+#endif
+    c->vector_fmul = vector_fmul_c;
+    c->vector_fmul_reverse = vector_fmul_reverse_c;
+    c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
+    c->float_to_int16 = ff_float_to_int16_c;
+
+    c->shrink[0]= ff_img_copy_plane;
+    c->shrink[1]= ff_shrink22;
+    c->shrink[2]= ff_shrink44;
+    c->shrink[3]= ff_shrink88;
+
+    c->prefetch= just_return;
+
+    memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
+    memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
+
+#ifdef HAVE_MMX
+    dsputil_init_mmx(c, avctx);
+#endif
+#ifdef ARCH_ARMV4L
+    dsputil_init_armv4l(c, avctx);
+#endif
+#ifdef HAVE_MLIB
+    dsputil_init_mlib(c, avctx);
+#endif
+#ifdef ARCH_SPARC
+   dsputil_init_vis(c,avctx);
+#endif
+#ifdef ARCH_ALPHA
+    dsputil_init_alpha(c, avctx);
+#endif
+#ifdef ARCH_POWERPC
+    dsputil_init_ppc(c, avctx);
+#endif
+#ifdef HAVE_MMI
+    dsputil_init_mmi(c, avctx);
+#endif
+#ifdef ARCH_SH4
+    dsputil_init_sh4(c,avctx);
+#endif
+#ifdef ARCH_BFIN
+    dsputil_init_bfin(c,avctx);
+#endif
+
+    for(i=0; i<64; i++){
+        if(!c->put_2tap_qpel_pixels_tab[0][i])
+            c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
+        if(!c->avg_2tap_qpel_pixels_tab[0][i])
+            c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
+    }
+
+    switch(c->idct_permutation_type){
+    case FF_NO_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= i;
+        break;
+    case FF_LIBMPEG2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_SIMPLE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= simple_mmx_permutation[i];
+        break;
+    case FF_TRANSPOSE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
+        break;
+    case FF_PARTTRANS_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
+    }
+}
+
diff --git a/contrib/ffmpeg/libavcodec/dsputil.h b/contrib/ffmpeg/libavcodec/dsputil.h
new file mode 100644
index 000000000..35deb6aab
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dsputil.h
@@ -0,0 +1,778 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file dsputil.h
+ * DSP utils.
+ * note, many functions in here may use MMX which trashes the FPU state, it is
+ * absolutely necessary to call emms_c() between dsp & float/double code
+ */
+
+#ifndef DSPUTIL_H
+#define DSPUTIL_H
+
+#include "common.h"
+#include "avcodec.h"
+
+
+//#define DEBUG
+/* dct code */
+typedef short DCTELEM;
+typedef int DWTELEM;
+
+void fdct_ifast (DCTELEM *data);
+void fdct_ifast248 (DCTELEM *data);
+void ff_jpeg_fdct_islow (DCTELEM *data);
+void ff_fdct248_islow (DCTELEM *data);
+
+void j_rev_dct (DCTELEM *data);
+void j_rev_dct4 (DCTELEM *data);
+void j_rev_dct2 (DCTELEM *data);
+void j_rev_dct1 (DCTELEM *data);
+
+void ff_fdct_mmx(DCTELEM *block);
+void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_sse2(DCTELEM *block);
+
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+
+void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
+                              const float *src2, int src3, int blocksize, int step);
+void ff_float_to_int16_c(int16_t *dst, const float *src, int len);
+
+/* encoding scans */
+extern const uint8_t ff_alternate_horizontal_scan[64];
+extern const uint8_t ff_alternate_vertical_scan[64];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag248_direct[64];
+
+/* pixel operations */
+#define MAX_NEG_CROP 1024
+
+/* temporary */
+extern uint32_t ff_squareTbl[512];
+extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
+
+/* VP3 DSP functions */
+void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
+void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+/* 1/2^n downscaling functions from imgconvert.c */
+void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+
+/* minimum alignment rules ;)
+if u notice errors in the align stuff, need more alignment for some asm code for some cpu
+or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
+
+!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
+i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
+
+!future video codecs might need functions with less strict alignment
+*/
+
+/*
+void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
+void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
+*/
+
+/* add and put pixel (decoding) */
+// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
+//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
+typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
+typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
+
+#define DEF_OLD_QPEL(name)\
+void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+
+#define CALL_2X_PIXELS(a, b, n)\
+static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    b(block  , pixels  , line_size, h);\
+    b(block+n, pixels+n, line_size, h);\
+}
+
+/* motion estimation */
+// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
+// allthough currently h<4 is not used as functions with width <8 are not used and neither implemented
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+
+
+// for snow slices
+typedef struct slice_buffer_s slice_buffer;
+
+/**
+ * DSPContext.
+ */
+typedef struct DSPContext {
+    /* pixel ops : interface with DCT */
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
+    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
+                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
+    int (*pix_sum)(uint8_t * pix, int line_size);
+    int (*pix_norm1)(uint8_t * pix, int line_size);
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+
+    me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[5];
+    me_cmp_func hadamard8_diff[5];
+    me_cmp_func dct_sad[5];
+    me_cmp_func quant_psnr[5];
+    me_cmp_func bit[5];
+    me_cmp_func rd[5];
+    me_cmp_func vsad[5];
+    me_cmp_func vsse[5];
+    me_cmp_func nsse[5];
+    me_cmp_func w53[5];
+    me_cmp_func w97[5];
+    me_cmp_func dct_max[5];
+    me_cmp_func dct264_sad[5];
+
+    me_cmp_func me_pre_cmp[5];
+    me_cmp_func me_cmp[5];
+    me_cmp_func me_sub_cmp[5];
+    me_cmp_func mb_cmp[5];
+    me_cmp_func ildct_cmp[5]; //only width 16 used
+    me_cmp_func frame_skip_cmp[5]; //only width 8 used
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation funcions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation funcions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation funcions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[4][4];
+
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+
+    /**
+     * Thirdpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
+     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func put_mspel_pixels_tab[8];
+
+    /**
+     * h264 Chram MC
+     */
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+    /* This is really one func used in VC-1 decoding */
+    h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+
+    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
+
+    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
+
+    h264_weight_func weight_h264_pixels_tab[10];
+    h264_biweight_func biweight_h264_pixels_tab[10];
+
+    /* AVS specific */
+    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
+    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
+
+    me_cmp_func pix_abs[2][4];
+
+    /* huffyuv specific */
+    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
+    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    /**
+     * subtract huffyuv's variant of median prediction
+     * note, this might read from src1[-1], src2[-1]
+     */
+    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
+    void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
+
+    void (*h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
+    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
+                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1);
+
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+
+    void (*h261_loop_filter)(uint8_t *src, int stride);
+
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*vector_fmul)(float *dst, const float *src, int len);
+    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
+    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
+    void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
+
+    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
+     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
+    void (*float_to_int16)(int16_t *dst, const float *src, int len);
+
+    /* (I)DCT */
+    void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*fdct248)(DCTELEM *block/* align 16*/);
+
+    /* IDCT really*/
+    void (*idct)(DCTELEM *block/* align 16*/);
+
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizotal line of dest
+     */
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizotal line of dest
+     */
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+    /**
+     * idct input permutation.
+     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+     * IDCT)
+     * this permutation must be performed before the idct_put/add, note, normally this can be merged
+     * with the zigzag/alternate scan<br>
+     * an example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
+     * - (x -> referece dct -> reference idct -> x)
+     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
+     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
+     */
+    uint8_t idct_permutation[64];
+    int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+#define FF_PARTTRANS_IDCT_PERM 5
+
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+    void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+
+    /* snow wavelet */
+    void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
+    void (*horizontal_compose97i)(DWTELEM *b, int width);
+    void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+
+    void (*prefetch)(void *mem, int stride, int h);
+
+    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+
+    /* vc1 functions */
+    void (*vc1_inv_trans_8x8)(DCTELEM *b);
+    void (*vc1_inv_trans_8x4)(DCTELEM *b, int n);
+    void (*vc1_inv_trans_4x8)(DCTELEM *b, int n);
+    void (*vc1_inv_trans_4x4)(DCTELEM *b, int n);
+    void (*vc1_v_overlap)(uint8_t* src, int stride);
+    void (*vc1_h_overlap)(uint8_t* src, int stride);
+    /* put 8x8 block with bicubic interpolation and quarterpel precision
+     * last argument is actually round value instead of height
+     */
+    op_pixels_func put_vc1_mspel_pixels_tab[16];
+} DSPContext;
+
+void dsputil_static_init(void);
+void dsputil_init(DSPContext* p, AVCodecContext *avctx);
+
+/**
+ * permute block according to permuatation.
+ * @param last last non zero element in scantable order
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
+
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+
+#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+    switch(type&0xFF){
+    default:
+    case FF_CMP_SAD:
+        return lambda>>FF_LAMBDA_SHIFT;
+    case FF_CMP_DCT:
+        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_SATD:
+    case FF_CMP_DCT264:
+        return (2*lambda)>>FF_LAMBDA_SHIFT;
+    case FF_CMP_RD:
+    case FF_CMP_PSNR:
+    case FF_CMP_SSE:
+    case FF_CMP_NSSE:
+        return lambda2>>FF_LAMBDA_SHIFT;
+    case FF_CMP_BIT:
+        return 1;
+    }
+}
+
+/**
+ * Empty mmx state.
+ * this must be called between any dsp function and float/double code.
+ * for example sin(); dsp->idct_put(); emms_c(); cos()
+ */
+#define emms_c()
+
+/* should be defined by architectures supporting
+   one or more MultiMedia extension */
+int mm_support(void);
+
+#ifdef __GNUC__
+  #define DECLARE_ALIGNED_16(t,v)       t v __attribute__ ((aligned (16)))
+#else
+  #define DECLARE_ALIGNED_16(t,v)      __declspec(align(16)) t v
+#endif
+
+#if defined(HAVE_MMX)
+
+#undef emms_c
+
+#define MM_MMX    0x0001 /* standard MMX */
+#define MM_3DNOW  0x0004 /* AMD 3DNOW */
+#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
+#define MM_SSE    0x0008 /* SSE functions */
+#define MM_SSE2   0x0010 /* PIV SSE2 functions */
+#define MM_3DNOWEXT  0x0020 /* AMD 3DNowExt */
+#define MM_SSE3   0x0040 /* Prescott SSE3 functions */
+
+extern int mm_flags;
+
+void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+static inline void emms(void)
+{
+    __asm __volatile ("emms;":::"memory");
+}
+
+
+#define emms_c() \
+{\
+    if (mm_flags & MM_MMX)\
+        emms();\
+}
+
+#ifdef __GNUC__
+  #define DECLARE_ALIGNED_8(t,v)       t v __attribute__ ((aligned (8)))
+#else
+  #define DECLARE_ALIGNED_8(t,v)      __declspec(align(8)) t v
+#endif
+
+#define STRIDE_ALIGN 8
+
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_ARMV4L)
+
+/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
+   line optimizations */
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (4)))
+#define STRIDE_ALIGN 4
+
+#define MM_IWMMXT    0x0100 /* XScale IWMMXT */
+
+extern int mm_flags;
+
+void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(HAVE_MLIB)
+
+/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_SPARC)
+
+/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_ALPHA)
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_POWERPC)
+
+#define MM_ALTIVEC    0x0001 /* standard AltiVec */
+
+extern int mm_flags;
+
+#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN)
+#define pixel altivec_pixel
+#include <altivec.h>
+#undef pixel
+#endif
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (16)))
+#define STRIDE_ALIGN 16
+
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(HAVE_MMI)
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (16)))
+#define STRIDE_ALIGN 16
+
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_SH4)
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_BFIN)
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+
+void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+
+#else
+
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
+#define STRIDE_ALIGN 8
+
+#endif
+
+#ifdef __GNUC__
+
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+
+#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
+#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+
+#else /* __GNUC__ */
+
+#define LD16(a) (*((uint16_t*)(a)))
+#define LD32(a) (*((uint32_t*)(a)))
+#define LD64(a) (*((uint64_t*)(a)))
+
+#define ST16(a, b) *((uint16_t*)(a)) = (b)
+#define ST32(a, b) *((uint32_t*)(a)) = (b)
+
+#endif /* !__GNUC__ */
+
+/* PSNR */
+void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
+              int orig_linesize[3], int coded_linesize,
+              AVCodecContext *avctx);
+
+/* FFT computation */
+
+/* NOTE: soon integer code will be added, so you must use the
+   FFTSample type */
+typedef float FFTSample;
+
+struct MDCTContext;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct MDCTContext *s, FFTSample *output,
+                       const FFTSample *input, FFTSample *tmp);
+} FFTContext;
+
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
+
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+
+/* MDCT computation */
+
+typedef struct MDCTContext {
+    int n;  /* size of MDCT (i.e. number of input data * 2) */
+    int nbits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    FFTContext fft;
+} MDCTContext;
+
+int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
+void ff_imdct_calc(MDCTContext *s, FFTSample *output,
+                const FFTSample *input, FFTSample *tmp);
+void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
+                        const FFTSample *input, FFTSample *tmp);
+void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output,
+                       const FFTSample *input, FFTSample *tmp);
+void ff_mdct_calc(MDCTContext *s, FFTSample *out,
+               const FFTSample *input, FFTSample *tmp);
+void ff_mdct_end(MDCTContext *s);
+
+#define WARPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    return name8(s, dst           , src           , stride, h)\
+          +name8(s, dst+8         , src+8         , stride, h);\
+}
+
+#define WARPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    int score=0;\
+    score +=name8(s, dst           , src           , stride, 8);\
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
+    if(h==16){\
+        dst += 8*stride;\
+        src += 8*stride;\
+        score +=name8(s, dst           , src           , stride, 8);\
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
+    }\
+    return score;\
+}
+
+
+static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST16(dst   , LD16(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST32(dst   , LD32(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST32(dst   , LD32(src   ));
+        ST32(dst+4 , LD32(src+4 ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST32(dst   , LD32(src   ));
+        ST32(dst+4 , LD32(src+4 ));
+        dst[8]= src[8];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST32(dst   , LD32(src   ));
+        ST32(dst+4 , LD32(src+4 ));
+        ST32(dst+8 , LD32(src+8 ));
+        ST32(dst+12, LD32(src+12));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST32(dst   , LD32(src   ));
+        ST32(dst+4 , LD32(src+4 ));
+        ST32(dst+8 , LD32(src+8 ));
+        ST32(dst+12, LD32(src+12));
+        dst[16]= src[16];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/dtsdec.c b/contrib/ffmpeg/libavcodec/dtsdec.c
new file mode 100644
index 000000000..456f3fdef
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dtsdec.c
@@ -0,0 +1,320 @@
+/*
+ * dtsdec.c : free DTS Coherent Acoustics stream decoder.
+ * Copyright (C) 2004 Benjamin Zores <ben@geexbox.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef HAVE_AV_CONFIG_H
+#undef HAVE_AV_CONFIG_H
+#endif
+
+#include "avcodec.h"
+#include <dts.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#define BUFFER_SIZE 18726
+#define HEADER_SIZE 14
+
+#ifdef LIBDTS_FIXED
+#define CONVERT_LEVEL (1 << 26)
+#define CONVERT_BIAS 0
+#else
+#define CONVERT_LEVEL 1
+#define CONVERT_BIAS 384
+#endif
+
+static inline
+int16_t convert (int32_t i)
+{
+#ifdef LIBDTS_FIXED
+    i >>= 15;
+#else
+    i -= 0x43c00000;
+#endif
+    return (i > 32767) ? 32767 : ((i < -32768) ? -32768 : i);
+}
+
+static void
+convert2s16_2 (sample_t * _f, int16_t * s16)
+{
+  int i;
+  int32_t * f = (int32_t *) _f;
+
+  for (i = 0; i < 256; i++)
+    {
+      s16[2*i] = convert (f[i]);
+      s16[2*i+1] = convert (f[i+256]);
+    }
+}
+
+static void
+convert2s16_4 (sample_t * _f, int16_t * s16)
+{
+  int i;
+  int32_t * f = (int32_t *) _f;
+
+  for (i = 0; i < 256; i++)
+    {
+      s16[4*i] = convert (f[i]);
+      s16[4*i+1] = convert (f[i+256]);
+      s16[4*i+2] = convert (f[i+512]);
+      s16[4*i+3] = convert (f[i+768]);
+    }
+}
+
+static void
+convert2s16_5 (sample_t * _f, int16_t * s16)
+{
+  int i;
+  int32_t * f = (int32_t *) _f;
+
+  for (i = 0; i < 256; i++)
+    {
+      s16[5*i] = convert (f[i]);
+      s16[5*i+1] = convert (f[i+256]);
+      s16[5*i+2] = convert (f[i+512]);
+      s16[5*i+3] = convert (f[i+768]);
+      s16[5*i+4] = convert (f[i+1024]);
+    }
+}
+
+static void
+convert2s16_multi (sample_t * _f, int16_t * s16, int flags)
+{
+  int i;
+  int32_t * f = (int32_t *) _f;
+
+  switch (flags)
+    {
+    case DTS_MONO:
+      for (i = 0; i < 256; i++)
+        {
+          s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0;
+          s16[5*i+4] = convert (f[i]);
+        }
+      break;
+    case DTS_CHANNEL:
+    case DTS_STEREO:
+    case DTS_DOLBY:
+      convert2s16_2 (_f, s16);
+      break;
+    case DTS_3F:
+      for (i = 0; i < 256; i++)
+        {
+          s16[5*i] = convert (f[i]);
+          s16[5*i+1] = convert (f[i+512]);
+          s16[5*i+2] = s16[5*i+3] = 0;
+          s16[5*i+4] = convert (f[i+256]);
+        }
+      break;
+    case DTS_2F2R:
+      convert2s16_4 (_f, s16);
+      break;
+    case DTS_3F2R:
+      convert2s16_5 (_f, s16);
+      break;
+    case DTS_MONO | DTS_LFE:
+      for (i = 0; i < 256; i++)
+        {
+          s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0;
+          s16[6*i+4] = convert (f[i+256]);
+          s16[6*i+5] = convert (f[i]);
+        }
+      break;
+    case DTS_CHANNEL | DTS_LFE:
+    case DTS_STEREO | DTS_LFE:
+    case DTS_DOLBY | DTS_LFE:
+      for (i = 0; i < 256; i++)
+        {
+          s16[6*i] = convert (f[i+256]);
+          s16[6*i+1] = convert (f[i+512]);
+          s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0;
+          s16[6*i+5] = convert (f[i]);
+        }
+      break;
+    case DTS_3F | DTS_LFE:
+      for (i = 0; i < 256; i++)
+        {
+          s16[6*i] = convert (f[i+256]);
+          s16[6*i+1] = convert (f[i+768]);
+          s16[6*i+2] = s16[6*i+3] = 0;
+          s16[6*i+4] = convert (f[i+512]);
+          s16[6*i+5] = convert (f[i]);
+        }
+      break;
+    case DTS_2F2R | DTS_LFE:
+      for (i = 0; i < 256; i++)
+        {
+          s16[6*i] = convert (f[i+256]);
+          s16[6*i+1] = convert (f[i+512]);
+          s16[6*i+2] = convert (f[i+768]);
+          s16[6*i+3] = convert (f[i+1024]);
+          s16[6*i+4] = 0;
+          s16[6*i+5] = convert (f[i]);
+        }
+      break;
+    case DTS_3F2R | DTS_LFE:
+      for (i = 0; i < 256; i++)
+        {
+          s16[6*i] = convert (f[i+256]);
+          s16[6*i+1] = convert (f[i+768]);
+          s16[6*i+2] = convert (f[i+1024]);
+          s16[6*i+3] = convert (f[i+1280]);
+          s16[6*i+4] = convert (f[i+512]);
+          s16[6*i+5] = convert (f[i]);
+        }
+      break;
+    }
+}
+
+static int
+channels_multi (int flags)
+{
+  if (flags & DTS_LFE)
+    return 6;
+  else if (flags & 1)   /* center channel */
+    return 5;
+  else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R)
+    return 4;
+  else
+    return 2;
+}
+
+static int
+dts_decode_frame (AVCodecContext *avctx, void *data, int *data_size,
+                  uint8_t *buff, int buff_size)
+{
+  uint8_t * start = buff;
+  uint8_t * end = buff + buff_size;
+  static uint8_t buf[BUFFER_SIZE];
+  static uint8_t * bufptr = buf;
+  static uint8_t * bufpos = buf + HEADER_SIZE;
+
+  static int sample_rate;
+  static int frame_length;
+  static int flags;
+  int bit_rate;
+  int len;
+  dts_state_t *state = avctx->priv_data;
+
+  *data_size = 0;
+
+  while (1)
+    {
+      len = end - start;
+      if (!len)
+        break;
+      if (len > bufpos - bufptr)
+        len = bufpos - bufptr;
+      memcpy (bufptr, start, len);
+      bufptr += len;
+      start += len;
+      if (bufptr != bufpos)
+          return start - buff;
+      if (bufpos != buf + HEADER_SIZE)
+          break;
+
+            {
+              int length;
+
+              length = dts_syncinfo (state, buf, &flags, &sample_rate,
+                                     &bit_rate, &frame_length);
+              if (!length)
+                {
+                  av_log (NULL, AV_LOG_INFO, "skip\n");
+                  for (bufptr = buf; bufptr < buf + HEADER_SIZE-1; bufptr++)
+                    bufptr[0] = bufptr[1];
+                  continue;
+                }
+              bufpos = buf + length;
+            }
+    }
+
+            {
+              level_t level;
+              sample_t bias;
+              int i;
+
+              flags = 2; /* ???????????? */
+              level = CONVERT_LEVEL;
+              bias = CONVERT_BIAS;
+
+              flags |= DTS_ADJUST_LEVEL;
+              if (dts_frame (state, buf, &flags, &level, bias))
+                goto error;
+              avctx->sample_rate = sample_rate;
+              avctx->channels = channels_multi (flags);
+              avctx->bit_rate = bit_rate;
+              for (i = 0; i < dts_blocks_num (state); i++)
+                {
+                  if (dts_block (state))
+                    goto error;
+                  {
+                    int chans;
+                    chans = channels_multi (flags);
+                    convert2s16_multi (dts_samples (state), data,
+                                       flags & (DTS_CHANNEL_MASK | DTS_LFE));
+
+                    data += 256 * sizeof (int16_t) * chans;
+                    *data_size += 256 * sizeof (int16_t) * chans;
+                  }
+                }
+              bufptr = buf;
+              bufpos = buf + HEADER_SIZE;
+              return start-buff;
+            error:
+              av_log (NULL, AV_LOG_ERROR, "error\n");
+              bufptr = buf;
+              bufpos = buf + HEADER_SIZE;
+            }
+
+  return start-buff;
+}
+
+static int
+dts_decode_init (AVCodecContext *avctx)
+{
+  avctx->priv_data = dts_init (0);
+  if (avctx->priv_data == NULL)
+    return -1;
+
+  return 0;
+}
+
+static int
+dts_decode_end (AVCodecContext *s)
+{
+  return 0;
+}
+
+AVCodec dts_decoder = {
+  "dts",
+  CODEC_TYPE_AUDIO,
+  CODEC_ID_DTS,
+  sizeof (dts_state_t *),
+  dts_decode_init,
+  NULL,
+  dts_decode_end,
+  dts_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/dv.c b/contrib/ffmpeg/libavcodec/dv.c
new file mode 100644
index 000000000..76095a481
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dv.c
@@ -0,0 +1,1265 @@
+/*
+ * DV decoder
+ * Copyright (c) 2002 Fabrice Bellard.
+ * Copyright (c) 2004 Roman Shaposhnik.
+ *
+ * DV encoder
+ * Copyright (c) 2003 Roman Shaposhnik.
+ *
+ * 50 Mbps (DVCPRO50) support
+ * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com>
+ *
+ * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
+ * of DV technical info.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file dv.c
+ * DV codec.
+ */
+#define ALT_BITSTREAM_READER
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "simple_idct.h"
+#include "dvdata.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+typedef struct DVVideoContext {
+    const DVprofile* sys;
+    AVFrame picture;
+    AVCodecContext *avctx;
+    uint8_t *buf;
+
+    uint8_t dv_zigzag[2][64];
+    uint8_t dv_idct_shift[2][2][22][64];
+
+    void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
+    void (*fdct[2])(DCTELEM *block);
+    void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
+} DVVideoContext;
+
+/* MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext */
+/* one element is needed for each video segment in a DV frame */
+/* at most there are 2 DIF channels * 12 DIF sequences * 27 video segments (PAL 50Mbps) */
+#define DV_ANCHOR_SIZE (2*12*27)
+
+static void* dv_anchor[DV_ANCHOR_SIZE];
+
+#define TEX_VLC_BITS 9
+
+#ifdef DV_CODEC_TINY_TARGET
+#define DV_VLC_MAP_RUN_SIZE 15
+#define DV_VLC_MAP_LEV_SIZE 23
+#else
+#define DV_VLC_MAP_RUN_SIZE  64
+#define DV_VLC_MAP_LEV_SIZE 512 //FIXME sign was removed so this should be /2 but needs check
+#endif
+
+/* XXX: also include quantization */
+static RL_VLC_ELEM *dv_rl_vlc;
+/* VLC encoding lookup table */
+static struct dv_vlc_pair {
+   uint32_t vlc;
+   uint8_t  size;
+} (*dv_vlc_map)[DV_VLC_MAP_LEV_SIZE] = NULL;
+
+static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
+{
+    int i, q, j;
+
+    /* NOTE: max left shift is 6 */
+    for(q = 0; q < 22; q++) {
+        /* 88DCT */
+        for(i = 1; i < 64; i++) {
+            /* 88 table */
+            j = perm[i];
+            s->dv_idct_shift[0][0][q][j] =
+                dv_quant_shifts[q][dv_88_areas[i]] + 1;
+            s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
+        }
+
+        /* 248DCT */
+        for(i = 1; i < 64; i++) {
+            /* 248 table */
+            s->dv_idct_shift[0][1][q][i] =
+                dv_quant_shifts[q][dv_248_areas[i]] + 1;
+            s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
+        }
+    }
+}
+
+static int dvvideo_init(AVCodecContext *avctx)
+{
+    DVVideoContext *s = avctx->priv_data;
+    DSPContext dsp;
+    static int done=0;
+    int i, j;
+
+    if (!done) {
+        VLC dv_vlc;
+        uint16_t new_dv_vlc_bits[NB_DV_VLC*2];
+        uint8_t new_dv_vlc_len[NB_DV_VLC*2];
+        uint8_t new_dv_vlc_run[NB_DV_VLC*2];
+        int16_t new_dv_vlc_level[NB_DV_VLC*2];
+
+        done = 1;
+
+        dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
+        if (!dv_vlc_map)
+            return -ENOMEM;
+
+        /* dv_anchor lets each thread know its Id */
+        for (i=0; i<DV_ANCHOR_SIZE; i++)
+            dv_anchor[i] = (void*)(size_t)i;
+
+        /* it's faster to include sign bit in a generic VLC parsing scheme */
+        for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
+            new_dv_vlc_bits[j] = dv_vlc_bits[i];
+            new_dv_vlc_len[j] = dv_vlc_len[i];
+            new_dv_vlc_run[j] = dv_vlc_run[i];
+            new_dv_vlc_level[j] = dv_vlc_level[i];
+
+            if (dv_vlc_level[i]) {
+                new_dv_vlc_bits[j] <<= 1;
+                new_dv_vlc_len[j]++;
+
+                j++;
+                new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
+                new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
+                new_dv_vlc_run[j] = dv_vlc_run[i];
+                new_dv_vlc_level[j] = -dv_vlc_level[i];
+            }
+        }
+
+        /* NOTE: as a trick, we use the fact the no codes are unused
+           to accelerate the parsing of partial codes */
+        init_vlc(&dv_vlc, TEX_VLC_BITS, j,
+                 new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
+
+        dv_rl_vlc = av_mallocz_static(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
+        if (!dv_rl_vlc)
+            return -ENOMEM;
+
+        for(i = 0; i < dv_vlc.table_size; i++){
+            int code= dv_vlc.table[i][0];
+            int len = dv_vlc.table[i][1];
+            int level, run;
+
+            if(len<0){ //more bits needed
+                run= 0;
+                level= code;
+            } else {
+                run=   new_dv_vlc_run[code] + 1;
+                level= new_dv_vlc_level[code];
+            }
+            dv_rl_vlc[i].len = len;
+            dv_rl_vlc[i].level = level;
+            dv_rl_vlc[i].run = run;
+        }
+        free_vlc(&dv_vlc);
+
+        for (i = 0; i < NB_DV_VLC - 1; i++) {
+           if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
+               continue;
+#ifdef DV_CODEC_TINY_TARGET
+           if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
+               continue;
+#endif
+
+           if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
+               continue;
+
+           dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
+                                                            (!!dv_vlc_level[i]);
+           dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
+                                                             (!!dv_vlc_level[i]);
+        }
+        for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
+#ifdef DV_CODEC_TINY_TARGET
+           for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
+              if (dv_vlc_map[i][j].size == 0) {
+                  dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
+                            (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
+                  dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
+                                          dv_vlc_map[0][j].size;
+              }
+           }
+#else
+           for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
+              if (dv_vlc_map[i][j].size == 0) {
+                  dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
+                            (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
+                  dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
+                                          dv_vlc_map[0][j].size;
+              }
+              dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
+                                            dv_vlc_map[i][j].vlc | 1;
+              dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
+                                            dv_vlc_map[i][j].size;
+           }
+#endif
+        }
+    }
+
+    /* Generic DSP setup */
+    dsputil_init(&dsp, avctx);
+    s->get_pixels = dsp.get_pixels;
+
+    /* 88DCT setup */
+    s->fdct[0] = dsp.fdct;
+    s->idct_put[0] = dsp.idct_put;
+    for (i=0; i<64; i++)
+       s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
+
+    /* 248DCT setup */
+    s->fdct[1] = dsp.fdct248;
+    s->idct_put[1] = simple_idct248_put;  // FIXME: need to add it to DSP
+    if(avctx->lowres){
+        for (i=0; i<64; i++){
+            int j= ff_zigzag248_direct[i];
+            s->dv_zigzag[1][i] = dsp.idct_permutation[(j&7) + (j&8)*4 + (j&48)/2];
+        }
+    }else
+        memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
+
+    /* XXX: do it only for constant case */
+    dv_build_unquantize_tables(s, dsp.idct_permutation);
+
+    avctx->coded_frame = &s->picture;
+    s->avctx= avctx;
+
+    return 0;
+}
+
+// #define VLC_DEBUG
+// #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
+
+typedef struct BlockInfo {
+    const uint8_t *shift_table;
+    const uint8_t *scan_table;
+    const int *iweight_table;
+    uint8_t pos; /* position in block */
+    uint8_t dct_mode;
+    uint8_t partial_bit_count;
+    uint16_t partial_bit_buffer;
+    int shift_offset;
+} BlockInfo;
+
+/* block size in bits */
+static const uint16_t block_sizes[6] = {
+    112, 112, 112, 112, 80, 80
+};
+/* bit budget for AC only in 5 MBs */
+static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5;
+/* see dv_88_areas and dv_248_areas for details */
+static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
+
+static inline int get_bits_left(GetBitContext *s)
+{
+    return s->size_in_bits - get_bits_count(s);
+}
+
+static inline int get_bits_size(GetBitContext *s)
+{
+    return s->size_in_bits;
+}
+
+static inline int put_bits_left(PutBitContext* s)
+{
+    return (s->buf_end - s->buf) * 8 - put_bits_count(s);
+}
+
+/* decode ac coefs */
+static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
+{
+    int last_index = get_bits_size(gb);
+    const uint8_t *scan_table = mb->scan_table;
+    const uint8_t *shift_table = mb->shift_table;
+    const int *iweight_table = mb->iweight_table;
+    int pos = mb->pos;
+    int partial_bit_count = mb->partial_bit_count;
+    int level, pos1, run, vlc_len, index;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+
+    /* if we must parse a partial vlc, we do it here */
+    if (partial_bit_count > 0) {
+        re_cache = ((unsigned)re_cache >> partial_bit_count) |
+                   (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
+        re_index -= partial_bit_count;
+        mb->partial_bit_count = 0;
+    }
+
+    /* get the AC coefficients until last_index is reached */
+    for(;;) {
+#ifdef VLC_DEBUG
+        printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
+#endif
+        /* our own optimized GET_RL_VLC */
+        index = NEG_USR32(re_cache, TEX_VLC_BITS);
+        vlc_len = dv_rl_vlc[index].len;
+        if (vlc_len < 0) {
+            index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
+            vlc_len = TEX_VLC_BITS - vlc_len;
+        }
+        level = dv_rl_vlc[index].level;
+        run = dv_rl_vlc[index].run;
+
+        /* gotta check if we're still within gb boundaries */
+        if (re_index + vlc_len > last_index) {
+            /* should be < 16 bits otherwise a codeword could have been parsed */
+            mb->partial_bit_count = last_index - re_index;
+            mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
+            re_index = last_index;
+            break;
+        }
+        re_index += vlc_len;
+
+#ifdef VLC_DEBUG
+        printf("run=%d level=%d\n", run, level);
+#endif
+        pos += run;
+        if (pos >= 64)
+            break;
+
+        pos1 = scan_table[pos];
+        level <<= shift_table[pos1];
+
+        /* unweigh, round, and shift down */
+        level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits;
+
+        block[pos1] = level;
+
+        UPDATE_CACHE(re, gb);
+    }
+    CLOSE_READER(re, gb);
+    mb->pos = pos;
+}
+
+static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
+{
+    int bits_left = get_bits_left(gb);
+    while (bits_left >= MIN_CACHE_BITS) {
+        put_bits(pb, MIN_CACHE_BITS, get_bits(gb, MIN_CACHE_BITS));
+        bits_left -= MIN_CACHE_BITS;
+    }
+    if (bits_left > 0) {
+        put_bits(pb, bits_left, get_bits(gb, bits_left));
+    }
+}
+
+/* mb_x and mb_y are in units of 8 pixels */
+static inline void dv_decode_video_segment(DVVideoContext *s,
+                                           uint8_t *buf_ptr1,
+                                           const uint16_t *mb_pos_ptr)
+{
+    int quant, dc, dct_mode, class1, j;
+    int mb_index, mb_x, mb_y, v, last_index;
+    DCTELEM *block, *block1;
+    int c_offset;
+    uint8_t *y_ptr;
+    void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
+    uint8_t *buf_ptr;
+    PutBitContext pb, vs_pb;
+    GetBitContext gb;
+    BlockInfo mb_data[5 * 6], *mb, *mb1;
+    DECLARE_ALIGNED_8(DCTELEM, sblock[5*6][64]);
+    DECLARE_ALIGNED_8(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */
+    DECLARE_ALIGNED_8(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */
+    const int log2_blocksize= 3-s->avctx->lowres;
+
+    assert((((int)mb_bit_buffer)&7)==0);
+    assert((((int)vs_bit_buffer)&7)==0);
+
+    memset(sblock, 0, sizeof(sblock));
+
+    /* pass 1 : read DC and AC coefficients in blocks */
+    buf_ptr = buf_ptr1;
+    block1 = &sblock[0][0];
+    mb1 = mb_data;
+    init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80);
+    for(mb_index = 0; mb_index < 5; mb_index++, mb1 += 6, block1 += 6 * 64) {
+        /* skip header */
+        quant = buf_ptr[3] & 0x0f;
+        buf_ptr += 4;
+        init_put_bits(&pb, mb_bit_buffer, 80);
+        mb = mb1;
+        block = block1;
+        for(j = 0;j < 6; j++) {
+            last_index = block_sizes[j];
+            init_get_bits(&gb, buf_ptr, last_index);
+
+            /* get the dc */
+            dc = get_sbits(&gb, 9);
+            dct_mode = get_bits1(&gb);
+            mb->dct_mode = dct_mode;
+            mb->scan_table = s->dv_zigzag[dct_mode];
+            mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88;
+            class1 = get_bits(&gb, 2);
+            mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode]
+                [quant + dv_quant_offset[class1]];
+            dc = dc << 2;
+            /* convert to unsigned because 128 is not added in the
+               standard IDCT */
+            dc += 1024;
+            block[0] = dc;
+            buf_ptr += last_index >> 3;
+            mb->pos = 0;
+            mb->partial_bit_count = 0;
+
+#ifdef VLC_DEBUG
+            printf("MB block: %d, %d ", mb_index, j);
+#endif
+            dv_decode_ac(&gb, mb, block);
+
+            /* write the remaining bits  in a new buffer only if the
+               block is finished */
+            if (mb->pos >= 64)
+                bit_copy(&pb, &gb);
+
+            block += 64;
+            mb++;
+        }
+
+        /* pass 2 : we can do it just after */
+#ifdef VLC_DEBUG
+        printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
+#endif
+        block = block1;
+        mb = mb1;
+        init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
+        flush_put_bits(&pb);
+        for(j = 0;j < 6; j++, block += 64, mb++) {
+            if (mb->pos < 64 && get_bits_left(&gb) > 0) {
+                dv_decode_ac(&gb, mb, block);
+                /* if still not finished, no need to parse other blocks */
+                if (mb->pos < 64)
+                    break;
+            }
+        }
+        /* all blocks are finished, so the extra bytes can be used at
+           the video segment level */
+        if (j >= 6)
+            bit_copy(&vs_pb, &gb);
+    }
+
+    /* we need a pass other the whole video segment */
+#ifdef VLC_DEBUG
+    printf("***pass 3 size=%d\n", put_bits_count(&vs_pb));
+#endif
+    block = &sblock[0][0];
+    mb = mb_data;
+    init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb));
+    flush_put_bits(&vs_pb);
+    for(mb_index = 0; mb_index < 5; mb_index++) {
+        for(j = 0;j < 6; j++) {
+            if (mb->pos < 64) {
+#ifdef VLC_DEBUG
+                printf("start %d:%d\n", mb_index, j);
+#endif
+                dv_decode_ac(&gb, mb, block);
+            }
+            if (mb->pos >= 64 && mb->pos < 127)
+                av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
+            block += 64;
+            mb++;
+        }
+    }
+
+    /* compute idct and place blocks */
+    block = &sblock[0][0];
+    mb = mb_data;
+    for(mb_index = 0; mb_index < 5; mb_index++) {
+        v = *mb_pos_ptr++;
+        mb_x = v & 0xff;
+        mb_y = v >> 8;
+        if (s->sys->pix_fmt == PIX_FMT_YUV422P) {
+            y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + (mb_x>>1))<<log2_blocksize);
+            c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize);
+        } else { /* 4:1:1 or 4:2:0 */
+            y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize);
+            if (s->sys->pix_fmt == PIX_FMT_YUV411P)
+                c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize);
+            else /* 4:2:0 */
+                c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize);
+        }
+        for(j = 0;j < 6; j++) {
+            idct_put = s->idct_put[mb->dct_mode && log2_blocksize==3];
+            if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */
+                if (j == 0 || j == 2) {
+                    /* Y0 Y1 */
+                    idct_put(y_ptr + ((j >> 1)<<log2_blocksize),
+                             s->picture.linesize[0], block);
+                } else if(j > 3) {
+                    /* Cr Cb */
+                    idct_put(s->picture.data[6 - j] + c_offset,
+                             s->picture.linesize[6 - j], block);
+                }
+                /* note: j=1 and j=3 are "dummy" blocks in 4:2:2 */
+            } else { /* 4:1:1 or 4:2:0 */
+                if (j < 4) {
+                    if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
+                        /* NOTE: at end of line, the macroblock is handled as 420 */
+                        idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block);
+                    } else {
+                        idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize),
+                                 s->picture.linesize[0], block);
+                    }
+                } else {
+                    if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
+                        uint64_t aligned_pixels[64/8];
+                        uint8_t *pixels= (uint8_t*)aligned_pixels;
+                        uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
+                        int x, y, linesize;
+                        /* NOTE: at end of line, the macroblock is handled as 420 */
+                        idct_put(pixels, 8, block);
+                        linesize = s->picture.linesize[6 - j];
+                        c_ptr = s->picture.data[6 - j] + c_offset;
+                        ptr = pixels;
+                        for(y = 0;y < (1<<log2_blocksize); y++) {
+                            ptr1= ptr + (1<<(log2_blocksize-1));
+                            c_ptr1 = c_ptr + (linesize<<log2_blocksize);
+                            for(x=0; x < (1<<(log2_blocksize-1)); x++){
+                                c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x];
+                            }
+                            c_ptr += linesize;
+                            ptr += 8;
+                        }
+                    } else {
+                        /* don't ask me why they inverted Cb and Cr ! */
+                        idct_put(s->picture.data[6 - j] + c_offset,
+                                 s->picture.linesize[6 - j], block);
+                    }
+                }
+            }
+            block += 64;
+            mb++;
+        }
+    }
+}
+
+#ifdef DV_CODEC_TINY_TARGET
+/* Converts run and level (where level != 0) pair into vlc, returning bit size */
+static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
+{
+    int size;
+    if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
+        *vlc = dv_vlc_map[run][level].vlc | sign;
+        size = dv_vlc_map[run][level].size;
+    }
+    else {
+        if (level < DV_VLC_MAP_LEV_SIZE) {
+            *vlc = dv_vlc_map[0][level].vlc | sign;
+            size = dv_vlc_map[0][level].size;
+        } else {
+            *vlc = 0xfe00 | (level << 1) | sign;
+            size = 16;
+        }
+        if (run) {
+            *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
+                                  (0x1f80 | (run - 1))) << size;
+            size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
+        }
+    }
+
+    return size;
+}
+
+static always_inline int dv_rl2vlc_size(int run, int level)
+{
+    int size;
+
+    if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
+        size = dv_vlc_map[run][level].size;
+    }
+    else {
+        size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
+        if (run) {
+            size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
+        }
+    }
+    return size;
+}
+#else
+static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
+{
+    *vlc = dv_vlc_map[run][l].vlc | sign;
+    return dv_vlc_map[run][l].size;
+}
+
+static always_inline int dv_rl2vlc_size(int run, int l)
+{
+    return dv_vlc_map[run][l].size;
+}
+#endif
+
+typedef struct EncBlockInfo {
+    int area_q[4];
+    int bit_size[4];
+    int prev[5];
+    int cur_ac;
+    int cno;
+    int dct_mode;
+    DCTELEM mb[64];
+    uint8_t next[64];
+    uint8_t sign[64];
+    uint8_t partial_bit_count;
+    uint32_t partial_bit_buffer; /* we can't use uint16_t here */
+} EncBlockInfo;
+
+static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
+                                       PutBitContext* pb_end)
+{
+    int prev;
+    int bits_left;
+    PutBitContext* pb = pb_pool;
+    int size = bi->partial_bit_count;
+    uint32_t vlc = bi->partial_bit_buffer;
+
+    bi->partial_bit_count = bi->partial_bit_buffer = 0;
+    for(;;){
+       /* Find suitable storage space */
+       for (; size > (bits_left = put_bits_left(pb)); pb++) {
+          if (bits_left) {
+              size -= bits_left;
+              put_bits(pb, bits_left, vlc >> size);
+              vlc = vlc & ((1<<size)-1);
+          }
+          if (pb + 1 >= pb_end) {
+              bi->partial_bit_count = size;
+              bi->partial_bit_buffer = vlc;
+              return pb;
+          }
+       }
+
+       /* Store VLC */
+       put_bits(pb, size, vlc);
+
+       if(bi->cur_ac>=64)
+           break;
+
+       /* Construct the next VLC */
+       prev= bi->cur_ac;
+       bi->cur_ac = bi->next[prev];
+       if(bi->cur_ac < 64){
+           size = dv_rl2vlc(bi->cur_ac - prev - 1, bi->mb[bi->cur_ac], bi->sign[bi->cur_ac], &vlc);
+       } else {
+           size = 4; vlc = 6; /* End Of Block stamp */
+       }
+    }
+    return pb;
+}
+
+static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
+                                              const uint8_t* zigzag_scan, const int *weight, int bias)
+{
+    int i, area;
+    /* We offer two different methods for class number assignment: the
+       method suggested in SMPTE 314M Table 22, and an improved
+       method. The SMPTE method is very conservative; it assigns class
+       3 (i.e. severe quantization) to any block where the largest AC
+       component is greater than 36. ffmpeg's DV encoder tracks AC bit
+       consumption precisely, so there is no need to bias most blocks
+       towards strongly lossy compression. Instead, we assign class 2
+       to most blocks, and use class 3 only when strictly necessary
+       (for blocks whose largest AC component exceeds 255). */
+
+#if 0 /* SMPTE spec method */
+    static const int classes[] = {12, 24, 36, 0xffff};
+#else /* improved ffmpeg method */
+    static const int classes[] = {-1, -1, 255, 0xffff};
+#endif
+    int max=classes[0];
+    int prev=0;
+
+    bi->mb[0] = blk[0];
+
+    for (area = 0; area < 4; area++) {
+       bi->prev[area] = prev;
+       bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
+       for (i=mb_area_start[area]; i<mb_area_start[area+1]; i++) {
+          int level = blk[zigzag_scan[i]];
+
+          if (level+15 > 30U) {
+              bi->sign[i] = (level>>31)&1;
+              /* weigh it and and shift down into range, adding for rounding */
+              /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
+                 AND the 2x doubling of the weights */
+              level = (FFABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4);
+              bi->mb[i] = level;
+              if(level>max) max= level;
+              bi->bit_size[area] += dv_rl2vlc_size(i - prev  - 1, level);
+              bi->next[prev]= i;
+              prev= i;
+          }
+       }
+    }
+    bi->next[prev]= i;
+    for(bi->cno = 0; max > classes[bi->cno]; bi->cno++);
+
+    bi->cno += bias;
+
+    if (bi->cno >= 3) {
+        bi->cno = 3;
+        prev=0;
+        i= bi->next[prev];
+        for (area = 0; area < 4; area++) {
+            bi->prev[area] = prev;
+            bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
+            for (; i<mb_area_start[area+1]; i= bi->next[i]) {
+                bi->mb[i] >>=1;
+
+                if (bi->mb[i]) {
+                    bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]);
+                    bi->next[prev]= i;
+                    prev= i;
+                }
+            }
+        }
+        bi->next[prev]= i;
+    }
+}
+
+//FIXME replace this by dsputil
+#define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7))
+static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
+    DCTELEM *s;
+    int score88 = 0;
+    int score248 = 0;
+    int i;
+
+    /* Compute 8-8 score (small values give a better chance for 8-8 DCT) */
+    s = blk;
+    for(i=0; i<7; i++) {
+        score88 += SC(0,  8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
+                   SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
+        s += 8;
+    }
+    /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
+    s = blk;
+    for(i=0; i<6; i++) {
+        score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
+                    SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
+        s += 8;
+    }
+
+    return (score88 - score248 > -10);
+}
+
+static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
+{
+    int size[5];
+    int i, j, k, a, prev, a2;
+    EncBlockInfo* b;
+
+    size[0] = size[1] = size[2] = size[3] = size[4] = 1<<24;
+    do {
+       b = blks;
+       for (i=0; i<5; i++) {
+          if (!qnos[i])
+              continue;
+
+          qnos[i]--;
+          size[i] = 0;
+          for (j=0; j<6; j++, b++) {
+             for (a=0; a<4; a++) {
+                if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
+                    b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
+                    b->area_q[a]++;
+                    prev= b->prev[a];
+                    assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
+                    for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
+                       b->mb[k] >>= 1;
+                       if (b->mb[k]) {
+                           b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
+                           prev= k;
+                       } else {
+                           if(b->next[k] >= mb_area_start[a+1] && b->next[k]<64){
+                                for(a2=a+1; b->next[k] >= mb_area_start[a2+1]; a2++)
+                                    b->prev[a2] = prev;
+                                assert(a2<4);
+                                assert(b->mb[b->next[k]]);
+                                b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]])
+                                                  -dv_rl2vlc_size(b->next[k] -    k - 1, b->mb[b->next[k]]);
+                                assert(b->prev[a2]==k && (a2+1 >= 4 || b->prev[a2+1]!=k));
+                                b->prev[a2] = prev;
+                           }
+                           b->next[prev] = b->next[k];
+                       }
+                    }
+                    b->prev[a+1]= prev;
+                }
+                size[i] += b->bit_size[a];
+             }
+          }
+          if(vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4])
+                return;
+       }
+    } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]);
+
+
+    for(a=2; a==2 || vs_total_ac_bits < size[0]; a+=a){
+        b = blks;
+        size[0] = 5*6*4; //EOB
+        for (j=0; j<6*5; j++, b++) {
+            prev= b->prev[0];
+            for (k= b->next[prev]; k<64; k= b->next[k]) {
+                if(b->mb[k] < a && b->mb[k] > -a){
+                    b->next[prev] = b->next[k];
+                }else{
+                    size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
+                    prev= k;
+                }
+            }
+        }
+    }
+}
+
+static inline void dv_encode_video_segment(DVVideoContext *s,
+                                           uint8_t *dif,
+                                           const uint16_t *mb_pos_ptr)
+{
+    int mb_index, i, j, v;
+    int mb_x, mb_y, c_offset, linesize;
+    uint8_t*  y_ptr;
+    uint8_t*  data;
+    uint8_t*  ptr;
+    int       do_edge_wrap;
+    DECLARE_ALIGNED_8(DCTELEM, block[64]);
+    EncBlockInfo  enc_blks[5*6];
+    PutBitContext pbs[5*6];
+    PutBitContext* pb;
+    EncBlockInfo* enc_blk;
+    int       vs_bit_size = 0;
+    int       qnos[5];
+
+    assert((((int)block) & 7) == 0);
+
+    enc_blk = &enc_blks[0];
+    pb = &pbs[0];
+    for(mb_index = 0; mb_index < 5; mb_index++) {
+        v = *mb_pos_ptr++;
+        mb_x = v & 0xff;
+        mb_y = v >> 8;
+        if (s->sys->pix_fmt == PIX_FMT_YUV422P) {
+            y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 4);
+        } else { /* 4:1:1 */
+            y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
+        }
+        if (s->sys->pix_fmt == PIX_FMT_YUV420P) {
+            c_offset = (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
+        } else { /* 4:2:2 or 4:1:1 */
+            c_offset = ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8));
+        }
+        do_edge_wrap = 0;
+        qnos[mb_index] = 15; /* No quantization */
+        ptr = dif + mb_index*80 + 4;
+        for(j = 0;j < 6; j++) {
+            int dummy = 0;
+            if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */
+                if (j == 0 || j == 2) {
+                    /* Y0 Y1 */
+                    data = y_ptr + ((j>>1) * 8);
+                    linesize = s->picture.linesize[0];
+                } else if (j > 3) {
+                    /* Cr Cb */
+                    data = s->picture.data[6 - j] + c_offset;
+                    linesize = s->picture.linesize[6 - j];
+                } else {
+                    /* j=1 and j=3 are "dummy" blocks, used for AC data only */
+                    data = 0;
+                    linesize = 0;
+                    dummy = 1;
+                }
+            } else { /* 4:1:1 or 4:2:0 */
+                if (j < 4) {  /* Four Y blocks */
+                    /* NOTE: at end of line, the macroblock is handled as 420 */
+                    if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
+                        data = y_ptr + (j * 8);
+                    } else {
+                        data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
+                    }
+                    linesize = s->picture.linesize[0];
+                } else {      /* Cr and Cb blocks */
+                    /* don't ask Fabrice why they inverted Cb and Cr ! */
+                    data = s->picture.data[6 - j] + c_offset;
+                    linesize = s->picture.linesize[6 - j];
+                    if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
+                        do_edge_wrap = 1;
+                }
+            }
+
+            /* Everything is set up -- now just copy data -> DCT block */
+            if (do_edge_wrap) {  /* Edge wrap copy: 4x16 -> 8x8 */
+                uint8_t* d;
+                DCTELEM *b = block;
+                for (i=0;i<8;i++) {
+                   d = data + 8 * linesize;
+                   b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
+                   b[4] =    d[0]; b[5] =    d[1]; b[6] =    d[2]; b[7] =    d[3];
+                   data += linesize;
+                   b += 8;
+                }
+            } else {             /* Simple copy: 8x8 -> 8x8 */
+                if (!dummy)
+                    s->get_pixels(block, data, linesize);
+            }
+
+            if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
+                enc_blk->dct_mode = dv_guess_dct_mode(block);
+            else
+                enc_blk->dct_mode = 0;
+            enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
+            enc_blk->partial_bit_count = 0;
+            enc_blk->partial_bit_buffer = 0;
+            enc_blk->cur_ac = 0;
+
+            if (dummy) {
+                /* We rely on the fact that encoding all zeros leads to an immediate EOB,
+                   which is precisely what the spec calls for in the "dummy" blocks. */
+                memset(block, 0, sizeof(block));
+            } else {
+                s->fdct[enc_blk->dct_mode](block);
+            }
+
+            dv_set_class_number(block, enc_blk,
+                                enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct,
+                                enc_blk->dct_mode ? dv_weight_248 : dv_weight_88,
+                                j/4);
+
+            init_put_bits(pb, ptr, block_sizes[j]/8);
+            put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
+            put_bits(pb, 1, enc_blk->dct_mode);
+            put_bits(pb, 2, enc_blk->cno);
+
+            vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
+                           enc_blk->bit_size[2] + enc_blk->bit_size[3];
+            ++enc_blk;
+            ++pb;
+            ptr += block_sizes[j]/8;
+        }
+    }
+
+    if (vs_total_ac_bits < vs_bit_size)
+        dv_guess_qnos(&enc_blks[0], &qnos[0]);
+
+    for (i=0; i<5; i++) {
+       dif[i*80 + 3] = qnos[i];
+    }
+
+    /* First pass over individual cells only */
+    for (j=0; j<5*6; j++)
+       dv_encode_ac(&enc_blks[j], &pbs[j], &pbs[j+1]);
+
+    /* Second pass over each MB space */
+    for (j=0; j<5*6; j+=6) {
+        pb= &pbs[j];
+        for (i=0; i<6; i++) {
+            if (enc_blks[i+j].partial_bit_count)
+                pb=dv_encode_ac(&enc_blks[i+j], pb, &pbs[j+6]);
+        }
+    }
+
+    /* Third and final pass over the whole vides segment space */
+    pb= &pbs[0];
+    for (j=0; j<5*6; j++) {
+       if (enc_blks[j].partial_bit_count)
+           pb=dv_encode_ac(&enc_blks[j], pb, &pbs[6*5]);
+       if (enc_blks[j].partial_bit_count)
+            av_log(NULL, AV_LOG_ERROR, "ac bitstream overflow\n");
+    }
+
+    for (j=0; j<5*6; j++)
+       flush_put_bits(&pbs[j]);
+}
+
+static int dv_decode_mt(AVCodecContext *avctx, void* sl)
+{
+    DVVideoContext *s = avctx->priv_data;
+    int slice = (size_t)sl;
+
+    /* which DIF channel is this? */
+    int chan = slice / (s->sys->difseg_size * 27);
+
+    /* slice within the DIF channel */
+    int chan_slice = slice % (s->sys->difseg_size * 27);
+
+    /* byte offset of this channel's data */
+    int chan_offset = chan * s->sys->difseg_size * 150 * 80;
+
+    dv_decode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset],
+                            &s->sys->video_place[slice*5]);
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+static int dv_encode_mt(AVCodecContext *avctx, void* sl)
+{
+    DVVideoContext *s = avctx->priv_data;
+    int slice = (size_t)sl;
+
+    /* which DIF channel is this? */
+    int chan = slice / (s->sys->difseg_size * 27);
+
+    /* slice within the DIF channel */
+    int chan_slice = slice % (s->sys->difseg_size * 27);
+
+    /* byte offset of this channel's data */
+    int chan_offset = chan * s->sys->difseg_size * 150 * 80;
+
+    dv_encode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset],
+                            &s->sys->video_place[slice*5]);
+    return 0;
+}
+#endif
+
+#ifdef CONFIG_DECODERS
+/* NOTE: exactly one frame must be given (120000 bytes for NTSC,
+   144000 bytes for PAL - or twice those for 50Mbps) */
+static int dvvideo_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+    DVVideoContext *s = avctx->priv_data;
+
+    s->sys = dv_frame_profile(buf);
+    if (!s->sys || buf_size < s->sys->frame_size)
+        return -1; /* NOTE: we only accept several full frames */
+
+    if(s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+
+    s->picture.reference = 0;
+    s->picture.key_frame = 1;
+    s->picture.pict_type = FF_I_TYPE;
+    avctx->pix_fmt = s->sys->pix_fmt;
+    avcodec_set_dimensions(avctx, s->sys->width, s->sys->height);
+    if(avctx->get_buffer(avctx, &s->picture) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    s->picture.interlaced_frame = 1;
+    s->picture.top_field_first = 0;
+
+    s->buf = buf;
+    avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
+                   s->sys->n_difchan * s->sys->difseg_size * 27);
+
+    emms_c();
+
+    /* return image */
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data= s->picture;
+
+    return s->sys->frame_size;
+}
+#endif
+
+
+static inline int dv_write_pack(enum dv_pack_type pack_id, DVVideoContext *c, uint8_t* buf)
+{
+    /*
+     * Here's what SMPTE314M says about these two:
+     *    (page 6) APTn, AP1n, AP2n, AP3n: These data shall be identical
+     *             as track application IDs (APTn = 001, AP1n =
+     *             001, AP2n = 001, AP3n = 001), if the source signal
+     *             comes from a digital VCR. If the signal source is
+     *             unknown, all bits for these data shall be set to 1.
+     *    (page 12) STYPE: STYPE defines a signal type of video signal
+     *                     00000b = 4:1:1 compression
+     *                     00100b = 4:2:2 compression
+     *                     XXXXXX = Reserved
+     * Now, I've got two problems with these statements:
+     *   1. it looks like APT == 111b should be a safe bet, but it isn't.
+     *      It seems that for PAL as defined in IEC 61834 we have to set
+     *      APT to 000 and for SMPTE314M to 001.
+     *   2. It is not at all clear what STYPE is used for 4:2:0 PAL
+     *      compression scheme (if any).
+     */
+    int apt = (c->sys->pix_fmt == PIX_FMT_YUV420P ? 0 : 1);
+    int stype = (c->sys->pix_fmt == PIX_FMT_YUV422P ? 4 : 0);
+
+    uint8_t aspect = 0;
+    if((int)(av_q2d(c->avctx->sample_aspect_ratio) * c->avctx->width / c->avctx->height * 10) == 17) /* 16:9 */
+        aspect = 0x02;
+
+    buf[0] = (uint8_t)pack_id;
+    switch (pack_id) {
+    case dv_header525: /* I can't imagine why these two weren't defined as real */
+    case dv_header625: /* packs in SMPTE314M -- they definitely look like ones */
+          buf[1] = 0xf8 |               /* reserved -- always 1 */
+                   (apt & 0x07);        /* APT: Track application ID */
+          buf[2] = (0 << 7)    | /* TF1: audio data is 0 - valid; 1 - invalid */
+                   (0x0f << 3) | /* reserved -- always 1 */
+                   (apt & 0x07); /* AP1: Audio application ID */
+          buf[3] = (0 << 7)    | /* TF2: video data is 0 - valid; 1 - invalid */
+                   (0x0f << 3) | /* reserved -- always 1 */
+                   (apt & 0x07); /* AP2: Video application ID */
+          buf[4] = (0 << 7)    | /* TF3: subcode(SSYB) is 0 - valid; 1 - invalid */
+                   (0x0f << 3) | /* reserved -- always 1 */
+                   (apt & 0x07); /* AP3: Subcode application ID */
+          break;
+    case dv_video_source:
+          buf[1] = 0xff; /* reserved -- always 1 */
+          buf[2] = (1 << 7) | /* B/W: 0 - b/w, 1 - color */
+                   (1 << 6) | /* following CLF is valid - 0, invalid - 1 */
+                   (3 << 4) | /* CLF: color frames id (see ITU-R BT.470-4) */
+                   0xf; /* reserved -- always 1 */
+          buf[3] = (3 << 6) | /* reserved -- always 1 */
+                   (c->sys->dsf << 5) | /*  system: 60fields/50fields */
+                   stype; /* signal type video compression */
+          buf[4] = 0xff; /* VISC: 0xff -- no information */
+          break;
+    case dv_video_control:
+          buf[1] = (0 << 6) | /* Copy generation management (CGMS) 0 -- free */
+                   0x3f; /* reserved -- always 1 */
+          buf[2] = 0xc8 | /* reserved -- always b11001xxx */
+                   aspect;
+          buf[3] = (1 << 7) | /* Frame/field flag 1 -- frame, 0 -- field */
+                   (1 << 6) | /* First/second field flag 0 -- field 2, 1 -- field 1 */
+                   (1 << 5) | /* Frame change flag 0 -- same picture as before, 1 -- different */
+                   (1 << 4) | /* 1 - interlaced, 0 - noninterlaced */
+                   0xc; /* reserved -- always b1100 */
+          buf[4] = 0xff; /* reserved -- always 1 */
+          break;
+    default:
+          buf[1] = buf[2] = buf[3] = buf[4] = 0xff;
+    }
+    return 5;
+}
+
+static void dv_format_frame(DVVideoContext* c, uint8_t* buf)
+{
+    int chan, i, j, k;
+
+    for (chan = 0; chan < c->sys->n_difchan; chan++) {
+        for (i = 0; i < c->sys->difseg_size; i++) {
+            memset(buf, 0xff, 80 * 6); /* First 6 DIF blocks are for control data */
+
+            /* DV header: 1DIF */
+            buf += dv_write_dif_id(dv_sect_header, chan, i, 0, buf);
+            buf += dv_write_pack((c->sys->dsf ? dv_header625 : dv_header525), c, buf);
+            buf += 72; /* unused bytes */
+
+            /* DV subcode: 2DIFs */
+            for (j = 0; j < 2; j++) {
+                buf += dv_write_dif_id(dv_sect_subcode, chan, i, j, buf);
+                for (k = 0; k < 6; k++)
+                     buf += dv_write_ssyb_id(k, (i < c->sys->difseg_size/2), buf) + 5;
+                buf += 29; /* unused bytes */
+            }
+
+            /* DV VAUX: 3DIFS */
+            for (j = 0; j < 3; j++) {
+                buf += dv_write_dif_id(dv_sect_vaux, chan, i, j, buf);
+                buf += dv_write_pack(dv_video_source,  c, buf);
+                buf += dv_write_pack(dv_video_control, c, buf);
+                buf += 7*5;
+                buf += dv_write_pack(dv_video_source,  c, buf);
+                buf += dv_write_pack(dv_video_control, c, buf);
+                buf += 4*5 + 2; /* unused bytes */
+            }
+
+            /* DV Audio/Video: 135 Video DIFs + 9 Audio DIFs */
+            for (j = 0; j < 135; j++) {
+                if (j%15 == 0) {
+                    memset(buf, 0xff, 80);
+                    buf += dv_write_dif_id(dv_sect_audio, chan, i, j/15, buf);
+                    buf += 77; /* audio control & shuffled PCM audio */
+                }
+                buf += dv_write_dif_id(dv_sect_video, chan, i, j, buf);
+                buf += 77; /* 1 video macro block: 1 bytes control
+                              4 * 14 bytes Y 8x8 data
+                              10 bytes Cr 8x8 data
+                              10 bytes Cb 8x8 data */
+            }
+        }
+    }
+}
+
+
+#ifdef CONFIG_ENCODERS
+static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
+                                void *data)
+{
+    DVVideoContext *s = c->priv_data;
+
+    s->sys = dv_codec_profile(c);
+    if (!s->sys)
+        return -1;
+    if(buf_size < s->sys->frame_size)
+        return -1;
+
+    c->pix_fmt = s->sys->pix_fmt;
+    s->picture = *((AVFrame *)data);
+    s->picture.key_frame = 1;
+    s->picture.pict_type = FF_I_TYPE;
+
+    s->buf = buf;
+    c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
+               s->sys->n_difchan * s->sys->difseg_size * 27);
+
+    emms_c();
+
+    dv_format_frame(s, buf);
+
+    return s->sys->frame_size;
+}
+#endif
+
+static int dvvideo_close(AVCodecContext *c)
+{
+
+    return 0;
+}
+
+
+#ifdef CONFIG_DVVIDEO_ENCODER
+AVCodec dvvideo_encoder = {
+    "dvvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_DVVIDEO,
+    sizeof(DVVideoContext),
+    dvvideo_init,
+    dvvideo_encode_frame,
+    dvvideo_close,
+    NULL,
+    CODEC_CAP_DR1,
+    NULL
+};
+#endif // CONFIG_DVVIDEO_ENCODER
+
+#ifdef CONFIG_DVVIDEO_DECODER
+AVCodec dvvideo_decoder = {
+    "dvvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_DVVIDEO,
+    sizeof(DVVideoContext),
+    dvvideo_init,
+    NULL,
+    dvvideo_close,
+    dvvideo_decode_frame,
+    CODEC_CAP_DR1,
+    NULL
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/dvbsub.c b/contrib/ffmpeg/libavcodec/dvbsub.c
new file mode 100644
index 000000000..44ba19d86
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dvbsub.c
@@ -0,0 +1,445 @@
+/*
+ * DVB subtitle encoding for ffmpeg
+ * Copyright (c) 2005 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+typedef struct DVBSubtitleContext {
+    int hide_state;
+    int object_version;
+} DVBSubtitleContext;
+
+#define PUTBITS2(val)\
+{\
+    bitbuf |= (val) << bitcnt;\
+    bitcnt -= 2;\
+    if (bitcnt < 0) {\
+        bitcnt = 6;\
+        *q++ = bitbuf;\
+        bitbuf = 0;\
+    }\
+}
+
+static void dvb_encode_rle2(uint8_t **pq,
+                            const uint8_t *bitmap, int linesize,
+                            int w, int h)
+{
+    uint8_t *q;
+    unsigned int bitbuf;
+    int bitcnt;
+    int x, y, len, x1, v, color;
+
+    q = *pq;
+
+    for(y = 0; y < h; y++) {
+        *q++ = 0x10;
+        bitbuf = 0;
+        bitcnt = 6;
+
+        x = 0;
+        while (x < w) {
+            x1 = x;
+            color = bitmap[x1++];
+            while (x1 < w && bitmap[x1] == color)
+                x1++;
+            len = x1 - x;
+            if (color == 0 && len == 2) {
+                PUTBITS2(0);
+                PUTBITS2(0);
+                PUTBITS2(1);
+            } else if (len >= 3 && len <= 10) {
+                v = len - 3;
+                PUTBITS2(0);
+                PUTBITS2((v >> 2) | 2);
+                PUTBITS2(v & 3);
+                PUTBITS2(color);
+            } else if (len >= 12 && len <= 27) {
+                v = len - 12;
+                PUTBITS2(0);
+                PUTBITS2(0);
+                PUTBITS2(2);
+                PUTBITS2(v >> 2);
+                PUTBITS2(v & 3);
+                PUTBITS2(color);
+            } else if (len >= 29) {
+                /* length = 29 ... 284 */
+                if (len > 284)
+                    len = 284;
+                v = len - 29;
+                PUTBITS2(0);
+                PUTBITS2(0);
+                PUTBITS2(3);
+                PUTBITS2((v >> 6));
+                PUTBITS2((v >> 4) & 3);
+                PUTBITS2((v >> 2) & 3);
+                PUTBITS2(v & 3);
+                PUTBITS2(color);
+            } else {
+                PUTBITS2(color);
+                if (color == 0) {
+                    PUTBITS2(1);
+                }
+                len = 1;
+            }
+            x += len;
+        }
+        /* end of line */
+        PUTBITS2(0);
+        PUTBITS2(0);
+        PUTBITS2(0);
+        if (bitcnt != 6) {
+            *q++ = bitbuf;
+        }
+        *q++ = 0xf0;
+        bitmap += linesize;
+    }
+    *pq = q;
+}
+
+#define PUTBITS4(val)\
+{\
+    bitbuf |= (val) << bitcnt;\
+    bitcnt -= 4;\
+    if (bitcnt < 0) {\
+        bitcnt = 4;\
+        *q++ = bitbuf;\
+        bitbuf = 0;\
+    }\
+}
+
+/* some DVB decoders only implement 4 bits/pixel */
+static void dvb_encode_rle4(uint8_t **pq,
+                            const uint8_t *bitmap, int linesize,
+                            int w, int h)
+{
+    uint8_t *q;
+    unsigned int bitbuf;
+    int bitcnt;
+    int x, y, len, x1, v, color;
+
+    q = *pq;
+
+    for(y = 0; y < h; y++) {
+        *q++ = 0x11;
+        bitbuf = 0;
+        bitcnt = 4;
+
+        x = 0;
+        while (x < w) {
+            x1 = x;
+            color = bitmap[x1++];
+            while (x1 < w && bitmap[x1] == color)
+                x1++;
+            len = x1 - x;
+            if (color == 0 && len == 2) {
+                PUTBITS4(0);
+                PUTBITS4(0xd);
+            } else if (color == 0 && (len >= 3 && len <= 9)) {
+                PUTBITS4(0);
+                PUTBITS4(len - 2);
+            } else if (len >= 4 && len <= 7) {
+                PUTBITS4(0);
+                PUTBITS4(8 + len - 4);
+                PUTBITS4(color);
+            } else if (len >= 9 && len <= 24) {
+                PUTBITS4(0);
+                PUTBITS4(0xe);
+                PUTBITS4(len - 9);
+                PUTBITS4(color);
+            } else if (len >= 25) {
+                if (len > 280)
+                    len = 280;
+                v = len - 25;
+                PUTBITS4(0);
+                PUTBITS4(0xf);
+                PUTBITS4(v >> 4);
+                PUTBITS4(v & 0xf);
+                PUTBITS4(color);
+            } else {
+                PUTBITS4(color);
+                if (color == 0) {
+                    PUTBITS4(0xc);
+                }
+                len = 1;
+            }
+            x += len;
+        }
+        /* end of line */
+        PUTBITS4(0);
+        PUTBITS4(0);
+        if (bitcnt != 4) {
+            *q++ = bitbuf;
+        }
+        *q++ = 0xf0;
+        bitmap += linesize;
+    }
+    *pq = q;
+}
+
+#define SCALEBITS 10
+#define ONE_HALF  (1 << (SCALEBITS - 1))
+#define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
+
+#define RGB_TO_Y_CCIR(r, g, b) \
+((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \
+  FIX(0.11400*219.0/255.0) * (b) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS)
+
+#define RGB_TO_U_CCIR(r1, g1, b1, shift)\
+(((- FIX(0.16874*224.0/255.0) * r1 - FIX(0.33126*224.0/255.0) * g1 +         \
+     FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_V_CCIR(r1, g1, b1, shift)\
+(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 -           \
+   FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+static inline void putbe16(uint8_t **pq, uint16_t v)
+{
+    uint8_t *q;
+    q = *pq;
+    *q++ = v >> 8;
+    *q++ = v;
+    *pq = q;
+}
+
+static int encode_dvb_subtitles(DVBSubtitleContext *s,
+                                uint8_t *outbuf, AVSubtitle *h)
+{
+    uint8_t *q, *pseg_len;
+    int page_id, region_id, clut_id, object_id, i, bpp_index, page_state;
+
+
+    q = outbuf;
+
+    page_id = 1;
+
+    if (h->num_rects == 0 || h->rects == NULL)
+        return -1;
+
+    *q++ = 0x00; /* subtitle_stream_id */
+
+    /* page composition segment */
+
+    *q++ = 0x0f; /* sync_byte */
+    *q++ = 0x10; /* segment_type */
+    putbe16(&q, page_id);
+    pseg_len = q;
+    q += 2; /* segment length */
+    *q++ = 30; /* page_timeout (seconds) */
+    if (s->hide_state)
+        page_state = 0; /* normal case */
+    else
+        page_state = 2; /* mode change */
+    /* page_version = 0 + page_state */
+    *q++ = s->object_version | (page_state << 2) | 3;
+
+    for (region_id = 0; region_id < h->num_rects; region_id++) {
+        *q++ = region_id;
+        *q++ = 0xff; /* reserved */
+        putbe16(&q, h->rects[region_id].x); /* left pos */
+        putbe16(&q, h->rects[region_id].y); /* top pos */
+    }
+
+    putbe16(&pseg_len, q - pseg_len - 2);
+
+    if (!s->hide_state) {
+        for (clut_id = 0; clut_id < h->num_rects; clut_id++) {
+
+            /* CLUT segment */
+
+            if (h->rects[clut_id].nb_colors <= 4) {
+                /* 2 bpp, some decoders do not support it correctly */
+                bpp_index = 0;
+            } else if (h->rects[clut_id].nb_colors <= 16) {
+                /* 4 bpp, standard encoding */
+                bpp_index = 1;
+            } else {
+                return -1;
+            }
+
+            *q++ = 0x0f; /* sync byte */
+            *q++ = 0x12; /* CLUT definition segment */
+            putbe16(&q, page_id);
+            pseg_len = q;
+            q += 2; /* segment length */
+            *q++ = clut_id;
+            *q++ = (0 << 4) | 0xf; /* version = 0 */
+
+            for(i = 0; i < h->rects[clut_id].nb_colors; i++) {
+                *q++ = i; /* clut_entry_id */
+                *q++ = (1 << (7 - bpp_index)) | (0xf << 1) | 1; /* 2 bits/pixel full range */
+                {
+                    int a, r, g, b;
+                    a = (h->rects[clut_id].rgba_palette[i] >> 24) & 0xff;
+                    r = (h->rects[clut_id].rgba_palette[i] >> 16) & 0xff;
+                    g = (h->rects[clut_id].rgba_palette[i] >> 8) & 0xff;
+                    b = (h->rects[clut_id].rgba_palette[i] >> 0) & 0xff;
+
+                    *q++ = RGB_TO_Y_CCIR(r, g, b);
+                    *q++ = RGB_TO_V_CCIR(r, g, b, 0);
+                    *q++ = RGB_TO_U_CCIR(r, g, b, 0);
+                    *q++ = 255 - a;
+                }
+            }
+
+            putbe16(&pseg_len, q - pseg_len - 2);
+        }
+    }
+
+    for (region_id = 0; region_id < h->num_rects; region_id++) {
+
+        /* region composition segment */
+
+        if (h->rects[region_id].nb_colors <= 4) {
+            /* 2 bpp, some decoders do not support it correctly */
+            bpp_index = 0;
+        } else if (h->rects[region_id].nb_colors <= 16) {
+            /* 4 bpp, standard encoding */
+            bpp_index = 1;
+        } else {
+            return -1;
+        }
+
+        *q++ = 0x0f; /* sync_byte */
+        *q++ = 0x11; /* segment_type */
+        putbe16(&q, page_id);
+        pseg_len = q;
+        q += 2; /* segment length */
+        *q++ = region_id;
+        *q++ = (s->object_version << 4) | (0 << 3) | 0x07; /* version , no fill */
+        putbe16(&q, h->rects[region_id].w); /* region width */
+        putbe16(&q, h->rects[region_id].h); /* region height */
+        *q++ = ((1 + bpp_index) << 5) | ((1 + bpp_index) << 2) | 0x03;
+        *q++ = region_id; /* clut_id == region_id */
+        *q++ = 0; /* 8 bit fill colors */
+        *q++ = 0x03; /* 4 bit and 2 bit fill colors */
+
+        if (!s->hide_state) {
+            putbe16(&q, region_id); /* object_id == region_id */
+            *q++ = (0 << 6) | (0 << 4);
+            *q++ = 0;
+            *q++ = 0xf0;
+            *q++ = 0;
+        }
+
+        putbe16(&pseg_len, q - pseg_len - 2);
+    }
+
+    if (!s->hide_state) {
+
+        for (object_id = 0; object_id < h->num_rects; object_id++) {
+            /* Object Data segment */
+
+            if (h->rects[object_id].nb_colors <= 4) {
+                /* 2 bpp, some decoders do not support it correctly */
+                bpp_index = 0;
+            } else if (h->rects[object_id].nb_colors <= 16) {
+                /* 4 bpp, standard encoding */
+                bpp_index = 1;
+            } else {
+                return -1;
+            }
+
+            *q++ = 0x0f; /* sync byte */
+            *q++ = 0x13;
+            putbe16(&q, page_id);
+            pseg_len = q;
+            q += 2; /* segment length */
+
+            putbe16(&q, object_id);
+            *q++ = (s->object_version << 4) | (0 << 2) | (0 << 1) | 1; /* version = 0,
+                                                                       onject_coding_method,
+                                                                       non_modifying_color_flag */
+            {
+                uint8_t *ptop_field_len, *pbottom_field_len, *top_ptr, *bottom_ptr;
+                void (*dvb_encode_rle)(uint8_t **pq,
+                                        const uint8_t *bitmap, int linesize,
+                                        int w, int h);
+                ptop_field_len = q;
+                q += 2;
+                pbottom_field_len = q;
+                q += 2;
+
+                if (bpp_index == 0)
+                    dvb_encode_rle = dvb_encode_rle2;
+                else
+                    dvb_encode_rle = dvb_encode_rle4;
+
+                top_ptr = q;
+                dvb_encode_rle(&q, h->rects[object_id].bitmap, h->rects[object_id].w * 2,
+                                    h->rects[object_id].w, h->rects[object_id].h >> 1);
+                bottom_ptr = q;
+                dvb_encode_rle(&q, h->rects[object_id].bitmap + h->rects[object_id].w,
+                                    h->rects[object_id].w * 2, h->rects[object_id].w,
+                                    h->rects[object_id].h >> 1);
+
+                putbe16(&ptop_field_len, bottom_ptr - top_ptr);
+                putbe16(&pbottom_field_len, q - bottom_ptr);
+            }
+
+            putbe16(&pseg_len, q - pseg_len - 2);
+        }
+    }
+
+    /* end of display set segment */
+
+    *q++ = 0x0f; /* sync_byte */
+    *q++ = 0x80; /* segment_type */
+    putbe16(&q, page_id);
+    pseg_len = q;
+    q += 2; /* segment length */
+
+    putbe16(&pseg_len, q - pseg_len - 2);
+
+    *q++ = 0xff; /* end of PES data */
+
+    s->object_version = (s->object_version + 1) & 0xf;
+    s->hide_state = !s->hide_state;
+    return q - outbuf;
+}
+
+static int dvbsub_init_decoder(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+static int dvbsub_close_decoder(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+static int dvbsub_encode(AVCodecContext *avctx,
+                       unsigned char *buf, int buf_size, void *data)
+{
+    DVBSubtitleContext *s = avctx->priv_data;
+    AVSubtitle *sub = data;
+    int ret;
+
+    ret = encode_dvb_subtitles(s, buf, sub);
+    return ret;
+}
+
+AVCodec dvbsub_encoder = {
+    "dvbsub",
+    CODEC_TYPE_SUBTITLE,
+    CODEC_ID_DVB_SUBTITLE,
+    sizeof(DVBSubtitleContext),
+    dvbsub_init_decoder,
+    dvbsub_encode,
+    dvbsub_close_decoder,
+};
diff --git a/contrib/ffmpeg/libavcodec/dvbsubdec.c b/contrib/ffmpeg/libavcodec/dvbsubdec.c
new file mode 100644
index 000000000..ff43ca6b9
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dvbsubdec.c
@@ -0,0 +1,1633 @@
+/*
+ * DVB subtitle decoding for ffmpeg
+ * Copyright (c) 2005 Ian Caulfield.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+
+//#define DEBUG
+//#define DEBUG_PACKET_CONTENTS
+//#define DEBUG_SAVE_IMAGES
+
+#define DVBSUB_PAGE_SEGMENT     0x10
+#define DVBSUB_REGION_SEGMENT   0x11
+#define DVBSUB_CLUT_SEGMENT     0x12
+#define DVBSUB_OBJECT_SEGMENT   0x13
+#define DVBSUB_DISPLAY_SEGMENT  0x80
+
+static unsigned char *cm;
+
+#ifdef DEBUG_SAVE_IMAGES
+#undef fprintf
+#if 0
+static void png_save(const char *filename, uint8_t *bitmap, int w, int h,
+                     uint32_t *rgba_palette)
+{
+    int x, y, v;
+    FILE *f;
+    char fname[40], fname2[40];
+    char command[1024];
+
+    snprintf(fname, 40, "%s.ppm", filename);
+
+    f = fopen(fname, "w");
+    if (!f) {
+        perror(fname);
+        exit(1);
+    }
+    fprintf(f, "P6\n"
+            "%d %d\n"
+            "%d\n",
+            w, h, 255);
+    for(y = 0; y < h; y++) {
+        for(x = 0; x < w; x++) {
+            v = rgba_palette[bitmap[y * w + x]];
+            putc((v >> 16) & 0xff, f);
+            putc((v >> 8) & 0xff, f);
+            putc((v >> 0) & 0xff, f);
+        }
+    }
+    fclose(f);
+
+
+    snprintf(fname2, 40, "%s-a.pgm", filename);
+
+    f = fopen(fname2, "w");
+    if (!f) {
+        perror(fname2);
+        exit(1);
+    }
+    fprintf(f, "P5\n"
+            "%d %d\n"
+            "%d\n",
+            w, h, 255);
+    for(y = 0; y < h; y++) {
+        for(x = 0; x < w; x++) {
+            v = rgba_palette[bitmap[y * w + x]];
+            putc((v >> 24) & 0xff, f);
+        }
+    }
+    fclose(f);
+
+    snprintf(command, 1024, "pnmtopng -alpha %s %s > %s.png 2> /dev/null", fname2, fname, filename);
+    system(command);
+
+    snprintf(command, 1024, "rm %s %s", fname, fname2);
+    system(command);
+}
+#endif
+
+static void png_save2(const char *filename, uint32_t *bitmap, int w, int h)
+{
+    int x, y, v;
+    FILE *f;
+    char fname[40], fname2[40];
+    char command[1024];
+
+    snprintf(fname, 40, "%s.ppm", filename);
+
+    f = fopen(fname, "w");
+    if (!f) {
+        perror(fname);
+        exit(1);
+    }
+    fprintf(f, "P6\n"
+            "%d %d\n"
+            "%d\n",
+            w, h, 255);
+    for(y = 0; y < h; y++) {
+        for(x = 0; x < w; x++) {
+            v = bitmap[y * w + x];
+            putc((v >> 16) & 0xff, f);
+            putc((v >> 8) & 0xff, f);
+            putc((v >> 0) & 0xff, f);
+        }
+    }
+    fclose(f);
+
+
+    snprintf(fname2, 40, "%s-a.pgm", filename);
+
+    f = fopen(fname2, "w");
+    if (!f) {
+        perror(fname2);
+        exit(1);
+    }
+    fprintf(f, "P5\n"
+            "%d %d\n"
+            "%d\n",
+            w, h, 255);
+    for(y = 0; y < h; y++) {
+        for(x = 0; x < w; x++) {
+            v = bitmap[y * w + x];
+            putc((v >> 24) & 0xff, f);
+        }
+    }
+    fclose(f);
+
+    snprintf(command, 1024, "pnmtopng -alpha %s %s > %s.png 2> /dev/null", fname2, fname, filename);
+    system(command);
+
+    snprintf(command, 1024, "rm %s %s", fname, fname2);
+    system(command);
+}
+#endif
+
+#define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+typedef struct DVBSubCLUT {
+    int id;
+
+    uint32_t clut4[4];
+    uint32_t clut16[16];
+    uint32_t clut256[256];
+
+    struct DVBSubCLUT *next;
+} DVBSubCLUT;
+
+static DVBSubCLUT default_clut;
+
+typedef struct DVBSubObjectDisplay {
+    int object_id;
+    int region_id;
+
+    int x_pos;
+    int y_pos;
+
+    int fgcolour;
+    int bgcolour;
+
+    struct DVBSubObjectDisplay *region_list_next;
+    struct DVBSubObjectDisplay *object_list_next;
+} DVBSubObjectDisplay;
+
+typedef struct DVBSubObject {
+    int id;
+
+    int type;
+
+    DVBSubObjectDisplay *display_list;
+
+    struct DVBSubObject *next;
+} DVBSubObject;
+
+typedef struct DVBSubRegionDisplay {
+    int region_id;
+
+    int x_pos;
+    int y_pos;
+
+    struct DVBSubRegionDisplay *next;
+} DVBSubRegionDisplay;
+
+typedef struct DVBSubRegion {
+    int id;
+
+    int width;
+    int height;
+    int depth;
+
+    int clut;
+    int bgcolour;
+
+    uint8_t *pbuf;
+    int buf_size;
+
+    DVBSubObjectDisplay *display_list;
+
+    struct DVBSubRegion *next;
+} DVBSubRegion;
+
+typedef struct DVBSubContext {
+    int composition_id;
+    int ancillary_id;
+
+    int time_out;
+    DVBSubRegion *region_list;
+    DVBSubCLUT   *clut_list;
+    DVBSubObject *object_list;
+
+    int display_list_size;
+    DVBSubRegionDisplay *display_list;
+} DVBSubContext;
+
+
+static DVBSubObject* get_object(DVBSubContext *ctx, int object_id)
+{
+    DVBSubObject *ptr = ctx->object_list;
+
+    while (ptr != NULL && ptr->id != object_id) {
+        ptr = ptr->next;
+    }
+
+    return ptr;
+}
+
+static DVBSubCLUT* get_clut(DVBSubContext *ctx, int clut_id)
+{
+    DVBSubCLUT *ptr = ctx->clut_list;
+
+    while (ptr != NULL && ptr->id != clut_id) {
+        ptr = ptr->next;
+    }
+
+    return ptr;
+}
+
+static DVBSubRegion* get_region(DVBSubContext *ctx, int region_id)
+{
+    DVBSubRegion *ptr = ctx->region_list;
+
+    while (ptr != NULL && ptr->id != region_id) {
+        ptr = ptr->next;
+    }
+
+    return ptr;
+}
+
+static void delete_region_display_list(DVBSubContext *ctx, DVBSubRegion *region)
+{
+    DVBSubObject *object, *obj2, **obj2_ptr;
+    DVBSubObjectDisplay *display, *obj_disp, **obj_disp_ptr;
+
+    while (region->display_list != NULL) {
+        display = region->display_list;
+
+        object = get_object(ctx, display->object_id);
+
+        if (object != NULL) {
+            obj_disp = object->display_list;
+            obj_disp_ptr = &object->display_list;
+
+            while (obj_disp != NULL && obj_disp != display) {
+                obj_disp_ptr = &obj_disp->object_list_next;
+                obj_disp = obj_disp->object_list_next;
+            }
+
+            if (obj_disp) {
+                *obj_disp_ptr = obj_disp->object_list_next;
+
+                if (object->display_list == NULL) {
+                    obj2 = ctx->object_list;
+                    obj2_ptr = &ctx->object_list;
+
+                    while (obj2 != NULL && obj2 != object) {
+                        obj2_ptr = &obj2->next;
+                        obj2 = obj2->next;
+                    }
+
+                    *obj2_ptr = obj2->next;
+
+                    av_free(obj2);
+                }
+            }
+        }
+
+        region->display_list = display->region_list_next;
+
+        av_free(display);
+    }
+
+}
+
+static void delete_state(DVBSubContext *ctx)
+{
+    DVBSubRegion *region;
+    DVBSubCLUT *clut;
+
+    while (ctx->region_list != NULL)
+    {
+        region = ctx->region_list;
+
+        ctx->region_list = region->next;
+
+        delete_region_display_list(ctx, region);
+        if (region->pbuf != NULL)
+            av_free(region->pbuf);
+
+        av_free(region);
+    }
+
+    while (ctx->clut_list != NULL)
+    {
+        clut = ctx->clut_list;
+
+        ctx->clut_list = clut->next;
+
+        av_free(clut);
+    }
+
+    /* Should already be null */
+    if (ctx->object_list != NULL)
+        av_log(0, AV_LOG_ERROR, "Memory deallocation error!\n");
+}
+
+static int dvbsub_init_decoder(AVCodecContext *avctx)
+{
+    int i, r, g, b, a = 0;
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+
+    cm = ff_cropTbl + MAX_NEG_CROP;
+
+    memset(avctx->priv_data, 0, sizeof(DVBSubContext));
+
+    ctx->composition_id = avctx->sub_id & 0xffff;
+    ctx->ancillary_id = avctx->sub_id >> 16;
+
+    default_clut.id = -1;
+    default_clut.next = NULL;
+
+    default_clut.clut4[0] = RGBA(  0,   0,   0,   0);
+    default_clut.clut4[1] = RGBA(255, 255, 255, 255);
+    default_clut.clut4[2] = RGBA(  0,   0,   0, 255);
+    default_clut.clut4[3] = RGBA(127, 127, 127, 255);
+
+    default_clut.clut16[0] = RGBA(  0,   0,   0,   0);
+    for (i = 1; i < 16; i++) {
+        if (i < 8) {
+            r = (i & 1) ? 255 : 0;
+            g = (i & 2) ? 255 : 0;
+            b = (i & 4) ? 255 : 0;
+        } else {
+            r = (i & 1) ? 127 : 0;
+            g = (i & 2) ? 127 : 0;
+            b = (i & 4) ? 127 : 0;
+        }
+        default_clut.clut16[i] = RGBA(r, g, b, 255);
+    }
+
+    default_clut.clut256[0] = RGBA(  0,   0,   0,   0);
+    for (i = 1; i < 256; i++) {
+        if (i < 8) {
+            r = (i & 1) ? 255 : 0;
+            g = (i & 2) ? 255 : 0;
+            b = (i & 4) ? 255 : 0;
+            a = 63;
+        } else {
+            switch (i & 0x88) {
+            case 0x00:
+                r = ((i & 1) ? 85 : 0) + ((i & 0x10) ? 170 : 0);
+                g = ((i & 2) ? 85 : 0) + ((i & 0x20) ? 170 : 0);
+                b = ((i & 4) ? 85 : 0) + ((i & 0x40) ? 170 : 0);
+                a = 255;
+                break;
+            case 0x08:
+                r = ((i & 1) ? 85 : 0) + ((i & 0x10) ? 170 : 0);
+                g = ((i & 2) ? 85 : 0) + ((i & 0x20) ? 170 : 0);
+                b = ((i & 4) ? 85 : 0) + ((i & 0x40) ? 170 : 0);
+                a = 127;
+                break;
+            case 0x80:
+                r = 127 + ((i & 1) ? 43 : 0) + ((i & 0x10) ? 85 : 0);
+                g = 127 + ((i & 2) ? 43 : 0) + ((i & 0x20) ? 85 : 0);
+                b = 127 + ((i & 4) ? 43 : 0) + ((i & 0x40) ? 85 : 0);
+                a = 255;
+                break;
+            case 0x88:
+                r = ((i & 1) ? 43 : 0) + ((i & 0x10) ? 85 : 0);
+                g = ((i & 2) ? 43 : 0) + ((i & 0x20) ? 85 : 0);
+                b = ((i & 4) ? 43 : 0) + ((i & 0x40) ? 85 : 0);
+                a = 255;
+                break;
+            }
+        }
+        default_clut.clut256[i] = RGBA(r, g, b, a);
+    }
+
+    return 0;
+}
+
+static int dvbsub_close_decoder(AVCodecContext *avctx)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+    DVBSubRegionDisplay *display;
+
+    delete_state(ctx);
+
+    while (ctx->display_list != NULL)
+    {
+        display = ctx->display_list;
+        ctx->display_list = display->next;
+
+        av_free(display);
+    }
+
+    return 0;
+}
+
+static int dvbsub_read_2bit_string(uint8_t *destbuf, int dbuf_len,
+                                   uint8_t **srcbuf, int buf_size,
+                                   int non_mod, uint8_t *map_table)
+{
+    GetBitContext gb;
+
+    int bits;
+    int run_length;
+    int pixels_read = 0;
+
+    init_get_bits(&gb, *srcbuf, buf_size << 8);
+
+    while (get_bits_count(&gb) < (buf_size << 8) && pixels_read < dbuf_len) {
+        bits = get_bits(&gb, 2);
+
+        if (bits != 0) {
+            if (non_mod != 1 || bits != 1) {
+                if (map_table != NULL)
+                    *destbuf++ = map_table[bits];
+                else
+                    *destbuf++ = bits;
+            }
+            pixels_read++;
+        } else {
+            bits = get_bits(&gb, 1);
+            if (bits == 1) {
+                run_length = get_bits(&gb, 3) + 3;
+                bits = get_bits(&gb, 2);
+
+                if (non_mod == 1 && bits == 1)
+                    pixels_read += run_length;
+                else {
+                    if (map_table != NULL)
+                        bits = map_table[bits];
+                    while (run_length-- > 0 && pixels_read < dbuf_len) {
+                        *destbuf++ = bits;
+                        pixels_read++;
+                    }
+                }
+            } else {
+                bits = get_bits(&gb, 1);
+                if (bits == 0) {
+                    bits = get_bits(&gb, 2);
+                    if (bits == 2) {
+                        run_length = get_bits(&gb, 4) + 12;
+                        bits = get_bits(&gb, 2);
+
+                        if (non_mod == 1 && bits == 1)
+                            pixels_read += run_length;
+                        else {
+                            if (map_table != NULL)
+                                bits = map_table[bits];
+                            while (run_length-- > 0 && pixels_read < dbuf_len) {
+                                *destbuf++ = bits;
+                                pixels_read++;
+                            }
+                        }
+                    } else if (bits == 3) {
+                        run_length = get_bits(&gb, 8) + 29;
+                        bits = get_bits(&gb, 2);
+
+                        if (non_mod == 1 && bits == 1)
+                            pixels_read += run_length;
+                        else {
+                            if (map_table != NULL)
+                                bits = map_table[bits];
+                            while (run_length-- > 0 && pixels_read < dbuf_len) {
+                                *destbuf++ = bits;
+                                pixels_read++;
+                            }
+                        }
+                    } else if (bits == 1) {
+                        pixels_read += 2;
+                        if (map_table != NULL)
+                            bits = map_table[0];
+                        else
+                            bits = 0;
+                        if (pixels_read <= dbuf_len) {
+                            *destbuf++ = bits;
+                            *destbuf++ = bits;
+                        }
+                    } else {
+                        (*srcbuf) += (get_bits_count(&gb) + 7) >> 3;
+                        return pixels_read;
+                    }
+                } else {
+                    if (map_table != NULL)
+                        bits = map_table[0];
+                    else
+                        bits = 0;
+                    *destbuf++ = bits;
+                    pixels_read++;
+                }
+            }
+        }
+    }
+
+    if (get_bits(&gb, 6) != 0)
+        av_log(0, AV_LOG_ERROR, "DVBSub error: line overflow\n");
+
+    (*srcbuf) += (get_bits_count(&gb) + 7) >> 3;
+
+    return pixels_read;
+}
+
+static int dvbsub_read_4bit_string(uint8_t *destbuf, int dbuf_len,
+                                   uint8_t **srcbuf, int buf_size,
+                                   int non_mod, uint8_t *map_table)
+{
+    GetBitContext gb;
+
+    int bits;
+    int run_length;
+    int pixels_read = 0;
+
+    init_get_bits(&gb, *srcbuf, buf_size << 8);
+
+    while (get_bits_count(&gb) < (buf_size << 8) && pixels_read < dbuf_len) {
+        bits = get_bits(&gb, 4);
+
+        if (bits != 0) {
+            if (non_mod != 1 || bits != 1) {
+                if (map_table != NULL)
+                    *destbuf++ = map_table[bits];
+                else
+                    *destbuf++ = bits;
+            }
+            pixels_read++;
+        } else {
+            bits = get_bits(&gb, 1);
+            if (bits == 0) {
+                run_length = get_bits(&gb, 3);
+
+                if (run_length == 0) {
+                    (*srcbuf) += (get_bits_count(&gb) + 7) >> 3;
+                    return pixels_read;
+                }
+
+                run_length += 2;
+
+                if (map_table != NULL)
+                    bits = map_table[0];
+                else
+                    bits = 0;
+
+                while (run_length-- > 0 && pixels_read < dbuf_len) {
+                    *destbuf++ = bits;
+                    pixels_read++;
+                }
+            } else {
+                bits = get_bits(&gb, 1);
+                if (bits == 0) {
+                    run_length = get_bits(&gb, 2) + 4;
+                    bits = get_bits(&gb, 4);
+
+                    if (non_mod == 1 && bits == 1)
+                        pixels_read += run_length;
+                    else {
+                        if (map_table != NULL)
+                            bits = map_table[bits];
+                        while (run_length-- > 0 && pixels_read < dbuf_len) {
+                            *destbuf++ = bits;
+                            pixels_read++;
+                        }
+                    }
+                } else {
+                    bits = get_bits(&gb, 2);
+                    if (bits == 2) {
+                        run_length = get_bits(&gb, 4) + 9;
+                        bits = get_bits(&gb, 4);
+
+                        if (non_mod == 1 && bits == 1)
+                            pixels_read += run_length;
+                        else {
+                            if (map_table != NULL)
+                                bits = map_table[bits];
+                            while (run_length-- > 0 && pixels_read < dbuf_len) {
+                                *destbuf++ = bits;
+                                pixels_read++;
+                            }
+                        }
+                    } else if (bits == 3) {
+                        run_length = get_bits(&gb, 8) + 25;
+                        bits = get_bits(&gb, 4);
+
+                        if (non_mod == 1 && bits == 1)
+                            pixels_read += run_length;
+                        else {
+                            if (map_table != NULL)
+                                bits = map_table[bits];
+                            while (run_length-- > 0 && pixels_read < dbuf_len) {
+                                *destbuf++ = bits;
+                                pixels_read++;
+                            }
+                        }
+                    } else if (bits == 1) {
+                        pixels_read += 2;
+                        if (map_table != NULL)
+                            bits = map_table[0];
+                        else
+                            bits = 0;
+                        if (pixels_read <= dbuf_len) {
+                            *destbuf++ = bits;
+                            *destbuf++ = bits;
+                        }
+                    } else {
+                        if (map_table != NULL)
+                            bits = map_table[0];
+                        else
+                            bits = 0;
+                        *destbuf++ = bits;
+                        pixels_read ++;
+                    }
+                }
+            }
+        }
+    }
+
+    if (get_bits(&gb, 8) != 0)
+        av_log(0, AV_LOG_ERROR, "DVBSub error: line overflow\n");
+
+    (*srcbuf) += (get_bits_count(&gb) + 7) >> 3;
+
+    return pixels_read;
+}
+
+static int dvbsub_read_8bit_string(uint8_t *destbuf, int dbuf_len,
+                                    uint8_t **srcbuf, int buf_size,
+                                    int non_mod, uint8_t *map_table)
+{
+    uint8_t *sbuf_end = (*srcbuf) + buf_size;
+    int bits;
+    int run_length;
+    int pixels_read = 0;
+
+    while (*srcbuf < sbuf_end && pixels_read < dbuf_len) {
+        bits = *(*srcbuf)++;
+
+        if (bits != 0) {
+            if (non_mod != 1 || bits != 1) {
+                if (map_table != NULL)
+                    *destbuf++ = map_table[bits];
+                else
+                    *destbuf++ = bits;
+            }
+            pixels_read++;
+        } else {
+            bits = *(*srcbuf)++;
+            run_length = bits & 0x7f;
+            if ((bits & 0x80) == 0) {
+                if (run_length == 0) {
+                    return pixels_read;
+                }
+
+                if (map_table != NULL)
+                    bits = map_table[0];
+                else
+                    bits = 0;
+                while (run_length-- > 0 && pixels_read < dbuf_len) {
+                    *destbuf++ = bits;
+                    pixels_read++;
+                }
+            } else {
+                bits = *(*srcbuf)++;
+
+                if (non_mod == 1 && bits == 1)
+                    pixels_read += run_length;
+                if (map_table != NULL)
+                    bits = map_table[bits];
+                else while (run_length-- > 0 && pixels_read < dbuf_len) {
+                    *destbuf++ = bits;
+                    pixels_read++;
+                }
+            }
+        }
+    }
+
+    if (*(*srcbuf)++ != 0)
+        av_log(0, AV_LOG_ERROR, "DVBSub error: line overflow\n");
+
+    return pixels_read;
+}
+
+
+
+static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDisplay *display,
+                                          uint8_t *buf, int buf_size, int top_bottom, int non_mod)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+
+    DVBSubRegion *region = get_region(ctx, display->region_id);
+    uint8_t *buf_end = buf + buf_size;
+    uint8_t *pbuf;
+    int x_pos, y_pos;
+    int i;
+
+    uint8_t map2to4[] = { 0x0,  0x7,  0x8,  0xf};
+    uint8_t map2to8[] = {0x00, 0x77, 0x88, 0xff};
+    uint8_t map4to8[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+                         0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
+    uint8_t *map_table;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_INFO, "DVB pixel block size %d, %s field:\n", buf_size,
+                top_bottom ? "bottom" : "top");
+#endif
+
+#ifdef DEBUG_PACKET_CONTENTS
+    for (i = 0; i < buf_size; i++)
+    {
+        if (i % 16 == 0)
+            av_log(avctx, AV_LOG_INFO, "0x%08p: ", buf+i);
+
+        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        if (i % 16 == 15)
+            av_log(avctx, AV_LOG_INFO, "\n");
+    }
+
+    if (i % 16 != 0)
+        av_log(avctx, AV_LOG_INFO, "\n");
+
+#endif
+
+    if (region == 0)
+        return;
+
+    pbuf = region->pbuf;
+
+    x_pos = display->x_pos;
+    y_pos = display->y_pos;
+
+    if ((y_pos & 1) != top_bottom)
+        y_pos++;
+
+    while (buf < buf_end) {
+        if (x_pos > region->width || y_pos > region->height) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid object location!\n");
+            return;
+        }
+
+        switch (*buf++) {
+        case 0x10:
+            if (region->depth == 8)
+                map_table = map2to8;
+            else if (region->depth == 4)
+                map_table = map2to4;
+            else
+                map_table = NULL;
+
+            x_pos += dvbsub_read_2bit_string(pbuf + (y_pos * region->width) + x_pos,
+                                                region->width - x_pos, &buf, buf_size,
+                                                non_mod, map_table);
+            break;
+        case 0x11:
+            if (region->depth < 4) {
+                av_log(avctx, AV_LOG_ERROR, "4-bit pixel string in %d-bit region!\n", region->depth);
+                return;
+            }
+
+            if (region->depth == 8)
+                map_table = map4to8;
+            else
+                map_table = NULL;
+
+            x_pos += dvbsub_read_4bit_string(pbuf + (y_pos * region->width) + x_pos,
+                                                region->width - x_pos, &buf, buf_size,
+                                                non_mod, map_table);
+            break;
+        case 0x12:
+            if (region->depth < 8) {
+                av_log(avctx, AV_LOG_ERROR, "8-bit pixel string in %d-bit region!\n", region->depth);
+                return;
+            }
+
+            x_pos += dvbsub_read_8bit_string(pbuf + (y_pos * region->width) + x_pos,
+                                                region->width - x_pos, &buf, buf_size,
+                                                non_mod, NULL);
+            break;
+
+        case 0x20:
+            map2to4[0] = (*buf) >> 4;
+            map2to4[1] = (*buf++) & 0xf;
+            map2to4[2] = (*buf) >> 4;
+            map2to4[3] = (*buf++) & 0xf;
+            break;
+        case 0x21:
+            for (i = 0; i < 4; i++)
+                map2to8[i] = *buf++;
+            break;
+        case 0x22:
+            for (i = 0; i < 16; i++)
+                map4to8[i] = *buf++;
+            break;
+
+        case 0xf0:
+            x_pos = display->x_pos;
+            y_pos += 2;
+            break;
+        default:
+            av_log(avctx, AV_LOG_INFO, "Unknown/unsupported pixel block 0x%x\n", *(buf-1));
+        }
+    }
+
+}
+
+static void dvbsub_parse_object_segment(AVCodecContext *avctx,
+                                        uint8_t *buf, int buf_size)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+
+    uint8_t *buf_end = buf + buf_size;
+    uint8_t *block;
+    int object_id;
+    DVBSubObject *object;
+    DVBSubObjectDisplay *display;
+    int top_field_len, bottom_field_len;
+
+    int coding_method, non_modifying_colour;
+
+    object_id = BE_16(buf);
+    buf += 2;
+
+    object = get_object(ctx, object_id);
+
+    if (!object)
+        return;
+
+    coding_method = ((*buf) >> 2) & 3;
+    non_modifying_colour = ((*buf++) >> 1) & 1;
+
+    if (coding_method == 0) {
+        top_field_len = BE_16(buf);
+        buf += 2;
+        bottom_field_len = BE_16(buf);
+        buf += 2;
+
+        if (buf + top_field_len + bottom_field_len > buf_end) {
+            av_log(avctx, AV_LOG_ERROR, "Field data size too large\n");
+            return;
+        }
+
+        for (display = object->display_list; display != 0; display = display->object_list_next) {
+            block = buf;
+
+            dvbsub_parse_pixel_data_block(avctx, display, block, top_field_len, 0,
+                                            non_modifying_colour);
+
+            if (bottom_field_len > 0)
+                block = buf + top_field_len;
+            else
+                bottom_field_len = top_field_len;
+
+            dvbsub_parse_pixel_data_block(avctx, display, block, bottom_field_len, 1,
+                                            non_modifying_colour);
+        }
+
+/*  } else if (coding_method == 1) {*/
+
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Unknown object coding %d\n", coding_method);
+    }
+
+}
+
+#define SCALEBITS 10
+#define ONE_HALF  (1 << (SCALEBITS - 1))
+#define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
+
+#define YUV_TO_RGB1_CCIR(cb1, cr1)\
+{\
+    cb = (cb1) - 128;\
+    cr = (cr1) - 128;\
+    r_add = FIX(1.40200*255.0/224.0) * cr + ONE_HALF;\
+    g_add = - FIX(0.34414*255.0/224.0) * cb - FIX(0.71414*255.0/224.0) * cr + \
+            ONE_HALF;\
+    b_add = FIX(1.77200*255.0/224.0) * cb + ONE_HALF;\
+}
+
+#define YUV_TO_RGB2_CCIR(r, g, b, y1)\
+{\
+    y = ((y1) - 16) * FIX(255.0/219.0);\
+    r = cm[(y + r_add) >> SCALEBITS];\
+    g = cm[(y + g_add) >> SCALEBITS];\
+    b = cm[(y + b_add) >> SCALEBITS];\
+}
+
+
+static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
+                                        uint8_t *buf, int buf_size)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+
+    uint8_t *buf_end = buf + buf_size;
+    int clut_id;
+    DVBSubCLUT *clut;
+    int entry_id, depth , full_range;
+    int y, cr, cb, alpha;
+    int r, g, b, r_add, g_add, b_add;
+
+#ifdef DEBUG_PACKET_CONTENTS
+    int i;
+
+    av_log(avctx, AV_LOG_INFO, "DVB clut packet:\n");
+
+    for (i=0; i < buf_size; i++)
+    {
+        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        if (i % 16 == 15)
+            av_log(avctx, AV_LOG_INFO, "\n");
+    }
+
+    if (i % 16 != 0)
+        av_log(avctx, AV_LOG_INFO, "\n");
+
+#endif
+
+    clut_id = *buf++;
+    buf += 1;
+
+    clut = get_clut(ctx, clut_id);
+
+    if (clut == NULL) {
+        clut = av_malloc(sizeof(DVBSubCLUT));
+
+        memcpy(clut, &default_clut, sizeof(DVBSubCLUT));
+
+        clut->id = clut_id;
+
+        clut->next = ctx->clut_list;
+        ctx->clut_list = clut;
+    }
+
+    while (buf + 4 < buf_end)
+    {
+        entry_id = *buf++;
+
+        depth = (*buf) & 0xe0;
+
+        if (depth == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid clut depth 0x%x!\n", *buf);
+            return;
+        }
+
+        full_range = (*buf++) & 1;
+
+        if (full_range) {
+            y = *buf++;
+            cr = *buf++;
+            cb = *buf++;
+            alpha = *buf++;
+        } else {
+            y = buf[0] & 0xfc;
+            cr = (((buf[0] & 3) << 2) | ((buf[1] >> 6) & 3)) << 4;
+            cb = (buf[1] << 2) & 0xf0;
+            alpha = (buf[1] << 6) & 0xc0;
+
+            buf += 2;
+        }
+
+        if (y == 0)
+            alpha = 0xff;
+
+        YUV_TO_RGB1_CCIR(cb, cr);
+        YUV_TO_RGB2_CCIR(r, g, b, y);
+
+#ifdef DEBUG
+        av_log(avctx, AV_LOG_INFO, "clut %d := (%d,%d,%d,%d)\n", entry_id, r, g, b, alpha);
+#endif
+
+        if (depth & 0x80)
+            clut->clut4[entry_id] = RGBA(r,g,b,255 - alpha);
+        if (depth & 0x40)
+            clut->clut16[entry_id] = RGBA(r,g,b,255 - alpha);
+        if (depth & 0x20)
+            clut->clut256[entry_id] = RGBA(r,g,b,255 - alpha);
+    }
+}
+
+
+static void dvbsub_parse_region_segment(AVCodecContext *avctx,
+                                        uint8_t *buf, int buf_size)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+
+    uint8_t *buf_end = buf + buf_size;
+    int region_id, object_id;
+    DVBSubRegion *region;
+    DVBSubObject *object;
+    DVBSubObjectDisplay *display;
+    int fill;
+
+    if (buf_size < 10)
+        return;
+
+    region_id = *buf++;
+
+    region = get_region(ctx, region_id);
+
+    if (region == NULL)
+    {
+        region = av_mallocz(sizeof(DVBSubRegion));
+
+        region->id = region_id;
+
+        region->next = ctx->region_list;
+        ctx->region_list = region;
+    }
+
+    fill = ((*buf++) >> 3) & 1;
+
+    region->width = BE_16(buf);
+    buf += 2;
+    region->height = BE_16(buf);
+    buf += 2;
+
+    if (region->width * region->height != region->buf_size) {
+        if (region->pbuf != 0)
+            av_free(region->pbuf);
+
+        region->buf_size = region->width * region->height;
+
+        region->pbuf = av_malloc(region->buf_size);
+
+        fill = 1;
+    }
+
+    region->depth = 1 << (((*buf++) >> 2) & 7);
+    region->clut = *buf++;
+
+    if (region->depth == 8)
+        region->bgcolour = *buf++;
+    else {
+        buf += 1;
+
+        if (region->depth == 4)
+            region->bgcolour = (((*buf++) >> 4) & 15);
+        else
+            region->bgcolour = (((*buf++) >> 2) & 3);
+    }
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_INFO, "Region %d, (%dx%d)\n", region_id, region->width, region->height);
+#endif
+
+    if (fill) {
+        memset(region->pbuf, region->bgcolour, region->buf_size);
+#ifdef DEBUG
+        av_log(avctx, AV_LOG_INFO, "Fill region (%d)\n", region->bgcolour);
+#endif
+    }
+
+    delete_region_display_list(ctx, region);
+
+    while (buf + 5 < buf_end) {
+        object_id = BE_16(buf);
+        buf += 2;
+
+        object = get_object(ctx, object_id);
+
+        if (object == NULL) {
+            object = av_mallocz(sizeof(DVBSubObject));
+
+            object->id = object_id;
+            object->next = ctx->object_list;
+            ctx->object_list = object;
+        }
+
+        object->type = (*buf) >> 6;
+
+        display = av_mallocz(sizeof(DVBSubObjectDisplay));
+
+        display->object_id = object_id;
+        display->region_id = region_id;
+
+        display->x_pos = BE_16(buf) & 0xfff;
+        buf += 2;
+        display->y_pos = BE_16(buf) & 0xfff;
+        buf += 2;
+
+        if ((object->type == 1 || object->type == 2) && buf+1 < buf_end) {
+            display->fgcolour = *buf++;
+            display->bgcolour = *buf++;
+        }
+
+        display->region_list_next = region->display_list;
+        region->display_list = display;
+
+        display->object_list_next = object->display_list;
+        object->display_list = display;
+    }
+}
+
+static void dvbsub_parse_page_segment(AVCodecContext *avctx,
+                                        uint8_t *buf, int buf_size)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+    DVBSubRegionDisplay *display;
+    DVBSubRegionDisplay *tmp_display_list, **tmp_ptr;
+
+    uint8_t *buf_end = buf + buf_size;
+    int region_id;
+    int page_state;
+
+    if (buf_size < 1)
+        return;
+
+    ctx->time_out = *buf++;
+    page_state = ((*buf++) >> 2) & 3;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_INFO, "Page time out %ds, state %d\n", ctx->time_out, page_state);
+#endif
+
+    if (page_state == 2)
+    {
+        delete_state(ctx);
+    }
+
+    tmp_display_list = ctx->display_list;
+    ctx->display_list = NULL;
+    ctx->display_list_size = 0;
+
+    while (buf + 5 < buf_end) {
+        region_id = *buf++;
+        buf += 1;
+
+        display = tmp_display_list;
+        tmp_ptr = &tmp_display_list;
+
+        while (display != NULL && display->region_id != region_id) {
+            tmp_ptr = &display->next;
+            display = display->next;
+        }
+
+        if (display == NULL)
+            display = av_mallocz(sizeof(DVBSubRegionDisplay));
+
+        display->region_id = region_id;
+
+        display->x_pos = BE_16(buf);
+        buf += 2;
+        display->y_pos = BE_16(buf);
+        buf += 2;
+
+        *tmp_ptr = display->next;
+
+        display->next = ctx->display_list;
+        ctx->display_list = display;
+        ctx->display_list_size++;
+
+#ifdef DEBUG
+        av_log(avctx, AV_LOG_INFO, "Region %d, (%d,%d)\n", region_id, display->x_pos, display->y_pos);
+#endif
+    }
+
+    while (tmp_display_list != 0) {
+        display = tmp_display_list;
+
+        tmp_display_list = display->next;
+
+        av_free(display);
+    }
+
+}
+
+
+#ifdef DEBUG_SAVE_IMAGES
+static void save_display_set(DVBSubContext *ctx)
+{
+    DVBSubRegion *region;
+    DVBSubRegionDisplay *display;
+    DVBSubCLUT *clut;
+    uint32_t *clut_table;
+    int x_pos, y_pos, width, height;
+    int x, y, y_off, x_off;
+    uint32_t *pbuf;
+    char filename[32];
+    static int fileno_index = 0;
+
+    x_pos = -1;
+    y_pos = -1;
+    width = 0;
+    height = 0;
+
+    for (display = ctx->display_list; display != NULL; display = display->next) {
+        region = get_region(ctx, display->region_id);
+
+        if (x_pos == -1) {
+            x_pos = display->x_pos;
+            y_pos = display->y_pos;
+            width = region->width;
+            height = region->height;
+        } else {
+            if (display->x_pos < x_pos) {
+                width += (x_pos - display->x_pos);
+                x_pos = display->x_pos;
+            }
+
+            if (display->y_pos < y_pos) {
+                height += (y_pos - display->y_pos);
+                y_pos = display->y_pos;
+            }
+
+            if (display->x_pos + region->width > x_pos + width) {
+                width = display->x_pos + region->width - x_pos;
+            }
+
+            if (display->y_pos + region->height > y_pos + height) {
+                height = display->y_pos + region->height - y_pos;
+            }
+        }
+    }
+
+    if (x_pos >= 0) {
+
+        pbuf = av_malloc(width * height * 4);
+
+        for (display = ctx->display_list; display != NULL; display = display->next) {
+            region = get_region(ctx, display->region_id);
+
+            x_off = display->x_pos - x_pos;
+            y_off = display->y_pos - y_pos;
+
+            clut = get_clut(ctx, region->clut);
+
+            if (clut == 0)
+                clut = &default_clut;
+
+            switch (region->depth) {
+            case 2:
+                clut_table = clut->clut4;
+                break;
+            case 8:
+                clut_table = clut->clut256;
+                break;
+            case 4:
+            default:
+                clut_table = clut->clut16;
+                break;
+            }
+
+            for (y = 0; y < region->height; y++) {
+                for (x = 0; x < region->width; x++) {
+                    pbuf[((y + y_off) * width) + x_off + x] =
+                        clut_table[region->pbuf[y * region->width + x]];
+                }
+            }
+
+        }
+
+        snprintf(filename, 32, "dvbs.%d", fileno_index);
+
+        png_save2(filename, pbuf, width, height);
+
+        av_free(pbuf);
+    }
+
+    fileno_index++;
+}
+#endif
+
+static int dvbsub_display_end_segment(AVCodecContext *avctx, uint8_t *buf,
+                                        int buf_size, AVSubtitle *sub)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+
+    DVBSubRegion *region;
+    DVBSubRegionDisplay *display;
+    AVSubtitleRect *rect;
+    DVBSubCLUT *clut;
+    uint32_t *clut_table;
+    int i;
+
+    sub->rects = NULL;
+    sub->start_display_time = 0;
+    sub->end_display_time = ctx->time_out * 1000;
+    sub->format = 0;
+
+    sub->num_rects = ctx->display_list_size;
+
+    if (sub->num_rects > 0)
+        sub->rects = av_mallocz(sizeof(AVSubtitleRect) * sub->num_rects);
+
+    i = 0;
+
+    for (display = ctx->display_list; display != NULL; display = display->next) {
+        region = get_region(ctx, display->region_id);
+        rect = &sub->rects[i];
+
+        if (region == NULL)
+            continue;
+
+        rect->x = display->x_pos;
+        rect->y = display->y_pos;
+        rect->w = region->width;
+        rect->h = region->height;
+        rect->nb_colors = 16;
+        rect->linesize = region->width;
+
+        clut = get_clut(ctx, region->clut);
+
+        if (clut == NULL)
+            clut = &default_clut;
+
+        switch (region->depth) {
+        case 2:
+            clut_table = clut->clut4;
+            break;
+        case 8:
+            clut_table = clut->clut256;
+            break;
+        case 4:
+        default:
+            clut_table = clut->clut16;
+            break;
+        }
+
+        rect->rgba_palette = av_malloc((1 << region->depth) * sizeof(uint32_t));
+        memcpy(rect->rgba_palette, clut_table, (1 << region->depth) * sizeof(uint32_t));
+
+        rect->bitmap = av_malloc(region->buf_size);
+        memcpy(rect->bitmap, region->pbuf, region->buf_size);
+
+        i++;
+    }
+
+    sub->num_rects = i;
+
+#ifdef DEBUG_SAVE_IMAGES
+    save_display_set(ctx);
+#endif
+
+    return 1;
+}
+
+static int dvbsub_decode(AVCodecContext *avctx,
+                         void *data, int *data_size,
+                         uint8_t *buf, int buf_size)
+{
+    DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data;
+    AVSubtitle *sub = (AVSubtitle*) data;
+    uint8_t *p, *p_end;
+    int segment_type;
+    int page_id;
+    int segment_length;
+
+#ifdef DEBUG_PACKET_CONTENTS
+    int i;
+
+    av_log(avctx, AV_LOG_INFO, "DVB sub packet:\n");
+
+    for (i=0; i < buf_size; i++)
+    {
+        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        if (i % 16 == 15)
+            av_log(avctx, AV_LOG_INFO, "\n");
+    }
+
+    if (i % 16 != 0)
+        av_log(avctx, AV_LOG_INFO, "\n");
+
+#endif
+
+    if (buf_size <= 2)
+        return -1;
+
+    p = buf;
+    p_end = buf + buf_size;
+
+    while (p < p_end && *p == 0x0f)
+    {
+        p += 1;
+        segment_type = *p++;
+        page_id = BE_16(p);
+        p += 2;
+        segment_length = BE_16(p);
+        p += 2;
+
+        if (page_id == ctx->composition_id || page_id == ctx->ancillary_id) {
+            switch (segment_type) {
+            case DVBSUB_PAGE_SEGMENT:
+                dvbsub_parse_page_segment(avctx, p, segment_length);
+                break;
+            case DVBSUB_REGION_SEGMENT:
+                dvbsub_parse_region_segment(avctx, p, segment_length);
+                break;
+            case DVBSUB_CLUT_SEGMENT:
+                dvbsub_parse_clut_segment(avctx, p, segment_length);
+                break;
+            case DVBSUB_OBJECT_SEGMENT:
+                dvbsub_parse_object_segment(avctx, p, segment_length);
+                break;
+            case DVBSUB_DISPLAY_SEGMENT:
+                *data_size = dvbsub_display_end_segment(avctx, p, segment_length, sub);
+                break;
+            default:
+#ifdef DEBUG
+                av_log(avctx, AV_LOG_INFO, "Subtitling segment type 0x%x, page id %d, length %d\n",
+                        segment_type, page_id, segment_length);
+#endif
+                break;
+            }
+        }
+
+        p += segment_length;
+    }
+
+    if (p != p_end)
+    {
+#ifdef DEBUG
+        av_log(avctx, AV_LOG_INFO, "Junk at end of packet\n");
+#endif
+        return -1;
+    }
+
+    return buf_size;
+}
+
+
+AVCodec dvbsub_decoder = {
+    "dvbsub",
+    CODEC_TYPE_SUBTITLE,
+    CODEC_ID_DVB_SUBTITLE,
+    sizeof(DVBSubContext),
+    dvbsub_init_decoder,
+    NULL,
+    dvbsub_close_decoder,
+    dvbsub_decode,
+};
+
+/* Parser (mostly) copied from dvdsub.c */
+
+#define PARSE_BUF_SIZE  (65536)
+
+
+/* parser definition */
+typedef struct DVBSubParseContext {
+    uint8_t *packet_buf;
+    int packet_start;
+    int packet_index;
+    int in_packet;
+} DVBSubParseContext;
+
+static int dvbsub_parse_init(AVCodecParserContext *s)
+{
+    DVBSubParseContext *pc = s->priv_data;
+    pc->packet_buf = av_malloc(PARSE_BUF_SIZE);
+
+    return 0;
+}
+
+static int dvbsub_parse(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DVBSubParseContext *pc = s->priv_data;
+    uint8_t *p, *p_end;
+    int len, buf_pos = 0;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_INFO, "DVB parse packet pts=%"PRIx64", lpts=%"PRIx64", cpts=%"PRIx64":\n",
+            s->pts, s->last_pts, s->cur_frame_pts[s->cur_frame_start_index]);
+#endif
+
+#ifdef DEBUG_PACKET_CONTENTS
+    int i;
+
+    for (i=0; i < buf_size; i++)
+    {
+        av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]);
+        if (i % 16 == 15)
+            av_log(avctx, AV_LOG_INFO, "\n");
+    }
+
+    if (i % 16 != 0)
+        av_log(avctx, AV_LOG_INFO, "\n");
+
+#endif
+
+    *poutbuf = NULL;
+    *poutbuf_size = 0;
+
+    s->fetch_timestamp = 1;
+
+    if (s->last_pts != s->pts && s->last_pts != AV_NOPTS_VALUE) /* Start of a new packet */
+    {
+        if (pc->packet_index != pc->packet_start)
+        {
+#ifdef DEBUG
+            av_log(avctx, AV_LOG_INFO, "Discarding %d bytes\n",
+                pc->packet_index - pc->packet_start);
+#endif
+        }
+
+        pc->packet_start = 0;
+        pc->packet_index = 0;
+
+        if (buf_size < 2 || buf[0] != 0x20 || buf[1] != 0x00) {
+#ifdef DEBUG
+            av_log(avctx, AV_LOG_INFO, "Bad packet header\n");
+#endif
+            return -1;
+        }
+
+        buf_pos = 2;
+
+        pc->in_packet = 1;
+    } else {
+        if (pc->packet_start != 0)
+        {
+            if (pc->packet_index != pc->packet_start)
+            {
+                memmove(pc->packet_buf, pc->packet_buf + pc->packet_start,
+                            pc->packet_index - pc->packet_start);
+
+                pc->packet_index -= pc->packet_start;
+                pc->packet_start = 0;
+            } else {
+                pc->packet_start = 0;
+                pc->packet_index = 0;
+            }
+        }
+    }
+
+    if (buf_size - buf_pos + pc->packet_index > PARSE_BUF_SIZE)
+        return -1;
+
+/* if not currently in a packet, discard data */
+    if (pc->in_packet == 0)
+        return buf_size;
+
+    memcpy(pc->packet_buf + pc->packet_index, buf + buf_pos, buf_size - buf_pos);
+    pc->packet_index += buf_size - buf_pos;
+
+    p = pc->packet_buf;
+    p_end = pc->packet_buf + pc->packet_index;
+
+    while (p < p_end)
+    {
+        if (*p == 0x0f)
+        {
+            if (p + 6 <= p_end)
+            {
+                len = BE_16(p + 4);
+
+                if (p + len + 6 <= p_end)
+                {
+                    *poutbuf_size += len + 6;
+
+                    p += len + 6;
+                } else
+                    break;
+            } else
+                break;
+        } else if (*p == 0xff) {
+            if (p + 1 < p_end)
+            {
+#ifdef DEBUG
+                av_log(avctx, AV_LOG_INFO, "Junk at end of packet\n");
+#endif
+            }
+            pc->packet_index = p - pc->packet_buf;
+            pc->in_packet = 0;
+            break;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Junk in packet\n");
+
+            pc->packet_index = p - pc->packet_buf;
+            pc->in_packet = 0;
+            break;
+        }
+    }
+
+    if (*poutbuf_size > 0)
+    {
+        *poutbuf = pc->packet_buf;
+        pc->packet_start = *poutbuf_size;
+    }
+
+    if (s->last_pts == AV_NOPTS_VALUE)
+        s->last_pts = s->pts;
+
+    return buf_size;
+}
+
+static void dvbsub_parse_close(AVCodecParserContext *s)
+{
+    DVBSubParseContext *pc = s->priv_data;
+    av_freep(&pc->packet_buf);
+}
+
+AVCodecParser dvbsub_parser = {
+    { CODEC_ID_DVB_SUBTITLE },
+    sizeof(DVBSubParseContext),
+    dvbsub_parse_init,
+    dvbsub_parse,
+    dvbsub_parse_close,
+};
diff --git a/contrib/ffmpeg/libavcodec/dvdata.h b/contrib/ffmpeg/libavcodec/dvdata.h
new file mode 100644
index 000000000..dce4aba98
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dvdata.h
@@ -0,0 +1,2724 @@
+/*
+ * Constants for DV codec
+ * Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file dvdata.h
+ * Constants for DV codec.
+ */
+
+/*
+ * DVprofile is used to express the differences between various
+ * DV flavors. For now it's primarily used for differentiating
+ * 525/60 and 625/50, but the plans are to use it for various
+ * DV specs as well (e.g. SMPTE314M vs. IEC 61834).
+ */
+typedef struct DVprofile {
+    int              dsf;                 /* value of the dsf in the DV header */
+    int              frame_size;          /* total size of one frame in bytes */
+    int              difseg_size;         /* number of DIF segments per DIF channel */
+    int              n_difchan;           /* number of DIF channels per frame */
+    int              frame_rate;
+    int              frame_rate_base;
+    int              ltc_divisor;         /* FPS from the LTS standpoint */
+    int              height;              /* picture height in pixels */
+    int              width;               /* picture width in pixels */
+    AVRational       sar[2];              /* sample aspect ratios for 4:3 and 16:9 */
+    const uint16_t  *video_place;         /* positions of all DV macro blocks */
+    enum PixelFormat pix_fmt;             /* picture pixel format */
+
+    int              audio_stride;        /* size of audio_shuffle table */
+    int              audio_min_samples[3];/* min ammount of audio samples */
+                                          /* for 48Khz, 44.1Khz and 32Khz */
+    int              audio_samples_dist[5];/* how many samples are supposed to be */
+                                         /* in each frame in a 5 frames window */
+    const uint16_t (*audio_shuffle)[9];  /* PCM shuffling table */
+} DVprofile;
+
+#define NB_DV_VLC 409
+
+/*
+ * There's a catch about the following three tables: the mapping they establish
+ * between (run, level) and vlc is not 1-1. So you have to watch out for that
+ * when building misc. tables. E.g. (1, 0) can be either 0x7cf or 0x1f82.
+ */
+static const uint16_t dv_vlc_bits[409] = {
+ 0x0000, 0x0002, 0x0007, 0x0008, 0x0009, 0x0014, 0x0015, 0x0016,
+ 0x0017, 0x0030, 0x0031, 0x0032, 0x0033, 0x0068, 0x0069, 0x006a,
+ 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x00e0, 0x00e1, 0x00e2,
+ 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea,
+ 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x01e0, 0x01e1, 0x01e2,
+ 0x01e3, 0x01e4, 0x01e5, 0x01e6, 0x01e7, 0x01e8, 0x01e9, 0x01ea,
+ 0x01eb, 0x01ec, 0x01ed, 0x01ee, 0x01ef, 0x03e0, 0x03e1, 0x03e2,
+ 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x07ce, 0x07cf, 0x07d0, 0x07d1,
+ 0x07d2, 0x07d3, 0x07d4, 0x07d5, 0x0fac, 0x0fad, 0x0fae, 0x0faf,
+ 0x0fb0, 0x0fb1, 0x0fb2, 0x0fb3, 0x0fb4, 0x0fb5, 0x0fb6, 0x0fb7,
+ 0x0fb8, 0x0fb9, 0x0fba, 0x0fbb, 0x0fbc, 0x0fbd, 0x0fbe, 0x0fbf,
+ 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
+ 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
+ 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
+ 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
+ 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
+ 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
+ 0x1fb0, 0x1fb1, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb5, 0x1fb6, 0x1fb7,
+ 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, 0x1fbc, 0x1fbd, 0x1fbe, 0x1fbf,
+ 0x7f00, 0x7f01, 0x7f02, 0x7f03, 0x7f04, 0x7f05, 0x7f06, 0x7f07,
+ 0x7f08, 0x7f09, 0x7f0a, 0x7f0b, 0x7f0c, 0x7f0d, 0x7f0e, 0x7f0f,
+ 0x7f10, 0x7f11, 0x7f12, 0x7f13, 0x7f14, 0x7f15, 0x7f16, 0x7f17,
+ 0x7f18, 0x7f19, 0x7f1a, 0x7f1b, 0x7f1c, 0x7f1d, 0x7f1e, 0x7f1f,
+ 0x7f20, 0x7f21, 0x7f22, 0x7f23, 0x7f24, 0x7f25, 0x7f26, 0x7f27,
+ 0x7f28, 0x7f29, 0x7f2a, 0x7f2b, 0x7f2c, 0x7f2d, 0x7f2e, 0x7f2f,
+ 0x7f30, 0x7f31, 0x7f32, 0x7f33, 0x7f34, 0x7f35, 0x7f36, 0x7f37,
+ 0x7f38, 0x7f39, 0x7f3a, 0x7f3b, 0x7f3c, 0x7f3d, 0x7f3e, 0x7f3f,
+ 0x7f40, 0x7f41, 0x7f42, 0x7f43, 0x7f44, 0x7f45, 0x7f46, 0x7f47,
+ 0x7f48, 0x7f49, 0x7f4a, 0x7f4b, 0x7f4c, 0x7f4d, 0x7f4e, 0x7f4f,
+ 0x7f50, 0x7f51, 0x7f52, 0x7f53, 0x7f54, 0x7f55, 0x7f56, 0x7f57,
+ 0x7f58, 0x7f59, 0x7f5a, 0x7f5b, 0x7f5c, 0x7f5d, 0x7f5e, 0x7f5f,
+ 0x7f60, 0x7f61, 0x7f62, 0x7f63, 0x7f64, 0x7f65, 0x7f66, 0x7f67,
+ 0x7f68, 0x7f69, 0x7f6a, 0x7f6b, 0x7f6c, 0x7f6d, 0x7f6e, 0x7f6f,
+ 0x7f70, 0x7f71, 0x7f72, 0x7f73, 0x7f74, 0x7f75, 0x7f76, 0x7f77,
+ 0x7f78, 0x7f79, 0x7f7a, 0x7f7b, 0x7f7c, 0x7f7d, 0x7f7e, 0x7f7f,
+ 0x7f80, 0x7f81, 0x7f82, 0x7f83, 0x7f84, 0x7f85, 0x7f86, 0x7f87,
+ 0x7f88, 0x7f89, 0x7f8a, 0x7f8b, 0x7f8c, 0x7f8d, 0x7f8e, 0x7f8f,
+ 0x7f90, 0x7f91, 0x7f92, 0x7f93, 0x7f94, 0x7f95, 0x7f96, 0x7f97,
+ 0x7f98, 0x7f99, 0x7f9a, 0x7f9b, 0x7f9c, 0x7f9d, 0x7f9e, 0x7f9f,
+ 0x7fa0, 0x7fa1, 0x7fa2, 0x7fa3, 0x7fa4, 0x7fa5, 0x7fa6, 0x7fa7,
+ 0x7fa8, 0x7fa9, 0x7faa, 0x7fab, 0x7fac, 0x7fad, 0x7fae, 0x7faf,
+ 0x7fb0, 0x7fb1, 0x7fb2, 0x7fb3, 0x7fb4, 0x7fb5, 0x7fb6, 0x7fb7,
+ 0x7fb8, 0x7fb9, 0x7fba, 0x7fbb, 0x7fbc, 0x7fbd, 0x7fbe, 0x7fbf,
+ 0x7fc0, 0x7fc1, 0x7fc2, 0x7fc3, 0x7fc4, 0x7fc5, 0x7fc6, 0x7fc7,
+ 0x7fc8, 0x7fc9, 0x7fca, 0x7fcb, 0x7fcc, 0x7fcd, 0x7fce, 0x7fcf,
+ 0x7fd0, 0x7fd1, 0x7fd2, 0x7fd3, 0x7fd4, 0x7fd5, 0x7fd6, 0x7fd7,
+ 0x7fd8, 0x7fd9, 0x7fda, 0x7fdb, 0x7fdc, 0x7fdd, 0x7fde, 0x7fdf,
+ 0x7fe0, 0x7fe1, 0x7fe2, 0x7fe3, 0x7fe4, 0x7fe5, 0x7fe6, 0x7fe7,
+ 0x7fe8, 0x7fe9, 0x7fea, 0x7feb, 0x7fec, 0x7fed, 0x7fee, 0x7fef,
+ 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff5, 0x7ff6, 0x7ff7,
+ 0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff,
+ 0x0006,
+};
+
+static const uint8_t dv_vlc_len[409] = {
+  2,  3,  4,  4,  4,  5,  5,  5,
+  5,  6,  6,  6,  6,  7,  7,  7,
+  7,  7,  7,  7,  7,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8,  8,  8,  9,  9,  9,
+  9,  9,  9,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 10, 11, 11, 11, 11,
+ 11, 11, 11, 11, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+  4,
+};
+
+static const uint8_t dv_vlc_run[409] = {
+  0,  0,  1,  0,  0,  2,  1,  0,
+  0,  3,  4,  0,  0,  5,  6,  2,
+  1,  1,  0,  0,  0,  7,  8,  9,
+ 10,  3,  4,  2,  1,  1,  1,  0,
+  0,  0,  0,  0,  0, 11, 12, 13,
+ 14,  5,  6,  3,  4,  2,  2,  1,
+  0,  0,  0,  0,  0,  5,  3,  3,
+  2,  1,  1,  1,  0,  1,  6,  4,
+  3,  1,  1,  1,  2,  3,  4,  5,
+  7,  8,  9, 10,  7,  8,  4,  3,
+  2,  2,  2,  2,  2,  1,  1,  1,
+  0,  1,  2,  3,  4,  5,  6,  7,
+  8,  9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+127,
+};
+
+static const uint8_t dv_vlc_level[409] = {
+  1,  2,  1,  3,  4,  1,  2,  5,
+  6,  1,  1,  7,  8,  1,  1,  2,
+  3,  4,  9, 10, 11,  1,  1,  1,
+  1,  2,  2,  3,  5,  6,  7, 12,
+ 13, 14, 15, 16, 17,  1,  1,  1,
+  1,  2,  2,  3,  3,  4,  5,  8,
+ 18, 19, 20, 21, 22,  3,  4,  5,
+  6,  9, 10, 11,  0,  0,  3,  4,
+  6, 12, 13, 14,  0,  0,  0,  0,
+  2,  2,  2,  2,  3,  3,  5,  7,
+  7,  8,  9, 10, 11, 15, 16, 17,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  1,  2,  3,  4,  5,  6,  7,
+  8,  9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87,
+ 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+  0,
+};
+
+/* unquant tables (not used directly) */
+static const uint8_t dv_88_areas[64] = {
+    0,0,0,1,1,1,2,2,
+    0,0,1,1,1,2,2,2,
+    0,1,1,1,2,2,2,3,
+    1,1,1,2,2,2,3,3,
+    1,1,2,2,2,3,3,3,
+    1,2,2,2,3,3,3,3,
+    2,2,2,3,3,3,3,3,
+    2,2,3,3,3,3,3,3,
+};
+
+static const uint8_t dv_248_areas[64] = {
+    0,0,1,1,1,2,2,3,
+    0,0,1,1,2,2,2,3,
+    0,1,1,2,2,2,3,3,
+    0,1,1,2,2,2,3,3,
+    1,1,2,2,2,3,3,3,
+    1,1,2,2,2,3,3,3,
+    1,2,2,2,3,3,3,3,
+    1,2,2,3,3,3,3,3,
+};
+
+static const uint8_t dv_quant_shifts[22][4] = {
+  { 3,3,4,4 },
+  { 3,3,4,4 },
+  { 2,3,3,4 },
+  { 2,3,3,4 },
+  { 2,2,3,3 },
+  { 2,2,3,3 },
+  { 1,2,2,3 },
+  { 1,2,2,3 },
+  { 1,1,2,2 },
+  { 1,1,2,2 },
+  { 0,1,1,2 },
+  { 0,1,1,2 },
+  { 0,0,1,1 },
+  { 0,0,1,1 },
+  { 0,0,0,1 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+};
+
+static const uint8_t dv_quant_offset[4] = { 6, 3, 0, 1 };
+
+/* NOTE: I prefer hardcoding the positionning of dv blocks, it is
+   simpler :-) */
+
+static const uint16_t dv_place_420[1620] = {
+ 0x0c24, 0x2412, 0x3036, 0x0000, 0x1848,
+ 0x0e24, 0x2612, 0x3236, 0x0200, 0x1a48,
+ 0x1024, 0x2812, 0x3436, 0x0400, 0x1c48,
+ 0x1026, 0x2814, 0x3438, 0x0402, 0x1c4a,
+ 0x0e26, 0x2614, 0x3238, 0x0202, 0x1a4a,
+ 0x0c26, 0x2414, 0x3038, 0x0002, 0x184a,
+ 0x0c28, 0x2416, 0x303a, 0x0004, 0x184c,
+ 0x0e28, 0x2616, 0x323a, 0x0204, 0x1a4c,
+ 0x1028, 0x2816, 0x343a, 0x0404, 0x1c4c,
+ 0x102a, 0x2818, 0x343c, 0x0406, 0x1c4e,
+ 0x0e2a, 0x2618, 0x323c, 0x0206, 0x1a4e,
+ 0x0c2a, 0x2418, 0x303c, 0x0006, 0x184e,
+ 0x0c2c, 0x241a, 0x303e, 0x0008, 0x1850,
+ 0x0e2c, 0x261a, 0x323e, 0x0208, 0x1a50,
+ 0x102c, 0x281a, 0x343e, 0x0408, 0x1c50,
+ 0x102e, 0x281c, 0x3440, 0x040a, 0x1c52,
+ 0x0e2e, 0x261c, 0x3240, 0x020a, 0x1a52,
+ 0x0c2e, 0x241c, 0x3040, 0x000a, 0x1852,
+ 0x0c30, 0x241e, 0x3042, 0x000c, 0x1854,
+ 0x0e30, 0x261e, 0x3242, 0x020c, 0x1a54,
+ 0x1030, 0x281e, 0x3442, 0x040c, 0x1c54,
+ 0x1032, 0x2820, 0x3444, 0x040e, 0x1c56,
+ 0x0e32, 0x2620, 0x3244, 0x020e, 0x1a56,
+ 0x0c32, 0x2420, 0x3044, 0x000e, 0x1856,
+ 0x0c34, 0x2422, 0x3046, 0x0010, 0x1858,
+ 0x0e34, 0x2622, 0x3246, 0x0210, 0x1a58,
+ 0x1034, 0x2822, 0x3446, 0x0410, 0x1c58,
+ 0x1224, 0x2a12, 0x3636, 0x0600, 0x1e48,
+ 0x1424, 0x2c12, 0x3836, 0x0800, 0x2048,
+ 0x1624, 0x2e12, 0x3a36, 0x0a00, 0x2248,
+ 0x1626, 0x2e14, 0x3a38, 0x0a02, 0x224a,
+ 0x1426, 0x2c14, 0x3838, 0x0802, 0x204a,
+ 0x1226, 0x2a14, 0x3638, 0x0602, 0x1e4a,
+ 0x1228, 0x2a16, 0x363a, 0x0604, 0x1e4c,
+ 0x1428, 0x2c16, 0x383a, 0x0804, 0x204c,
+ 0x1628, 0x2e16, 0x3a3a, 0x0a04, 0x224c,
+ 0x162a, 0x2e18, 0x3a3c, 0x0a06, 0x224e,
+ 0x142a, 0x2c18, 0x383c, 0x0806, 0x204e,
+ 0x122a, 0x2a18, 0x363c, 0x0606, 0x1e4e,
+ 0x122c, 0x2a1a, 0x363e, 0x0608, 0x1e50,
+ 0x142c, 0x2c1a, 0x383e, 0x0808, 0x2050,
+ 0x162c, 0x2e1a, 0x3a3e, 0x0a08, 0x2250,
+ 0x162e, 0x2e1c, 0x3a40, 0x0a0a, 0x2252,
+ 0x142e, 0x2c1c, 0x3840, 0x080a, 0x2052,
+ 0x122e, 0x2a1c, 0x3640, 0x060a, 0x1e52,
+ 0x1230, 0x2a1e, 0x3642, 0x060c, 0x1e54,
+ 0x1430, 0x2c1e, 0x3842, 0x080c, 0x2054,
+ 0x1630, 0x2e1e, 0x3a42, 0x0a0c, 0x2254,
+ 0x1632, 0x2e20, 0x3a44, 0x0a0e, 0x2256,
+ 0x1432, 0x2c20, 0x3844, 0x080e, 0x2056,
+ 0x1232, 0x2a20, 0x3644, 0x060e, 0x1e56,
+ 0x1234, 0x2a22, 0x3646, 0x0610, 0x1e58,
+ 0x1434, 0x2c22, 0x3846, 0x0810, 0x2058,
+ 0x1634, 0x2e22, 0x3a46, 0x0a10, 0x2258,
+ 0x1824, 0x3012, 0x3c36, 0x0c00, 0x2448,
+ 0x1a24, 0x3212, 0x3e36, 0x0e00, 0x2648,
+ 0x1c24, 0x3412, 0x4036, 0x1000, 0x2848,
+ 0x1c26, 0x3414, 0x4038, 0x1002, 0x284a,
+ 0x1a26, 0x3214, 0x3e38, 0x0e02, 0x264a,
+ 0x1826, 0x3014, 0x3c38, 0x0c02, 0x244a,
+ 0x1828, 0x3016, 0x3c3a, 0x0c04, 0x244c,
+ 0x1a28, 0x3216, 0x3e3a, 0x0e04, 0x264c,
+ 0x1c28, 0x3416, 0x403a, 0x1004, 0x284c,
+ 0x1c2a, 0x3418, 0x403c, 0x1006, 0x284e,
+ 0x1a2a, 0x3218, 0x3e3c, 0x0e06, 0x264e,
+ 0x182a, 0x3018, 0x3c3c, 0x0c06, 0x244e,
+ 0x182c, 0x301a, 0x3c3e, 0x0c08, 0x2450,
+ 0x1a2c, 0x321a, 0x3e3e, 0x0e08, 0x2650,
+ 0x1c2c, 0x341a, 0x403e, 0x1008, 0x2850,
+ 0x1c2e, 0x341c, 0x4040, 0x100a, 0x2852,
+ 0x1a2e, 0x321c, 0x3e40, 0x0e0a, 0x2652,
+ 0x182e, 0x301c, 0x3c40, 0x0c0a, 0x2452,
+ 0x1830, 0x301e, 0x3c42, 0x0c0c, 0x2454,
+ 0x1a30, 0x321e, 0x3e42, 0x0e0c, 0x2654,
+ 0x1c30, 0x341e, 0x4042, 0x100c, 0x2854,
+ 0x1c32, 0x3420, 0x4044, 0x100e, 0x2856,
+ 0x1a32, 0x3220, 0x3e44, 0x0e0e, 0x2656,
+ 0x1832, 0x3020, 0x3c44, 0x0c0e, 0x2456,
+ 0x1834, 0x3022, 0x3c46, 0x0c10, 0x2458,
+ 0x1a34, 0x3222, 0x3e46, 0x0e10, 0x2658,
+ 0x1c34, 0x3422, 0x4046, 0x1010, 0x2858,
+ 0x1e24, 0x3612, 0x4236, 0x1200, 0x2a48,
+ 0x2024, 0x3812, 0x4436, 0x1400, 0x2c48,
+ 0x2224, 0x3a12, 0x4636, 0x1600, 0x2e48,
+ 0x2226, 0x3a14, 0x4638, 0x1602, 0x2e4a,
+ 0x2026, 0x3814, 0x4438, 0x1402, 0x2c4a,
+ 0x1e26, 0x3614, 0x4238, 0x1202, 0x2a4a,
+ 0x1e28, 0x3616, 0x423a, 0x1204, 0x2a4c,
+ 0x2028, 0x3816, 0x443a, 0x1404, 0x2c4c,
+ 0x2228, 0x3a16, 0x463a, 0x1604, 0x2e4c,
+ 0x222a, 0x3a18, 0x463c, 0x1606, 0x2e4e,
+ 0x202a, 0x3818, 0x443c, 0x1406, 0x2c4e,
+ 0x1e2a, 0x3618, 0x423c, 0x1206, 0x2a4e,
+ 0x1e2c, 0x361a, 0x423e, 0x1208, 0x2a50,
+ 0x202c, 0x381a, 0x443e, 0x1408, 0x2c50,
+ 0x222c, 0x3a1a, 0x463e, 0x1608, 0x2e50,
+ 0x222e, 0x3a1c, 0x4640, 0x160a, 0x2e52,
+ 0x202e, 0x381c, 0x4440, 0x140a, 0x2c52,
+ 0x1e2e, 0x361c, 0x4240, 0x120a, 0x2a52,
+ 0x1e30, 0x361e, 0x4242, 0x120c, 0x2a54,
+ 0x2030, 0x381e, 0x4442, 0x140c, 0x2c54,
+ 0x2230, 0x3a1e, 0x4642, 0x160c, 0x2e54,
+ 0x2232, 0x3a20, 0x4644, 0x160e, 0x2e56,
+ 0x2032, 0x3820, 0x4444, 0x140e, 0x2c56,
+ 0x1e32, 0x3620, 0x4244, 0x120e, 0x2a56,
+ 0x1e34, 0x3622, 0x4246, 0x1210, 0x2a58,
+ 0x2034, 0x3822, 0x4446, 0x1410, 0x2c58,
+ 0x2234, 0x3a22, 0x4646, 0x1610, 0x2e58,
+ 0x2424, 0x3c12, 0x0036, 0x1800, 0x3048,
+ 0x2624, 0x3e12, 0x0236, 0x1a00, 0x3248,
+ 0x2824, 0x4012, 0x0436, 0x1c00, 0x3448,
+ 0x2826, 0x4014, 0x0438, 0x1c02, 0x344a,
+ 0x2626, 0x3e14, 0x0238, 0x1a02, 0x324a,
+ 0x2426, 0x3c14, 0x0038, 0x1802, 0x304a,
+ 0x2428, 0x3c16, 0x003a, 0x1804, 0x304c,
+ 0x2628, 0x3e16, 0x023a, 0x1a04, 0x324c,
+ 0x2828, 0x4016, 0x043a, 0x1c04, 0x344c,
+ 0x282a, 0x4018, 0x043c, 0x1c06, 0x344e,
+ 0x262a, 0x3e18, 0x023c, 0x1a06, 0x324e,
+ 0x242a, 0x3c18, 0x003c, 0x1806, 0x304e,
+ 0x242c, 0x3c1a, 0x003e, 0x1808, 0x3050,
+ 0x262c, 0x3e1a, 0x023e, 0x1a08, 0x3250,
+ 0x282c, 0x401a, 0x043e, 0x1c08, 0x3450,
+ 0x282e, 0x401c, 0x0440, 0x1c0a, 0x3452,
+ 0x262e, 0x3e1c, 0x0240, 0x1a0a, 0x3252,
+ 0x242e, 0x3c1c, 0x0040, 0x180a, 0x3052,
+ 0x2430, 0x3c1e, 0x0042, 0x180c, 0x3054,
+ 0x2630, 0x3e1e, 0x0242, 0x1a0c, 0x3254,
+ 0x2830, 0x401e, 0x0442, 0x1c0c, 0x3454,
+ 0x2832, 0x4020, 0x0444, 0x1c0e, 0x3456,
+ 0x2632, 0x3e20, 0x0244, 0x1a0e, 0x3256,
+ 0x2432, 0x3c20, 0x0044, 0x180e, 0x3056,
+ 0x2434, 0x3c22, 0x0046, 0x1810, 0x3058,
+ 0x2634, 0x3e22, 0x0246, 0x1a10, 0x3258,
+ 0x2834, 0x4022, 0x0446, 0x1c10, 0x3458,
+ 0x2a24, 0x4212, 0x0636, 0x1e00, 0x3648,
+ 0x2c24, 0x4412, 0x0836, 0x2000, 0x3848,
+ 0x2e24, 0x4612, 0x0a36, 0x2200, 0x3a48,
+ 0x2e26, 0x4614, 0x0a38, 0x2202, 0x3a4a,
+ 0x2c26, 0x4414, 0x0838, 0x2002, 0x384a,
+ 0x2a26, 0x4214, 0x0638, 0x1e02, 0x364a,
+ 0x2a28, 0x4216, 0x063a, 0x1e04, 0x364c,
+ 0x2c28, 0x4416, 0x083a, 0x2004, 0x384c,
+ 0x2e28, 0x4616, 0x0a3a, 0x2204, 0x3a4c,
+ 0x2e2a, 0x4618, 0x0a3c, 0x2206, 0x3a4e,
+ 0x2c2a, 0x4418, 0x083c, 0x2006, 0x384e,
+ 0x2a2a, 0x4218, 0x063c, 0x1e06, 0x364e,
+ 0x2a2c, 0x421a, 0x063e, 0x1e08, 0x3650,
+ 0x2c2c, 0x441a, 0x083e, 0x2008, 0x3850,
+ 0x2e2c, 0x461a, 0x0a3e, 0x2208, 0x3a50,
+ 0x2e2e, 0x461c, 0x0a40, 0x220a, 0x3a52,
+ 0x2c2e, 0x441c, 0x0840, 0x200a, 0x3852,
+ 0x2a2e, 0x421c, 0x0640, 0x1e0a, 0x3652,
+ 0x2a30, 0x421e, 0x0642, 0x1e0c, 0x3654,
+ 0x2c30, 0x441e, 0x0842, 0x200c, 0x3854,
+ 0x2e30, 0x461e, 0x0a42, 0x220c, 0x3a54,
+ 0x2e32, 0x4620, 0x0a44, 0x220e, 0x3a56,
+ 0x2c32, 0x4420, 0x0844, 0x200e, 0x3856,
+ 0x2a32, 0x4220, 0x0644, 0x1e0e, 0x3656,
+ 0x2a34, 0x4222, 0x0646, 0x1e10, 0x3658,
+ 0x2c34, 0x4422, 0x0846, 0x2010, 0x3858,
+ 0x2e34, 0x4622, 0x0a46, 0x2210, 0x3a58,
+ 0x3024, 0x0012, 0x0c36, 0x2400, 0x3c48,
+ 0x3224, 0x0212, 0x0e36, 0x2600, 0x3e48,
+ 0x3424, 0x0412, 0x1036, 0x2800, 0x4048,
+ 0x3426, 0x0414, 0x1038, 0x2802, 0x404a,
+ 0x3226, 0x0214, 0x0e38, 0x2602, 0x3e4a,
+ 0x3026, 0x0014, 0x0c38, 0x2402, 0x3c4a,
+ 0x3028, 0x0016, 0x0c3a, 0x2404, 0x3c4c,
+ 0x3228, 0x0216, 0x0e3a, 0x2604, 0x3e4c,
+ 0x3428, 0x0416, 0x103a, 0x2804, 0x404c,
+ 0x342a, 0x0418, 0x103c, 0x2806, 0x404e,
+ 0x322a, 0x0218, 0x0e3c, 0x2606, 0x3e4e,
+ 0x302a, 0x0018, 0x0c3c, 0x2406, 0x3c4e,
+ 0x302c, 0x001a, 0x0c3e, 0x2408, 0x3c50,
+ 0x322c, 0x021a, 0x0e3e, 0x2608, 0x3e50,
+ 0x342c, 0x041a, 0x103e, 0x2808, 0x4050,
+ 0x342e, 0x041c, 0x1040, 0x280a, 0x4052,
+ 0x322e, 0x021c, 0x0e40, 0x260a, 0x3e52,
+ 0x302e, 0x001c, 0x0c40, 0x240a, 0x3c52,
+ 0x3030, 0x001e, 0x0c42, 0x240c, 0x3c54,
+ 0x3230, 0x021e, 0x0e42, 0x260c, 0x3e54,
+ 0x3430, 0x041e, 0x1042, 0x280c, 0x4054,
+ 0x3432, 0x0420, 0x1044, 0x280e, 0x4056,
+ 0x3232, 0x0220, 0x0e44, 0x260e, 0x3e56,
+ 0x3032, 0x0020, 0x0c44, 0x240e, 0x3c56,
+ 0x3034, 0x0022, 0x0c46, 0x2410, 0x3c58,
+ 0x3234, 0x0222, 0x0e46, 0x2610, 0x3e58,
+ 0x3434, 0x0422, 0x1046, 0x2810, 0x4058,
+ 0x3624, 0x0612, 0x1236, 0x2a00, 0x4248,
+ 0x3824, 0x0812, 0x1436, 0x2c00, 0x4448,
+ 0x3a24, 0x0a12, 0x1636, 0x2e00, 0x4648,
+ 0x3a26, 0x0a14, 0x1638, 0x2e02, 0x464a,
+ 0x3826, 0x0814, 0x1438, 0x2c02, 0x444a,
+ 0x3626, 0x0614, 0x1238, 0x2a02, 0x424a,
+ 0x3628, 0x0616, 0x123a, 0x2a04, 0x424c,
+ 0x3828, 0x0816, 0x143a, 0x2c04, 0x444c,
+ 0x3a28, 0x0a16, 0x163a, 0x2e04, 0x464c,
+ 0x3a2a, 0x0a18, 0x163c, 0x2e06, 0x464e,
+ 0x382a, 0x0818, 0x143c, 0x2c06, 0x444e,
+ 0x362a, 0x0618, 0x123c, 0x2a06, 0x424e,
+ 0x362c, 0x061a, 0x123e, 0x2a08, 0x4250,
+ 0x382c, 0x081a, 0x143e, 0x2c08, 0x4450,
+ 0x3a2c, 0x0a1a, 0x163e, 0x2e08, 0x4650,
+ 0x3a2e, 0x0a1c, 0x1640, 0x2e0a, 0x4652,
+ 0x382e, 0x081c, 0x1440, 0x2c0a, 0x4452,
+ 0x362e, 0x061c, 0x1240, 0x2a0a, 0x4252,
+ 0x3630, 0x061e, 0x1242, 0x2a0c, 0x4254,
+ 0x3830, 0x081e, 0x1442, 0x2c0c, 0x4454,
+ 0x3a30, 0x0a1e, 0x1642, 0x2e0c, 0x4654,
+ 0x3a32, 0x0a20, 0x1644, 0x2e0e, 0x4656,
+ 0x3832, 0x0820, 0x1444, 0x2c0e, 0x4456,
+ 0x3632, 0x0620, 0x1244, 0x2a0e, 0x4256,
+ 0x3634, 0x0622, 0x1246, 0x2a10, 0x4258,
+ 0x3834, 0x0822, 0x1446, 0x2c10, 0x4458,
+ 0x3a34, 0x0a22, 0x1646, 0x2e10, 0x4658,
+ 0x3c24, 0x0c12, 0x1836, 0x3000, 0x0048,
+ 0x3e24, 0x0e12, 0x1a36, 0x3200, 0x0248,
+ 0x4024, 0x1012, 0x1c36, 0x3400, 0x0448,
+ 0x4026, 0x1014, 0x1c38, 0x3402, 0x044a,
+ 0x3e26, 0x0e14, 0x1a38, 0x3202, 0x024a,
+ 0x3c26, 0x0c14, 0x1838, 0x3002, 0x004a,
+ 0x3c28, 0x0c16, 0x183a, 0x3004, 0x004c,
+ 0x3e28, 0x0e16, 0x1a3a, 0x3204, 0x024c,
+ 0x4028, 0x1016, 0x1c3a, 0x3404, 0x044c,
+ 0x402a, 0x1018, 0x1c3c, 0x3406, 0x044e,
+ 0x3e2a, 0x0e18, 0x1a3c, 0x3206, 0x024e,
+ 0x3c2a, 0x0c18, 0x183c, 0x3006, 0x004e,
+ 0x3c2c, 0x0c1a, 0x183e, 0x3008, 0x0050,
+ 0x3e2c, 0x0e1a, 0x1a3e, 0x3208, 0x0250,
+ 0x402c, 0x101a, 0x1c3e, 0x3408, 0x0450,
+ 0x402e, 0x101c, 0x1c40, 0x340a, 0x0452,
+ 0x3e2e, 0x0e1c, 0x1a40, 0x320a, 0x0252,
+ 0x3c2e, 0x0c1c, 0x1840, 0x300a, 0x0052,
+ 0x3c30, 0x0c1e, 0x1842, 0x300c, 0x0054,
+ 0x3e30, 0x0e1e, 0x1a42, 0x320c, 0x0254,
+ 0x4030, 0x101e, 0x1c42, 0x340c, 0x0454,
+ 0x4032, 0x1020, 0x1c44, 0x340e, 0x0456,
+ 0x3e32, 0x0e20, 0x1a44, 0x320e, 0x0256,
+ 0x3c32, 0x0c20, 0x1844, 0x300e, 0x0056,
+ 0x3c34, 0x0c22, 0x1846, 0x3010, 0x0058,
+ 0x3e34, 0x0e22, 0x1a46, 0x3210, 0x0258,
+ 0x4034, 0x1022, 0x1c46, 0x3410, 0x0458,
+ 0x4224, 0x1212, 0x1e36, 0x3600, 0x0648,
+ 0x4424, 0x1412, 0x2036, 0x3800, 0x0848,
+ 0x4624, 0x1612, 0x2236, 0x3a00, 0x0a48,
+ 0x4626, 0x1614, 0x2238, 0x3a02, 0x0a4a,
+ 0x4426, 0x1414, 0x2038, 0x3802, 0x084a,
+ 0x4226, 0x1214, 0x1e38, 0x3602, 0x064a,
+ 0x4228, 0x1216, 0x1e3a, 0x3604, 0x064c,
+ 0x4428, 0x1416, 0x203a, 0x3804, 0x084c,
+ 0x4628, 0x1616, 0x223a, 0x3a04, 0x0a4c,
+ 0x462a, 0x1618, 0x223c, 0x3a06, 0x0a4e,
+ 0x442a, 0x1418, 0x203c, 0x3806, 0x084e,
+ 0x422a, 0x1218, 0x1e3c, 0x3606, 0x064e,
+ 0x422c, 0x121a, 0x1e3e, 0x3608, 0x0650,
+ 0x442c, 0x141a, 0x203e, 0x3808, 0x0850,
+ 0x462c, 0x161a, 0x223e, 0x3a08, 0x0a50,
+ 0x462e, 0x161c, 0x2240, 0x3a0a, 0x0a52,
+ 0x442e, 0x141c, 0x2040, 0x380a, 0x0852,
+ 0x422e, 0x121c, 0x1e40, 0x360a, 0x0652,
+ 0x4230, 0x121e, 0x1e42, 0x360c, 0x0654,
+ 0x4430, 0x141e, 0x2042, 0x380c, 0x0854,
+ 0x4630, 0x161e, 0x2242, 0x3a0c, 0x0a54,
+ 0x4632, 0x1620, 0x2244, 0x3a0e, 0x0a56,
+ 0x4432, 0x1420, 0x2044, 0x380e, 0x0856,
+ 0x4232, 0x1220, 0x1e44, 0x360e, 0x0656,
+ 0x4234, 0x1222, 0x1e46, 0x3610, 0x0658,
+ 0x4434, 0x1422, 0x2046, 0x3810, 0x0858,
+ 0x4634, 0x1622, 0x2246, 0x3a10, 0x0a58,
+ 0x0024, 0x1812, 0x2436, 0x3c00, 0x0c48,
+ 0x0224, 0x1a12, 0x2636, 0x3e00, 0x0e48,
+ 0x0424, 0x1c12, 0x2836, 0x4000, 0x1048,
+ 0x0426, 0x1c14, 0x2838, 0x4002, 0x104a,
+ 0x0226, 0x1a14, 0x2638, 0x3e02, 0x0e4a,
+ 0x0026, 0x1814, 0x2438, 0x3c02, 0x0c4a,
+ 0x0028, 0x1816, 0x243a, 0x3c04, 0x0c4c,
+ 0x0228, 0x1a16, 0x263a, 0x3e04, 0x0e4c,
+ 0x0428, 0x1c16, 0x283a, 0x4004, 0x104c,
+ 0x042a, 0x1c18, 0x283c, 0x4006, 0x104e,
+ 0x022a, 0x1a18, 0x263c, 0x3e06, 0x0e4e,
+ 0x002a, 0x1818, 0x243c, 0x3c06, 0x0c4e,
+ 0x002c, 0x181a, 0x243e, 0x3c08, 0x0c50,
+ 0x022c, 0x1a1a, 0x263e, 0x3e08, 0x0e50,
+ 0x042c, 0x1c1a, 0x283e, 0x4008, 0x1050,
+ 0x042e, 0x1c1c, 0x2840, 0x400a, 0x1052,
+ 0x022e, 0x1a1c, 0x2640, 0x3e0a, 0x0e52,
+ 0x002e, 0x181c, 0x2440, 0x3c0a, 0x0c52,
+ 0x0030, 0x181e, 0x2442, 0x3c0c, 0x0c54,
+ 0x0230, 0x1a1e, 0x2642, 0x3e0c, 0x0e54,
+ 0x0430, 0x1c1e, 0x2842, 0x400c, 0x1054,
+ 0x0432, 0x1c20, 0x2844, 0x400e, 0x1056,
+ 0x0232, 0x1a20, 0x2644, 0x3e0e, 0x0e56,
+ 0x0032, 0x1820, 0x2444, 0x3c0e, 0x0c56,
+ 0x0034, 0x1822, 0x2446, 0x3c10, 0x0c58,
+ 0x0234, 0x1a22, 0x2646, 0x3e10, 0x0e58,
+ 0x0434, 0x1c22, 0x2846, 0x4010, 0x1058,
+ 0x0624, 0x1e12, 0x2a36, 0x4200, 0x1248,
+ 0x0824, 0x2012, 0x2c36, 0x4400, 0x1448,
+ 0x0a24, 0x2212, 0x2e36, 0x4600, 0x1648,
+ 0x0a26, 0x2214, 0x2e38, 0x4602, 0x164a,
+ 0x0826, 0x2014, 0x2c38, 0x4402, 0x144a,
+ 0x0626, 0x1e14, 0x2a38, 0x4202, 0x124a,
+ 0x0628, 0x1e16, 0x2a3a, 0x4204, 0x124c,
+ 0x0828, 0x2016, 0x2c3a, 0x4404, 0x144c,
+ 0x0a28, 0x2216, 0x2e3a, 0x4604, 0x164c,
+ 0x0a2a, 0x2218, 0x2e3c, 0x4606, 0x164e,
+ 0x082a, 0x2018, 0x2c3c, 0x4406, 0x144e,
+ 0x062a, 0x1e18, 0x2a3c, 0x4206, 0x124e,
+ 0x062c, 0x1e1a, 0x2a3e, 0x4208, 0x1250,
+ 0x082c, 0x201a, 0x2c3e, 0x4408, 0x1450,
+ 0x0a2c, 0x221a, 0x2e3e, 0x4608, 0x1650,
+ 0x0a2e, 0x221c, 0x2e40, 0x460a, 0x1652,
+ 0x082e, 0x201c, 0x2c40, 0x440a, 0x1452,
+ 0x062e, 0x1e1c, 0x2a40, 0x420a, 0x1252,
+ 0x0630, 0x1e1e, 0x2a42, 0x420c, 0x1254,
+ 0x0830, 0x201e, 0x2c42, 0x440c, 0x1454,
+ 0x0a30, 0x221e, 0x2e42, 0x460c, 0x1654,
+ 0x0a32, 0x2220, 0x2e44, 0x460e, 0x1656,
+ 0x0832, 0x2020, 0x2c44, 0x440e, 0x1456,
+ 0x0632, 0x1e20, 0x2a44, 0x420e, 0x1256,
+ 0x0634, 0x1e22, 0x2a46, 0x4210, 0x1258,
+ 0x0834, 0x2022, 0x2c46, 0x4410, 0x1458,
+ 0x0a34, 0x2222, 0x2e46, 0x4610, 0x1658,
+};
+
+static const uint16_t dv_place_411P[1620] = {
+ 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848,
+ 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948,
+ 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48,
+ 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48,
+ 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48,
+ 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48,
+ 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c,
+ 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c,
+ 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c,
+ 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c,
+ 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c,
+ 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c,
+ 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850,
+ 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950,
+ 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50,
+ 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50,
+ 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50,
+ 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50,
+ 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54,
+ 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54,
+ 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54,
+ 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54,
+ 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954,
+ 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854,
+ 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858,
+ 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58,
+ 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58,
+ 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48,
+ 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48,
+ 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048,
+ 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148,
+ 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248,
+ 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348,
+ 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c,
+ 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c,
+ 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c,
+ 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c,
+ 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c,
+ 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c,
+ 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50,
+ 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50,
+ 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050,
+ 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150,
+ 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250,
+ 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350,
+ 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354,
+ 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254,
+ 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154,
+ 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054,
+ 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54,
+ 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54,
+ 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58,
+ 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058,
+ 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258,
+ 0x1824, 0x3310, 0x3f34, 0x0c00, 0x2448,
+ 0x1924, 0x3410, 0x4034, 0x0d00, 0x2548,
+ 0x1a24, 0x3510, 0x4134, 0x0e00, 0x2648,
+ 0x1b24, 0x3514, 0x4138, 0x0f00, 0x2748,
+ 0x1c24, 0x3414, 0x4038, 0x1000, 0x2848,
+ 0x1d24, 0x3314, 0x3f38, 0x1100, 0x2948,
+ 0x1d28, 0x3214, 0x3e38, 0x1104, 0x294c,
+ 0x1c28, 0x3114, 0x3d38, 0x1004, 0x284c,
+ 0x1b28, 0x3014, 0x3c38, 0x0f04, 0x274c,
+ 0x1a28, 0x3018, 0x3c3c, 0x0e04, 0x264c,
+ 0x1928, 0x3118, 0x3d3c, 0x0d04, 0x254c,
+ 0x1828, 0x3218, 0x3e3c, 0x0c04, 0x244c,
+ 0x182c, 0x3318, 0x3f3c, 0x0c08, 0x2450,
+ 0x192c, 0x3418, 0x403c, 0x0d08, 0x2550,
+ 0x1a2c, 0x3518, 0x413c, 0x0e08, 0x2650,
+ 0x1b2c, 0x351c, 0x4140, 0x0f08, 0x2750,
+ 0x1c2c, 0x341c, 0x4040, 0x1008, 0x2850,
+ 0x1d2c, 0x331c, 0x3f40, 0x1108, 0x2950,
+ 0x1d30, 0x321c, 0x3e40, 0x110c, 0x2954,
+ 0x1c30, 0x311c, 0x3d40, 0x100c, 0x2854,
+ 0x1b30, 0x301c, 0x3c40, 0x0f0c, 0x2754,
+ 0x1a30, 0x3020, 0x3c44, 0x0e0c, 0x2654,
+ 0x1930, 0x3120, 0x3d44, 0x0d0c, 0x2554,
+ 0x1830, 0x3220, 0x3e44, 0x0c0c, 0x2454,
+ 0x1834, 0x3320, 0x3f44, 0x0c10, 0x2458,
+ 0x1934, 0x3420, 0x4044, 0x0d10, 0x2658,
+ 0x1a34, 0x3520, 0x4144, 0x0e10, 0x2858,
+ 0x1e24, 0x3910, 0x4534, 0x1200, 0x2a48,
+ 0x1f24, 0x3a10, 0x4634, 0x1300, 0x2b48,
+ 0x2024, 0x3b10, 0x4734, 0x1400, 0x2c48,
+ 0x2124, 0x3b14, 0x4738, 0x1500, 0x2d48,
+ 0x2224, 0x3a14, 0x4638, 0x1600, 0x2e48,
+ 0x2324, 0x3914, 0x4538, 0x1700, 0x2f48,
+ 0x2328, 0x3814, 0x4438, 0x1704, 0x2f4c,
+ 0x2228, 0x3714, 0x4338, 0x1604, 0x2e4c,
+ 0x2128, 0x3614, 0x4238, 0x1504, 0x2d4c,
+ 0x2028, 0x3618, 0x423c, 0x1404, 0x2c4c,
+ 0x1f28, 0x3718, 0x433c, 0x1304, 0x2b4c,
+ 0x1e28, 0x3818, 0x443c, 0x1204, 0x2a4c,
+ 0x1e2c, 0x3918, 0x453c, 0x1208, 0x2a50,
+ 0x1f2c, 0x3a18, 0x463c, 0x1308, 0x2b50,
+ 0x202c, 0x3b18, 0x473c, 0x1408, 0x2c50,
+ 0x212c, 0x3b1c, 0x4740, 0x1508, 0x2d50,
+ 0x222c, 0x3a1c, 0x4640, 0x1608, 0x2e50,
+ 0x232c, 0x391c, 0x4540, 0x1708, 0x2f50,
+ 0x2330, 0x381c, 0x4440, 0x170c, 0x2f54,
+ 0x2230, 0x371c, 0x4340, 0x160c, 0x2e54,
+ 0x2130, 0x361c, 0x4240, 0x150c, 0x2d54,
+ 0x2030, 0x3620, 0x4244, 0x140c, 0x2c54,
+ 0x1f30, 0x3720, 0x4344, 0x130c, 0x2b54,
+ 0x1e30, 0x3820, 0x4444, 0x120c, 0x2a54,
+ 0x1e34, 0x3920, 0x4544, 0x1210, 0x2a58,
+ 0x1f34, 0x3a20, 0x4644, 0x1310, 0x2c58,
+ 0x2034, 0x3b20, 0x4744, 0x1410, 0x2e58,
+ 0x2424, 0x3f10, 0x0334, 0x1800, 0x3048,
+ 0x2524, 0x4010, 0x0434, 0x1900, 0x3148,
+ 0x2624, 0x4110, 0x0534, 0x1a00, 0x3248,
+ 0x2724, 0x4114, 0x0538, 0x1b00, 0x3348,
+ 0x2824, 0x4014, 0x0438, 0x1c00, 0x3448,
+ 0x2924, 0x3f14, 0x0338, 0x1d00, 0x3548,
+ 0x2928, 0x3e14, 0x0238, 0x1d04, 0x354c,
+ 0x2828, 0x3d14, 0x0138, 0x1c04, 0x344c,
+ 0x2728, 0x3c14, 0x0038, 0x1b04, 0x334c,
+ 0x2628, 0x3c18, 0x003c, 0x1a04, 0x324c,
+ 0x2528, 0x3d18, 0x013c, 0x1904, 0x314c,
+ 0x2428, 0x3e18, 0x023c, 0x1804, 0x304c,
+ 0x242c, 0x3f18, 0x033c, 0x1808, 0x3050,
+ 0x252c, 0x4018, 0x043c, 0x1908, 0x3150,
+ 0x262c, 0x4118, 0x053c, 0x1a08, 0x3250,
+ 0x272c, 0x411c, 0x0540, 0x1b08, 0x3350,
+ 0x282c, 0x401c, 0x0440, 0x1c08, 0x3450,
+ 0x292c, 0x3f1c, 0x0340, 0x1d08, 0x3550,
+ 0x2930, 0x3e1c, 0x0240, 0x1d0c, 0x3554,
+ 0x2830, 0x3d1c, 0x0140, 0x1c0c, 0x3454,
+ 0x2730, 0x3c1c, 0x0040, 0x1b0c, 0x3354,
+ 0x2630, 0x3c20, 0x0044, 0x1a0c, 0x3254,
+ 0x2530, 0x3d20, 0x0144, 0x190c, 0x3154,
+ 0x2430, 0x3e20, 0x0244, 0x180c, 0x3054,
+ 0x2434, 0x3f20, 0x0344, 0x1810, 0x3058,
+ 0x2534, 0x4020, 0x0444, 0x1910, 0x3258,
+ 0x2634, 0x4120, 0x0544, 0x1a10, 0x3458,
+ 0x2a24, 0x4510, 0x0934, 0x1e00, 0x3648,
+ 0x2b24, 0x4610, 0x0a34, 0x1f00, 0x3748,
+ 0x2c24, 0x4710, 0x0b34, 0x2000, 0x3848,
+ 0x2d24, 0x4714, 0x0b38, 0x2100, 0x3948,
+ 0x2e24, 0x4614, 0x0a38, 0x2200, 0x3a48,
+ 0x2f24, 0x4514, 0x0938, 0x2300, 0x3b48,
+ 0x2f28, 0x4414, 0x0838, 0x2304, 0x3b4c,
+ 0x2e28, 0x4314, 0x0738, 0x2204, 0x3a4c,
+ 0x2d28, 0x4214, 0x0638, 0x2104, 0x394c,
+ 0x2c28, 0x4218, 0x063c, 0x2004, 0x384c,
+ 0x2b28, 0x4318, 0x073c, 0x1f04, 0x374c,
+ 0x2a28, 0x4418, 0x083c, 0x1e04, 0x364c,
+ 0x2a2c, 0x4518, 0x093c, 0x1e08, 0x3650,
+ 0x2b2c, 0x4618, 0x0a3c, 0x1f08, 0x3750,
+ 0x2c2c, 0x4718, 0x0b3c, 0x2008, 0x3850,
+ 0x2d2c, 0x471c, 0x0b40, 0x2108, 0x3950,
+ 0x2e2c, 0x461c, 0x0a40, 0x2208, 0x3a50,
+ 0x2f2c, 0x451c, 0x0940, 0x2308, 0x3b50,
+ 0x2f30, 0x441c, 0x0840, 0x230c, 0x3b54,
+ 0x2e30, 0x431c, 0x0740, 0x220c, 0x3a54,
+ 0x2d30, 0x421c, 0x0640, 0x210c, 0x3954,
+ 0x2c30, 0x4220, 0x0644, 0x200c, 0x3854,
+ 0x2b30, 0x4320, 0x0744, 0x1f0c, 0x3754,
+ 0x2a30, 0x4420, 0x0844, 0x1e0c, 0x3654,
+ 0x2a34, 0x4520, 0x0944, 0x1e10, 0x3658,
+ 0x2b34, 0x4620, 0x0a44, 0x1f10, 0x3858,
+ 0x2c34, 0x4720, 0x0b44, 0x2010, 0x3a58,
+ 0x3024, 0x0310, 0x0f34, 0x2400, 0x3c48,
+ 0x3124, 0x0410, 0x1034, 0x2500, 0x3d48,
+ 0x3224, 0x0510, 0x1134, 0x2600, 0x3e48,
+ 0x3324, 0x0514, 0x1138, 0x2700, 0x3f48,
+ 0x3424, 0x0414, 0x1038, 0x2800, 0x4048,
+ 0x3524, 0x0314, 0x0f38, 0x2900, 0x4148,
+ 0x3528, 0x0214, 0x0e38, 0x2904, 0x414c,
+ 0x3428, 0x0114, 0x0d38, 0x2804, 0x404c,
+ 0x3328, 0x0014, 0x0c38, 0x2704, 0x3f4c,
+ 0x3228, 0x0018, 0x0c3c, 0x2604, 0x3e4c,
+ 0x3128, 0x0118, 0x0d3c, 0x2504, 0x3d4c,
+ 0x3028, 0x0218, 0x0e3c, 0x2404, 0x3c4c,
+ 0x302c, 0x0318, 0x0f3c, 0x2408, 0x3c50,
+ 0x312c, 0x0418, 0x103c, 0x2508, 0x3d50,
+ 0x322c, 0x0518, 0x113c, 0x2608, 0x3e50,
+ 0x332c, 0x051c, 0x1140, 0x2708, 0x3f50,
+ 0x342c, 0x041c, 0x1040, 0x2808, 0x4050,
+ 0x352c, 0x031c, 0x0f40, 0x2908, 0x4150,
+ 0x3530, 0x021c, 0x0e40, 0x290c, 0x4154,
+ 0x3430, 0x011c, 0x0d40, 0x280c, 0x4054,
+ 0x3330, 0x001c, 0x0c40, 0x270c, 0x3f54,
+ 0x3230, 0x0020, 0x0c44, 0x260c, 0x3e54,
+ 0x3130, 0x0120, 0x0d44, 0x250c, 0x3d54,
+ 0x3030, 0x0220, 0x0e44, 0x240c, 0x3c54,
+ 0x3034, 0x0320, 0x0f44, 0x2410, 0x3c58,
+ 0x3134, 0x0420, 0x1044, 0x2510, 0x3e58,
+ 0x3234, 0x0520, 0x1144, 0x2610, 0x4058,
+ 0x3624, 0x0910, 0x1534, 0x2a00, 0x4248,
+ 0x3724, 0x0a10, 0x1634, 0x2b00, 0x4348,
+ 0x3824, 0x0b10, 0x1734, 0x2c00, 0x4448,
+ 0x3924, 0x0b14, 0x1738, 0x2d00, 0x4548,
+ 0x3a24, 0x0a14, 0x1638, 0x2e00, 0x4648,
+ 0x3b24, 0x0914, 0x1538, 0x2f00, 0x4748,
+ 0x3b28, 0x0814, 0x1438, 0x2f04, 0x474c,
+ 0x3a28, 0x0714, 0x1338, 0x2e04, 0x464c,
+ 0x3928, 0x0614, 0x1238, 0x2d04, 0x454c,
+ 0x3828, 0x0618, 0x123c, 0x2c04, 0x444c,
+ 0x3728, 0x0718, 0x133c, 0x2b04, 0x434c,
+ 0x3628, 0x0818, 0x143c, 0x2a04, 0x424c,
+ 0x362c, 0x0918, 0x153c, 0x2a08, 0x4250,
+ 0x372c, 0x0a18, 0x163c, 0x2b08, 0x4350,
+ 0x382c, 0x0b18, 0x173c, 0x2c08, 0x4450,
+ 0x392c, 0x0b1c, 0x1740, 0x2d08, 0x4550,
+ 0x3a2c, 0x0a1c, 0x1640, 0x2e08, 0x4650,
+ 0x3b2c, 0x091c, 0x1540, 0x2f08, 0x4750,
+ 0x3b30, 0x081c, 0x1440, 0x2f0c, 0x4754,
+ 0x3a30, 0x071c, 0x1340, 0x2e0c, 0x4654,
+ 0x3930, 0x061c, 0x1240, 0x2d0c, 0x4554,
+ 0x3830, 0x0620, 0x1244, 0x2c0c, 0x4454,
+ 0x3730, 0x0720, 0x1344, 0x2b0c, 0x4354,
+ 0x3630, 0x0820, 0x1444, 0x2a0c, 0x4254,
+ 0x3634, 0x0920, 0x1544, 0x2a10, 0x4258,
+ 0x3734, 0x0a20, 0x1644, 0x2b10, 0x4458,
+ 0x3834, 0x0b20, 0x1744, 0x2c10, 0x4658,
+ 0x3c24, 0x0f10, 0x1b34, 0x3000, 0x0048,
+ 0x3d24, 0x1010, 0x1c34, 0x3100, 0x0148,
+ 0x3e24, 0x1110, 0x1d34, 0x3200, 0x0248,
+ 0x3f24, 0x1114, 0x1d38, 0x3300, 0x0348,
+ 0x4024, 0x1014, 0x1c38, 0x3400, 0x0448,
+ 0x4124, 0x0f14, 0x1b38, 0x3500, 0x0548,
+ 0x4128, 0x0e14, 0x1a38, 0x3504, 0x054c,
+ 0x4028, 0x0d14, 0x1938, 0x3404, 0x044c,
+ 0x3f28, 0x0c14, 0x1838, 0x3304, 0x034c,
+ 0x3e28, 0x0c18, 0x183c, 0x3204, 0x024c,
+ 0x3d28, 0x0d18, 0x193c, 0x3104, 0x014c,
+ 0x3c28, 0x0e18, 0x1a3c, 0x3004, 0x004c,
+ 0x3c2c, 0x0f18, 0x1b3c, 0x3008, 0x0050,
+ 0x3d2c, 0x1018, 0x1c3c, 0x3108, 0x0150,
+ 0x3e2c, 0x1118, 0x1d3c, 0x3208, 0x0250,
+ 0x3f2c, 0x111c, 0x1d40, 0x3308, 0x0350,
+ 0x402c, 0x101c, 0x1c40, 0x3408, 0x0450,
+ 0x412c, 0x0f1c, 0x1b40, 0x3508, 0x0550,
+ 0x4130, 0x0e1c, 0x1a40, 0x350c, 0x0554,
+ 0x4030, 0x0d1c, 0x1940, 0x340c, 0x0454,
+ 0x3f30, 0x0c1c, 0x1840, 0x330c, 0x0354,
+ 0x3e30, 0x0c20, 0x1844, 0x320c, 0x0254,
+ 0x3d30, 0x0d20, 0x1944, 0x310c, 0x0154,
+ 0x3c30, 0x0e20, 0x1a44, 0x300c, 0x0054,
+ 0x3c34, 0x0f20, 0x1b44, 0x3010, 0x0058,
+ 0x3d34, 0x1020, 0x1c44, 0x3110, 0x0258,
+ 0x3e34, 0x1120, 0x1d44, 0x3210, 0x0458,
+ 0x4224, 0x1510, 0x2134, 0x3600, 0x0648,
+ 0x4324, 0x1610, 0x2234, 0x3700, 0x0748,
+ 0x4424, 0x1710, 0x2334, 0x3800, 0x0848,
+ 0x4524, 0x1714, 0x2338, 0x3900, 0x0948,
+ 0x4624, 0x1614, 0x2238, 0x3a00, 0x0a48,
+ 0x4724, 0x1514, 0x2138, 0x3b00, 0x0b48,
+ 0x4728, 0x1414, 0x2038, 0x3b04, 0x0b4c,
+ 0x4628, 0x1314, 0x1f38, 0x3a04, 0x0a4c,
+ 0x4528, 0x1214, 0x1e38, 0x3904, 0x094c,
+ 0x4428, 0x1218, 0x1e3c, 0x3804, 0x084c,
+ 0x4328, 0x1318, 0x1f3c, 0x3704, 0x074c,
+ 0x4228, 0x1418, 0x203c, 0x3604, 0x064c,
+ 0x422c, 0x1518, 0x213c, 0x3608, 0x0650,
+ 0x432c, 0x1618, 0x223c, 0x3708, 0x0750,
+ 0x442c, 0x1718, 0x233c, 0x3808, 0x0850,
+ 0x452c, 0x171c, 0x2340, 0x3908, 0x0950,
+ 0x462c, 0x161c, 0x2240, 0x3a08, 0x0a50,
+ 0x472c, 0x151c, 0x2140, 0x3b08, 0x0b50,
+ 0x4730, 0x141c, 0x2040, 0x3b0c, 0x0b54,
+ 0x4630, 0x131c, 0x1f40, 0x3a0c, 0x0a54,
+ 0x4530, 0x121c, 0x1e40, 0x390c, 0x0954,
+ 0x4430, 0x1220, 0x1e44, 0x380c, 0x0854,
+ 0x4330, 0x1320, 0x1f44, 0x370c, 0x0754,
+ 0x4230, 0x1420, 0x2044, 0x360c, 0x0654,
+ 0x4234, 0x1520, 0x2144, 0x3610, 0x0658,
+ 0x4334, 0x1620, 0x2244, 0x3710, 0x0858,
+ 0x4434, 0x1720, 0x2344, 0x3810, 0x0a58,
+ 0x0024, 0x1b10, 0x2734, 0x3c00, 0x0c48,
+ 0x0124, 0x1c10, 0x2834, 0x3d00, 0x0d48,
+ 0x0224, 0x1d10, 0x2934, 0x3e00, 0x0e48,
+ 0x0324, 0x1d14, 0x2938, 0x3f00, 0x0f48,
+ 0x0424, 0x1c14, 0x2838, 0x4000, 0x1048,
+ 0x0524, 0x1b14, 0x2738, 0x4100, 0x1148,
+ 0x0528, 0x1a14, 0x2638, 0x4104, 0x114c,
+ 0x0428, 0x1914, 0x2538, 0x4004, 0x104c,
+ 0x0328, 0x1814, 0x2438, 0x3f04, 0x0f4c,
+ 0x0228, 0x1818, 0x243c, 0x3e04, 0x0e4c,
+ 0x0128, 0x1918, 0x253c, 0x3d04, 0x0d4c,
+ 0x0028, 0x1a18, 0x263c, 0x3c04, 0x0c4c,
+ 0x002c, 0x1b18, 0x273c, 0x3c08, 0x0c50,
+ 0x012c, 0x1c18, 0x283c, 0x3d08, 0x0d50,
+ 0x022c, 0x1d18, 0x293c, 0x3e08, 0x0e50,
+ 0x032c, 0x1d1c, 0x2940, 0x3f08, 0x0f50,
+ 0x042c, 0x1c1c, 0x2840, 0x4008, 0x1050,
+ 0x052c, 0x1b1c, 0x2740, 0x4108, 0x1150,
+ 0x0530, 0x1a1c, 0x2640, 0x410c, 0x1154,
+ 0x0430, 0x191c, 0x2540, 0x400c, 0x1054,
+ 0x0330, 0x181c, 0x2440, 0x3f0c, 0x0f54,
+ 0x0230, 0x1820, 0x2444, 0x3e0c, 0x0e54,
+ 0x0130, 0x1920, 0x2544, 0x3d0c, 0x0d54,
+ 0x0030, 0x1a20, 0x2644, 0x3c0c, 0x0c54,
+ 0x0034, 0x1b20, 0x2744, 0x3c10, 0x0c58,
+ 0x0134, 0x1c20, 0x2844, 0x3d10, 0x0e58,
+ 0x0234, 0x1d20, 0x2944, 0x3e10, 0x1058,
+ 0x0624, 0x2110, 0x2d34, 0x4200, 0x1248,
+ 0x0724, 0x2210, 0x2e34, 0x4300, 0x1348,
+ 0x0824, 0x2310, 0x2f34, 0x4400, 0x1448,
+ 0x0924, 0x2314, 0x2f38, 0x4500, 0x1548,
+ 0x0a24, 0x2214, 0x2e38, 0x4600, 0x1648,
+ 0x0b24, 0x2114, 0x2d38, 0x4700, 0x1748,
+ 0x0b28, 0x2014, 0x2c38, 0x4704, 0x174c,
+ 0x0a28, 0x1f14, 0x2b38, 0x4604, 0x164c,
+ 0x0928, 0x1e14, 0x2a38, 0x4504, 0x154c,
+ 0x0828, 0x1e18, 0x2a3c, 0x4404, 0x144c,
+ 0x0728, 0x1f18, 0x2b3c, 0x4304, 0x134c,
+ 0x0628, 0x2018, 0x2c3c, 0x4204, 0x124c,
+ 0x062c, 0x2118, 0x2d3c, 0x4208, 0x1250,
+ 0x072c, 0x2218, 0x2e3c, 0x4308, 0x1350,
+ 0x082c, 0x2318, 0x2f3c, 0x4408, 0x1450,
+ 0x092c, 0x231c, 0x2f40, 0x4508, 0x1550,
+ 0x0a2c, 0x221c, 0x2e40, 0x4608, 0x1650,
+ 0x0b2c, 0x211c, 0x2d40, 0x4708, 0x1750,
+ 0x0b30, 0x201c, 0x2c40, 0x470c, 0x1754,
+ 0x0a30, 0x1f1c, 0x2b40, 0x460c, 0x1654,
+ 0x0930, 0x1e1c, 0x2a40, 0x450c, 0x1554,
+ 0x0830, 0x1e20, 0x2a44, 0x440c, 0x1454,
+ 0x0730, 0x1f20, 0x2b44, 0x430c, 0x1354,
+ 0x0630, 0x2020, 0x2c44, 0x420c, 0x1254,
+ 0x0634, 0x2120, 0x2d44, 0x4210, 0x1258,
+ 0x0734, 0x2220, 0x2e44, 0x4310, 0x1458,
+ 0x0834, 0x2320, 0x2f44, 0x4410, 0x1658,
+};
+
+static const uint16_t dv_place_411[1350] = {
+ 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848,
+ 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948,
+ 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48,
+ 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48,
+ 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48,
+ 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48,
+ 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c,
+ 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c,
+ 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c,
+ 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c,
+ 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c,
+ 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c,
+ 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850,
+ 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950,
+ 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50,
+ 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50,
+ 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50,
+ 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50,
+ 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54,
+ 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54,
+ 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54,
+ 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54,
+ 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954,
+ 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854,
+ 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858,
+ 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58,
+ 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58,
+ 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48,
+ 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48,
+ 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048,
+ 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148,
+ 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248,
+ 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348,
+ 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c,
+ 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c,
+ 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c,
+ 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c,
+ 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c,
+ 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c,
+ 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50,
+ 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50,
+ 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050,
+ 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150,
+ 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250,
+ 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350,
+ 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354,
+ 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254,
+ 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154,
+ 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054,
+ 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54,
+ 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54,
+ 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58,
+ 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058,
+ 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258,
+ 0x1824, 0x3310, 0x0334, 0x0c00, 0x2448,
+ 0x1924, 0x3410, 0x0434, 0x0d00, 0x2548,
+ 0x1a24, 0x3510, 0x0534, 0x0e00, 0x2648,
+ 0x1b24, 0x3514, 0x0538, 0x0f00, 0x2748,
+ 0x1c24, 0x3414, 0x0438, 0x1000, 0x2848,
+ 0x1d24, 0x3314, 0x0338, 0x1100, 0x2948,
+ 0x1d28, 0x3214, 0x0238, 0x1104, 0x294c,
+ 0x1c28, 0x3114, 0x0138, 0x1004, 0x284c,
+ 0x1b28, 0x3014, 0x0038, 0x0f04, 0x274c,
+ 0x1a28, 0x3018, 0x003c, 0x0e04, 0x264c,
+ 0x1928, 0x3118, 0x013c, 0x0d04, 0x254c,
+ 0x1828, 0x3218, 0x023c, 0x0c04, 0x244c,
+ 0x182c, 0x3318, 0x033c, 0x0c08, 0x2450,
+ 0x192c, 0x3418, 0x043c, 0x0d08, 0x2550,
+ 0x1a2c, 0x3518, 0x053c, 0x0e08, 0x2650,
+ 0x1b2c, 0x351c, 0x0540, 0x0f08, 0x2750,
+ 0x1c2c, 0x341c, 0x0440, 0x1008, 0x2850,
+ 0x1d2c, 0x331c, 0x0340, 0x1108, 0x2950,
+ 0x1d30, 0x321c, 0x0240, 0x110c, 0x2954,
+ 0x1c30, 0x311c, 0x0140, 0x100c, 0x2854,
+ 0x1b30, 0x301c, 0x0040, 0x0f0c, 0x2754,
+ 0x1a30, 0x3020, 0x0044, 0x0e0c, 0x2654,
+ 0x1930, 0x3120, 0x0144, 0x0d0c, 0x2554,
+ 0x1830, 0x3220, 0x0244, 0x0c0c, 0x2454,
+ 0x1834, 0x3320, 0x0344, 0x0c10, 0x2458,
+ 0x1934, 0x3420, 0x0444, 0x0d10, 0x2658,
+ 0x1a34, 0x3520, 0x0544, 0x0e10, 0x2858,
+ 0x1e24, 0x3910, 0x0934, 0x1200, 0x2a48,
+ 0x1f24, 0x3a10, 0x0a34, 0x1300, 0x2b48,
+ 0x2024, 0x3b10, 0x0b34, 0x1400, 0x2c48,
+ 0x2124, 0x3b14, 0x0b38, 0x1500, 0x2d48,
+ 0x2224, 0x3a14, 0x0a38, 0x1600, 0x2e48,
+ 0x2324, 0x3914, 0x0938, 0x1700, 0x2f48,
+ 0x2328, 0x3814, 0x0838, 0x1704, 0x2f4c,
+ 0x2228, 0x3714, 0x0738, 0x1604, 0x2e4c,
+ 0x2128, 0x3614, 0x0638, 0x1504, 0x2d4c,
+ 0x2028, 0x3618, 0x063c, 0x1404, 0x2c4c,
+ 0x1f28, 0x3718, 0x073c, 0x1304, 0x2b4c,
+ 0x1e28, 0x3818, 0x083c, 0x1204, 0x2a4c,
+ 0x1e2c, 0x3918, 0x093c, 0x1208, 0x2a50,
+ 0x1f2c, 0x3a18, 0x0a3c, 0x1308, 0x2b50,
+ 0x202c, 0x3b18, 0x0b3c, 0x1408, 0x2c50,
+ 0x212c, 0x3b1c, 0x0b40, 0x1508, 0x2d50,
+ 0x222c, 0x3a1c, 0x0a40, 0x1608, 0x2e50,
+ 0x232c, 0x391c, 0x0940, 0x1708, 0x2f50,
+ 0x2330, 0x381c, 0x0840, 0x170c, 0x2f54,
+ 0x2230, 0x371c, 0x0740, 0x160c, 0x2e54,
+ 0x2130, 0x361c, 0x0640, 0x150c, 0x2d54,
+ 0x2030, 0x3620, 0x0644, 0x140c, 0x2c54,
+ 0x1f30, 0x3720, 0x0744, 0x130c, 0x2b54,
+ 0x1e30, 0x3820, 0x0844, 0x120c, 0x2a54,
+ 0x1e34, 0x3920, 0x0944, 0x1210, 0x2a58,
+ 0x1f34, 0x3a20, 0x0a44, 0x1310, 0x2c58,
+ 0x2034, 0x3b20, 0x0b44, 0x1410, 0x2e58,
+ 0x2424, 0x0310, 0x0f34, 0x1800, 0x3048,
+ 0x2524, 0x0410, 0x1034, 0x1900, 0x3148,
+ 0x2624, 0x0510, 0x1134, 0x1a00, 0x3248,
+ 0x2724, 0x0514, 0x1138, 0x1b00, 0x3348,
+ 0x2824, 0x0414, 0x1038, 0x1c00, 0x3448,
+ 0x2924, 0x0314, 0x0f38, 0x1d00, 0x3548,
+ 0x2928, 0x0214, 0x0e38, 0x1d04, 0x354c,
+ 0x2828, 0x0114, 0x0d38, 0x1c04, 0x344c,
+ 0x2728, 0x0014, 0x0c38, 0x1b04, 0x334c,
+ 0x2628, 0x0018, 0x0c3c, 0x1a04, 0x324c,
+ 0x2528, 0x0118, 0x0d3c, 0x1904, 0x314c,
+ 0x2428, 0x0218, 0x0e3c, 0x1804, 0x304c,
+ 0x242c, 0x0318, 0x0f3c, 0x1808, 0x3050,
+ 0x252c, 0x0418, 0x103c, 0x1908, 0x3150,
+ 0x262c, 0x0518, 0x113c, 0x1a08, 0x3250,
+ 0x272c, 0x051c, 0x1140, 0x1b08, 0x3350,
+ 0x282c, 0x041c, 0x1040, 0x1c08, 0x3450,
+ 0x292c, 0x031c, 0x0f40, 0x1d08, 0x3550,
+ 0x2930, 0x021c, 0x0e40, 0x1d0c, 0x3554,
+ 0x2830, 0x011c, 0x0d40, 0x1c0c, 0x3454,
+ 0x2730, 0x001c, 0x0c40, 0x1b0c, 0x3354,
+ 0x2630, 0x0020, 0x0c44, 0x1a0c, 0x3254,
+ 0x2530, 0x0120, 0x0d44, 0x190c, 0x3154,
+ 0x2430, 0x0220, 0x0e44, 0x180c, 0x3054,
+ 0x2434, 0x0320, 0x0f44, 0x1810, 0x3058,
+ 0x2534, 0x0420, 0x1044, 0x1910, 0x3258,
+ 0x2634, 0x0520, 0x1144, 0x1a10, 0x3458,
+ 0x2a24, 0x0910, 0x1534, 0x1e00, 0x3648,
+ 0x2b24, 0x0a10, 0x1634, 0x1f00, 0x3748,
+ 0x2c24, 0x0b10, 0x1734, 0x2000, 0x3848,
+ 0x2d24, 0x0b14, 0x1738, 0x2100, 0x3948,
+ 0x2e24, 0x0a14, 0x1638, 0x2200, 0x3a48,
+ 0x2f24, 0x0914, 0x1538, 0x2300, 0x3b48,
+ 0x2f28, 0x0814, 0x1438, 0x2304, 0x3b4c,
+ 0x2e28, 0x0714, 0x1338, 0x2204, 0x3a4c,
+ 0x2d28, 0x0614, 0x1238, 0x2104, 0x394c,
+ 0x2c28, 0x0618, 0x123c, 0x2004, 0x384c,
+ 0x2b28, 0x0718, 0x133c, 0x1f04, 0x374c,
+ 0x2a28, 0x0818, 0x143c, 0x1e04, 0x364c,
+ 0x2a2c, 0x0918, 0x153c, 0x1e08, 0x3650,
+ 0x2b2c, 0x0a18, 0x163c, 0x1f08, 0x3750,
+ 0x2c2c, 0x0b18, 0x173c, 0x2008, 0x3850,
+ 0x2d2c, 0x0b1c, 0x1740, 0x2108, 0x3950,
+ 0x2e2c, 0x0a1c, 0x1640, 0x2208, 0x3a50,
+ 0x2f2c, 0x091c, 0x1540, 0x2308, 0x3b50,
+ 0x2f30, 0x081c, 0x1440, 0x230c, 0x3b54,
+ 0x2e30, 0x071c, 0x1340, 0x220c, 0x3a54,
+ 0x2d30, 0x061c, 0x1240, 0x210c, 0x3954,
+ 0x2c30, 0x0620, 0x1244, 0x200c, 0x3854,
+ 0x2b30, 0x0720, 0x1344, 0x1f0c, 0x3754,
+ 0x2a30, 0x0820, 0x1444, 0x1e0c, 0x3654,
+ 0x2a34, 0x0920, 0x1544, 0x1e10, 0x3658,
+ 0x2b34, 0x0a20, 0x1644, 0x1f10, 0x3858,
+ 0x2c34, 0x0b20, 0x1744, 0x2010, 0x3a58,
+ 0x3024, 0x0f10, 0x1b34, 0x2400, 0x0048,
+ 0x3124, 0x1010, 0x1c34, 0x2500, 0x0148,
+ 0x3224, 0x1110, 0x1d34, 0x2600, 0x0248,
+ 0x3324, 0x1114, 0x1d38, 0x2700, 0x0348,
+ 0x3424, 0x1014, 0x1c38, 0x2800, 0x0448,
+ 0x3524, 0x0f14, 0x1b38, 0x2900, 0x0548,
+ 0x3528, 0x0e14, 0x1a38, 0x2904, 0x054c,
+ 0x3428, 0x0d14, 0x1938, 0x2804, 0x044c,
+ 0x3328, 0x0c14, 0x1838, 0x2704, 0x034c,
+ 0x3228, 0x0c18, 0x183c, 0x2604, 0x024c,
+ 0x3128, 0x0d18, 0x193c, 0x2504, 0x014c,
+ 0x3028, 0x0e18, 0x1a3c, 0x2404, 0x004c,
+ 0x302c, 0x0f18, 0x1b3c, 0x2408, 0x0050,
+ 0x312c, 0x1018, 0x1c3c, 0x2508, 0x0150,
+ 0x322c, 0x1118, 0x1d3c, 0x2608, 0x0250,
+ 0x332c, 0x111c, 0x1d40, 0x2708, 0x0350,
+ 0x342c, 0x101c, 0x1c40, 0x2808, 0x0450,
+ 0x352c, 0x0f1c, 0x1b40, 0x2908, 0x0550,
+ 0x3530, 0x0e1c, 0x1a40, 0x290c, 0x0554,
+ 0x3430, 0x0d1c, 0x1940, 0x280c, 0x0454,
+ 0x3330, 0x0c1c, 0x1840, 0x270c, 0x0354,
+ 0x3230, 0x0c20, 0x1844, 0x260c, 0x0254,
+ 0x3130, 0x0d20, 0x1944, 0x250c, 0x0154,
+ 0x3030, 0x0e20, 0x1a44, 0x240c, 0x0054,
+ 0x3034, 0x0f20, 0x1b44, 0x2410, 0x0058,
+ 0x3134, 0x1020, 0x1c44, 0x2510, 0x0258,
+ 0x3234, 0x1120, 0x1d44, 0x2610, 0x0458,
+ 0x3624, 0x1510, 0x2134, 0x2a00, 0x0648,
+ 0x3724, 0x1610, 0x2234, 0x2b00, 0x0748,
+ 0x3824, 0x1710, 0x2334, 0x2c00, 0x0848,
+ 0x3924, 0x1714, 0x2338, 0x2d00, 0x0948,
+ 0x3a24, 0x1614, 0x2238, 0x2e00, 0x0a48,
+ 0x3b24, 0x1514, 0x2138, 0x2f00, 0x0b48,
+ 0x3b28, 0x1414, 0x2038, 0x2f04, 0x0b4c,
+ 0x3a28, 0x1314, 0x1f38, 0x2e04, 0x0a4c,
+ 0x3928, 0x1214, 0x1e38, 0x2d04, 0x094c,
+ 0x3828, 0x1218, 0x1e3c, 0x2c04, 0x084c,
+ 0x3728, 0x1318, 0x1f3c, 0x2b04, 0x074c,
+ 0x3628, 0x1418, 0x203c, 0x2a04, 0x064c,
+ 0x362c, 0x1518, 0x213c, 0x2a08, 0x0650,
+ 0x372c, 0x1618, 0x223c, 0x2b08, 0x0750,
+ 0x382c, 0x1718, 0x233c, 0x2c08, 0x0850,
+ 0x392c, 0x171c, 0x2340, 0x2d08, 0x0950,
+ 0x3a2c, 0x161c, 0x2240, 0x2e08, 0x0a50,
+ 0x3b2c, 0x151c, 0x2140, 0x2f08, 0x0b50,
+ 0x3b30, 0x141c, 0x2040, 0x2f0c, 0x0b54,
+ 0x3a30, 0x131c, 0x1f40, 0x2e0c, 0x0a54,
+ 0x3930, 0x121c, 0x1e40, 0x2d0c, 0x0954,
+ 0x3830, 0x1220, 0x1e44, 0x2c0c, 0x0854,
+ 0x3730, 0x1320, 0x1f44, 0x2b0c, 0x0754,
+ 0x3630, 0x1420, 0x2044, 0x2a0c, 0x0654,
+ 0x3634, 0x1520, 0x2144, 0x2a10, 0x0658,
+ 0x3734, 0x1620, 0x2244, 0x2b10, 0x0858,
+ 0x3834, 0x1720, 0x2344, 0x2c10, 0x0a58,
+ 0x0024, 0x1b10, 0x2734, 0x3000, 0x0c48,
+ 0x0124, 0x1c10, 0x2834, 0x3100, 0x0d48,
+ 0x0224, 0x1d10, 0x2934, 0x3200, 0x0e48,
+ 0x0324, 0x1d14, 0x2938, 0x3300, 0x0f48,
+ 0x0424, 0x1c14, 0x2838, 0x3400, 0x1048,
+ 0x0524, 0x1b14, 0x2738, 0x3500, 0x1148,
+ 0x0528, 0x1a14, 0x2638, 0x3504, 0x114c,
+ 0x0428, 0x1914, 0x2538, 0x3404, 0x104c,
+ 0x0328, 0x1814, 0x2438, 0x3304, 0x0f4c,
+ 0x0228, 0x1818, 0x243c, 0x3204, 0x0e4c,
+ 0x0128, 0x1918, 0x253c, 0x3104, 0x0d4c,
+ 0x0028, 0x1a18, 0x263c, 0x3004, 0x0c4c,
+ 0x002c, 0x1b18, 0x273c, 0x3008, 0x0c50,
+ 0x012c, 0x1c18, 0x283c, 0x3108, 0x0d50,
+ 0x022c, 0x1d18, 0x293c, 0x3208, 0x0e50,
+ 0x032c, 0x1d1c, 0x2940, 0x3308, 0x0f50,
+ 0x042c, 0x1c1c, 0x2840, 0x3408, 0x1050,
+ 0x052c, 0x1b1c, 0x2740, 0x3508, 0x1150,
+ 0x0530, 0x1a1c, 0x2640, 0x350c, 0x1154,
+ 0x0430, 0x191c, 0x2540, 0x340c, 0x1054,
+ 0x0330, 0x181c, 0x2440, 0x330c, 0x0f54,
+ 0x0230, 0x1820, 0x2444, 0x320c, 0x0e54,
+ 0x0130, 0x1920, 0x2544, 0x310c, 0x0d54,
+ 0x0030, 0x1a20, 0x2644, 0x300c, 0x0c54,
+ 0x0034, 0x1b20, 0x2744, 0x3010, 0x0c58,
+ 0x0134, 0x1c20, 0x2844, 0x3110, 0x0e58,
+ 0x0234, 0x1d20, 0x2944, 0x3210, 0x1058,
+ 0x0624, 0x2110, 0x2d34, 0x3600, 0x1248,
+ 0x0724, 0x2210, 0x2e34, 0x3700, 0x1348,
+ 0x0824, 0x2310, 0x2f34, 0x3800, 0x1448,
+ 0x0924, 0x2314, 0x2f38, 0x3900, 0x1548,
+ 0x0a24, 0x2214, 0x2e38, 0x3a00, 0x1648,
+ 0x0b24, 0x2114, 0x2d38, 0x3b00, 0x1748,
+ 0x0b28, 0x2014, 0x2c38, 0x3b04, 0x174c,
+ 0x0a28, 0x1f14, 0x2b38, 0x3a04, 0x164c,
+ 0x0928, 0x1e14, 0x2a38, 0x3904, 0x154c,
+ 0x0828, 0x1e18, 0x2a3c, 0x3804, 0x144c,
+ 0x0728, 0x1f18, 0x2b3c, 0x3704, 0x134c,
+ 0x0628, 0x2018, 0x2c3c, 0x3604, 0x124c,
+ 0x062c, 0x2118, 0x2d3c, 0x3608, 0x1250,
+ 0x072c, 0x2218, 0x2e3c, 0x3708, 0x1350,
+ 0x082c, 0x2318, 0x2f3c, 0x3808, 0x1450,
+ 0x092c, 0x231c, 0x2f40, 0x3908, 0x1550,
+ 0x0a2c, 0x221c, 0x2e40, 0x3a08, 0x1650,
+ 0x0b2c, 0x211c, 0x2d40, 0x3b08, 0x1750,
+ 0x0b30, 0x201c, 0x2c40, 0x3b0c, 0x1754,
+ 0x0a30, 0x1f1c, 0x2b40, 0x3a0c, 0x1654,
+ 0x0930, 0x1e1c, 0x2a40, 0x390c, 0x1554,
+ 0x0830, 0x1e20, 0x2a44, 0x380c, 0x1454,
+ 0x0730, 0x1f20, 0x2b44, 0x370c, 0x1354,
+ 0x0630, 0x2020, 0x2c44, 0x360c, 0x1254,
+ 0x0634, 0x2120, 0x2d44, 0x3610, 0x1258,
+ 0x0734, 0x2220, 0x2e44, 0x3710, 0x1458,
+ 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658,
+};
+
+/* 4:2:2 macroblock placement tables created by dvtables.py */
+
+/* 2 channels per frame, 10 DIF sequences per channel,
+   27 video segments per DIF sequence, 5 macroblocks per video segment */
+static const uint16_t dv_place_422_525[2*10*27*5] = {
+ 0x0c48, 0x2424, 0x306c, 0x0000, 0x1890,
+ 0x0d48, 0x2524, 0x316c, 0x0100, 0x1990,
+ 0x0e48, 0x2624, 0x326c, 0x0200, 0x1a90,
+ 0x0e4c, 0x2628, 0x3270, 0x0204, 0x1a94,
+ 0x0d4c, 0x2528, 0x3170, 0x0104, 0x1994,
+ 0x0c4c, 0x2428, 0x3070, 0x0004, 0x1894,
+ 0x0c50, 0x242c, 0x3074, 0x0008, 0x1898,
+ 0x0d50, 0x252c, 0x3174, 0x0108, 0x1998,
+ 0x0e50, 0x262c, 0x3274, 0x0208, 0x1a98,
+ 0x0e54, 0x2630, 0x3278, 0x020c, 0x1a9c,
+ 0x0d54, 0x2530, 0x3178, 0x010c, 0x199c,
+ 0x0c54, 0x2430, 0x3078, 0x000c, 0x189c,
+ 0x0c58, 0x2434, 0x307c, 0x0010, 0x18a0,
+ 0x0d58, 0x2534, 0x317c, 0x0110, 0x19a0,
+ 0x0e58, 0x2634, 0x327c, 0x0210, 0x1aa0,
+ 0x0e5c, 0x2638, 0x3280, 0x0214, 0x1aa4,
+ 0x0d5c, 0x2538, 0x3180, 0x0114, 0x19a4,
+ 0x0c5c, 0x2438, 0x3080, 0x0014, 0x18a4,
+ 0x0c60, 0x243c, 0x3084, 0x0018, 0x18a8,
+ 0x0d60, 0x253c, 0x3184, 0x0118, 0x19a8,
+ 0x0e60, 0x263c, 0x3284, 0x0218, 0x1aa8,
+ 0x0e64, 0x2640, 0x3288, 0x021c, 0x1aac,
+ 0x0d64, 0x2540, 0x3188, 0x011c, 0x19ac,
+ 0x0c64, 0x2440, 0x3088, 0x001c, 0x18ac,
+ 0x0c68, 0x2444, 0x308c, 0x0020, 0x18b0,
+ 0x0d68, 0x2544, 0x318c, 0x0120, 0x19b0,
+ 0x0e68, 0x2644, 0x328c, 0x0220, 0x1ab0,
+ 0x1248, 0x2a24, 0x366c, 0x0600, 0x1e90,
+ 0x1348, 0x2b24, 0x376c, 0x0700, 0x1f90,
+ 0x1448, 0x2c24, 0x386c, 0x0800, 0x2090,
+ 0x144c, 0x2c28, 0x3870, 0x0804, 0x2094,
+ 0x134c, 0x2b28, 0x3770, 0x0704, 0x1f94,
+ 0x124c, 0x2a28, 0x3670, 0x0604, 0x1e94,
+ 0x1250, 0x2a2c, 0x3674, 0x0608, 0x1e98,
+ 0x1350, 0x2b2c, 0x3774, 0x0708, 0x1f98,
+ 0x1450, 0x2c2c, 0x3874, 0x0808, 0x2098,
+ 0x1454, 0x2c30, 0x3878, 0x080c, 0x209c,
+ 0x1354, 0x2b30, 0x3778, 0x070c, 0x1f9c,
+ 0x1254, 0x2a30, 0x3678, 0x060c, 0x1e9c,
+ 0x1258, 0x2a34, 0x367c, 0x0610, 0x1ea0,
+ 0x1358, 0x2b34, 0x377c, 0x0710, 0x1fa0,
+ 0x1458, 0x2c34, 0x387c, 0x0810, 0x20a0,
+ 0x145c, 0x2c38, 0x3880, 0x0814, 0x20a4,
+ 0x135c, 0x2b38, 0x3780, 0x0714, 0x1fa4,
+ 0x125c, 0x2a38, 0x3680, 0x0614, 0x1ea4,
+ 0x1260, 0x2a3c, 0x3684, 0x0618, 0x1ea8,
+ 0x1360, 0x2b3c, 0x3784, 0x0718, 0x1fa8,
+ 0x1460, 0x2c3c, 0x3884, 0x0818, 0x20a8,
+ 0x1464, 0x2c40, 0x3888, 0x081c, 0x20ac,
+ 0x1364, 0x2b40, 0x3788, 0x071c, 0x1fac,
+ 0x1264, 0x2a40, 0x3688, 0x061c, 0x1eac,
+ 0x1268, 0x2a44, 0x368c, 0x0620, 0x1eb0,
+ 0x1368, 0x2b44, 0x378c, 0x0720, 0x1fb0,
+ 0x1468, 0x2c44, 0x388c, 0x0820, 0x20b0,
+ 0x1848, 0x3024, 0x006c, 0x0c00, 0x2490,
+ 0x1948, 0x3124, 0x016c, 0x0d00, 0x2590,
+ 0x1a48, 0x3224, 0x026c, 0x0e00, 0x2690,
+ 0x1a4c, 0x3228, 0x0270, 0x0e04, 0x2694,
+ 0x194c, 0x3128, 0x0170, 0x0d04, 0x2594,
+ 0x184c, 0x3028, 0x0070, 0x0c04, 0x2494,
+ 0x1850, 0x302c, 0x0074, 0x0c08, 0x2498,
+ 0x1950, 0x312c, 0x0174, 0x0d08, 0x2598,
+ 0x1a50, 0x322c, 0x0274, 0x0e08, 0x2698,
+ 0x1a54, 0x3230, 0x0278, 0x0e0c, 0x269c,
+ 0x1954, 0x3130, 0x0178, 0x0d0c, 0x259c,
+ 0x1854, 0x3030, 0x0078, 0x0c0c, 0x249c,
+ 0x1858, 0x3034, 0x007c, 0x0c10, 0x24a0,
+ 0x1958, 0x3134, 0x017c, 0x0d10, 0x25a0,
+ 0x1a58, 0x3234, 0x027c, 0x0e10, 0x26a0,
+ 0x1a5c, 0x3238, 0x0280, 0x0e14, 0x26a4,
+ 0x195c, 0x3138, 0x0180, 0x0d14, 0x25a4,
+ 0x185c, 0x3038, 0x0080, 0x0c14, 0x24a4,
+ 0x1860, 0x303c, 0x0084, 0x0c18, 0x24a8,
+ 0x1960, 0x313c, 0x0184, 0x0d18, 0x25a8,
+ 0x1a60, 0x323c, 0x0284, 0x0e18, 0x26a8,
+ 0x1a64, 0x3240, 0x0288, 0x0e1c, 0x26ac,
+ 0x1964, 0x3140, 0x0188, 0x0d1c, 0x25ac,
+ 0x1864, 0x3040, 0x0088, 0x0c1c, 0x24ac,
+ 0x1868, 0x3044, 0x008c, 0x0c20, 0x24b0,
+ 0x1968, 0x3144, 0x018c, 0x0d20, 0x25b0,
+ 0x1a68, 0x3244, 0x028c, 0x0e20, 0x26b0,
+ 0x1e48, 0x3624, 0x066c, 0x1200, 0x2a90,
+ 0x1f48, 0x3724, 0x076c, 0x1300, 0x2b90,
+ 0x2048, 0x3824, 0x086c, 0x1400, 0x2c90,
+ 0x204c, 0x3828, 0x0870, 0x1404, 0x2c94,
+ 0x1f4c, 0x3728, 0x0770, 0x1304, 0x2b94,
+ 0x1e4c, 0x3628, 0x0670, 0x1204, 0x2a94,
+ 0x1e50, 0x362c, 0x0674, 0x1208, 0x2a98,
+ 0x1f50, 0x372c, 0x0774, 0x1308, 0x2b98,
+ 0x2050, 0x382c, 0x0874, 0x1408, 0x2c98,
+ 0x2054, 0x3830, 0x0878, 0x140c, 0x2c9c,
+ 0x1f54, 0x3730, 0x0778, 0x130c, 0x2b9c,
+ 0x1e54, 0x3630, 0x0678, 0x120c, 0x2a9c,
+ 0x1e58, 0x3634, 0x067c, 0x1210, 0x2aa0,
+ 0x1f58, 0x3734, 0x077c, 0x1310, 0x2ba0,
+ 0x2058, 0x3834, 0x087c, 0x1410, 0x2ca0,
+ 0x205c, 0x3838, 0x0880, 0x1414, 0x2ca4,
+ 0x1f5c, 0x3738, 0x0780, 0x1314, 0x2ba4,
+ 0x1e5c, 0x3638, 0x0680, 0x1214, 0x2aa4,
+ 0x1e60, 0x363c, 0x0684, 0x1218, 0x2aa8,
+ 0x1f60, 0x373c, 0x0784, 0x1318, 0x2ba8,
+ 0x2060, 0x383c, 0x0884, 0x1418, 0x2ca8,
+ 0x2064, 0x3840, 0x0888, 0x141c, 0x2cac,
+ 0x1f64, 0x3740, 0x0788, 0x131c, 0x2bac,
+ 0x1e64, 0x3640, 0x0688, 0x121c, 0x2aac,
+ 0x1e68, 0x3644, 0x068c, 0x1220, 0x2ab0,
+ 0x1f68, 0x3744, 0x078c, 0x1320, 0x2bb0,
+ 0x2068, 0x3844, 0x088c, 0x1420, 0x2cb0,
+ 0x2448, 0x0024, 0x0c6c, 0x1800, 0x3090,
+ 0x2548, 0x0124, 0x0d6c, 0x1900, 0x3190,
+ 0x2648, 0x0224, 0x0e6c, 0x1a00, 0x3290,
+ 0x264c, 0x0228, 0x0e70, 0x1a04, 0x3294,
+ 0x254c, 0x0128, 0x0d70, 0x1904, 0x3194,
+ 0x244c, 0x0028, 0x0c70, 0x1804, 0x3094,
+ 0x2450, 0x002c, 0x0c74, 0x1808, 0x3098,
+ 0x2550, 0x012c, 0x0d74, 0x1908, 0x3198,
+ 0x2650, 0x022c, 0x0e74, 0x1a08, 0x3298,
+ 0x2654, 0x0230, 0x0e78, 0x1a0c, 0x329c,
+ 0x2554, 0x0130, 0x0d78, 0x190c, 0x319c,
+ 0x2454, 0x0030, 0x0c78, 0x180c, 0x309c,
+ 0x2458, 0x0034, 0x0c7c, 0x1810, 0x30a0,
+ 0x2558, 0x0134, 0x0d7c, 0x1910, 0x31a0,
+ 0x2658, 0x0234, 0x0e7c, 0x1a10, 0x32a0,
+ 0x265c, 0x0238, 0x0e80, 0x1a14, 0x32a4,
+ 0x255c, 0x0138, 0x0d80, 0x1914, 0x31a4,
+ 0x245c, 0x0038, 0x0c80, 0x1814, 0x30a4,
+ 0x2460, 0x003c, 0x0c84, 0x1818, 0x30a8,
+ 0x2560, 0x013c, 0x0d84, 0x1918, 0x31a8,
+ 0x2660, 0x023c, 0x0e84, 0x1a18, 0x32a8,
+ 0x2664, 0x0240, 0x0e88, 0x1a1c, 0x32ac,
+ 0x2564, 0x0140, 0x0d88, 0x191c, 0x31ac,
+ 0x2464, 0x0040, 0x0c88, 0x181c, 0x30ac,
+ 0x2468, 0x0044, 0x0c8c, 0x1820, 0x30b0,
+ 0x2568, 0x0144, 0x0d8c, 0x1920, 0x31b0,
+ 0x2668, 0x0244, 0x0e8c, 0x1a20, 0x32b0,
+ 0x2a48, 0x0624, 0x126c, 0x1e00, 0x3690,
+ 0x2b48, 0x0724, 0x136c, 0x1f00, 0x3790,
+ 0x2c48, 0x0824, 0x146c, 0x2000, 0x3890,
+ 0x2c4c, 0x0828, 0x1470, 0x2004, 0x3894,
+ 0x2b4c, 0x0728, 0x1370, 0x1f04, 0x3794,
+ 0x2a4c, 0x0628, 0x1270, 0x1e04, 0x3694,
+ 0x2a50, 0x062c, 0x1274, 0x1e08, 0x3698,
+ 0x2b50, 0x072c, 0x1374, 0x1f08, 0x3798,
+ 0x2c50, 0x082c, 0x1474, 0x2008, 0x3898,
+ 0x2c54, 0x0830, 0x1478, 0x200c, 0x389c,
+ 0x2b54, 0x0730, 0x1378, 0x1f0c, 0x379c,
+ 0x2a54, 0x0630, 0x1278, 0x1e0c, 0x369c,
+ 0x2a58, 0x0634, 0x127c, 0x1e10, 0x36a0,
+ 0x2b58, 0x0734, 0x137c, 0x1f10, 0x37a0,
+ 0x2c58, 0x0834, 0x147c, 0x2010, 0x38a0,
+ 0x2c5c, 0x0838, 0x1480, 0x2014, 0x38a4,
+ 0x2b5c, 0x0738, 0x1380, 0x1f14, 0x37a4,
+ 0x2a5c, 0x0638, 0x1280, 0x1e14, 0x36a4,
+ 0x2a60, 0x063c, 0x1284, 0x1e18, 0x36a8,
+ 0x2b60, 0x073c, 0x1384, 0x1f18, 0x37a8,
+ 0x2c60, 0x083c, 0x1484, 0x2018, 0x38a8,
+ 0x2c64, 0x0840, 0x1488, 0x201c, 0x38ac,
+ 0x2b64, 0x0740, 0x1388, 0x1f1c, 0x37ac,
+ 0x2a64, 0x0640, 0x1288, 0x1e1c, 0x36ac,
+ 0x2a68, 0x0644, 0x128c, 0x1e20, 0x36b0,
+ 0x2b68, 0x0744, 0x138c, 0x1f20, 0x37b0,
+ 0x2c68, 0x0844, 0x148c, 0x2020, 0x38b0,
+ 0x3048, 0x0c24, 0x186c, 0x2400, 0x0090,
+ 0x3148, 0x0d24, 0x196c, 0x2500, 0x0190,
+ 0x3248, 0x0e24, 0x1a6c, 0x2600, 0x0290,
+ 0x324c, 0x0e28, 0x1a70, 0x2604, 0x0294,
+ 0x314c, 0x0d28, 0x1970, 0x2504, 0x0194,
+ 0x304c, 0x0c28, 0x1870, 0x2404, 0x0094,
+ 0x3050, 0x0c2c, 0x1874, 0x2408, 0x0098,
+ 0x3150, 0x0d2c, 0x1974, 0x2508, 0x0198,
+ 0x3250, 0x0e2c, 0x1a74, 0x2608, 0x0298,
+ 0x3254, 0x0e30, 0x1a78, 0x260c, 0x029c,
+ 0x3154, 0x0d30, 0x1978, 0x250c, 0x019c,
+ 0x3054, 0x0c30, 0x1878, 0x240c, 0x009c,
+ 0x3058, 0x0c34, 0x187c, 0x2410, 0x00a0,
+ 0x3158, 0x0d34, 0x197c, 0x2510, 0x01a0,
+ 0x3258, 0x0e34, 0x1a7c, 0x2610, 0x02a0,
+ 0x325c, 0x0e38, 0x1a80, 0x2614, 0x02a4,
+ 0x315c, 0x0d38, 0x1980, 0x2514, 0x01a4,
+ 0x305c, 0x0c38, 0x1880, 0x2414, 0x00a4,
+ 0x3060, 0x0c3c, 0x1884, 0x2418, 0x00a8,
+ 0x3160, 0x0d3c, 0x1984, 0x2518, 0x01a8,
+ 0x3260, 0x0e3c, 0x1a84, 0x2618, 0x02a8,
+ 0x3264, 0x0e40, 0x1a88, 0x261c, 0x02ac,
+ 0x3164, 0x0d40, 0x1988, 0x251c, 0x01ac,
+ 0x3064, 0x0c40, 0x1888, 0x241c, 0x00ac,
+ 0x3068, 0x0c44, 0x188c, 0x2420, 0x00b0,
+ 0x3168, 0x0d44, 0x198c, 0x2520, 0x01b0,
+ 0x3268, 0x0e44, 0x1a8c, 0x2620, 0x02b0,
+ 0x3648, 0x1224, 0x1e6c, 0x2a00, 0x0690,
+ 0x3748, 0x1324, 0x1f6c, 0x2b00, 0x0790,
+ 0x3848, 0x1424, 0x206c, 0x2c00, 0x0890,
+ 0x384c, 0x1428, 0x2070, 0x2c04, 0x0894,
+ 0x374c, 0x1328, 0x1f70, 0x2b04, 0x0794,
+ 0x364c, 0x1228, 0x1e70, 0x2a04, 0x0694,
+ 0x3650, 0x122c, 0x1e74, 0x2a08, 0x0698,
+ 0x3750, 0x132c, 0x1f74, 0x2b08, 0x0798,
+ 0x3850, 0x142c, 0x2074, 0x2c08, 0x0898,
+ 0x3854, 0x1430, 0x2078, 0x2c0c, 0x089c,
+ 0x3754, 0x1330, 0x1f78, 0x2b0c, 0x079c,
+ 0x3654, 0x1230, 0x1e78, 0x2a0c, 0x069c,
+ 0x3658, 0x1234, 0x1e7c, 0x2a10, 0x06a0,
+ 0x3758, 0x1334, 0x1f7c, 0x2b10, 0x07a0,
+ 0x3858, 0x1434, 0x207c, 0x2c10, 0x08a0,
+ 0x385c, 0x1438, 0x2080, 0x2c14, 0x08a4,
+ 0x375c, 0x1338, 0x1f80, 0x2b14, 0x07a4,
+ 0x365c, 0x1238, 0x1e80, 0x2a14, 0x06a4,
+ 0x3660, 0x123c, 0x1e84, 0x2a18, 0x06a8,
+ 0x3760, 0x133c, 0x1f84, 0x2b18, 0x07a8,
+ 0x3860, 0x143c, 0x2084, 0x2c18, 0x08a8,
+ 0x3864, 0x1440, 0x2088, 0x2c1c, 0x08ac,
+ 0x3764, 0x1340, 0x1f88, 0x2b1c, 0x07ac,
+ 0x3664, 0x1240, 0x1e88, 0x2a1c, 0x06ac,
+ 0x3668, 0x1244, 0x1e8c, 0x2a20, 0x06b0,
+ 0x3768, 0x1344, 0x1f8c, 0x2b20, 0x07b0,
+ 0x3868, 0x1444, 0x208c, 0x2c20, 0x08b0,
+ 0x0048, 0x1824, 0x246c, 0x3000, 0x0c90,
+ 0x0148, 0x1924, 0x256c, 0x3100, 0x0d90,
+ 0x0248, 0x1a24, 0x266c, 0x3200, 0x0e90,
+ 0x024c, 0x1a28, 0x2670, 0x3204, 0x0e94,
+ 0x014c, 0x1928, 0x2570, 0x3104, 0x0d94,
+ 0x004c, 0x1828, 0x2470, 0x3004, 0x0c94,
+ 0x0050, 0x182c, 0x2474, 0x3008, 0x0c98,
+ 0x0150, 0x192c, 0x2574, 0x3108, 0x0d98,
+ 0x0250, 0x1a2c, 0x2674, 0x3208, 0x0e98,
+ 0x0254, 0x1a30, 0x2678, 0x320c, 0x0e9c,
+ 0x0154, 0x1930, 0x2578, 0x310c, 0x0d9c,
+ 0x0054, 0x1830, 0x2478, 0x300c, 0x0c9c,
+ 0x0058, 0x1834, 0x247c, 0x3010, 0x0ca0,
+ 0x0158, 0x1934, 0x257c, 0x3110, 0x0da0,
+ 0x0258, 0x1a34, 0x267c, 0x3210, 0x0ea0,
+ 0x025c, 0x1a38, 0x2680, 0x3214, 0x0ea4,
+ 0x015c, 0x1938, 0x2580, 0x3114, 0x0da4,
+ 0x005c, 0x1838, 0x2480, 0x3014, 0x0ca4,
+ 0x0060, 0x183c, 0x2484, 0x3018, 0x0ca8,
+ 0x0160, 0x193c, 0x2584, 0x3118, 0x0da8,
+ 0x0260, 0x1a3c, 0x2684, 0x3218, 0x0ea8,
+ 0x0264, 0x1a40, 0x2688, 0x321c, 0x0eac,
+ 0x0164, 0x1940, 0x2588, 0x311c, 0x0dac,
+ 0x0064, 0x1840, 0x2488, 0x301c, 0x0cac,
+ 0x0068, 0x1844, 0x248c, 0x3020, 0x0cb0,
+ 0x0168, 0x1944, 0x258c, 0x3120, 0x0db0,
+ 0x0268, 0x1a44, 0x268c, 0x3220, 0x0eb0,
+ 0x0648, 0x1e24, 0x2a6c, 0x3600, 0x1290,
+ 0x0748, 0x1f24, 0x2b6c, 0x3700, 0x1390,
+ 0x0848, 0x2024, 0x2c6c, 0x3800, 0x1490,
+ 0x084c, 0x2028, 0x2c70, 0x3804, 0x1494,
+ 0x074c, 0x1f28, 0x2b70, 0x3704, 0x1394,
+ 0x064c, 0x1e28, 0x2a70, 0x3604, 0x1294,
+ 0x0650, 0x1e2c, 0x2a74, 0x3608, 0x1298,
+ 0x0750, 0x1f2c, 0x2b74, 0x3708, 0x1398,
+ 0x0850, 0x202c, 0x2c74, 0x3808, 0x1498,
+ 0x0854, 0x2030, 0x2c78, 0x380c, 0x149c,
+ 0x0754, 0x1f30, 0x2b78, 0x370c, 0x139c,
+ 0x0654, 0x1e30, 0x2a78, 0x360c, 0x129c,
+ 0x0658, 0x1e34, 0x2a7c, 0x3610, 0x12a0,
+ 0x0758, 0x1f34, 0x2b7c, 0x3710, 0x13a0,
+ 0x0858, 0x2034, 0x2c7c, 0x3810, 0x14a0,
+ 0x085c, 0x2038, 0x2c80, 0x3814, 0x14a4,
+ 0x075c, 0x1f38, 0x2b80, 0x3714, 0x13a4,
+ 0x065c, 0x1e38, 0x2a80, 0x3614, 0x12a4,
+ 0x0660, 0x1e3c, 0x2a84, 0x3618, 0x12a8,
+ 0x0760, 0x1f3c, 0x2b84, 0x3718, 0x13a8,
+ 0x0860, 0x203c, 0x2c84, 0x3818, 0x14a8,
+ 0x0864, 0x2040, 0x2c88, 0x381c, 0x14ac,
+ 0x0764, 0x1f40, 0x2b88, 0x371c, 0x13ac,
+ 0x0664, 0x1e40, 0x2a88, 0x361c, 0x12ac,
+ 0x0668, 0x1e44, 0x2a8c, 0x3620, 0x12b0,
+ 0x0768, 0x1f44, 0x2b8c, 0x3720, 0x13b0,
+ 0x0868, 0x2044, 0x2c8c, 0x3820, 0x14b0,
+ 0x0f48, 0x2724, 0x336c, 0x0300, 0x1b90,
+ 0x1048, 0x2824, 0x346c, 0x0400, 0x1c90,
+ 0x1148, 0x2924, 0x356c, 0x0500, 0x1d90,
+ 0x114c, 0x2928, 0x3570, 0x0504, 0x1d94,
+ 0x104c, 0x2828, 0x3470, 0x0404, 0x1c94,
+ 0x0f4c, 0x2728, 0x3370, 0x0304, 0x1b94,
+ 0x0f50, 0x272c, 0x3374, 0x0308, 0x1b98,
+ 0x1050, 0x282c, 0x3474, 0x0408, 0x1c98,
+ 0x1150, 0x292c, 0x3574, 0x0508, 0x1d98,
+ 0x1154, 0x2930, 0x3578, 0x050c, 0x1d9c,
+ 0x1054, 0x2830, 0x3478, 0x040c, 0x1c9c,
+ 0x0f54, 0x2730, 0x3378, 0x030c, 0x1b9c,
+ 0x0f58, 0x2734, 0x337c, 0x0310, 0x1ba0,
+ 0x1058, 0x2834, 0x347c, 0x0410, 0x1ca0,
+ 0x1158, 0x2934, 0x357c, 0x0510, 0x1da0,
+ 0x115c, 0x2938, 0x3580, 0x0514, 0x1da4,
+ 0x105c, 0x2838, 0x3480, 0x0414, 0x1ca4,
+ 0x0f5c, 0x2738, 0x3380, 0x0314, 0x1ba4,
+ 0x0f60, 0x273c, 0x3384, 0x0318, 0x1ba8,
+ 0x1060, 0x283c, 0x3484, 0x0418, 0x1ca8,
+ 0x1160, 0x293c, 0x3584, 0x0518, 0x1da8,
+ 0x1164, 0x2940, 0x3588, 0x051c, 0x1dac,
+ 0x1064, 0x2840, 0x3488, 0x041c, 0x1cac,
+ 0x0f64, 0x2740, 0x3388, 0x031c, 0x1bac,
+ 0x0f68, 0x2744, 0x338c, 0x0320, 0x1bb0,
+ 0x1068, 0x2844, 0x348c, 0x0420, 0x1cb0,
+ 0x1168, 0x2944, 0x358c, 0x0520, 0x1db0,
+ 0x1548, 0x2d24, 0x396c, 0x0900, 0x2190,
+ 0x1648, 0x2e24, 0x3a6c, 0x0a00, 0x2290,
+ 0x1748, 0x2f24, 0x3b6c, 0x0b00, 0x2390,
+ 0x174c, 0x2f28, 0x3b70, 0x0b04, 0x2394,
+ 0x164c, 0x2e28, 0x3a70, 0x0a04, 0x2294,
+ 0x154c, 0x2d28, 0x3970, 0x0904, 0x2194,
+ 0x1550, 0x2d2c, 0x3974, 0x0908, 0x2198,
+ 0x1650, 0x2e2c, 0x3a74, 0x0a08, 0x2298,
+ 0x1750, 0x2f2c, 0x3b74, 0x0b08, 0x2398,
+ 0x1754, 0x2f30, 0x3b78, 0x0b0c, 0x239c,
+ 0x1654, 0x2e30, 0x3a78, 0x0a0c, 0x229c,
+ 0x1554, 0x2d30, 0x3978, 0x090c, 0x219c,
+ 0x1558, 0x2d34, 0x397c, 0x0910, 0x21a0,
+ 0x1658, 0x2e34, 0x3a7c, 0x0a10, 0x22a0,
+ 0x1758, 0x2f34, 0x3b7c, 0x0b10, 0x23a0,
+ 0x175c, 0x2f38, 0x3b80, 0x0b14, 0x23a4,
+ 0x165c, 0x2e38, 0x3a80, 0x0a14, 0x22a4,
+ 0x155c, 0x2d38, 0x3980, 0x0914, 0x21a4,
+ 0x1560, 0x2d3c, 0x3984, 0x0918, 0x21a8,
+ 0x1660, 0x2e3c, 0x3a84, 0x0a18, 0x22a8,
+ 0x1760, 0x2f3c, 0x3b84, 0x0b18, 0x23a8,
+ 0x1764, 0x2f40, 0x3b88, 0x0b1c, 0x23ac,
+ 0x1664, 0x2e40, 0x3a88, 0x0a1c, 0x22ac,
+ 0x1564, 0x2d40, 0x3988, 0x091c, 0x21ac,
+ 0x1568, 0x2d44, 0x398c, 0x0920, 0x21b0,
+ 0x1668, 0x2e44, 0x3a8c, 0x0a20, 0x22b0,
+ 0x1768, 0x2f44, 0x3b8c, 0x0b20, 0x23b0,
+ 0x1b48, 0x3324, 0x036c, 0x0f00, 0x2790,
+ 0x1c48, 0x3424, 0x046c, 0x1000, 0x2890,
+ 0x1d48, 0x3524, 0x056c, 0x1100, 0x2990,
+ 0x1d4c, 0x3528, 0x0570, 0x1104, 0x2994,
+ 0x1c4c, 0x3428, 0x0470, 0x1004, 0x2894,
+ 0x1b4c, 0x3328, 0x0370, 0x0f04, 0x2794,
+ 0x1b50, 0x332c, 0x0374, 0x0f08, 0x2798,
+ 0x1c50, 0x342c, 0x0474, 0x1008, 0x2898,
+ 0x1d50, 0x352c, 0x0574, 0x1108, 0x2998,
+ 0x1d54, 0x3530, 0x0578, 0x110c, 0x299c,
+ 0x1c54, 0x3430, 0x0478, 0x100c, 0x289c,
+ 0x1b54, 0x3330, 0x0378, 0x0f0c, 0x279c,
+ 0x1b58, 0x3334, 0x037c, 0x0f10, 0x27a0,
+ 0x1c58, 0x3434, 0x047c, 0x1010, 0x28a0,
+ 0x1d58, 0x3534, 0x057c, 0x1110, 0x29a0,
+ 0x1d5c, 0x3538, 0x0580, 0x1114, 0x29a4,
+ 0x1c5c, 0x3438, 0x0480, 0x1014, 0x28a4,
+ 0x1b5c, 0x3338, 0x0380, 0x0f14, 0x27a4,
+ 0x1b60, 0x333c, 0x0384, 0x0f18, 0x27a8,
+ 0x1c60, 0x343c, 0x0484, 0x1018, 0x28a8,
+ 0x1d60, 0x353c, 0x0584, 0x1118, 0x29a8,
+ 0x1d64, 0x3540, 0x0588, 0x111c, 0x29ac,
+ 0x1c64, 0x3440, 0x0488, 0x101c, 0x28ac,
+ 0x1b64, 0x3340, 0x0388, 0x0f1c, 0x27ac,
+ 0x1b68, 0x3344, 0x038c, 0x0f20, 0x27b0,
+ 0x1c68, 0x3444, 0x048c, 0x1020, 0x28b0,
+ 0x1d68, 0x3544, 0x058c, 0x1120, 0x29b0,
+ 0x2148, 0x3924, 0x096c, 0x1500, 0x2d90,
+ 0x2248, 0x3a24, 0x0a6c, 0x1600, 0x2e90,
+ 0x2348, 0x3b24, 0x0b6c, 0x1700, 0x2f90,
+ 0x234c, 0x3b28, 0x0b70, 0x1704, 0x2f94,
+ 0x224c, 0x3a28, 0x0a70, 0x1604, 0x2e94,
+ 0x214c, 0x3928, 0x0970, 0x1504, 0x2d94,
+ 0x2150, 0x392c, 0x0974, 0x1508, 0x2d98,
+ 0x2250, 0x3a2c, 0x0a74, 0x1608, 0x2e98,
+ 0x2350, 0x3b2c, 0x0b74, 0x1708, 0x2f98,
+ 0x2354, 0x3b30, 0x0b78, 0x170c, 0x2f9c,
+ 0x2254, 0x3a30, 0x0a78, 0x160c, 0x2e9c,
+ 0x2154, 0x3930, 0x0978, 0x150c, 0x2d9c,
+ 0x2158, 0x3934, 0x097c, 0x1510, 0x2da0,
+ 0x2258, 0x3a34, 0x0a7c, 0x1610, 0x2ea0,
+ 0x2358, 0x3b34, 0x0b7c, 0x1710, 0x2fa0,
+ 0x235c, 0x3b38, 0x0b80, 0x1714, 0x2fa4,
+ 0x225c, 0x3a38, 0x0a80, 0x1614, 0x2ea4,
+ 0x215c, 0x3938, 0x0980, 0x1514, 0x2da4,
+ 0x2160, 0x393c, 0x0984, 0x1518, 0x2da8,
+ 0x2260, 0x3a3c, 0x0a84, 0x1618, 0x2ea8,
+ 0x2360, 0x3b3c, 0x0b84, 0x1718, 0x2fa8,
+ 0x2364, 0x3b40, 0x0b88, 0x171c, 0x2fac,
+ 0x2264, 0x3a40, 0x0a88, 0x161c, 0x2eac,
+ 0x2164, 0x3940, 0x0988, 0x151c, 0x2dac,
+ 0x2168, 0x3944, 0x098c, 0x1520, 0x2db0,
+ 0x2268, 0x3a44, 0x0a8c, 0x1620, 0x2eb0,
+ 0x2368, 0x3b44, 0x0b8c, 0x1720, 0x2fb0,
+ 0x2748, 0x0324, 0x0f6c, 0x1b00, 0x3390,
+ 0x2848, 0x0424, 0x106c, 0x1c00, 0x3490,
+ 0x2948, 0x0524, 0x116c, 0x1d00, 0x3590,
+ 0x294c, 0x0528, 0x1170, 0x1d04, 0x3594,
+ 0x284c, 0x0428, 0x1070, 0x1c04, 0x3494,
+ 0x274c, 0x0328, 0x0f70, 0x1b04, 0x3394,
+ 0x2750, 0x032c, 0x0f74, 0x1b08, 0x3398,
+ 0x2850, 0x042c, 0x1074, 0x1c08, 0x3498,
+ 0x2950, 0x052c, 0x1174, 0x1d08, 0x3598,
+ 0x2954, 0x0530, 0x1178, 0x1d0c, 0x359c,
+ 0x2854, 0x0430, 0x1078, 0x1c0c, 0x349c,
+ 0x2754, 0x0330, 0x0f78, 0x1b0c, 0x339c,
+ 0x2758, 0x0334, 0x0f7c, 0x1b10, 0x33a0,
+ 0x2858, 0x0434, 0x107c, 0x1c10, 0x34a0,
+ 0x2958, 0x0534, 0x117c, 0x1d10, 0x35a0,
+ 0x295c, 0x0538, 0x1180, 0x1d14, 0x35a4,
+ 0x285c, 0x0438, 0x1080, 0x1c14, 0x34a4,
+ 0x275c, 0x0338, 0x0f80, 0x1b14, 0x33a4,
+ 0x2760, 0x033c, 0x0f84, 0x1b18, 0x33a8,
+ 0x2860, 0x043c, 0x1084, 0x1c18, 0x34a8,
+ 0x2960, 0x053c, 0x1184, 0x1d18, 0x35a8,
+ 0x2964, 0x0540, 0x1188, 0x1d1c, 0x35ac,
+ 0x2864, 0x0440, 0x1088, 0x1c1c, 0x34ac,
+ 0x2764, 0x0340, 0x0f88, 0x1b1c, 0x33ac,
+ 0x2768, 0x0344, 0x0f8c, 0x1b20, 0x33b0,
+ 0x2868, 0x0444, 0x108c, 0x1c20, 0x34b0,
+ 0x2968, 0x0544, 0x118c, 0x1d20, 0x35b0,
+ 0x2d48, 0x0924, 0x156c, 0x2100, 0x3990,
+ 0x2e48, 0x0a24, 0x166c, 0x2200, 0x3a90,
+ 0x2f48, 0x0b24, 0x176c, 0x2300, 0x3b90,
+ 0x2f4c, 0x0b28, 0x1770, 0x2304, 0x3b94,
+ 0x2e4c, 0x0a28, 0x1670, 0x2204, 0x3a94,
+ 0x2d4c, 0x0928, 0x1570, 0x2104, 0x3994,
+ 0x2d50, 0x092c, 0x1574, 0x2108, 0x3998,
+ 0x2e50, 0x0a2c, 0x1674, 0x2208, 0x3a98,
+ 0x2f50, 0x0b2c, 0x1774, 0x2308, 0x3b98,
+ 0x2f54, 0x0b30, 0x1778, 0x230c, 0x3b9c,
+ 0x2e54, 0x0a30, 0x1678, 0x220c, 0x3a9c,
+ 0x2d54, 0x0930, 0x1578, 0x210c, 0x399c,
+ 0x2d58, 0x0934, 0x157c, 0x2110, 0x39a0,
+ 0x2e58, 0x0a34, 0x167c, 0x2210, 0x3aa0,
+ 0x2f58, 0x0b34, 0x177c, 0x2310, 0x3ba0,
+ 0x2f5c, 0x0b38, 0x1780, 0x2314, 0x3ba4,
+ 0x2e5c, 0x0a38, 0x1680, 0x2214, 0x3aa4,
+ 0x2d5c, 0x0938, 0x1580, 0x2114, 0x39a4,
+ 0x2d60, 0x093c, 0x1584, 0x2118, 0x39a8,
+ 0x2e60, 0x0a3c, 0x1684, 0x2218, 0x3aa8,
+ 0x2f60, 0x0b3c, 0x1784, 0x2318, 0x3ba8,
+ 0x2f64, 0x0b40, 0x1788, 0x231c, 0x3bac,
+ 0x2e64, 0x0a40, 0x1688, 0x221c, 0x3aac,
+ 0x2d64, 0x0940, 0x1588, 0x211c, 0x39ac,
+ 0x2d68, 0x0944, 0x158c, 0x2120, 0x39b0,
+ 0x2e68, 0x0a44, 0x168c, 0x2220, 0x3ab0,
+ 0x2f68, 0x0b44, 0x178c, 0x2320, 0x3bb0,
+ 0x3348, 0x0f24, 0x1b6c, 0x2700, 0x0390,
+ 0x3448, 0x1024, 0x1c6c, 0x2800, 0x0490,
+ 0x3548, 0x1124, 0x1d6c, 0x2900, 0x0590,
+ 0x354c, 0x1128, 0x1d70, 0x2904, 0x0594,
+ 0x344c, 0x1028, 0x1c70, 0x2804, 0x0494,
+ 0x334c, 0x0f28, 0x1b70, 0x2704, 0x0394,
+ 0x3350, 0x0f2c, 0x1b74, 0x2708, 0x0398,
+ 0x3450, 0x102c, 0x1c74, 0x2808, 0x0498,
+ 0x3550, 0x112c, 0x1d74, 0x2908, 0x0598,
+ 0x3554, 0x1130, 0x1d78, 0x290c, 0x059c,
+ 0x3454, 0x1030, 0x1c78, 0x280c, 0x049c,
+ 0x3354, 0x0f30, 0x1b78, 0x270c, 0x039c,
+ 0x3358, 0x0f34, 0x1b7c, 0x2710, 0x03a0,
+ 0x3458, 0x1034, 0x1c7c, 0x2810, 0x04a0,
+ 0x3558, 0x1134, 0x1d7c, 0x2910, 0x05a0,
+ 0x355c, 0x1138, 0x1d80, 0x2914, 0x05a4,
+ 0x345c, 0x1038, 0x1c80, 0x2814, 0x04a4,
+ 0x335c, 0x0f38, 0x1b80, 0x2714, 0x03a4,
+ 0x3360, 0x0f3c, 0x1b84, 0x2718, 0x03a8,
+ 0x3460, 0x103c, 0x1c84, 0x2818, 0x04a8,
+ 0x3560, 0x113c, 0x1d84, 0x2918, 0x05a8,
+ 0x3564, 0x1140, 0x1d88, 0x291c, 0x05ac,
+ 0x3464, 0x1040, 0x1c88, 0x281c, 0x04ac,
+ 0x3364, 0x0f40, 0x1b88, 0x271c, 0x03ac,
+ 0x3368, 0x0f44, 0x1b8c, 0x2720, 0x03b0,
+ 0x3468, 0x1044, 0x1c8c, 0x2820, 0x04b0,
+ 0x3568, 0x1144, 0x1d8c, 0x2920, 0x05b0,
+ 0x3948, 0x1524, 0x216c, 0x2d00, 0x0990,
+ 0x3a48, 0x1624, 0x226c, 0x2e00, 0x0a90,
+ 0x3b48, 0x1724, 0x236c, 0x2f00, 0x0b90,
+ 0x3b4c, 0x1728, 0x2370, 0x2f04, 0x0b94,
+ 0x3a4c, 0x1628, 0x2270, 0x2e04, 0x0a94,
+ 0x394c, 0x1528, 0x2170, 0x2d04, 0x0994,
+ 0x3950, 0x152c, 0x2174, 0x2d08, 0x0998,
+ 0x3a50, 0x162c, 0x2274, 0x2e08, 0x0a98,
+ 0x3b50, 0x172c, 0x2374, 0x2f08, 0x0b98,
+ 0x3b54, 0x1730, 0x2378, 0x2f0c, 0x0b9c,
+ 0x3a54, 0x1630, 0x2278, 0x2e0c, 0x0a9c,
+ 0x3954, 0x1530, 0x2178, 0x2d0c, 0x099c,
+ 0x3958, 0x1534, 0x217c, 0x2d10, 0x09a0,
+ 0x3a58, 0x1634, 0x227c, 0x2e10, 0x0aa0,
+ 0x3b58, 0x1734, 0x237c, 0x2f10, 0x0ba0,
+ 0x3b5c, 0x1738, 0x2380, 0x2f14, 0x0ba4,
+ 0x3a5c, 0x1638, 0x2280, 0x2e14, 0x0aa4,
+ 0x395c, 0x1538, 0x2180, 0x2d14, 0x09a4,
+ 0x3960, 0x153c, 0x2184, 0x2d18, 0x09a8,
+ 0x3a60, 0x163c, 0x2284, 0x2e18, 0x0aa8,
+ 0x3b60, 0x173c, 0x2384, 0x2f18, 0x0ba8,
+ 0x3b64, 0x1740, 0x2388, 0x2f1c, 0x0bac,
+ 0x3a64, 0x1640, 0x2288, 0x2e1c, 0x0aac,
+ 0x3964, 0x1540, 0x2188, 0x2d1c, 0x09ac,
+ 0x3968, 0x1544, 0x218c, 0x2d20, 0x09b0,
+ 0x3a68, 0x1644, 0x228c, 0x2e20, 0x0ab0,
+ 0x3b68, 0x1744, 0x238c, 0x2f20, 0x0bb0,
+ 0x0348, 0x1b24, 0x276c, 0x3300, 0x0f90,
+ 0x0448, 0x1c24, 0x286c, 0x3400, 0x1090,
+ 0x0548, 0x1d24, 0x296c, 0x3500, 0x1190,
+ 0x054c, 0x1d28, 0x2970, 0x3504, 0x1194,
+ 0x044c, 0x1c28, 0x2870, 0x3404, 0x1094,
+ 0x034c, 0x1b28, 0x2770, 0x3304, 0x0f94,
+ 0x0350, 0x1b2c, 0x2774, 0x3308, 0x0f98,
+ 0x0450, 0x1c2c, 0x2874, 0x3408, 0x1098,
+ 0x0550, 0x1d2c, 0x2974, 0x3508, 0x1198,
+ 0x0554, 0x1d30, 0x2978, 0x350c, 0x119c,
+ 0x0454, 0x1c30, 0x2878, 0x340c, 0x109c,
+ 0x0354, 0x1b30, 0x2778, 0x330c, 0x0f9c,
+ 0x0358, 0x1b34, 0x277c, 0x3310, 0x0fa0,
+ 0x0458, 0x1c34, 0x287c, 0x3410, 0x10a0,
+ 0x0558, 0x1d34, 0x297c, 0x3510, 0x11a0,
+ 0x055c, 0x1d38, 0x2980, 0x3514, 0x11a4,
+ 0x045c, 0x1c38, 0x2880, 0x3414, 0x10a4,
+ 0x035c, 0x1b38, 0x2780, 0x3314, 0x0fa4,
+ 0x0360, 0x1b3c, 0x2784, 0x3318, 0x0fa8,
+ 0x0460, 0x1c3c, 0x2884, 0x3418, 0x10a8,
+ 0x0560, 0x1d3c, 0x2984, 0x3518, 0x11a8,
+ 0x0564, 0x1d40, 0x2988, 0x351c, 0x11ac,
+ 0x0464, 0x1c40, 0x2888, 0x341c, 0x10ac,
+ 0x0364, 0x1b40, 0x2788, 0x331c, 0x0fac,
+ 0x0368, 0x1b44, 0x278c, 0x3320, 0x0fb0,
+ 0x0468, 0x1c44, 0x288c, 0x3420, 0x10b0,
+ 0x0568, 0x1d44, 0x298c, 0x3520, 0x11b0,
+ 0x0948, 0x2124, 0x2d6c, 0x3900, 0x1590,
+ 0x0a48, 0x2224, 0x2e6c, 0x3a00, 0x1690,
+ 0x0b48, 0x2324, 0x2f6c, 0x3b00, 0x1790,
+ 0x0b4c, 0x2328, 0x2f70, 0x3b04, 0x1794,
+ 0x0a4c, 0x2228, 0x2e70, 0x3a04, 0x1694,
+ 0x094c, 0x2128, 0x2d70, 0x3904, 0x1594,
+ 0x0950, 0x212c, 0x2d74, 0x3908, 0x1598,
+ 0x0a50, 0x222c, 0x2e74, 0x3a08, 0x1698,
+ 0x0b50, 0x232c, 0x2f74, 0x3b08, 0x1798,
+ 0x0b54, 0x2330, 0x2f78, 0x3b0c, 0x179c,
+ 0x0a54, 0x2230, 0x2e78, 0x3a0c, 0x169c,
+ 0x0954, 0x2130, 0x2d78, 0x390c, 0x159c,
+ 0x0958, 0x2134, 0x2d7c, 0x3910, 0x15a0,
+ 0x0a58, 0x2234, 0x2e7c, 0x3a10, 0x16a0,
+ 0x0b58, 0x2334, 0x2f7c, 0x3b10, 0x17a0,
+ 0x0b5c, 0x2338, 0x2f80, 0x3b14, 0x17a4,
+ 0x0a5c, 0x2238, 0x2e80, 0x3a14, 0x16a4,
+ 0x095c, 0x2138, 0x2d80, 0x3914, 0x15a4,
+ 0x0960, 0x213c, 0x2d84, 0x3918, 0x15a8,
+ 0x0a60, 0x223c, 0x2e84, 0x3a18, 0x16a8,
+ 0x0b60, 0x233c, 0x2f84, 0x3b18, 0x17a8,
+ 0x0b64, 0x2340, 0x2f88, 0x3b1c, 0x17ac,
+ 0x0a64, 0x2240, 0x2e88, 0x3a1c, 0x16ac,
+ 0x0964, 0x2140, 0x2d88, 0x391c, 0x15ac,
+ 0x0968, 0x2144, 0x2d8c, 0x3920, 0x15b0,
+ 0x0a68, 0x2244, 0x2e8c, 0x3a20, 0x16b0,
+ 0x0b68, 0x2344, 0x2f8c, 0x3b20, 0x17b0,
+};
+
+/* 2 channels per frame, 12 DIF sequences per channel,
+   27 video segments per DIF sequence, 5 macroblocks per video segment */
+static const uint16_t dv_place_422_625[2*12*27*5] = {
+ 0x0c48, 0x2424, 0x306c, 0x0000, 0x1890,
+ 0x0d48, 0x2524, 0x316c, 0x0100, 0x1990,
+ 0x0e48, 0x2624, 0x326c, 0x0200, 0x1a90,
+ 0x0e4c, 0x2628, 0x3270, 0x0204, 0x1a94,
+ 0x0d4c, 0x2528, 0x3170, 0x0104, 0x1994,
+ 0x0c4c, 0x2428, 0x3070, 0x0004, 0x1894,
+ 0x0c50, 0x242c, 0x3074, 0x0008, 0x1898,
+ 0x0d50, 0x252c, 0x3174, 0x0108, 0x1998,
+ 0x0e50, 0x262c, 0x3274, 0x0208, 0x1a98,
+ 0x0e54, 0x2630, 0x3278, 0x020c, 0x1a9c,
+ 0x0d54, 0x2530, 0x3178, 0x010c, 0x199c,
+ 0x0c54, 0x2430, 0x3078, 0x000c, 0x189c,
+ 0x0c58, 0x2434, 0x307c, 0x0010, 0x18a0,
+ 0x0d58, 0x2534, 0x317c, 0x0110, 0x19a0,
+ 0x0e58, 0x2634, 0x327c, 0x0210, 0x1aa0,
+ 0x0e5c, 0x2638, 0x3280, 0x0214, 0x1aa4,
+ 0x0d5c, 0x2538, 0x3180, 0x0114, 0x19a4,
+ 0x0c5c, 0x2438, 0x3080, 0x0014, 0x18a4,
+ 0x0c60, 0x243c, 0x3084, 0x0018, 0x18a8,
+ 0x0d60, 0x253c, 0x3184, 0x0118, 0x19a8,
+ 0x0e60, 0x263c, 0x3284, 0x0218, 0x1aa8,
+ 0x0e64, 0x2640, 0x3288, 0x021c, 0x1aac,
+ 0x0d64, 0x2540, 0x3188, 0x011c, 0x19ac,
+ 0x0c64, 0x2440, 0x3088, 0x001c, 0x18ac,
+ 0x0c68, 0x2444, 0x308c, 0x0020, 0x18b0,
+ 0x0d68, 0x2544, 0x318c, 0x0120, 0x19b0,
+ 0x0e68, 0x2644, 0x328c, 0x0220, 0x1ab0,
+ 0x1248, 0x2a24, 0x366c, 0x0600, 0x1e90,
+ 0x1348, 0x2b24, 0x376c, 0x0700, 0x1f90,
+ 0x1448, 0x2c24, 0x386c, 0x0800, 0x2090,
+ 0x144c, 0x2c28, 0x3870, 0x0804, 0x2094,
+ 0x134c, 0x2b28, 0x3770, 0x0704, 0x1f94,
+ 0x124c, 0x2a28, 0x3670, 0x0604, 0x1e94,
+ 0x1250, 0x2a2c, 0x3674, 0x0608, 0x1e98,
+ 0x1350, 0x2b2c, 0x3774, 0x0708, 0x1f98,
+ 0x1450, 0x2c2c, 0x3874, 0x0808, 0x2098,
+ 0x1454, 0x2c30, 0x3878, 0x080c, 0x209c,
+ 0x1354, 0x2b30, 0x3778, 0x070c, 0x1f9c,
+ 0x1254, 0x2a30, 0x3678, 0x060c, 0x1e9c,
+ 0x1258, 0x2a34, 0x367c, 0x0610, 0x1ea0,
+ 0x1358, 0x2b34, 0x377c, 0x0710, 0x1fa0,
+ 0x1458, 0x2c34, 0x387c, 0x0810, 0x20a0,
+ 0x145c, 0x2c38, 0x3880, 0x0814, 0x20a4,
+ 0x135c, 0x2b38, 0x3780, 0x0714, 0x1fa4,
+ 0x125c, 0x2a38, 0x3680, 0x0614, 0x1ea4,
+ 0x1260, 0x2a3c, 0x3684, 0x0618, 0x1ea8,
+ 0x1360, 0x2b3c, 0x3784, 0x0718, 0x1fa8,
+ 0x1460, 0x2c3c, 0x3884, 0x0818, 0x20a8,
+ 0x1464, 0x2c40, 0x3888, 0x081c, 0x20ac,
+ 0x1364, 0x2b40, 0x3788, 0x071c, 0x1fac,
+ 0x1264, 0x2a40, 0x3688, 0x061c, 0x1eac,
+ 0x1268, 0x2a44, 0x368c, 0x0620, 0x1eb0,
+ 0x1368, 0x2b44, 0x378c, 0x0720, 0x1fb0,
+ 0x1468, 0x2c44, 0x388c, 0x0820, 0x20b0,
+ 0x1848, 0x3024, 0x3c6c, 0x0c00, 0x2490,
+ 0x1948, 0x3124, 0x3d6c, 0x0d00, 0x2590,
+ 0x1a48, 0x3224, 0x3e6c, 0x0e00, 0x2690,
+ 0x1a4c, 0x3228, 0x3e70, 0x0e04, 0x2694,
+ 0x194c, 0x3128, 0x3d70, 0x0d04, 0x2594,
+ 0x184c, 0x3028, 0x3c70, 0x0c04, 0x2494,
+ 0x1850, 0x302c, 0x3c74, 0x0c08, 0x2498,
+ 0x1950, 0x312c, 0x3d74, 0x0d08, 0x2598,
+ 0x1a50, 0x322c, 0x3e74, 0x0e08, 0x2698,
+ 0x1a54, 0x3230, 0x3e78, 0x0e0c, 0x269c,
+ 0x1954, 0x3130, 0x3d78, 0x0d0c, 0x259c,
+ 0x1854, 0x3030, 0x3c78, 0x0c0c, 0x249c,
+ 0x1858, 0x3034, 0x3c7c, 0x0c10, 0x24a0,
+ 0x1958, 0x3134, 0x3d7c, 0x0d10, 0x25a0,
+ 0x1a58, 0x3234, 0x3e7c, 0x0e10, 0x26a0,
+ 0x1a5c, 0x3238, 0x3e80, 0x0e14, 0x26a4,
+ 0x195c, 0x3138, 0x3d80, 0x0d14, 0x25a4,
+ 0x185c, 0x3038, 0x3c80, 0x0c14, 0x24a4,
+ 0x1860, 0x303c, 0x3c84, 0x0c18, 0x24a8,
+ 0x1960, 0x313c, 0x3d84, 0x0d18, 0x25a8,
+ 0x1a60, 0x323c, 0x3e84, 0x0e18, 0x26a8,
+ 0x1a64, 0x3240, 0x3e88, 0x0e1c, 0x26ac,
+ 0x1964, 0x3140, 0x3d88, 0x0d1c, 0x25ac,
+ 0x1864, 0x3040, 0x3c88, 0x0c1c, 0x24ac,
+ 0x1868, 0x3044, 0x3c8c, 0x0c20, 0x24b0,
+ 0x1968, 0x3144, 0x3d8c, 0x0d20, 0x25b0,
+ 0x1a68, 0x3244, 0x3e8c, 0x0e20, 0x26b0,
+ 0x1e48, 0x3624, 0x426c, 0x1200, 0x2a90,
+ 0x1f48, 0x3724, 0x436c, 0x1300, 0x2b90,
+ 0x2048, 0x3824, 0x446c, 0x1400, 0x2c90,
+ 0x204c, 0x3828, 0x4470, 0x1404, 0x2c94,
+ 0x1f4c, 0x3728, 0x4370, 0x1304, 0x2b94,
+ 0x1e4c, 0x3628, 0x4270, 0x1204, 0x2a94,
+ 0x1e50, 0x362c, 0x4274, 0x1208, 0x2a98,
+ 0x1f50, 0x372c, 0x4374, 0x1308, 0x2b98,
+ 0x2050, 0x382c, 0x4474, 0x1408, 0x2c98,
+ 0x2054, 0x3830, 0x4478, 0x140c, 0x2c9c,
+ 0x1f54, 0x3730, 0x4378, 0x130c, 0x2b9c,
+ 0x1e54, 0x3630, 0x4278, 0x120c, 0x2a9c,
+ 0x1e58, 0x3634, 0x427c, 0x1210, 0x2aa0,
+ 0x1f58, 0x3734, 0x437c, 0x1310, 0x2ba0,
+ 0x2058, 0x3834, 0x447c, 0x1410, 0x2ca0,
+ 0x205c, 0x3838, 0x4480, 0x1414, 0x2ca4,
+ 0x1f5c, 0x3738, 0x4380, 0x1314, 0x2ba4,
+ 0x1e5c, 0x3638, 0x4280, 0x1214, 0x2aa4,
+ 0x1e60, 0x363c, 0x4284, 0x1218, 0x2aa8,
+ 0x1f60, 0x373c, 0x4384, 0x1318, 0x2ba8,
+ 0x2060, 0x383c, 0x4484, 0x1418, 0x2ca8,
+ 0x2064, 0x3840, 0x4488, 0x141c, 0x2cac,
+ 0x1f64, 0x3740, 0x4388, 0x131c, 0x2bac,
+ 0x1e64, 0x3640, 0x4288, 0x121c, 0x2aac,
+ 0x1e68, 0x3644, 0x428c, 0x1220, 0x2ab0,
+ 0x1f68, 0x3744, 0x438c, 0x1320, 0x2bb0,
+ 0x2068, 0x3844, 0x448c, 0x1420, 0x2cb0,
+ 0x2448, 0x3c24, 0x006c, 0x1800, 0x3090,
+ 0x2548, 0x3d24, 0x016c, 0x1900, 0x3190,
+ 0x2648, 0x3e24, 0x026c, 0x1a00, 0x3290,
+ 0x264c, 0x3e28, 0x0270, 0x1a04, 0x3294,
+ 0x254c, 0x3d28, 0x0170, 0x1904, 0x3194,
+ 0x244c, 0x3c28, 0x0070, 0x1804, 0x3094,
+ 0x2450, 0x3c2c, 0x0074, 0x1808, 0x3098,
+ 0x2550, 0x3d2c, 0x0174, 0x1908, 0x3198,
+ 0x2650, 0x3e2c, 0x0274, 0x1a08, 0x3298,
+ 0x2654, 0x3e30, 0x0278, 0x1a0c, 0x329c,
+ 0x2554, 0x3d30, 0x0178, 0x190c, 0x319c,
+ 0x2454, 0x3c30, 0x0078, 0x180c, 0x309c,
+ 0x2458, 0x3c34, 0x007c, 0x1810, 0x30a0,
+ 0x2558, 0x3d34, 0x017c, 0x1910, 0x31a0,
+ 0x2658, 0x3e34, 0x027c, 0x1a10, 0x32a0,
+ 0x265c, 0x3e38, 0x0280, 0x1a14, 0x32a4,
+ 0x255c, 0x3d38, 0x0180, 0x1914, 0x31a4,
+ 0x245c, 0x3c38, 0x0080, 0x1814, 0x30a4,
+ 0x2460, 0x3c3c, 0x0084, 0x1818, 0x30a8,
+ 0x2560, 0x3d3c, 0x0184, 0x1918, 0x31a8,
+ 0x2660, 0x3e3c, 0x0284, 0x1a18, 0x32a8,
+ 0x2664, 0x3e40, 0x0288, 0x1a1c, 0x32ac,
+ 0x2564, 0x3d40, 0x0188, 0x191c, 0x31ac,
+ 0x2464, 0x3c40, 0x0088, 0x181c, 0x30ac,
+ 0x2468, 0x3c44, 0x008c, 0x1820, 0x30b0,
+ 0x2568, 0x3d44, 0x018c, 0x1920, 0x31b0,
+ 0x2668, 0x3e44, 0x028c, 0x1a20, 0x32b0,
+ 0x2a48, 0x4224, 0x066c, 0x1e00, 0x3690,
+ 0x2b48, 0x4324, 0x076c, 0x1f00, 0x3790,
+ 0x2c48, 0x4424, 0x086c, 0x2000, 0x3890,
+ 0x2c4c, 0x4428, 0x0870, 0x2004, 0x3894,
+ 0x2b4c, 0x4328, 0x0770, 0x1f04, 0x3794,
+ 0x2a4c, 0x4228, 0x0670, 0x1e04, 0x3694,
+ 0x2a50, 0x422c, 0x0674, 0x1e08, 0x3698,
+ 0x2b50, 0x432c, 0x0774, 0x1f08, 0x3798,
+ 0x2c50, 0x442c, 0x0874, 0x2008, 0x3898,
+ 0x2c54, 0x4430, 0x0878, 0x200c, 0x389c,
+ 0x2b54, 0x4330, 0x0778, 0x1f0c, 0x379c,
+ 0x2a54, 0x4230, 0x0678, 0x1e0c, 0x369c,
+ 0x2a58, 0x4234, 0x067c, 0x1e10, 0x36a0,
+ 0x2b58, 0x4334, 0x077c, 0x1f10, 0x37a0,
+ 0x2c58, 0x4434, 0x087c, 0x2010, 0x38a0,
+ 0x2c5c, 0x4438, 0x0880, 0x2014, 0x38a4,
+ 0x2b5c, 0x4338, 0x0780, 0x1f14, 0x37a4,
+ 0x2a5c, 0x4238, 0x0680, 0x1e14, 0x36a4,
+ 0x2a60, 0x423c, 0x0684, 0x1e18, 0x36a8,
+ 0x2b60, 0x433c, 0x0784, 0x1f18, 0x37a8,
+ 0x2c60, 0x443c, 0x0884, 0x2018, 0x38a8,
+ 0x2c64, 0x4440, 0x0888, 0x201c, 0x38ac,
+ 0x2b64, 0x4340, 0x0788, 0x1f1c, 0x37ac,
+ 0x2a64, 0x4240, 0x0688, 0x1e1c, 0x36ac,
+ 0x2a68, 0x4244, 0x068c, 0x1e20, 0x36b0,
+ 0x2b68, 0x4344, 0x078c, 0x1f20, 0x37b0,
+ 0x2c68, 0x4444, 0x088c, 0x2020, 0x38b0,
+ 0x3048, 0x0024, 0x0c6c, 0x2400, 0x3c90,
+ 0x3148, 0x0124, 0x0d6c, 0x2500, 0x3d90,
+ 0x3248, 0x0224, 0x0e6c, 0x2600, 0x3e90,
+ 0x324c, 0x0228, 0x0e70, 0x2604, 0x3e94,
+ 0x314c, 0x0128, 0x0d70, 0x2504, 0x3d94,
+ 0x304c, 0x0028, 0x0c70, 0x2404, 0x3c94,
+ 0x3050, 0x002c, 0x0c74, 0x2408, 0x3c98,
+ 0x3150, 0x012c, 0x0d74, 0x2508, 0x3d98,
+ 0x3250, 0x022c, 0x0e74, 0x2608, 0x3e98,
+ 0x3254, 0x0230, 0x0e78, 0x260c, 0x3e9c,
+ 0x3154, 0x0130, 0x0d78, 0x250c, 0x3d9c,
+ 0x3054, 0x0030, 0x0c78, 0x240c, 0x3c9c,
+ 0x3058, 0x0034, 0x0c7c, 0x2410, 0x3ca0,
+ 0x3158, 0x0134, 0x0d7c, 0x2510, 0x3da0,
+ 0x3258, 0x0234, 0x0e7c, 0x2610, 0x3ea0,
+ 0x325c, 0x0238, 0x0e80, 0x2614, 0x3ea4,
+ 0x315c, 0x0138, 0x0d80, 0x2514, 0x3da4,
+ 0x305c, 0x0038, 0x0c80, 0x2414, 0x3ca4,
+ 0x3060, 0x003c, 0x0c84, 0x2418, 0x3ca8,
+ 0x3160, 0x013c, 0x0d84, 0x2518, 0x3da8,
+ 0x3260, 0x023c, 0x0e84, 0x2618, 0x3ea8,
+ 0x3264, 0x0240, 0x0e88, 0x261c, 0x3eac,
+ 0x3164, 0x0140, 0x0d88, 0x251c, 0x3dac,
+ 0x3064, 0x0040, 0x0c88, 0x241c, 0x3cac,
+ 0x3068, 0x0044, 0x0c8c, 0x2420, 0x3cb0,
+ 0x3168, 0x0144, 0x0d8c, 0x2520, 0x3db0,
+ 0x3268, 0x0244, 0x0e8c, 0x2620, 0x3eb0,
+ 0x3648, 0x0624, 0x126c, 0x2a00, 0x4290,
+ 0x3748, 0x0724, 0x136c, 0x2b00, 0x4390,
+ 0x3848, 0x0824, 0x146c, 0x2c00, 0x4490,
+ 0x384c, 0x0828, 0x1470, 0x2c04, 0x4494,
+ 0x374c, 0x0728, 0x1370, 0x2b04, 0x4394,
+ 0x364c, 0x0628, 0x1270, 0x2a04, 0x4294,
+ 0x3650, 0x062c, 0x1274, 0x2a08, 0x4298,
+ 0x3750, 0x072c, 0x1374, 0x2b08, 0x4398,
+ 0x3850, 0x082c, 0x1474, 0x2c08, 0x4498,
+ 0x3854, 0x0830, 0x1478, 0x2c0c, 0x449c,
+ 0x3754, 0x0730, 0x1378, 0x2b0c, 0x439c,
+ 0x3654, 0x0630, 0x1278, 0x2a0c, 0x429c,
+ 0x3658, 0x0634, 0x127c, 0x2a10, 0x42a0,
+ 0x3758, 0x0734, 0x137c, 0x2b10, 0x43a0,
+ 0x3858, 0x0834, 0x147c, 0x2c10, 0x44a0,
+ 0x385c, 0x0838, 0x1480, 0x2c14, 0x44a4,
+ 0x375c, 0x0738, 0x1380, 0x2b14, 0x43a4,
+ 0x365c, 0x0638, 0x1280, 0x2a14, 0x42a4,
+ 0x3660, 0x063c, 0x1284, 0x2a18, 0x42a8,
+ 0x3760, 0x073c, 0x1384, 0x2b18, 0x43a8,
+ 0x3860, 0x083c, 0x1484, 0x2c18, 0x44a8,
+ 0x3864, 0x0840, 0x1488, 0x2c1c, 0x44ac,
+ 0x3764, 0x0740, 0x1388, 0x2b1c, 0x43ac,
+ 0x3664, 0x0640, 0x1288, 0x2a1c, 0x42ac,
+ 0x3668, 0x0644, 0x128c, 0x2a20, 0x42b0,
+ 0x3768, 0x0744, 0x138c, 0x2b20, 0x43b0,
+ 0x3868, 0x0844, 0x148c, 0x2c20, 0x44b0,
+ 0x3c48, 0x0c24, 0x186c, 0x3000, 0x0090,
+ 0x3d48, 0x0d24, 0x196c, 0x3100, 0x0190,
+ 0x3e48, 0x0e24, 0x1a6c, 0x3200, 0x0290,
+ 0x3e4c, 0x0e28, 0x1a70, 0x3204, 0x0294,
+ 0x3d4c, 0x0d28, 0x1970, 0x3104, 0x0194,
+ 0x3c4c, 0x0c28, 0x1870, 0x3004, 0x0094,
+ 0x3c50, 0x0c2c, 0x1874, 0x3008, 0x0098,
+ 0x3d50, 0x0d2c, 0x1974, 0x3108, 0x0198,
+ 0x3e50, 0x0e2c, 0x1a74, 0x3208, 0x0298,
+ 0x3e54, 0x0e30, 0x1a78, 0x320c, 0x029c,
+ 0x3d54, 0x0d30, 0x1978, 0x310c, 0x019c,
+ 0x3c54, 0x0c30, 0x1878, 0x300c, 0x009c,
+ 0x3c58, 0x0c34, 0x187c, 0x3010, 0x00a0,
+ 0x3d58, 0x0d34, 0x197c, 0x3110, 0x01a0,
+ 0x3e58, 0x0e34, 0x1a7c, 0x3210, 0x02a0,
+ 0x3e5c, 0x0e38, 0x1a80, 0x3214, 0x02a4,
+ 0x3d5c, 0x0d38, 0x1980, 0x3114, 0x01a4,
+ 0x3c5c, 0x0c38, 0x1880, 0x3014, 0x00a4,
+ 0x3c60, 0x0c3c, 0x1884, 0x3018, 0x00a8,
+ 0x3d60, 0x0d3c, 0x1984, 0x3118, 0x01a8,
+ 0x3e60, 0x0e3c, 0x1a84, 0x3218, 0x02a8,
+ 0x3e64, 0x0e40, 0x1a88, 0x321c, 0x02ac,
+ 0x3d64, 0x0d40, 0x1988, 0x311c, 0x01ac,
+ 0x3c64, 0x0c40, 0x1888, 0x301c, 0x00ac,
+ 0x3c68, 0x0c44, 0x188c, 0x3020, 0x00b0,
+ 0x3d68, 0x0d44, 0x198c, 0x3120, 0x01b0,
+ 0x3e68, 0x0e44, 0x1a8c, 0x3220, 0x02b0,
+ 0x4248, 0x1224, 0x1e6c, 0x3600, 0x0690,
+ 0x4348, 0x1324, 0x1f6c, 0x3700, 0x0790,
+ 0x4448, 0x1424, 0x206c, 0x3800, 0x0890,
+ 0x444c, 0x1428, 0x2070, 0x3804, 0x0894,
+ 0x434c, 0x1328, 0x1f70, 0x3704, 0x0794,
+ 0x424c, 0x1228, 0x1e70, 0x3604, 0x0694,
+ 0x4250, 0x122c, 0x1e74, 0x3608, 0x0698,
+ 0x4350, 0x132c, 0x1f74, 0x3708, 0x0798,
+ 0x4450, 0x142c, 0x2074, 0x3808, 0x0898,
+ 0x4454, 0x1430, 0x2078, 0x380c, 0x089c,
+ 0x4354, 0x1330, 0x1f78, 0x370c, 0x079c,
+ 0x4254, 0x1230, 0x1e78, 0x360c, 0x069c,
+ 0x4258, 0x1234, 0x1e7c, 0x3610, 0x06a0,
+ 0x4358, 0x1334, 0x1f7c, 0x3710, 0x07a0,
+ 0x4458, 0x1434, 0x207c, 0x3810, 0x08a0,
+ 0x445c, 0x1438, 0x2080, 0x3814, 0x08a4,
+ 0x435c, 0x1338, 0x1f80, 0x3714, 0x07a4,
+ 0x425c, 0x1238, 0x1e80, 0x3614, 0x06a4,
+ 0x4260, 0x123c, 0x1e84, 0x3618, 0x06a8,
+ 0x4360, 0x133c, 0x1f84, 0x3718, 0x07a8,
+ 0x4460, 0x143c, 0x2084, 0x3818, 0x08a8,
+ 0x4464, 0x1440, 0x2088, 0x381c, 0x08ac,
+ 0x4364, 0x1340, 0x1f88, 0x371c, 0x07ac,
+ 0x4264, 0x1240, 0x1e88, 0x361c, 0x06ac,
+ 0x4268, 0x1244, 0x1e8c, 0x3620, 0x06b0,
+ 0x4368, 0x1344, 0x1f8c, 0x3720, 0x07b0,
+ 0x4468, 0x1444, 0x208c, 0x3820, 0x08b0,
+ 0x0048, 0x1824, 0x246c, 0x3c00, 0x0c90,
+ 0x0148, 0x1924, 0x256c, 0x3d00, 0x0d90,
+ 0x0248, 0x1a24, 0x266c, 0x3e00, 0x0e90,
+ 0x024c, 0x1a28, 0x2670, 0x3e04, 0x0e94,
+ 0x014c, 0x1928, 0x2570, 0x3d04, 0x0d94,
+ 0x004c, 0x1828, 0x2470, 0x3c04, 0x0c94,
+ 0x0050, 0x182c, 0x2474, 0x3c08, 0x0c98,
+ 0x0150, 0x192c, 0x2574, 0x3d08, 0x0d98,
+ 0x0250, 0x1a2c, 0x2674, 0x3e08, 0x0e98,
+ 0x0254, 0x1a30, 0x2678, 0x3e0c, 0x0e9c,
+ 0x0154, 0x1930, 0x2578, 0x3d0c, 0x0d9c,
+ 0x0054, 0x1830, 0x2478, 0x3c0c, 0x0c9c,
+ 0x0058, 0x1834, 0x247c, 0x3c10, 0x0ca0,
+ 0x0158, 0x1934, 0x257c, 0x3d10, 0x0da0,
+ 0x0258, 0x1a34, 0x267c, 0x3e10, 0x0ea0,
+ 0x025c, 0x1a38, 0x2680, 0x3e14, 0x0ea4,
+ 0x015c, 0x1938, 0x2580, 0x3d14, 0x0da4,
+ 0x005c, 0x1838, 0x2480, 0x3c14, 0x0ca4,
+ 0x0060, 0x183c, 0x2484, 0x3c18, 0x0ca8,
+ 0x0160, 0x193c, 0x2584, 0x3d18, 0x0da8,
+ 0x0260, 0x1a3c, 0x2684, 0x3e18, 0x0ea8,
+ 0x0264, 0x1a40, 0x2688, 0x3e1c, 0x0eac,
+ 0x0164, 0x1940, 0x2588, 0x3d1c, 0x0dac,
+ 0x0064, 0x1840, 0x2488, 0x3c1c, 0x0cac,
+ 0x0068, 0x1844, 0x248c, 0x3c20, 0x0cb0,
+ 0x0168, 0x1944, 0x258c, 0x3d20, 0x0db0,
+ 0x0268, 0x1a44, 0x268c, 0x3e20, 0x0eb0,
+ 0x0648, 0x1e24, 0x2a6c, 0x4200, 0x1290,
+ 0x0748, 0x1f24, 0x2b6c, 0x4300, 0x1390,
+ 0x0848, 0x2024, 0x2c6c, 0x4400, 0x1490,
+ 0x084c, 0x2028, 0x2c70, 0x4404, 0x1494,
+ 0x074c, 0x1f28, 0x2b70, 0x4304, 0x1394,
+ 0x064c, 0x1e28, 0x2a70, 0x4204, 0x1294,
+ 0x0650, 0x1e2c, 0x2a74, 0x4208, 0x1298,
+ 0x0750, 0x1f2c, 0x2b74, 0x4308, 0x1398,
+ 0x0850, 0x202c, 0x2c74, 0x4408, 0x1498,
+ 0x0854, 0x2030, 0x2c78, 0x440c, 0x149c,
+ 0x0754, 0x1f30, 0x2b78, 0x430c, 0x139c,
+ 0x0654, 0x1e30, 0x2a78, 0x420c, 0x129c,
+ 0x0658, 0x1e34, 0x2a7c, 0x4210, 0x12a0,
+ 0x0758, 0x1f34, 0x2b7c, 0x4310, 0x13a0,
+ 0x0858, 0x2034, 0x2c7c, 0x4410, 0x14a0,
+ 0x085c, 0x2038, 0x2c80, 0x4414, 0x14a4,
+ 0x075c, 0x1f38, 0x2b80, 0x4314, 0x13a4,
+ 0x065c, 0x1e38, 0x2a80, 0x4214, 0x12a4,
+ 0x0660, 0x1e3c, 0x2a84, 0x4218, 0x12a8,
+ 0x0760, 0x1f3c, 0x2b84, 0x4318, 0x13a8,
+ 0x0860, 0x203c, 0x2c84, 0x4418, 0x14a8,
+ 0x0864, 0x2040, 0x2c88, 0x441c, 0x14ac,
+ 0x0764, 0x1f40, 0x2b88, 0x431c, 0x13ac,
+ 0x0664, 0x1e40, 0x2a88, 0x421c, 0x12ac,
+ 0x0668, 0x1e44, 0x2a8c, 0x4220, 0x12b0,
+ 0x0768, 0x1f44, 0x2b8c, 0x4320, 0x13b0,
+ 0x0868, 0x2044, 0x2c8c, 0x4420, 0x14b0,
+ 0x0f48, 0x2724, 0x336c, 0x0300, 0x1b90,
+ 0x1048, 0x2824, 0x346c, 0x0400, 0x1c90,
+ 0x1148, 0x2924, 0x356c, 0x0500, 0x1d90,
+ 0x114c, 0x2928, 0x3570, 0x0504, 0x1d94,
+ 0x104c, 0x2828, 0x3470, 0x0404, 0x1c94,
+ 0x0f4c, 0x2728, 0x3370, 0x0304, 0x1b94,
+ 0x0f50, 0x272c, 0x3374, 0x0308, 0x1b98,
+ 0x1050, 0x282c, 0x3474, 0x0408, 0x1c98,
+ 0x1150, 0x292c, 0x3574, 0x0508, 0x1d98,
+ 0x1154, 0x2930, 0x3578, 0x050c, 0x1d9c,
+ 0x1054, 0x2830, 0x3478, 0x040c, 0x1c9c,
+ 0x0f54, 0x2730, 0x3378, 0x030c, 0x1b9c,
+ 0x0f58, 0x2734, 0x337c, 0x0310, 0x1ba0,
+ 0x1058, 0x2834, 0x347c, 0x0410, 0x1ca0,
+ 0x1158, 0x2934, 0x357c, 0x0510, 0x1da0,
+ 0x115c, 0x2938, 0x3580, 0x0514, 0x1da4,
+ 0x105c, 0x2838, 0x3480, 0x0414, 0x1ca4,
+ 0x0f5c, 0x2738, 0x3380, 0x0314, 0x1ba4,
+ 0x0f60, 0x273c, 0x3384, 0x0318, 0x1ba8,
+ 0x1060, 0x283c, 0x3484, 0x0418, 0x1ca8,
+ 0x1160, 0x293c, 0x3584, 0x0518, 0x1da8,
+ 0x1164, 0x2940, 0x3588, 0x051c, 0x1dac,
+ 0x1064, 0x2840, 0x3488, 0x041c, 0x1cac,
+ 0x0f64, 0x2740, 0x3388, 0x031c, 0x1bac,
+ 0x0f68, 0x2744, 0x338c, 0x0320, 0x1bb0,
+ 0x1068, 0x2844, 0x348c, 0x0420, 0x1cb0,
+ 0x1168, 0x2944, 0x358c, 0x0520, 0x1db0,
+ 0x1548, 0x2d24, 0x396c, 0x0900, 0x2190,
+ 0x1648, 0x2e24, 0x3a6c, 0x0a00, 0x2290,
+ 0x1748, 0x2f24, 0x3b6c, 0x0b00, 0x2390,
+ 0x174c, 0x2f28, 0x3b70, 0x0b04, 0x2394,
+ 0x164c, 0x2e28, 0x3a70, 0x0a04, 0x2294,
+ 0x154c, 0x2d28, 0x3970, 0x0904, 0x2194,
+ 0x1550, 0x2d2c, 0x3974, 0x0908, 0x2198,
+ 0x1650, 0x2e2c, 0x3a74, 0x0a08, 0x2298,
+ 0x1750, 0x2f2c, 0x3b74, 0x0b08, 0x2398,
+ 0x1754, 0x2f30, 0x3b78, 0x0b0c, 0x239c,
+ 0x1654, 0x2e30, 0x3a78, 0x0a0c, 0x229c,
+ 0x1554, 0x2d30, 0x3978, 0x090c, 0x219c,
+ 0x1558, 0x2d34, 0x397c, 0x0910, 0x21a0,
+ 0x1658, 0x2e34, 0x3a7c, 0x0a10, 0x22a0,
+ 0x1758, 0x2f34, 0x3b7c, 0x0b10, 0x23a0,
+ 0x175c, 0x2f38, 0x3b80, 0x0b14, 0x23a4,
+ 0x165c, 0x2e38, 0x3a80, 0x0a14, 0x22a4,
+ 0x155c, 0x2d38, 0x3980, 0x0914, 0x21a4,
+ 0x1560, 0x2d3c, 0x3984, 0x0918, 0x21a8,
+ 0x1660, 0x2e3c, 0x3a84, 0x0a18, 0x22a8,
+ 0x1760, 0x2f3c, 0x3b84, 0x0b18, 0x23a8,
+ 0x1764, 0x2f40, 0x3b88, 0x0b1c, 0x23ac,
+ 0x1664, 0x2e40, 0x3a88, 0x0a1c, 0x22ac,
+ 0x1564, 0x2d40, 0x3988, 0x091c, 0x21ac,
+ 0x1568, 0x2d44, 0x398c, 0x0920, 0x21b0,
+ 0x1668, 0x2e44, 0x3a8c, 0x0a20, 0x22b0,
+ 0x1768, 0x2f44, 0x3b8c, 0x0b20, 0x23b0,
+ 0x1b48, 0x3324, 0x3f6c, 0x0f00, 0x2790,
+ 0x1c48, 0x3424, 0x406c, 0x1000, 0x2890,
+ 0x1d48, 0x3524, 0x416c, 0x1100, 0x2990,
+ 0x1d4c, 0x3528, 0x4170, 0x1104, 0x2994,
+ 0x1c4c, 0x3428, 0x4070, 0x1004, 0x2894,
+ 0x1b4c, 0x3328, 0x3f70, 0x0f04, 0x2794,
+ 0x1b50, 0x332c, 0x3f74, 0x0f08, 0x2798,
+ 0x1c50, 0x342c, 0x4074, 0x1008, 0x2898,
+ 0x1d50, 0x352c, 0x4174, 0x1108, 0x2998,
+ 0x1d54, 0x3530, 0x4178, 0x110c, 0x299c,
+ 0x1c54, 0x3430, 0x4078, 0x100c, 0x289c,
+ 0x1b54, 0x3330, 0x3f78, 0x0f0c, 0x279c,
+ 0x1b58, 0x3334, 0x3f7c, 0x0f10, 0x27a0,
+ 0x1c58, 0x3434, 0x407c, 0x1010, 0x28a0,
+ 0x1d58, 0x3534, 0x417c, 0x1110, 0x29a0,
+ 0x1d5c, 0x3538, 0x4180, 0x1114, 0x29a4,
+ 0x1c5c, 0x3438, 0x4080, 0x1014, 0x28a4,
+ 0x1b5c, 0x3338, 0x3f80, 0x0f14, 0x27a4,
+ 0x1b60, 0x333c, 0x3f84, 0x0f18, 0x27a8,
+ 0x1c60, 0x343c, 0x4084, 0x1018, 0x28a8,
+ 0x1d60, 0x353c, 0x4184, 0x1118, 0x29a8,
+ 0x1d64, 0x3540, 0x4188, 0x111c, 0x29ac,
+ 0x1c64, 0x3440, 0x4088, 0x101c, 0x28ac,
+ 0x1b64, 0x3340, 0x3f88, 0x0f1c, 0x27ac,
+ 0x1b68, 0x3344, 0x3f8c, 0x0f20, 0x27b0,
+ 0x1c68, 0x3444, 0x408c, 0x1020, 0x28b0,
+ 0x1d68, 0x3544, 0x418c, 0x1120, 0x29b0,
+ 0x2148, 0x3924, 0x456c, 0x1500, 0x2d90,
+ 0x2248, 0x3a24, 0x466c, 0x1600, 0x2e90,
+ 0x2348, 0x3b24, 0x476c, 0x1700, 0x2f90,
+ 0x234c, 0x3b28, 0x4770, 0x1704, 0x2f94,
+ 0x224c, 0x3a28, 0x4670, 0x1604, 0x2e94,
+ 0x214c, 0x3928, 0x4570, 0x1504, 0x2d94,
+ 0x2150, 0x392c, 0x4574, 0x1508, 0x2d98,
+ 0x2250, 0x3a2c, 0x4674, 0x1608, 0x2e98,
+ 0x2350, 0x3b2c, 0x4774, 0x1708, 0x2f98,
+ 0x2354, 0x3b30, 0x4778, 0x170c, 0x2f9c,
+ 0x2254, 0x3a30, 0x4678, 0x160c, 0x2e9c,
+ 0x2154, 0x3930, 0x4578, 0x150c, 0x2d9c,
+ 0x2158, 0x3934, 0x457c, 0x1510, 0x2da0,
+ 0x2258, 0x3a34, 0x467c, 0x1610, 0x2ea0,
+ 0x2358, 0x3b34, 0x477c, 0x1710, 0x2fa0,
+ 0x235c, 0x3b38, 0x4780, 0x1714, 0x2fa4,
+ 0x225c, 0x3a38, 0x4680, 0x1614, 0x2ea4,
+ 0x215c, 0x3938, 0x4580, 0x1514, 0x2da4,
+ 0x2160, 0x393c, 0x4584, 0x1518, 0x2da8,
+ 0x2260, 0x3a3c, 0x4684, 0x1618, 0x2ea8,
+ 0x2360, 0x3b3c, 0x4784, 0x1718, 0x2fa8,
+ 0x2364, 0x3b40, 0x4788, 0x171c, 0x2fac,
+ 0x2264, 0x3a40, 0x4688, 0x161c, 0x2eac,
+ 0x2164, 0x3940, 0x4588, 0x151c, 0x2dac,
+ 0x2168, 0x3944, 0x458c, 0x1520, 0x2db0,
+ 0x2268, 0x3a44, 0x468c, 0x1620, 0x2eb0,
+ 0x2368, 0x3b44, 0x478c, 0x1720, 0x2fb0,
+ 0x2748, 0x3f24, 0x036c, 0x1b00, 0x3390,
+ 0x2848, 0x4024, 0x046c, 0x1c00, 0x3490,
+ 0x2948, 0x4124, 0x056c, 0x1d00, 0x3590,
+ 0x294c, 0x4128, 0x0570, 0x1d04, 0x3594,
+ 0x284c, 0x4028, 0x0470, 0x1c04, 0x3494,
+ 0x274c, 0x3f28, 0x0370, 0x1b04, 0x3394,
+ 0x2750, 0x3f2c, 0x0374, 0x1b08, 0x3398,
+ 0x2850, 0x402c, 0x0474, 0x1c08, 0x3498,
+ 0x2950, 0x412c, 0x0574, 0x1d08, 0x3598,
+ 0x2954, 0x4130, 0x0578, 0x1d0c, 0x359c,
+ 0x2854, 0x4030, 0x0478, 0x1c0c, 0x349c,
+ 0x2754, 0x3f30, 0x0378, 0x1b0c, 0x339c,
+ 0x2758, 0x3f34, 0x037c, 0x1b10, 0x33a0,
+ 0x2858, 0x4034, 0x047c, 0x1c10, 0x34a0,
+ 0x2958, 0x4134, 0x057c, 0x1d10, 0x35a0,
+ 0x295c, 0x4138, 0x0580, 0x1d14, 0x35a4,
+ 0x285c, 0x4038, 0x0480, 0x1c14, 0x34a4,
+ 0x275c, 0x3f38, 0x0380, 0x1b14, 0x33a4,
+ 0x2760, 0x3f3c, 0x0384, 0x1b18, 0x33a8,
+ 0x2860, 0x403c, 0x0484, 0x1c18, 0x34a8,
+ 0x2960, 0x413c, 0x0584, 0x1d18, 0x35a8,
+ 0x2964, 0x4140, 0x0588, 0x1d1c, 0x35ac,
+ 0x2864, 0x4040, 0x0488, 0x1c1c, 0x34ac,
+ 0x2764, 0x3f40, 0x0388, 0x1b1c, 0x33ac,
+ 0x2768, 0x3f44, 0x038c, 0x1b20, 0x33b0,
+ 0x2868, 0x4044, 0x048c, 0x1c20, 0x34b0,
+ 0x2968, 0x4144, 0x058c, 0x1d20, 0x35b0,
+ 0x2d48, 0x4524, 0x096c, 0x2100, 0x3990,
+ 0x2e48, 0x4624, 0x0a6c, 0x2200, 0x3a90,
+ 0x2f48, 0x4724, 0x0b6c, 0x2300, 0x3b90,
+ 0x2f4c, 0x4728, 0x0b70, 0x2304, 0x3b94,
+ 0x2e4c, 0x4628, 0x0a70, 0x2204, 0x3a94,
+ 0x2d4c, 0x4528, 0x0970, 0x2104, 0x3994,
+ 0x2d50, 0x452c, 0x0974, 0x2108, 0x3998,
+ 0x2e50, 0x462c, 0x0a74, 0x2208, 0x3a98,
+ 0x2f50, 0x472c, 0x0b74, 0x2308, 0x3b98,
+ 0x2f54, 0x4730, 0x0b78, 0x230c, 0x3b9c,
+ 0x2e54, 0x4630, 0x0a78, 0x220c, 0x3a9c,
+ 0x2d54, 0x4530, 0x0978, 0x210c, 0x399c,
+ 0x2d58, 0x4534, 0x097c, 0x2110, 0x39a0,
+ 0x2e58, 0x4634, 0x0a7c, 0x2210, 0x3aa0,
+ 0x2f58, 0x4734, 0x0b7c, 0x2310, 0x3ba0,
+ 0x2f5c, 0x4738, 0x0b80, 0x2314, 0x3ba4,
+ 0x2e5c, 0x4638, 0x0a80, 0x2214, 0x3aa4,
+ 0x2d5c, 0x4538, 0x0980, 0x2114, 0x39a4,
+ 0x2d60, 0x453c, 0x0984, 0x2118, 0x39a8,
+ 0x2e60, 0x463c, 0x0a84, 0x2218, 0x3aa8,
+ 0x2f60, 0x473c, 0x0b84, 0x2318, 0x3ba8,
+ 0x2f64, 0x4740, 0x0b88, 0x231c, 0x3bac,
+ 0x2e64, 0x4640, 0x0a88, 0x221c, 0x3aac,
+ 0x2d64, 0x4540, 0x0988, 0x211c, 0x39ac,
+ 0x2d68, 0x4544, 0x098c, 0x2120, 0x39b0,
+ 0x2e68, 0x4644, 0x0a8c, 0x2220, 0x3ab0,
+ 0x2f68, 0x4744, 0x0b8c, 0x2320, 0x3bb0,
+ 0x3348, 0x0324, 0x0f6c, 0x2700, 0x3f90,
+ 0x3448, 0x0424, 0x106c, 0x2800, 0x4090,
+ 0x3548, 0x0524, 0x116c, 0x2900, 0x4190,
+ 0x354c, 0x0528, 0x1170, 0x2904, 0x4194,
+ 0x344c, 0x0428, 0x1070, 0x2804, 0x4094,
+ 0x334c, 0x0328, 0x0f70, 0x2704, 0x3f94,
+ 0x3350, 0x032c, 0x0f74, 0x2708, 0x3f98,
+ 0x3450, 0x042c, 0x1074, 0x2808, 0x4098,
+ 0x3550, 0x052c, 0x1174, 0x2908, 0x4198,
+ 0x3554, 0x0530, 0x1178, 0x290c, 0x419c,
+ 0x3454, 0x0430, 0x1078, 0x280c, 0x409c,
+ 0x3354, 0x0330, 0x0f78, 0x270c, 0x3f9c,
+ 0x3358, 0x0334, 0x0f7c, 0x2710, 0x3fa0,
+ 0x3458, 0x0434, 0x107c, 0x2810, 0x40a0,
+ 0x3558, 0x0534, 0x117c, 0x2910, 0x41a0,
+ 0x355c, 0x0538, 0x1180, 0x2914, 0x41a4,
+ 0x345c, 0x0438, 0x1080, 0x2814, 0x40a4,
+ 0x335c, 0x0338, 0x0f80, 0x2714, 0x3fa4,
+ 0x3360, 0x033c, 0x0f84, 0x2718, 0x3fa8,
+ 0x3460, 0x043c, 0x1084, 0x2818, 0x40a8,
+ 0x3560, 0x053c, 0x1184, 0x2918, 0x41a8,
+ 0x3564, 0x0540, 0x1188, 0x291c, 0x41ac,
+ 0x3464, 0x0440, 0x1088, 0x281c, 0x40ac,
+ 0x3364, 0x0340, 0x0f88, 0x271c, 0x3fac,
+ 0x3368, 0x0344, 0x0f8c, 0x2720, 0x3fb0,
+ 0x3468, 0x0444, 0x108c, 0x2820, 0x40b0,
+ 0x3568, 0x0544, 0x118c, 0x2920, 0x41b0,
+ 0x3948, 0x0924, 0x156c, 0x2d00, 0x4590,
+ 0x3a48, 0x0a24, 0x166c, 0x2e00, 0x4690,
+ 0x3b48, 0x0b24, 0x176c, 0x2f00, 0x4790,
+ 0x3b4c, 0x0b28, 0x1770, 0x2f04, 0x4794,
+ 0x3a4c, 0x0a28, 0x1670, 0x2e04, 0x4694,
+ 0x394c, 0x0928, 0x1570, 0x2d04, 0x4594,
+ 0x3950, 0x092c, 0x1574, 0x2d08, 0x4598,
+ 0x3a50, 0x0a2c, 0x1674, 0x2e08, 0x4698,
+ 0x3b50, 0x0b2c, 0x1774, 0x2f08, 0x4798,
+ 0x3b54, 0x0b30, 0x1778, 0x2f0c, 0x479c,
+ 0x3a54, 0x0a30, 0x1678, 0x2e0c, 0x469c,
+ 0x3954, 0x0930, 0x1578, 0x2d0c, 0x459c,
+ 0x3958, 0x0934, 0x157c, 0x2d10, 0x45a0,
+ 0x3a58, 0x0a34, 0x167c, 0x2e10, 0x46a0,
+ 0x3b58, 0x0b34, 0x177c, 0x2f10, 0x47a0,
+ 0x3b5c, 0x0b38, 0x1780, 0x2f14, 0x47a4,
+ 0x3a5c, 0x0a38, 0x1680, 0x2e14, 0x46a4,
+ 0x395c, 0x0938, 0x1580, 0x2d14, 0x45a4,
+ 0x3960, 0x093c, 0x1584, 0x2d18, 0x45a8,
+ 0x3a60, 0x0a3c, 0x1684, 0x2e18, 0x46a8,
+ 0x3b60, 0x0b3c, 0x1784, 0x2f18, 0x47a8,
+ 0x3b64, 0x0b40, 0x1788, 0x2f1c, 0x47ac,
+ 0x3a64, 0x0a40, 0x1688, 0x2e1c, 0x46ac,
+ 0x3964, 0x0940, 0x1588, 0x2d1c, 0x45ac,
+ 0x3968, 0x0944, 0x158c, 0x2d20, 0x45b0,
+ 0x3a68, 0x0a44, 0x168c, 0x2e20, 0x46b0,
+ 0x3b68, 0x0b44, 0x178c, 0x2f20, 0x47b0,
+ 0x3f48, 0x0f24, 0x1b6c, 0x3300, 0x0390,
+ 0x4048, 0x1024, 0x1c6c, 0x3400, 0x0490,
+ 0x4148, 0x1124, 0x1d6c, 0x3500, 0x0590,
+ 0x414c, 0x1128, 0x1d70, 0x3504, 0x0594,
+ 0x404c, 0x1028, 0x1c70, 0x3404, 0x0494,
+ 0x3f4c, 0x0f28, 0x1b70, 0x3304, 0x0394,
+ 0x3f50, 0x0f2c, 0x1b74, 0x3308, 0x0398,
+ 0x4050, 0x102c, 0x1c74, 0x3408, 0x0498,
+ 0x4150, 0x112c, 0x1d74, 0x3508, 0x0598,
+ 0x4154, 0x1130, 0x1d78, 0x350c, 0x059c,
+ 0x4054, 0x1030, 0x1c78, 0x340c, 0x049c,
+ 0x3f54, 0x0f30, 0x1b78, 0x330c, 0x039c,
+ 0x3f58, 0x0f34, 0x1b7c, 0x3310, 0x03a0,
+ 0x4058, 0x1034, 0x1c7c, 0x3410, 0x04a0,
+ 0x4158, 0x1134, 0x1d7c, 0x3510, 0x05a0,
+ 0x415c, 0x1138, 0x1d80, 0x3514, 0x05a4,
+ 0x405c, 0x1038, 0x1c80, 0x3414, 0x04a4,
+ 0x3f5c, 0x0f38, 0x1b80, 0x3314, 0x03a4,
+ 0x3f60, 0x0f3c, 0x1b84, 0x3318, 0x03a8,
+ 0x4060, 0x103c, 0x1c84, 0x3418, 0x04a8,
+ 0x4160, 0x113c, 0x1d84, 0x3518, 0x05a8,
+ 0x4164, 0x1140, 0x1d88, 0x351c, 0x05ac,
+ 0x4064, 0x1040, 0x1c88, 0x341c, 0x04ac,
+ 0x3f64, 0x0f40, 0x1b88, 0x331c, 0x03ac,
+ 0x3f68, 0x0f44, 0x1b8c, 0x3320, 0x03b0,
+ 0x4068, 0x1044, 0x1c8c, 0x3420, 0x04b0,
+ 0x4168, 0x1144, 0x1d8c, 0x3520, 0x05b0,
+ 0x4548, 0x1524, 0x216c, 0x3900, 0x0990,
+ 0x4648, 0x1624, 0x226c, 0x3a00, 0x0a90,
+ 0x4748, 0x1724, 0x236c, 0x3b00, 0x0b90,
+ 0x474c, 0x1728, 0x2370, 0x3b04, 0x0b94,
+ 0x464c, 0x1628, 0x2270, 0x3a04, 0x0a94,
+ 0x454c, 0x1528, 0x2170, 0x3904, 0x0994,
+ 0x4550, 0x152c, 0x2174, 0x3908, 0x0998,
+ 0x4650, 0x162c, 0x2274, 0x3a08, 0x0a98,
+ 0x4750, 0x172c, 0x2374, 0x3b08, 0x0b98,
+ 0x4754, 0x1730, 0x2378, 0x3b0c, 0x0b9c,
+ 0x4654, 0x1630, 0x2278, 0x3a0c, 0x0a9c,
+ 0x4554, 0x1530, 0x2178, 0x390c, 0x099c,
+ 0x4558, 0x1534, 0x217c, 0x3910, 0x09a0,
+ 0x4658, 0x1634, 0x227c, 0x3a10, 0x0aa0,
+ 0x4758, 0x1734, 0x237c, 0x3b10, 0x0ba0,
+ 0x475c, 0x1738, 0x2380, 0x3b14, 0x0ba4,
+ 0x465c, 0x1638, 0x2280, 0x3a14, 0x0aa4,
+ 0x455c, 0x1538, 0x2180, 0x3914, 0x09a4,
+ 0x4560, 0x153c, 0x2184, 0x3918, 0x09a8,
+ 0x4660, 0x163c, 0x2284, 0x3a18, 0x0aa8,
+ 0x4760, 0x173c, 0x2384, 0x3b18, 0x0ba8,
+ 0x4764, 0x1740, 0x2388, 0x3b1c, 0x0bac,
+ 0x4664, 0x1640, 0x2288, 0x3a1c, 0x0aac,
+ 0x4564, 0x1540, 0x2188, 0x391c, 0x09ac,
+ 0x4568, 0x1544, 0x218c, 0x3920, 0x09b0,
+ 0x4668, 0x1644, 0x228c, 0x3a20, 0x0ab0,
+ 0x4768, 0x1744, 0x238c, 0x3b20, 0x0bb0,
+ 0x0348, 0x1b24, 0x276c, 0x3f00, 0x0f90,
+ 0x0448, 0x1c24, 0x286c, 0x4000, 0x1090,
+ 0x0548, 0x1d24, 0x296c, 0x4100, 0x1190,
+ 0x054c, 0x1d28, 0x2970, 0x4104, 0x1194,
+ 0x044c, 0x1c28, 0x2870, 0x4004, 0x1094,
+ 0x034c, 0x1b28, 0x2770, 0x3f04, 0x0f94,
+ 0x0350, 0x1b2c, 0x2774, 0x3f08, 0x0f98,
+ 0x0450, 0x1c2c, 0x2874, 0x4008, 0x1098,
+ 0x0550, 0x1d2c, 0x2974, 0x4108, 0x1198,
+ 0x0554, 0x1d30, 0x2978, 0x410c, 0x119c,
+ 0x0454, 0x1c30, 0x2878, 0x400c, 0x109c,
+ 0x0354, 0x1b30, 0x2778, 0x3f0c, 0x0f9c,
+ 0x0358, 0x1b34, 0x277c, 0x3f10, 0x0fa0,
+ 0x0458, 0x1c34, 0x287c, 0x4010, 0x10a0,
+ 0x0558, 0x1d34, 0x297c, 0x4110, 0x11a0,
+ 0x055c, 0x1d38, 0x2980, 0x4114, 0x11a4,
+ 0x045c, 0x1c38, 0x2880, 0x4014, 0x10a4,
+ 0x035c, 0x1b38, 0x2780, 0x3f14, 0x0fa4,
+ 0x0360, 0x1b3c, 0x2784, 0x3f18, 0x0fa8,
+ 0x0460, 0x1c3c, 0x2884, 0x4018, 0x10a8,
+ 0x0560, 0x1d3c, 0x2984, 0x4118, 0x11a8,
+ 0x0564, 0x1d40, 0x2988, 0x411c, 0x11ac,
+ 0x0464, 0x1c40, 0x2888, 0x401c, 0x10ac,
+ 0x0364, 0x1b40, 0x2788, 0x3f1c, 0x0fac,
+ 0x0368, 0x1b44, 0x278c, 0x3f20, 0x0fb0,
+ 0x0468, 0x1c44, 0x288c, 0x4020, 0x10b0,
+ 0x0568, 0x1d44, 0x298c, 0x4120, 0x11b0,
+ 0x0948, 0x2124, 0x2d6c, 0x4500, 0x1590,
+ 0x0a48, 0x2224, 0x2e6c, 0x4600, 0x1690,
+ 0x0b48, 0x2324, 0x2f6c, 0x4700, 0x1790,
+ 0x0b4c, 0x2328, 0x2f70, 0x4704, 0x1794,
+ 0x0a4c, 0x2228, 0x2e70, 0x4604, 0x1694,
+ 0x094c, 0x2128, 0x2d70, 0x4504, 0x1594,
+ 0x0950, 0x212c, 0x2d74, 0x4508, 0x1598,
+ 0x0a50, 0x222c, 0x2e74, 0x4608, 0x1698,
+ 0x0b50, 0x232c, 0x2f74, 0x4708, 0x1798,
+ 0x0b54, 0x2330, 0x2f78, 0x470c, 0x179c,
+ 0x0a54, 0x2230, 0x2e78, 0x460c, 0x169c,
+ 0x0954, 0x2130, 0x2d78, 0x450c, 0x159c,
+ 0x0958, 0x2134, 0x2d7c, 0x4510, 0x15a0,
+ 0x0a58, 0x2234, 0x2e7c, 0x4610, 0x16a0,
+ 0x0b58, 0x2334, 0x2f7c, 0x4710, 0x17a0,
+ 0x0b5c, 0x2338, 0x2f80, 0x4714, 0x17a4,
+ 0x0a5c, 0x2238, 0x2e80, 0x4614, 0x16a4,
+ 0x095c, 0x2138, 0x2d80, 0x4514, 0x15a4,
+ 0x0960, 0x213c, 0x2d84, 0x4518, 0x15a8,
+ 0x0a60, 0x223c, 0x2e84, 0x4618, 0x16a8,
+ 0x0b60, 0x233c, 0x2f84, 0x4718, 0x17a8,
+ 0x0b64, 0x2340, 0x2f88, 0x471c, 0x17ac,
+ 0x0a64, 0x2240, 0x2e88, 0x461c, 0x16ac,
+ 0x0964, 0x2140, 0x2d88, 0x451c, 0x15ac,
+ 0x0968, 0x2144, 0x2d8c, 0x4520, 0x15b0,
+ 0x0a68, 0x2244, 0x2e8c, 0x4620, 0x16b0,
+ 0x0b68, 0x2344, 0x2f8c, 0x4720, 0x17b0,
+};
+
+/* DV25/50 DCT coefficient weights and inverse weights */
+/* created by dvtables.py */
+static const int dv_weight_bits = 18;
+static const int dv_weight_88[64] = {
+ 131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536,
+ 237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935,
+ 224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916,
+ 212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433,
+ 206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704,
+ 200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568,
+ 174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627,
+ 170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258,
+};
+static const int dv_weight_248[64] = {
+ 131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754,
+ 224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536,
+ 211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568,
+ 242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965,
+ 200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627,
+ 229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965,
+ 175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364,
+ 195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651,
+};
+static const int dv_iweight_bits = 14;
+static const int dv_iweight_88[64] = {
+ 32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079,
+ 18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284,
+ 19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262,
+ 20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815,
+ 20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400,
+ 21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764,
+ 24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191,
+ 25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536,
+};
+static const int dv_iweight_248[64] = {
+ 32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196,
+ 19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079,
+ 20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764,
+ 17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692,
+ 21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191,
+ 18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867,
+ 24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173,
+ 22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642,
+};
+
+static const uint16_t dv_audio_shuffle525[10][9] = {
+  {  0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */
+  {  6, 36, 66, 26, 56, 86, 16, 46, 76 },
+  { 12, 42, 72,  2, 32, 62, 22, 52, 82 },
+  { 18, 48, 78,  8, 38, 68, 28, 58, 88 },
+  { 24, 54, 84, 14, 44, 74,  4, 34, 64 },
+
+  {  1, 31, 61, 21, 51, 81, 11, 41, 71 }, /* 2nd channel */
+  {  7, 37, 67, 27, 57, 87, 17, 47, 77 },
+  { 13, 43, 73,  3, 33, 63, 23, 53, 83 },
+  { 19, 49, 79,  9, 39, 69, 29, 59, 89 },
+  { 25, 55, 85, 15, 45, 75,  5, 35, 65 },
+};
+
+static const uint16_t dv_audio_shuffle625[12][9] = {
+  {   0,  36,  72,  26,  62,  98,  16,  52,  88}, /* 1st channel */
+  {   6,  42,  78,  32,  68, 104,  22,  58,  94},
+  {  12,  48,  84,   2,  38,  74,  28,  64, 100},
+  {  18,  54,  90,   8,  44,  80,  34,  70, 106},
+  {  24,  60,  96,  14,  50,  86,   4,  40,  76},
+  {  30,  66, 102,  20,  56,  92,  10,  46,  82},
+
+  {   1,  37,  73,  27,  63,  99,  17,  53,  89}, /* 2nd channel */
+  {   7,  43,  79,  33,  69, 105,  23,  59,  95},
+  {  13,  49,  85,   3,  39,  75,  29,  65, 101},
+  {  19,  55,  91,   9,  45,  81,  35,  71, 107},
+  {  25,  61,  97,  15,  51,  87,   5,  41,  77},
+  {  31,  67, 103,  21,  57,  93,  11,  47,  83},
+};
+
+static const __attribute__((unused)) int dv_audio_frequency[3] = {
+    48000, 44100, 32000,
+};
+
+static const DVprofile dv_profiles[] = {
+    { .dsf = 0,
+      .frame_size = 120000,        /* IEC 61834, SMPTE-314M - 525/60 (NTSC) */
+      .difseg_size = 10,
+      .n_difchan = 1,
+      .frame_rate = 30000,
+      .ltc_divisor = 30,
+      .frame_rate_base = 1001,
+      .height = 480,
+      .width = 720,
+      .sar = {{10, 11}, {40, 33}},
+      .video_place = dv_place_411,
+      .pix_fmt = PIX_FMT_YUV411P,
+      .audio_stride = 90,
+      .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */
+      .audio_samples_dist = { 1600, 1602, 1602, 1602, 1602 }, /* per SMPTE-314M */
+      .audio_shuffle = dv_audio_shuffle525,
+    },
+    { .dsf = 1,
+      .frame_size = 144000,        /* IEC 61834 - 625/50 (PAL) */
+      .difseg_size = 12,
+      .n_difchan = 1,
+      .frame_rate = 25,
+      .frame_rate_base = 1,
+      .ltc_divisor = 25,
+      .height = 576,
+      .width = 720,
+      .sar = {{59, 54}, {118, 81}},
+      .video_place = dv_place_420,
+      .pix_fmt = PIX_FMT_YUV420P,
+      .audio_stride = 108,
+      .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */
+      .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 },
+      .audio_shuffle = dv_audio_shuffle625,
+    },
+    { .dsf = 1,
+      .frame_size = 144000,        /* SMPTE-314M - 625/50 (PAL) */
+      .difseg_size = 12,
+      .n_difchan = 1,
+      .frame_rate = 25,
+      .frame_rate_base = 1,
+      .ltc_divisor = 25,
+      .height = 576,
+      .width = 720,
+      .sar = {{59, 54}, {118, 81}},
+      .video_place = dv_place_411P,
+      .pix_fmt = PIX_FMT_YUV411P,
+      .audio_stride = 108,
+      .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */
+      .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 },
+      .audio_shuffle = dv_audio_shuffle625,
+    },
+    { .dsf = 0,
+      .frame_size = 240000,        /* SMPTE-314M - 525/60 (NTSC) 50 Mbps */
+      .difseg_size = 10,           /* also known as "DVCPRO50" */
+      .n_difchan = 2,
+      .frame_rate = 30000,
+      .ltc_divisor = 30,
+      .frame_rate_base = 1001,
+      .height = 480,
+      .width = 720,
+      .sar = {{10, 11}, {40, 33}},
+      .video_place = dv_place_422_525,
+      .pix_fmt = PIX_FMT_YUV422P,
+      .audio_stride = 90,
+      .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */
+      .audio_samples_dist = { 1600, 1602, 1602, 1602, 1602 }, /* per SMPTE-314M */
+      .audio_shuffle = dv_audio_shuffle525,
+    },
+    { .dsf = 1,
+      .frame_size = 288000,        /* SMPTE-314M - 625/50 (PAL) 50 Mbps */
+      .difseg_size = 12,           /* also known as "DVCPRO50" */
+      .n_difchan = 2,
+      .frame_rate = 25,
+      .frame_rate_base = 1,
+      .ltc_divisor = 25,
+      .height = 576,
+      .width = 720,
+      .sar = {{59, 54}, {118, 81}},
+      .video_place = dv_place_422_625,
+      .pix_fmt = PIX_FMT_YUV422P,
+      .audio_stride = 108,
+      .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */
+      .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 },
+      .audio_shuffle = dv_audio_shuffle625,
+    }
+};
+
+enum dv_section_type {
+     dv_sect_header  = 0x1f,
+     dv_sect_subcode = 0x3f,
+     dv_sect_vaux    = 0x56,
+     dv_sect_audio   = 0x76,
+     dv_sect_video   = 0x96,
+};
+
+enum dv_pack_type {
+     dv_header525     = 0x3f, /* see dv_write_pack for important details on */
+     dv_header625     = 0xbf, /* these two packs */
+     dv_timecode      = 0x13,
+     dv_audio_source  = 0x50,
+     dv_audio_control = 0x51,
+     dv_audio_recdate = 0x52,
+     dv_audio_rectime = 0x53,
+     dv_video_source  = 0x60,
+     dv_video_control = 0x61,
+     dv_video_recdate = 0x62,
+     dv_video_rectime = 0x63,
+     dv_unknown_pack  = 0xff,
+};
+
+/* minimum number of bytes to read from a DV stream in order to determine the profile */
+#define DV_PROFILE_BYTES (6*80) /* 6 DIF blocks */
+
+/* largest possible DV frame, in bytes (PAL 50Mbps) */
+#define DV_MAX_FRAME_SIZE 288000
+
+static inline const DVprofile* dv_frame_profile(uint8_t* frame)
+{
+    if ((frame[3] & 0x80) == 0) {      /* DSF flag */
+        /* it's an NTSC format */
+        if ((frame[80*5 + 48 + 3] & 0x4)) { /* 4:2:2 sampling */
+            return &dv_profiles[3]; /* NTSC 50Mbps */
+        } else { /* 4:1:1 sampling */
+            return &dv_profiles[0]; /* NTSC 25Mbps */
+        }
+    } else {
+        /* it's a PAL format */
+        if ((frame[80*5 + 48 + 3] & 0x4)) { /* 4:2:2 sampling */
+            return &dv_profiles[4]; /* PAL 50Mbps */
+        } else if ((frame[5] & 0x07) == 0) { /* APT flag */
+            return &dv_profiles[1]; /* PAL 25Mbps 4:2:0 */
+        } else
+            return &dv_profiles[2]; /* PAL 25Mbps 4:1:1 */
+    }
+}
+
+static inline const DVprofile* dv_codec_profile(AVCodecContext* codec)
+{
+    int i;
+
+    if (codec->width != 720)
+        return NULL;
+
+    for (i=0; i<sizeof(dv_profiles)/sizeof(DVprofile); i++)
+       if (codec->height == dv_profiles[i].height && codec->pix_fmt == dv_profiles[i].pix_fmt)
+           return &dv_profiles[i];
+
+    return NULL;
+}
+
+static inline int dv_write_dif_id(enum dv_section_type t, uint8_t chan_num, uint8_t seq_num,
+                                  uint8_t dif_num, uint8_t* buf)
+{
+    buf[0] = (uint8_t)t;    /* Section type */
+    buf[1] = (seq_num<<4) | /* DIF seq number 0-9 for 525/60; 0-11 for 625/50 */
+             (chan_num << 3) | /* FSC: for 50Mb/s 0 - first channel; 1 - second */
+             7;             /* reserved -- always 1 */
+    buf[2] = dif_num;       /* DIF block number Video: 0-134, Audio: 0-8 */
+    return 3;
+}
+
+
+static inline int dv_write_ssyb_id(uint8_t syb_num, uint8_t fr, uint8_t* buf)
+{
+    if (syb_num == 0 || syb_num == 6) {
+        buf[0] = (fr<<7) | /* FR ID 1 - first half of each channel; 0 - second */
+                 (0<<4)  | /* AP3 (Subcode application ID) */
+                 0x0f;     /* reserved -- always 1 */
+    }
+    else if (syb_num == 11) {
+        buf[0] = (fr<<7) | /* FR ID 1 - first half of each channel; 0 - second */
+                 0x7f;     /* reserved -- always 1 */
+    }
+    else {
+        buf[0] = (fr<<7) | /* FR ID 1 - first half of each channel; 0 - second */
+                 (0<<4)  | /* APT (Track application ID) */
+                 0x0f;     /* reserved -- always 1 */
+    }
+    buf[1] = 0xf0 |            /* reserved -- always 1 */
+             (syb_num & 0x0f); /* SSYB number 0 - 11   */
+    buf[2] = 0xff;             /* reserved -- always 1 */
+    return 3;
+}
diff --git a/contrib/ffmpeg/libavcodec/dvdsubdec.c b/contrib/ffmpeg/libavcodec/dvdsubdec.c
new file mode 100644
index 000000000..9a0dd7756
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dvdsubdec.c
@@ -0,0 +1,477 @@
+/*
+ * DVD subtitle decoding for ffmpeg
+ * Copyright (c) 2005 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+//#define DEBUG
+
+static int dvdsub_init_decoder(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+static uint16_t getbe16(const uint8_t *p)
+{
+    return (p[0] << 8) | p[1];
+}
+
+static int get_nibble(const uint8_t *buf, int nibble_offset)
+{
+    return (buf[nibble_offset >> 1] >> ((1 - (nibble_offset & 1)) << 2)) & 0xf;
+}
+
+static int decode_rle(uint8_t *bitmap, int linesize, int w, int h,
+                      const uint8_t *buf, int nibble_offset, int buf_size)
+{
+    unsigned int v;
+    int x, y, len, color, nibble_end;
+    uint8_t *d;
+
+    nibble_end = buf_size * 2;
+    x = 0;
+    y = 0;
+    d = bitmap;
+    for(;;) {
+        if (nibble_offset >= nibble_end)
+            return -1;
+        v = get_nibble(buf, nibble_offset++);
+        if (v < 0x4) {
+            v = (v << 4) | get_nibble(buf, nibble_offset++);
+            if (v < 0x10) {
+                v = (v << 4) | get_nibble(buf, nibble_offset++);
+                if (v < 0x040) {
+                    v = (v << 4) | get_nibble(buf, nibble_offset++);
+                    if (v < 4) {
+                        v |= (w - x) << 2;
+                    }
+                }
+            }
+        }
+        len = v >> 2;
+        if (len > (w - x))
+            len = (w - x);
+        color = v & 0x03;
+        memset(d + x, color, len);
+        x += len;
+        if (x >= w) {
+            y++;
+            if (y >= h)
+                break;
+            d += linesize;
+            x = 0;
+            /* byte align */
+            nibble_offset += (nibble_offset & 1);
+        }
+    }
+    return 0;
+}
+
+static void guess_palette(uint32_t *rgba_palette,
+                          uint8_t *palette,
+                          uint8_t *alpha,
+                          uint32_t subtitle_color)
+{
+    uint8_t color_used[16];
+    int nb_opaque_colors, i, level, j, r, g, b;
+
+    for(i = 0; i < 4; i++)
+        rgba_palette[i] = 0;
+
+    memset(color_used, 0, 16);
+    nb_opaque_colors = 0;
+    for(i = 0; i < 4; i++) {
+        if (alpha[i] != 0 && !color_used[palette[i]]) {
+            color_used[palette[i]] = 1;
+            nb_opaque_colors++;
+        }
+    }
+
+    if (nb_opaque_colors == 0)
+        return;
+
+    j = nb_opaque_colors;
+    memset(color_used, 0, 16);
+    for(i = 0; i < 4; i++) {
+        if (alpha[i] != 0) {
+            if (!color_used[palette[i]])  {
+                level = (0xff * j) / nb_opaque_colors;
+                r = (((subtitle_color >> 16) & 0xff) * level) >> 8;
+                g = (((subtitle_color >> 8) & 0xff) * level) >> 8;
+                b = (((subtitle_color >> 0) & 0xff) * level) >> 8;
+                rgba_palette[i] = b | (g << 8) | (r << 16) | ((alpha[i] * 17) << 24);
+                color_used[palette[i]] = (i + 1);
+                j--;
+            } else {
+                rgba_palette[i] = (rgba_palette[color_used[palette[i]] - 1] & 0x00ffffff) |
+                                    ((alpha[i] * 17) << 24);
+            }
+        }
+    }
+}
+
+static int decode_dvd_subtitles(AVSubtitle *sub_header,
+                                const uint8_t *buf, int buf_size)
+{
+    int cmd_pos, pos, cmd, x1, y1, x2, y2, offset1, offset2, next_cmd_pos;
+    uint8_t palette[4], alpha[4];
+    int date;
+    int i;
+    int is_menu = 0;
+
+    if (buf_size < 4)
+        return -1;
+    sub_header->rects = NULL;
+    sub_header->num_rects = 0;
+    sub_header->start_display_time = 0;
+    sub_header->end_display_time = 0;
+
+    cmd_pos = getbe16(buf + 2);
+    while ((cmd_pos + 4) < buf_size) {
+        date = getbe16(buf + cmd_pos);
+        next_cmd_pos = getbe16(buf + cmd_pos + 2);
+#ifdef DEBUG
+        av_log(NULL, AV_LOG_INFO, "cmd_pos=0x%04x next=0x%04x date=%d\n",
+               cmd_pos, next_cmd_pos, date);
+#endif
+        pos = cmd_pos + 4;
+        offset1 = -1;
+        offset2 = -1;
+        x1 = y1 = x2 = y2 = 0;
+        while (pos < buf_size) {
+            cmd = buf[pos++];
+#ifdef DEBUG
+            av_log(NULL, AV_LOG_INFO, "cmd=%02x\n", cmd);
+#endif
+            switch(cmd) {
+            case 0x00:
+                /* menu subpicture */
+                is_menu = 1;
+                break;
+            case 0x01:
+                /* set start date */
+                sub_header->start_display_time = (date << 10) / 90;
+                break;
+            case 0x02:
+                /* set end date */
+                sub_header->end_display_time = (date << 10) / 90;
+                break;
+            case 0x03:
+                /* set palette */
+                if ((buf_size - pos) < 2)
+                    goto fail;
+                palette[3] = buf[pos] >> 4;
+                palette[2] = buf[pos] & 0x0f;
+                palette[1] = buf[pos + 1] >> 4;
+                palette[0] = buf[pos + 1] & 0x0f;
+                pos += 2;
+                break;
+            case 0x04:
+                /* set alpha */
+                if ((buf_size - pos) < 2)
+                    goto fail;
+                alpha[3] = buf[pos] >> 4;
+                alpha[2] = buf[pos] & 0x0f;
+                alpha[1] = buf[pos + 1] >> 4;
+                alpha[0] = buf[pos + 1] & 0x0f;
+                pos += 2;
+#ifdef DEBUG
+            av_log(NULL, AV_LOG_INFO, "alpha=%x%x%x%x\n", alpha[0],alpha[1],alpha[2],alpha[3]);
+#endif
+                break;
+            case 0x05:
+                if ((buf_size - pos) < 6)
+                    goto fail;
+                x1 = (buf[pos] << 4) | (buf[pos + 1] >> 4);
+                x2 = ((buf[pos + 1] & 0x0f) << 8) | buf[pos + 2];
+                y1 = (buf[pos + 3] << 4) | (buf[pos + 4] >> 4);
+                y2 = ((buf[pos + 4] & 0x0f) << 8) | buf[pos + 5];
+#ifdef DEBUG
+                av_log(NULL, AV_LOG_INFO, "x1=%d x2=%d y1=%d y2=%d\n",
+                       x1, x2, y1, y2);
+#endif
+                pos += 6;
+                break;
+            case 0x06:
+                if ((buf_size - pos) < 4)
+                    goto fail;
+                offset1 = getbe16(buf + pos);
+                offset2 = getbe16(buf + pos + 2);
+#ifdef DEBUG
+                av_log(NULL, AV_LOG_INFO, "offset1=0x%04x offset2=0x%04x\n", offset1, offset2);
+#endif
+                pos += 4;
+                break;
+            case 0xff:
+            default:
+                goto the_end;
+            }
+        }
+    the_end:
+        if (offset1 >= 0) {
+            int w, h;
+            uint8_t *bitmap;
+
+            /* decode the bitmap */
+            w = x2 - x1 + 1;
+            if (w < 0)
+                w = 0;
+            h = y2 - y1;
+            if (h < 0)
+                h = 0;
+            if (w > 0 && h > 0) {
+                if (sub_header->rects != NULL) {
+                    for (i = 0; i < sub_header->num_rects; i++) {
+                        av_free(sub_header->rects[i].bitmap);
+                        av_free(sub_header->rects[i].rgba_palette);
+                    }
+                    av_freep(&sub_header->rects);
+                    sub_header->num_rects = 0;
+                }
+
+                bitmap = av_malloc(w * h);
+                sub_header->rects = av_mallocz(sizeof(AVSubtitleRect));
+                sub_header->num_rects = 1;
+                sub_header->rects[0].rgba_palette = av_malloc(4 * 4);
+                decode_rle(bitmap, w * 2, w, h / 2,
+                           buf, offset1 * 2, buf_size);
+                decode_rle(bitmap + w, w * 2, w, h / 2,
+                           buf, offset2 * 2, buf_size);
+                guess_palette(sub_header->rects[0].rgba_palette,
+                              palette, alpha, 0xffff00);
+                sub_header->rects[0].x = x1;
+                sub_header->rects[0].y = y1;
+                sub_header->rects[0].w = w;
+                sub_header->rects[0].h = h;
+                sub_header->rects[0].nb_colors = 4;
+                sub_header->rects[0].linesize = w;
+                sub_header->rects[0].bitmap = bitmap;
+            }
+        }
+        if (next_cmd_pos == cmd_pos)
+            break;
+        cmd_pos = next_cmd_pos;
+    }
+    if (sub_header->num_rects > 0)
+        return is_menu;
+ fail:
+    return -1;
+}
+
+static int is_transp(const uint8_t *buf, int pitch, int n,
+                     const uint8_t *transp_color)
+{
+    int i;
+    for(i = 0; i < n; i++) {
+        if (!transp_color[*buf])
+            return 0;
+        buf += pitch;
+    }
+    return 1;
+}
+
+/* return 0 if empty rectangle, 1 if non empty */
+static int find_smallest_bounding_rectangle(AVSubtitle *s)
+{
+    uint8_t transp_color[256];
+    int y1, y2, x1, x2, y, w, h, i;
+    uint8_t *bitmap;
+
+    if (s->num_rects == 0 || s->rects == NULL || s->rects[0].w <= 0 || s->rects[0].h <= 0)
+        return 0;
+
+    memset(transp_color, 0, 256);
+    for(i = 0; i < s->rects[0].nb_colors; i++) {
+        if ((s->rects[0].rgba_palette[i] >> 24) == 0)
+            transp_color[i] = 1;
+    }
+    y1 = 0;
+    while (y1 < s->rects[0].h && is_transp(s->rects[0].bitmap + y1 * s->rects[0].linesize,
+                                  1, s->rects[0].w, transp_color))
+        y1++;
+    if (y1 == s->rects[0].h) {
+        av_freep(&s->rects[0].bitmap);
+        s->rects[0].w = s->rects[0].h = 0;
+        return 0;
+    }
+
+    y2 = s->rects[0].h - 1;
+    while (y2 > 0 && is_transp(s->rects[0].bitmap + y2 * s->rects[0].linesize, 1,
+                               s->rects[0].w, transp_color))
+        y2--;
+    x1 = 0;
+    while (x1 < (s->rects[0].w - 1) && is_transp(s->rects[0].bitmap + x1, s->rects[0].linesize,
+                                        s->rects[0].h, transp_color))
+        x1++;
+    x2 = s->rects[0].w - 1;
+    while (x2 > 0 && is_transp(s->rects[0].bitmap + x2, s->rects[0].linesize, s->rects[0].h,
+                                  transp_color))
+        x2--;
+    w = x2 - x1 + 1;
+    h = y2 - y1 + 1;
+    bitmap = av_malloc(w * h);
+    if (!bitmap)
+        return 1;
+    for(y = 0; y < h; y++) {
+        memcpy(bitmap + w * y, s->rects[0].bitmap + x1 + (y1 + y) * s->rects[0].linesize, w);
+    }
+    av_freep(&s->rects[0].bitmap);
+    s->rects[0].bitmap = bitmap;
+    s->rects[0].linesize = w;
+    s->rects[0].w = w;
+    s->rects[0].h = h;
+    s->rects[0].x += x1;
+    s->rects[0].y += y1;
+    return 1;
+}
+
+static int dvdsub_close_decoder(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+#ifdef DEBUG
+#undef fprintf
+static void ppm_save(const char *filename, uint8_t *bitmap, int w, int h,
+                     uint32_t *rgba_palette)
+{
+    int x, y, v;
+    FILE *f;
+
+    f = fopen(filename, "w");
+    if (!f) {
+        perror(filename);
+        exit(1);
+    }
+    fprintf(f, "P6\n"
+            "%d %d\n"
+            "%d\n",
+            w, h, 255);
+    for(y = 0; y < h; y++) {
+        for(x = 0; x < w; x++) {
+            v = rgba_palette[bitmap[y * w + x]];
+            putc((v >> 16) & 0xff, f);
+            putc((v >> 8) & 0xff, f);
+            putc((v >> 0) & 0xff, f);
+        }
+    }
+    fclose(f);
+}
+#endif
+
+static int dvdsub_decode(AVCodecContext *avctx,
+                         void *data, int *data_size,
+                         uint8_t *buf, int buf_size)
+{
+    AVSubtitle *sub = (void *)data;
+    int is_menu;
+
+    is_menu = decode_dvd_subtitles(sub, buf, buf_size);
+
+    if (is_menu < 0) {
+    no_subtitle:
+        *data_size = 0;
+
+        return buf_size;
+    }
+    if (!is_menu && find_smallest_bounding_rectangle(sub) == 0)
+        goto no_subtitle;
+
+#if defined(DEBUG)
+    av_log(NULL, AV_LOG_INFO, "start=%d ms end =%d ms\n",
+           sub->start_display_time,
+           sub->end_display_time);
+    ppm_save("/tmp/a.ppm", sub->rects[0].bitmap,
+             sub->rects[0].w, sub->rects[0].h, sub->rects[0].rgba_palette);
+#endif
+
+    *data_size = 1;
+    return buf_size;
+}
+
+AVCodec dvdsub_decoder = {
+    "dvdsub",
+    CODEC_TYPE_SUBTITLE,
+    CODEC_ID_DVD_SUBTITLE,
+    0,
+    dvdsub_init_decoder,
+    NULL,
+    dvdsub_close_decoder,
+    dvdsub_decode,
+};
+
+/* parser definition */
+typedef struct DVDSubParseContext {
+    uint8_t *packet;
+    int packet_len;
+    int packet_index;
+} DVDSubParseContext;
+
+static int dvdsub_parse_init(AVCodecParserContext *s)
+{
+    return 0;
+}
+
+static int dvdsub_parse(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DVDSubParseContext *pc = s->priv_data;
+
+    if (pc->packet_index == 0) {
+        if (buf_size < 2)
+            return 0;
+        pc->packet_len = (buf[0] << 8) | buf[1];
+        av_freep(&pc->packet);
+        pc->packet = av_malloc(pc->packet_len);
+    }
+    if (pc->packet) {
+        if (pc->packet_index + buf_size <= pc->packet_len) {
+            memcpy(pc->packet + pc->packet_index, buf, buf_size);
+            pc->packet_index += buf_size;
+            if (pc->packet_index >= pc->packet_len) {
+                *poutbuf = pc->packet;
+                *poutbuf_size = pc->packet_len;
+                pc->packet_index = 0;
+                return buf_size;
+            }
+        } else {
+            /* erroneous size */
+            pc->packet_index = 0;
+        }
+    }
+    *poutbuf = NULL;
+    *poutbuf_size = 0;
+    return buf_size;
+}
+
+static void dvdsub_parse_close(AVCodecParserContext *s)
+{
+    DVDSubParseContext *pc = s->priv_data;
+    av_freep(&pc->packet);
+}
+
+AVCodecParser dvdsub_parser = {
+    { CODEC_ID_DVD_SUBTITLE },
+    sizeof(DVDSubParseContext),
+    dvdsub_parse_init,
+    dvdsub_parse,
+    dvdsub_parse_close,
+};
diff --git a/contrib/ffmpeg/libavcodec/dvdsubenc.c b/contrib/ffmpeg/libavcodec/dvdsubenc.c
new file mode 100644
index 000000000..fac29acc2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/dvdsubenc.c
@@ -0,0 +1,247 @@
+/*
+ * DVD subtitle encoding for ffmpeg
+ * Copyright (c) 2005 Wolfram Gloger.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+// ncnt is the nibble counter
+#define PUTNIBBLE(val)\
+do {\
+    if (ncnt++ & 1)\
+        *q++ = bitbuf | ((val) & 0x0f);\
+    else\
+        bitbuf = (val) << 4;\
+} while(0)
+
+static void dvd_encode_rle(uint8_t **pq,
+                           const uint8_t *bitmap, int linesize,
+                           int w, int h,
+                           const int cmap[256])
+{
+    uint8_t *q;
+    unsigned int bitbuf = 0;
+    int ncnt;
+    int x, y, len, color;
+
+    q = *pq;
+
+    for (y = 0; y < h; ++y) {
+        ncnt = 0;
+        for(x = 0; x < w; x += len) {
+            color = bitmap[x];
+            for (len=1; x+len < w; ++len)
+                if (bitmap[x+len] != color)
+                    break;
+            color = cmap[color];
+            assert(color < 4);
+            if (len < 0x04) {
+                PUTNIBBLE((len << 2)|color);
+            } else if (len < 0x10) {
+                PUTNIBBLE(len >> 2);
+                PUTNIBBLE((len << 2)|color);
+            } else if (len < 0x40) {
+                PUTNIBBLE(0);
+                PUTNIBBLE(len >> 2);
+                PUTNIBBLE((len << 2)|color);
+            } else if (x+len == w) {
+                PUTNIBBLE(0);
+                PUTNIBBLE(0);
+                PUTNIBBLE(0);
+                PUTNIBBLE(color);
+            } else {
+                if (len > 0xff)
+                    len = 0xff;
+                PUTNIBBLE(0);
+                PUTNIBBLE(len >> 6);
+                PUTNIBBLE(len >> 2);
+                PUTNIBBLE((len << 2)|color);
+            }
+        }
+        /* end of line */
+        if (ncnt & 1)
+            PUTNIBBLE(0);
+        bitmap += linesize;
+    }
+
+    *pq = q;
+}
+
+static inline void putbe16(uint8_t **pq, uint16_t v)
+{
+    uint8_t *q = *pq;
+    *q++ = v >> 8;
+    *q++ = v;
+    *pq = q;
+}
+
+static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
+                                const AVSubtitle *h)
+{
+    uint8_t *q, *qq;
+    int object_id;
+    int offset1[20], offset2[20];
+    int i, imax, color, alpha, rects = h->num_rects;
+    unsigned long hmax;
+    unsigned long hist[256];
+    int           cmap[256];
+
+    if (rects == 0 || h->rects == NULL)
+        return -1;
+    if (rects > 20)
+        rects = 20;
+
+    // analyze bitmaps, compress to 4 colors
+    for (i=0; i<256; ++i) {
+        hist[i] = 0;
+        cmap[i] = 0;
+    }
+    for (object_id = 0; object_id < rects; object_id++)
+        for (i=0; i<h->rects[object_id].w*h->rects[object_id].h; ++i) {
+            color = h->rects[object_id].bitmap[i];
+            // only count non-transparent pixels
+            alpha = h->rects[object_id].rgba_palette[color] >> 24;
+            hist[color] += alpha;
+        }
+    for (color=3;; --color) {
+        hmax = 0;
+        imax = 0;
+        for (i=0; i<256; ++i)
+            if (hist[i] > hmax) {
+                imax = i;
+                hmax = hist[i];
+            }
+        if (hmax == 0)
+            break;
+        if (color == 0)
+            color = 3;
+        av_log(NULL, AV_LOG_DEBUG, "dvd_subtitle hist[%d]=%ld -> col %d\n",
+               imax, hist[imax], color);
+        cmap[imax] = color;
+        hist[imax] = 0;
+    }
+
+
+    // encode data block
+    q = outbuf + 4;
+    for (object_id = 0; object_id < rects; object_id++) {
+        offset1[object_id] = q - outbuf;
+        // worst case memory requirement: 1 nibble per pixel..
+        if ((q - outbuf) + h->rects[object_id].w*h->rects[object_id].h/2
+            + 17*rects + 21 > outbuf_size) {
+            av_log(NULL, AV_LOG_ERROR, "dvd_subtitle too big\n");
+            return -1;
+        }
+        dvd_encode_rle(&q, h->rects[object_id].bitmap,
+                       h->rects[object_id].w*2,
+                       h->rects[object_id].w, h->rects[object_id].h >> 1,
+                       cmap);
+        offset2[object_id] = q - outbuf;
+        dvd_encode_rle(&q, h->rects[object_id].bitmap + h->rects[object_id].w,
+                       h->rects[object_id].w*2,
+                       h->rects[object_id].w, h->rects[object_id].h >> 1,
+                       cmap);
+    }
+
+    // set data packet size
+    qq = outbuf + 2;
+    putbe16(&qq, q - outbuf);
+
+    // send start display command
+    putbe16(&q, (h->start_display_time*90) >> 10);
+    putbe16(&q, (q - outbuf) /*- 2 */ + 8 + 12*rects + 2);
+    *q++ = 0x03; // palette - 4 nibbles
+    *q++ = 0x03; *q++ = 0x7f;
+    *q++ = 0x04; // alpha - 4 nibbles
+    *q++ = 0xf0; *q++ = 0x00;
+    //*q++ = 0x0f; *q++ = 0xff;
+
+    // XXX not sure if more than one rect can really be encoded..
+    // 12 bytes per rect
+    for (object_id = 0; object_id < rects; object_id++) {
+        int x2 = h->rects[object_id].x + h->rects[object_id].w - 1;
+        int y2 = h->rects[object_id].y + h->rects[object_id].h - 1;
+
+        *q++ = 0x05;
+        // x1 x2 -> 6 nibbles
+        *q++ = h->rects[object_id].x >> 4;
+        *q++ = (h->rects[object_id].x << 4) | ((x2 >> 8) & 0xf);
+        *q++ = x2;
+        // y1 y2 -> 6 nibbles
+        *q++ = h->rects[object_id].y >> 4;
+        *q++ = (h->rects[object_id].y << 4) | ((y2 >> 8) & 0xf);
+        *q++ = y2;
+
+        *q++ = 0x06;
+        // offset1, offset2
+        putbe16(&q, offset1[object_id]);
+        putbe16(&q, offset2[object_id]);
+    }
+    *q++ = 0x01; // start command
+    *q++ = 0xff; // terminating command
+
+    // send stop display command last
+    putbe16(&q, (h->end_display_time*90) >> 10);
+    putbe16(&q, (q - outbuf) - 2 /*+ 4*/);
+    *q++ = 0x02; // set end
+    *q++ = 0xff; // terminating command
+
+    qq = outbuf;
+    putbe16(&qq, q - outbuf);
+
+    av_log(NULL, AV_LOG_DEBUG, "subtitle_packet size=%td\n", q - outbuf);
+    return q - outbuf;
+}
+
+static int dvdsub_init_encoder(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+static int dvdsub_close_encoder(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+static int dvdsub_encode(AVCodecContext *avctx,
+                         unsigned char *buf, int buf_size, void *data)
+{
+    //DVDSubtitleContext *s = avctx->priv_data;
+    AVSubtitle *sub = data;
+    int ret;
+
+    ret = encode_dvd_subtitles(buf, buf_size, sub);
+    return ret;
+}
+
+AVCodec dvdsub_encoder = {
+    "dvdsub",
+    CODEC_TYPE_SUBTITLE,
+    CODEC_ID_DVD_SUBTITLE,
+    0,
+    dvdsub_init_encoder,
+    dvdsub_encode,
+    dvdsub_close_encoder,
+};
+
+/* Local Variables: */
+/* c-basic-offset:4 */
+/* End: */
diff --git a/contrib/ffmpeg/libavcodec/error_resilience.c b/contrib/ffmpeg/libavcodec/error_resilience.c
new file mode 100644
index 000000000..0923721ee
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/error_resilience.c
@@ -0,0 +1,1030 @@
+/*
+ * Error resilience / concealment
+ *
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file error_resilience.c
+ * Error resilience / concealment.
+ */
+
+#include <limits.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "common.h"
+
+static void decode_mb(MpegEncContext *s){
+    s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* s->linesize  ) + s->mb_x * 16;
+    s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * s->uvlinesize) + s->mb_x * 8;
+    s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * s->uvlinesize) + s->mb_x * 8;
+
+    MPV_decode_mb(s, s->block);
+}
+
+/**
+ * replaces the current MB with a flat dc only version.
+ */
+static void put_dc(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int mb_x, int mb_y)
+{
+    int dc, dcu, dcv, y, i;
+    for(i=0; i<4; i++){
+        dc= s->dc_val[0][mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*s->b8_stride];
+        if(dc<0) dc=0;
+        else if(dc>2040) dc=2040;
+        for(y=0; y<8; y++){
+            int x;
+            for(x=0; x<8; x++){
+                dest_y[x + (i&1)*8 + (y + (i>>1)*8)*s->linesize]= dc/8;
+            }
+        }
+    }
+    dcu = s->dc_val[1][mb_x + mb_y*s->mb_stride];
+    dcv = s->dc_val[2][mb_x + mb_y*s->mb_stride];
+    if     (dcu<0   ) dcu=0;
+    else if(dcu>2040) dcu=2040;
+    if     (dcv<0   ) dcv=0;
+    else if(dcv>2040) dcv=2040;
+    for(y=0; y<8; y++){
+        int x;
+        for(x=0; x<8; x++){
+            dest_cb[x + y*(s->uvlinesize)]= dcu/8;
+            dest_cr[x + y*(s->uvlinesize)]= dcv/8;
+        }
+    }
+}
+
+static void filter181(int16_t *data, int width, int height, int stride){
+    int x,y;
+
+    /* horizontal filter */
+    for(y=1; y<height-1; y++){
+        int prev_dc= data[0 + y*stride];
+
+        for(x=1; x<width-1; x++){
+            int dc;
+
+            dc= - prev_dc
+                + data[x     + y*stride]*8
+                - data[x + 1 + y*stride];
+            dc= (dc*10923 + 32768)>>16;
+            prev_dc= data[x + y*stride];
+            data[x + y*stride]= dc;
+        }
+    }
+
+    /* vertical filter */
+    for(x=1; x<width-1; x++){
+        int prev_dc= data[x];
+
+        for(y=1; y<height-1; y++){
+            int dc;
+
+            dc= - prev_dc
+                + data[x +  y   *stride]*8
+                - data[x + (y+1)*stride];
+            dc= (dc*10923 + 32768)>>16;
+            prev_dc= data[x + y*stride];
+            data[x + y*stride]= dc;
+        }
+    }
+}
+
+/**
+ * guess the dc of blocks which dont have a undamaged dc
+ * @param w     width in 8 pixel blocks
+ * @param h     height in 8 pixel blocks
+ */
+static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){
+    int b_x, b_y;
+
+    for(b_y=0; b_y<h; b_y++){
+        for(b_x=0; b_x<w; b_x++){
+            int color[4]={1024,1024,1024,1024};
+            int distance[4]={9999,9999,9999,9999};
+            int mb_index, error, j;
+            int64_t guess, weight_sum;
+
+            mb_index= (b_x>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+
+            error= s->error_status_table[mb_index];
+
+            if(IS_INTER(s->current_picture.mb_type[mb_index])) continue; //inter
+            if(!(error&DC_ERROR)) continue;           //dc-ok
+
+            /* right block */
+            for(j=b_x+1; j<w; j++){
+                int mb_index_j= (j>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+                int error_j= s->error_status_table[mb_index_j];
+                int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]);
+                if(intra_j==0 || !(error_j&DC_ERROR)){
+                    color[0]= dc[j + b_y*stride];
+                    distance[0]= j-b_x;
+                    break;
+                }
+            }
+
+            /* left block */
+            for(j=b_x-1; j>=0; j--){
+                int mb_index_j= (j>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+                int error_j= s->error_status_table[mb_index_j];
+                int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]);
+                if(intra_j==0 || !(error_j&DC_ERROR)){
+                    color[1]= dc[j + b_y*stride];
+                    distance[1]= b_x-j;
+                    break;
+                }
+            }
+
+            /* bottom block */
+            for(j=b_y+1; j<h; j++){
+                int mb_index_j= (b_x>>is_luma) + (j>>is_luma)*s->mb_stride;
+                int error_j= s->error_status_table[mb_index_j];
+                int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]);
+                if(intra_j==0 || !(error_j&DC_ERROR)){
+                    color[2]= dc[b_x + j*stride];
+                    distance[2]= j-b_y;
+                    break;
+                }
+            }
+
+            /* top block */
+            for(j=b_y-1; j>=0; j--){
+                int mb_index_j= (b_x>>is_luma) + (j>>is_luma)*s->mb_stride;
+                int error_j= s->error_status_table[mb_index_j];
+                int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]);
+                if(intra_j==0 || !(error_j&DC_ERROR)){
+                    color[3]= dc[b_x + j*stride];
+                    distance[3]= b_y-j;
+                    break;
+                }
+            }
+
+            weight_sum=0;
+            guess=0;
+            for(j=0; j<4; j++){
+                int64_t weight= 256*256*256*16/distance[j];
+                guess+= weight*(int64_t)color[j];
+                weight_sum+= weight;
+            }
+            guess= (guess + weight_sum/2) / weight_sum;
+
+            dc[b_x + b_y*stride]= guess;
+        }
+    }
+}
+
+/**
+ * simple horizontal deblocking filter used for error resilience
+ * @param w     width in 8 pixel blocks
+ * @param h     height in 8 pixel blocks
+ */
+static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
+    int b_x, b_y;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    for(b_y=0; b_y<h; b_y++){
+        for(b_x=0; b_x<w-1; b_x++){
+            int y;
+            int left_status = s->error_status_table[( b_x   >>is_luma) + (b_y>>is_luma)*s->mb_stride];
+            int right_status= s->error_status_table[((b_x+1)>>is_luma) + (b_y>>is_luma)*s->mb_stride];
+            int left_intra=   IS_INTRA(s->current_picture.mb_type      [( b_x   >>is_luma) + (b_y>>is_luma)*s->mb_stride]);
+            int right_intra=  IS_INTRA(s->current_picture.mb_type      [((b_x+1)>>is_luma) + (b_y>>is_luma)*s->mb_stride]);
+            int left_damage =  left_status&(DC_ERROR|AC_ERROR|MV_ERROR);
+            int right_damage= right_status&(DC_ERROR|AC_ERROR|MV_ERROR);
+            int offset= b_x*8 + b_y*stride*8;
+            int16_t *left_mv=  s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ( b_x   <<(1-is_luma))];
+            int16_t *right_mv= s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ((b_x+1)<<(1-is_luma))];
+
+            if(!(left_damage||right_damage)) continue; // both undamaged
+
+            if(   (!left_intra) && (!right_intra)
+               && FFABS(left_mv[0]-right_mv[0]) + FFABS(left_mv[1]+right_mv[1]) < 2) continue;
+
+            for(y=0; y<8; y++){
+                int a,b,c,d;
+
+                a= dst[offset + 7 + y*stride] - dst[offset + 6 + y*stride];
+                b= dst[offset + 8 + y*stride] - dst[offset + 7 + y*stride];
+                c= dst[offset + 9 + y*stride] - dst[offset + 8 + y*stride];
+
+                d= FFABS(b) - ((FFABS(a) + FFABS(c) + 1)>>1);
+                d= FFMAX(d, 0);
+                if(b<0) d= -d;
+
+                if(d==0) continue;
+
+                if(!(left_damage && right_damage))
+                    d= d*16/9;
+
+                if(left_damage){
+                    dst[offset + 7 + y*stride] = cm[dst[offset + 7 + y*stride] + ((d*7)>>4)];
+                    dst[offset + 6 + y*stride] = cm[dst[offset + 6 + y*stride] + ((d*5)>>4)];
+                    dst[offset + 5 + y*stride] = cm[dst[offset + 5 + y*stride] + ((d*3)>>4)];
+                    dst[offset + 4 + y*stride] = cm[dst[offset + 4 + y*stride] + ((d*1)>>4)];
+                }
+                if(right_damage){
+                    dst[offset + 8 + y*stride] = cm[dst[offset + 8 + y*stride] - ((d*7)>>4)];
+                    dst[offset + 9 + y*stride] = cm[dst[offset + 9 + y*stride] - ((d*5)>>4)];
+                    dst[offset + 10+ y*stride] = cm[dst[offset +10 + y*stride] - ((d*3)>>4)];
+                    dst[offset + 11+ y*stride] = cm[dst[offset +11 + y*stride] - ((d*1)>>4)];
+                }
+            }
+        }
+    }
+}
+
+/**
+ * simple vertical deblocking filter used for error resilience
+ * @param w     width in 8 pixel blocks
+ * @param h     height in 8 pixel blocks
+ */
+static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
+    int b_x, b_y;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    for(b_y=0; b_y<h-1; b_y++){
+        for(b_x=0; b_x<w; b_x++){
+            int x;
+            int top_status   = s->error_status_table[(b_x>>is_luma) + ( b_y   >>is_luma)*s->mb_stride];
+            int bottom_status= s->error_status_table[(b_x>>is_luma) + ((b_y+1)>>is_luma)*s->mb_stride];
+            int top_intra=     IS_INTRA(s->current_picture.mb_type      [(b_x>>is_luma) + ( b_y   >>is_luma)*s->mb_stride]);
+            int bottom_intra=  IS_INTRA(s->current_picture.mb_type      [(b_x>>is_luma) + ((b_y+1)>>is_luma)*s->mb_stride]);
+            int top_damage =      top_status&(DC_ERROR|AC_ERROR|MV_ERROR);
+            int bottom_damage= bottom_status&(DC_ERROR|AC_ERROR|MV_ERROR);
+            int offset= b_x*8 + b_y*stride*8;
+            int16_t *top_mv=    s->current_picture.motion_val[0][s->b8_stride*( b_y   <<(1-is_luma)) + (b_x<<(1-is_luma))];
+            int16_t *bottom_mv= s->current_picture.motion_val[0][s->b8_stride*((b_y+1)<<(1-is_luma)) + (b_x<<(1-is_luma))];
+
+            if(!(top_damage||bottom_damage)) continue; // both undamaged
+
+            if(   (!top_intra) && (!bottom_intra)
+               && FFABS(top_mv[0]-bottom_mv[0]) + FFABS(top_mv[1]+bottom_mv[1]) < 2) continue;
+
+            for(x=0; x<8; x++){
+                int a,b,c,d;
+
+                a= dst[offset + x + 7*stride] - dst[offset + x + 6*stride];
+                b= dst[offset + x + 8*stride] - dst[offset + x + 7*stride];
+                c= dst[offset + x + 9*stride] - dst[offset + x + 8*stride];
+
+                d= FFABS(b) - ((FFABS(a) + FFABS(c)+1)>>1);
+                d= FFMAX(d, 0);
+                if(b<0) d= -d;
+
+                if(d==0) continue;
+
+                if(!(top_damage && bottom_damage))
+                    d= d*16/9;
+
+                if(top_damage){
+                    dst[offset + x +  7*stride] = cm[dst[offset + x +  7*stride] + ((d*7)>>4)];
+                    dst[offset + x +  6*stride] = cm[dst[offset + x +  6*stride] + ((d*5)>>4)];
+                    dst[offset + x +  5*stride] = cm[dst[offset + x +  5*stride] + ((d*3)>>4)];
+                    dst[offset + x +  4*stride] = cm[dst[offset + x +  4*stride] + ((d*1)>>4)];
+                }
+                if(bottom_damage){
+                    dst[offset + x +  8*stride] = cm[dst[offset + x +  8*stride] - ((d*7)>>4)];
+                    dst[offset + x +  9*stride] = cm[dst[offset + x +  9*stride] - ((d*5)>>4)];
+                    dst[offset + x + 10*stride] = cm[dst[offset + x + 10*stride] - ((d*3)>>4)];
+                    dst[offset + x + 11*stride] = cm[dst[offset + x + 11*stride] - ((d*1)>>4)];
+                }
+            }
+        }
+    }
+}
+
+static void guess_mv(MpegEncContext *s){
+    uint8_t fixed[s->mb_stride * s->mb_height];
+#define MV_FROZEN    3
+#define MV_CHANGED   2
+#define MV_UNCHANGED 1
+    const int mb_stride = s->mb_stride;
+    const int mb_width = s->mb_width;
+    const int mb_height= s->mb_height;
+    int i, depth, num_avail;
+    int mb_x, mb_y;
+
+    num_avail=0;
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[ i ];
+        int f=0;
+        int error= s->error_status_table[mb_xy];
+
+        if(IS_INTRA(s->current_picture.mb_type[mb_xy])) f=MV_FROZEN; //intra //FIXME check
+        if(!(error&MV_ERROR)) f=MV_FROZEN;           //inter with undamaged MV
+
+        fixed[mb_xy]= f;
+        if(f==MV_FROZEN)
+            num_avail++;
+    }
+
+    if((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || num_avail <= mb_width/2){
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                const int mb_xy= mb_x + mb_y*s->mb_stride;
+
+                if(IS_INTRA(s->current_picture.mb_type[mb_xy]))  continue;
+                if(!(s->error_status_table[mb_xy]&MV_ERROR)) continue;
+
+                s->mv_dir = MV_DIR_FORWARD;
+                s->mb_intra=0;
+                s->mv_type = MV_TYPE_16X16;
+                s->mb_skipped=0;
+
+                s->dsp.clear_blocks(s->block[0]);
+
+                s->mb_x= mb_x;
+                s->mb_y= mb_y;
+                s->mv[0][0][0]= 0;
+                s->mv[0][0][1]= 0;
+                decode_mb(s);
+            }
+        }
+        return;
+    }
+
+    for(depth=0;; depth++){
+        int changed, pass, none_left;
+
+        none_left=1;
+        changed=1;
+        for(pass=0; (changed || pass<2) && pass<10; pass++){
+            int mb_x, mb_y;
+int score_sum=0;
+
+            changed=0;
+            for(mb_y=0; mb_y<s->mb_height; mb_y++){
+                for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                    const int mb_xy= mb_x + mb_y*s->mb_stride;
+                    int mv_predictor[8][2]={{0}};
+                    int pred_count=0;
+                    int j;
+                    int best_score=256*256*256*64;
+                    int best_pred=0;
+                    const int mot_stride= s->b8_stride;
+                    const int mot_index= mb_x*2 + mb_y*2*mot_stride;
+                    int prev_x= s->current_picture.motion_val[0][mot_index][0];
+                    int prev_y= s->current_picture.motion_val[0][mot_index][1];
+
+                    if((mb_x^mb_y^pass)&1) continue;
+
+                    if(fixed[mb_xy]==MV_FROZEN) continue;
+                    assert(!IS_INTRA(s->current_picture.mb_type[mb_xy]));
+                    assert(s->last_picture_ptr && s->last_picture_ptr->data[0]);
+
+                    j=0;
+                    if(mb_x>0           && fixed[mb_xy-1        ]==MV_FROZEN) j=1;
+                    if(mb_x+1<mb_width  && fixed[mb_xy+1        ]==MV_FROZEN) j=1;
+                    if(mb_y>0           && fixed[mb_xy-mb_stride]==MV_FROZEN) j=1;
+                    if(mb_y+1<mb_height && fixed[mb_xy+mb_stride]==MV_FROZEN) j=1;
+                    if(j==0) continue;
+
+                    j=0;
+                    if(mb_x>0           && fixed[mb_xy-1        ]==MV_CHANGED) j=1;
+                    if(mb_x+1<mb_width  && fixed[mb_xy+1        ]==MV_CHANGED) j=1;
+                    if(mb_y>0           && fixed[mb_xy-mb_stride]==MV_CHANGED) j=1;
+                    if(mb_y+1<mb_height && fixed[mb_xy+mb_stride]==MV_CHANGED) j=1;
+                    if(j==0 && pass>1) continue;
+
+                    none_left=0;
+
+                    if(mb_x>0 && fixed[mb_xy-1]){
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - 2][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - 2][1];
+                        pred_count++;
+                    }
+                    if(mb_x+1<mb_width && fixed[mb_xy+1]){
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index + 2][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + 2][1];
+                        pred_count++;
+                    }
+                    if(mb_y>0 && fixed[mb_xy-mb_stride]){
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - mot_stride*2][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - mot_stride*2][1];
+                        pred_count++;
+                    }
+                    if(mb_y+1<mb_height && fixed[mb_xy+mb_stride]){
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index + mot_stride*2][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + mot_stride*2][1];
+                        pred_count++;
+                    }
+                    if(pred_count==0) continue;
+
+                    if(pred_count>1){
+                        int sum_x=0, sum_y=0;
+                        int max_x, max_y, min_x, min_y;
+
+                        for(j=0; j<pred_count; j++){
+                            sum_x+= mv_predictor[j][0];
+                            sum_y+= mv_predictor[j][1];
+                        }
+
+                        /* mean */
+                        mv_predictor[pred_count][0] = sum_x/j;
+                        mv_predictor[pred_count][1] = sum_y/j;
+
+                        /* median */
+                        if(pred_count>=3){
+                            min_y= min_x= 99999;
+                            max_y= max_x=-99999;
+                        }else{
+                            min_x=min_y=max_x=max_y=0;
+                        }
+                        for(j=0; j<pred_count; j++){
+                            max_x= FFMAX(max_x, mv_predictor[j][0]);
+                            max_y= FFMAX(max_y, mv_predictor[j][1]);
+                            min_x= FFMIN(min_x, mv_predictor[j][0]);
+                            min_y= FFMIN(min_y, mv_predictor[j][1]);
+                        }
+                        mv_predictor[pred_count+1][0] = sum_x - max_x - min_x;
+                        mv_predictor[pred_count+1][1] = sum_y - max_y - min_y;
+
+                        if(pred_count==4){
+                            mv_predictor[pred_count+1][0] /= 2;
+                            mv_predictor[pred_count+1][1] /= 2;
+                        }
+                        pred_count+=2;
+                    }
+
+                    /* zero MV */
+                    pred_count++;
+
+                    /* last MV */
+                    mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index][0];
+                    mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index][1];
+                    pred_count++;
+
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mb_intra=0;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_skipped=0;
+
+                    s->dsp.clear_blocks(s->block[0]);
+
+                    s->mb_x= mb_x;
+                    s->mb_y= mb_y;
+
+                    for(j=0; j<pred_count; j++){
+                        int score=0;
+                        uint8_t *src= s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
+
+                        s->current_picture.motion_val[0][mot_index][0]= s->mv[0][0][0]= mv_predictor[j][0];
+                        s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[j][1];
+
+                        decode_mb(s);
+
+                        if(mb_x>0 && fixed[mb_xy-1]){
+                            int k;
+                            for(k=0; k<16; k++)
+                                score += FFABS(src[k*s->linesize-1 ]-src[k*s->linesize   ]);
+                        }
+                        if(mb_x+1<mb_width && fixed[mb_xy+1]){
+                            int k;
+                            for(k=0; k<16; k++)
+                                score += FFABS(src[k*s->linesize+15]-src[k*s->linesize+16]);
+                        }
+                        if(mb_y>0 && fixed[mb_xy-mb_stride]){
+                            int k;
+                            for(k=0; k<16; k++)
+                                score += FFABS(src[k-s->linesize   ]-src[k               ]);
+                        }
+                        if(mb_y+1<mb_height && fixed[mb_xy+mb_stride]){
+                            int k;
+                            for(k=0; k<16; k++)
+                                score += FFABS(src[k+s->linesize*15]-src[k+s->linesize*16]);
+                        }
+
+                        if(score <= best_score){ // <= will favor the last MV
+                            best_score= score;
+                            best_pred= j;
+                        }
+                    }
+score_sum+= best_score;
+//FIXME no need to set s->current_picture.motion_val[0][mot_index][0] explicit
+                    s->current_picture.motion_val[0][mot_index][0]= s->mv[0][0][0]= mv_predictor[best_pred][0];
+                    s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[best_pred][1];
+
+                    decode_mb(s);
+
+
+                    if(s->mv[0][0][0] != prev_x || s->mv[0][0][1] != prev_y){
+                        fixed[mb_xy]=MV_CHANGED;
+                        changed++;
+                    }else
+                        fixed[mb_xy]=MV_UNCHANGED;
+                }
+            }
+
+//            printf(".%d/%d", changed, score_sum); fflush(stdout);
+        }
+
+        if(none_left)
+            return;
+
+        for(i=0; i<s->mb_num; i++){
+            int mb_xy= s->mb_index2xy[i];
+            if(fixed[mb_xy])
+                fixed[mb_xy]=MV_FROZEN;
+        }
+//        printf(":"); fflush(stdout);
+    }
+}
+
+static int is_intra_more_likely(MpegEncContext *s){
+    int is_intra_likely, i, j, undamaged_count, skip_amount, mb_x, mb_y;
+
+    if(s->last_picture_ptr==NULL) return 1; //no previous frame available -> use spatial prediction
+
+    undamaged_count=0;
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        const int error= s->error_status_table[mb_xy];
+        if(!((error&DC_ERROR) && (error&MV_ERROR)))
+            undamaged_count++;
+    }
+
+    if(undamaged_count < 5) return 0; //allmost all MBs damaged -> use temporal prediction
+
+    skip_amount= FFMAX(undamaged_count/50, 1); //check only upto 50 MBs
+    is_intra_likely=0;
+
+    j=0;
+    for(mb_y= 0; mb_y<s->mb_height-1; mb_y++){
+        for(mb_x= 0; mb_x<s->mb_width; mb_x++){
+            int error;
+            const int mb_xy= mb_x + mb_y*s->mb_stride;
+
+            error= s->error_status_table[mb_xy];
+            if((error&DC_ERROR) && (error&MV_ERROR))
+                continue; //skip damaged
+
+            j++;
+            if((j%skip_amount) != 0) continue; //skip a few to speed things up
+
+            if(s->pict_type==I_TYPE){
+                uint8_t *mb_ptr     = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
+                uint8_t *last_mb_ptr= s->last_picture.data   [0] + mb_x*16 + mb_y*16*s->linesize;
+
+                is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr                    , s->linesize, 16);
+                is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
+            }else{
+                if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
+                   is_intra_likely++;
+                else
+                   is_intra_likely--;
+            }
+        }
+    }
+//printf("is_intra_likely: %d type:%d\n", is_intra_likely, s->pict_type);
+    return is_intra_likely > 0;
+}
+
+void ff_er_frame_start(MpegEncContext *s){
+    if(!s->error_resilience) return;
+
+    memset(s->error_status_table, MV_ERROR|AC_ERROR|DC_ERROR|VP_START|AC_END|DC_END|MV_END, s->mb_stride*s->mb_height*sizeof(uint8_t));
+    s->error_count= 3*s->mb_num;
+}
+
+/**
+ * adds a slice.
+ * @param endx x component of the last macroblock, can be -1 for the last of the previous line
+ * @param status the status at the end (MV_END, AC_ERROR, ...), it is assumed that no earlier end or
+ *               error of the same type occured
+ */
+void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int endy, int status){
+    const int start_i= clip(startx + starty * s->mb_width    , 0, s->mb_num-1);
+    const int end_i  = clip(endx   + endy   * s->mb_width    , 0, s->mb_num);
+    const int start_xy= s->mb_index2xy[start_i];
+    const int end_xy  = s->mb_index2xy[end_i];
+    int mask= -1;
+
+    if(!s->error_resilience) return;
+
+    mask &= ~VP_START;
+    if(status & (AC_ERROR|AC_END)){
+        mask &= ~(AC_ERROR|AC_END);
+        s->error_count -= end_i - start_i + 1;
+    }
+    if(status & (DC_ERROR|DC_END)){
+        mask &= ~(DC_ERROR|DC_END);
+        s->error_count -= end_i - start_i + 1;
+    }
+    if(status & (MV_ERROR|MV_END)){
+        mask &= ~(MV_ERROR|MV_END);
+        s->error_count -= end_i - start_i + 1;
+    }
+
+    if(status & (AC_ERROR|DC_ERROR|MV_ERROR)) s->error_count= INT_MAX;
+
+    if(mask == ~0x7F){
+        memset(&s->error_status_table[start_xy], 0, (end_xy - start_xy) * sizeof(uint8_t));
+    }else{
+        int i;
+        for(i=start_xy; i<end_xy; i++){
+            s->error_status_table[ i ] &= mask;
+        }
+    }
+
+    if(end_i == s->mb_num)
+        s->error_count= INT_MAX;
+    else{
+        s->error_status_table[end_xy] &= mask;
+        s->error_status_table[end_xy] |= status;
+    }
+
+    s->error_status_table[start_xy] |= VP_START;
+
+    if(start_xy > 0 && s->avctx->thread_count <= 1 && s->avctx->skip_top*s->mb_width < start_i){
+        int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ];
+
+        prev_status &= ~ VP_START;
+        if(prev_status != (MV_END|DC_END|AC_END)) s->error_count= INT_MAX;
+    }
+}
+
+void ff_er_frame_end(MpegEncContext *s){
+    int i, mb_x, mb_y, error, error_type, dc_error, mv_error, ac_error;
+    int distance;
+    int threshold_part[4]= {100,100,100};
+    int threshold= 50;
+    int is_intra_likely;
+    int size = s->b8_stride * 2 * s->mb_height;
+    Picture *pic= s->current_picture_ptr;
+
+    if(!s->error_resilience || s->error_count==0 ||
+       s->error_count==3*s->mb_width*(s->avctx->skip_top + s->avctx->skip_bottom)) return;
+
+    if(s->current_picture.motion_val[0] == NULL){
+        av_log(s->avctx, AV_LOG_ERROR, "Warning MVs not available\n");
+
+        for(i=0; i<2; i++){
+            pic->ref_index[i]= av_mallocz(size * sizeof(uint8_t));
+            pic->motion_val_base[i]= av_mallocz((size+4) * 2 * sizeof(uint16_t));
+            pic->motion_val[i]= pic->motion_val_base[i]+4;
+        }
+        pic->motion_subsample_log2= 3;
+        s->current_picture= *s->current_picture_ptr;
+    }
+
+    for(i=0; i<2; i++){
+        if(pic->ref_index[i])
+            memset(pic->ref_index[i], 0, size * sizeof(uint8_t));
+    }
+
+    if(s->avctx->debug&FF_DEBUG_ER){
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                int status= s->error_status_table[mb_x + mb_y*s->mb_stride];
+
+                av_log(s->avctx, AV_LOG_DEBUG, "%2X ", status);
+            }
+            av_log(s->avctx, AV_LOG_DEBUG, "\n");
+        }
+    }
+
+#if 1
+    /* handle overlapping slices */
+    for(error_type=1; error_type<=3; error_type++){
+        int end_ok=0;
+
+        for(i=s->mb_num-1; i>=0; i--){
+            const int mb_xy= s->mb_index2xy[i];
+            int error= s->error_status_table[mb_xy];
+
+            if(error&(1<<error_type))
+                end_ok=1;
+            if(error&(8<<error_type))
+                end_ok=1;
+
+            if(!end_ok)
+                s->error_status_table[mb_xy]|= 1<<error_type;
+
+            if(error&VP_START)
+                end_ok=0;
+        }
+    }
+#endif
+#if 1
+    /* handle slices with partitions of different length */
+    if(s->partitioned_frame){
+        int end_ok=0;
+
+        for(i=s->mb_num-1; i>=0; i--){
+            const int mb_xy= s->mb_index2xy[i];
+            int error= s->error_status_table[mb_xy];
+
+            if(error&AC_END)
+                end_ok=0;
+            if((error&MV_END) || (error&DC_END) || (error&AC_ERROR))
+                end_ok=1;
+
+            if(!end_ok)
+                s->error_status_table[mb_xy]|= AC_ERROR;
+
+            if(error&VP_START)
+                end_ok=0;
+        }
+    }
+#endif
+    /* handle missing slices */
+    if(s->error_resilience>=4){
+        int end_ok=1;
+
+        for(i=s->mb_num-2; i>=s->mb_width+100; i--){ //FIXME +100 hack
+            const int mb_xy= s->mb_index2xy[i];
+            int error1= s->error_status_table[mb_xy  ];
+            int error2= s->error_status_table[s->mb_index2xy[i+1]];
+
+            if(error1&VP_START)
+                end_ok=1;
+
+            if(   error2==(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END)
+               && error1!=(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END)
+               && ((error1&AC_END) || (error1&DC_END) || (error1&MV_END))){ //end & uninited
+                end_ok=0;
+            }
+
+            if(!end_ok)
+                s->error_status_table[mb_xy]|= DC_ERROR|AC_ERROR|MV_ERROR;
+        }
+    }
+
+#if 1
+    /* backward mark errors */
+    distance=9999999;
+    for(error_type=1; error_type<=3; error_type++){
+        for(i=s->mb_num-1; i>=0; i--){
+            const int mb_xy= s->mb_index2xy[i];
+            int error= s->error_status_table[mb_xy];
+
+            if(!s->mbskip_table[mb_xy]) //FIXME partition specific
+                distance++;
+            if(error&(1<<error_type))
+                distance= 0;
+
+            if(s->partitioned_frame){
+                if(distance < threshold_part[error_type-1])
+                    s->error_status_table[mb_xy]|= 1<<error_type;
+            }else{
+                if(distance < threshold)
+                    s->error_status_table[mb_xy]|= 1<<error_type;
+            }
+
+            if(error&VP_START)
+                distance= 9999999;
+        }
+    }
+#endif
+
+    /* forward mark errors */
+    error=0;
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        int old_error= s->error_status_table[mb_xy];
+
+        if(old_error&VP_START)
+            error= old_error& (DC_ERROR|AC_ERROR|MV_ERROR);
+        else{
+            error|= old_error& (DC_ERROR|AC_ERROR|MV_ERROR);
+            s->error_status_table[mb_xy]|= error;
+        }
+    }
+#if 1
+    /* handle not partitioned case */
+    if(!s->partitioned_frame){
+        for(i=0; i<s->mb_num; i++){
+            const int mb_xy= s->mb_index2xy[i];
+            error= s->error_status_table[mb_xy];
+            if(error&(AC_ERROR|DC_ERROR|MV_ERROR))
+                error|= AC_ERROR|DC_ERROR|MV_ERROR;
+            s->error_status_table[mb_xy]= error;
+        }
+    }
+#endif
+
+    dc_error= ac_error= mv_error=0;
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        error= s->error_status_table[mb_xy];
+        if(error&DC_ERROR) dc_error ++;
+        if(error&AC_ERROR) ac_error ++;
+        if(error&MV_ERROR) mv_error ++;
+    }
+    av_log(s->avctx, AV_LOG_INFO, "concealing %d DC, %d AC, %d MV errors\n", dc_error, ac_error, mv_error);
+
+    is_intra_likely= is_intra_more_likely(s);
+
+    /* set unknown mb-type to most likely */
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        error= s->error_status_table[mb_xy];
+        if(!((error&DC_ERROR) && (error&MV_ERROR)))
+            continue;
+
+        if(is_intra_likely)
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA4x4;
+        else
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+    }
+
+    /* handle inter blocks with damaged AC */
+    for(mb_y=0; mb_y<s->mb_height; mb_y++){
+        for(mb_x=0; mb_x<s->mb_width; mb_x++){
+            const int mb_xy= mb_x + mb_y * s->mb_stride;
+            const int mb_type= s->current_picture.mb_type[mb_xy];
+            error= s->error_status_table[mb_xy];
+
+            if(IS_INTRA(mb_type)) continue; //intra
+            if(error&MV_ERROR) continue;              //inter with damaged MV
+            if(!(error&AC_ERROR)) continue;           //undamaged inter
+
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mb_intra=0;
+            s->mb_skipped=0;
+            if(IS_8X8(mb_type)){
+                int mb_index= mb_x*2 + mb_y*2*s->b8_stride;
+                int j;
+                s->mv_type = MV_TYPE_8X8;
+                for(j=0; j<4; j++){
+                    s->mv[0][j][0] = s->current_picture.motion_val[0][ mb_index + (j&1) + (j>>1)*s->b8_stride ][0];
+                    s->mv[0][j][1] = s->current_picture.motion_val[0][ mb_index + (j&1) + (j>>1)*s->b8_stride ][1];
+                }
+            }else{
+                s->mv_type = MV_TYPE_16X16;
+                s->mv[0][0][0] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][0];
+                s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1];
+            }
+
+            s->dsp.clear_blocks(s->block[0]);
+
+            s->mb_x= mb_x;
+            s->mb_y= mb_y;
+            decode_mb(s);
+        }
+    }
+
+    /* guess MVs */
+    if(s->pict_type==B_TYPE){
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                int xy= mb_x*2 + mb_y*2*s->b8_stride;
+                const int mb_xy= mb_x + mb_y * s->mb_stride;
+                const int mb_type= s->current_picture.mb_type[mb_xy];
+                error= s->error_status_table[mb_xy];
+
+                if(IS_INTRA(mb_type)) continue;
+                if(!(error&MV_ERROR)) continue;           //inter with undamaged MV
+                if(!(error&AC_ERROR)) continue;           //undamaged inter
+
+                s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD;
+                s->mb_intra=0;
+                s->mv_type = MV_TYPE_16X16;
+                s->mb_skipped=0;
+
+                if(s->pp_time){
+                    int time_pp= s->pp_time;
+                    int time_pb= s->pb_time;
+
+                    s->mv[0][0][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp;
+                    s->mv[0][0][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp;
+                    s->mv[1][0][0] = s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
+                    s->mv[1][0][1] = s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp;
+                }else{
+                    s->mv[0][0][0]= 0;
+                    s->mv[0][0][1]= 0;
+                    s->mv[1][0][0]= 0;
+                    s->mv[1][0][1]= 0;
+                }
+
+                s->dsp.clear_blocks(s->block[0]);
+                s->mb_x= mb_x;
+                s->mb_y= mb_y;
+                decode_mb(s);
+            }
+        }
+    }else
+        guess_mv(s);
+
+#ifdef HAVE_XVMC
+    /* the filters below are not XvMC compatible, skip them */
+    if(s->avctx->xvmc_acceleration) goto ec_clean;
+#endif
+    /* fill DC for inter blocks */
+    for(mb_y=0; mb_y<s->mb_height; mb_y++){
+        for(mb_x=0; mb_x<s->mb_width; mb_x++){
+            int dc, dcu, dcv, y, n;
+            int16_t *dc_ptr;
+            uint8_t *dest_y, *dest_cb, *dest_cr;
+            const int mb_xy= mb_x + mb_y * s->mb_stride;
+            const int mb_type= s->current_picture.mb_type[mb_xy];
+
+            error= s->error_status_table[mb_xy];
+
+            if(IS_INTRA(mb_type) && s->partitioned_frame) continue;
+//            if(error&MV_ERROR) continue; //inter data damaged FIXME is this good?
+
+            dest_y = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
+            dest_cb= s->current_picture.data[1] + mb_x*8  + mb_y*8 *s->uvlinesize;
+            dest_cr= s->current_picture.data[2] + mb_x*8  + mb_y*8 *s->uvlinesize;
+
+            dc_ptr= &s->dc_val[0][mb_x*2 + mb_y*2*s->b8_stride];
+            for(n=0; n<4; n++){
+                dc=0;
+                for(y=0; y<8; y++){
+                    int x;
+                    for(x=0; x<8; x++){
+                       dc+= dest_y[x + (n&1)*8 + (y + (n>>1)*8)*s->linesize];
+                    }
+                }
+                dc_ptr[(n&1) + (n>>1)*s->b8_stride]= (dc+4)>>3;
+            }
+
+            dcu=dcv=0;
+            for(y=0; y<8; y++){
+                int x;
+                for(x=0; x<8; x++){
+                    dcu+=dest_cb[x + y*(s->uvlinesize)];
+                    dcv+=dest_cr[x + y*(s->uvlinesize)];
+                }
+            }
+            s->dc_val[1][mb_x + mb_y*s->mb_stride]= (dcu+4)>>3;
+            s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3;
+        }
+    }
+#if 1
+    /* guess DC for damaged blocks */
+    guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1);
+    guess_dc(s, s->dc_val[1], s->mb_width  , s->mb_height  , s->mb_stride, 0);
+    guess_dc(s, s->dc_val[2], s->mb_width  , s->mb_height  , s->mb_stride, 0);
+#endif
+    /* filter luma DC */
+    filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride);
+
+#if 1
+    /* render DC only intra */
+    for(mb_y=0; mb_y<s->mb_height; mb_y++){
+        for(mb_x=0; mb_x<s->mb_width; mb_x++){
+            uint8_t *dest_y, *dest_cb, *dest_cr;
+            const int mb_xy= mb_x + mb_y * s->mb_stride;
+            const int mb_type= s->current_picture.mb_type[mb_xy];
+
+            error= s->error_status_table[mb_xy];
+
+            if(IS_INTER(mb_type)) continue;
+            if(!(error&AC_ERROR)) continue;              //undamaged
+
+            dest_y = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
+            dest_cb= s->current_picture.data[1] + mb_x*8  + mb_y*8 *s->uvlinesize;
+            dest_cr= s->current_picture.data[2] + mb_x*8  + mb_y*8 *s->uvlinesize;
+
+            put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
+        }
+    }
+#endif
+
+    if(s->avctx->error_concealment&FF_EC_DEBLOCK){
+        /* filter horizontal block boundaries */
+        h_block_filter(s, s->current_picture.data[0], s->mb_width*2, s->mb_height*2, s->linesize  , 1);
+        h_block_filter(s, s->current_picture.data[1], s->mb_width  , s->mb_height  , s->uvlinesize, 0);
+        h_block_filter(s, s->current_picture.data[2], s->mb_width  , s->mb_height  , s->uvlinesize, 0);
+
+        /* filter vertical block boundaries */
+        v_block_filter(s, s->current_picture.data[0], s->mb_width*2, s->mb_height*2, s->linesize  , 1);
+        v_block_filter(s, s->current_picture.data[1], s->mb_width  , s->mb_height  , s->uvlinesize, 0);
+        v_block_filter(s, s->current_picture.data[2], s->mb_width  , s->mb_height  , s->uvlinesize, 0);
+    }
+
+#ifdef HAVE_XVMC
+ec_clean:
+#endif
+    /* clean a few tables */
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        int error= s->error_status_table[mb_xy];
+
+        if(s->pict_type!=B_TYPE && (error&(DC_ERROR|MV_ERROR|AC_ERROR))){
+            s->mbskip_table[mb_xy]=0;
+        }
+        s->mbintra_table[mb_xy]=1;
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/eval.c b/contrib/ffmpeg/libavcodec/eval.c
new file mode 100644
index 000000000..961c8b5ac
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/eval.c
@@ -0,0 +1,466 @@
+/*
+ * simple arithmetic expression evaluator
+ *
+ * Copyright (c) 2002-2006 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file eval.c
+ * simple arithmetic expression evaluator.
+ *
+ * see http://joe.hotchkiss.com/programming/eval/eval.html
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "eval.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#ifndef NAN
+  #define NAN 0.0/0.0
+#endif
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+typedef struct Parser{
+    int stack_index;
+    char *s;
+    double *const_value;
+    const char **const_name;          // NULL terminated
+    double (**func1)(void *, double a); // NULL terminated
+    const char **func1_name;          // NULL terminated
+    double (**func2)(void *, double a, double b); // NULL terminated
+    char **func2_name;          // NULL terminated
+    void *opaque;
+    char **error;
+#define VARS 10
+    double var[VARS];
+} Parser;
+
+static int8_t si_prefixes['z' - 'E' + 1]={
+    ['y'-'E']= -24,
+    ['z'-'E']= -21,
+    ['a'-'E']= -18,
+    ['f'-'E']= -15,
+    ['p'-'E']= -12,
+    ['n'-'E']= - 9,
+    ['u'-'E']= - 6,
+    ['m'-'E']= - 3,
+    ['c'-'E']= - 2,
+    ['d'-'E']= - 1,
+    ['h'-'E']=   2,
+    ['k'-'E']=   3,
+    ['K'-'E']=   3,
+    ['M'-'E']=   6,
+    ['G'-'E']=   9,
+    ['T'-'E']=  12,
+    ['P'-'E']=  15,
+    ['E'-'E']=  18,
+    ['Z'-'E']=  21,
+    ['Y'-'E']=  24,
+};
+
+/** strtod() function extended with 'k', 'M', 'G', 'ki', 'Mi', 'Gi' and 'B'
+ * postfixes.  This allows using f.e. kB, MiB, G and B as a postfix. This
+ * function assumes that the unit of numbers is bits not bytes.
+ */
+static double av_strtod(const char *name, char **tail) {
+    double d;
+    char *next;
+    d = strtod(name, &next);
+    /* if parsing succeeded, check for and interpret postfixes */
+    if (next!=name) {
+
+        if(*next >= 'E' && *next <= 'z'){
+            int e= si_prefixes[*next - 'E'];
+            if(e){
+                if(next[1] == 'i'){
+                    d*= pow( 2, e/0.3);
+                    next+=2;
+                }else{
+                    d*= pow(10, e);
+                    next++;
+                }
+            }
+        }
+
+        if(*next=='B') {
+            d*=8;
+            *next++;
+        }
+    }
+    /* if requested, fill in tail with the position after the last parsed
+       character */
+    if (tail)
+        *tail = next;
+    return d;
+}
+
+static int strmatch(const char *s, const char *prefix){
+    int i;
+    for(i=0; prefix[i]; i++){
+        if(prefix[i] != s[i]) return 0;
+    }
+    return 1;
+}
+
+struct ff_expr_s {
+    enum {
+        e_value, e_const, e_func0, e_func1, e_func2,
+        e_squish, e_gauss, e_ld,
+        e_mod, e_max, e_min, e_eq, e_gt, e_gte,
+        e_pow, e_mul, e_div, e_add,
+        e_last, e_st, e_while,
+    } type;
+    double value; // is sign in other types
+    union {
+        int const_index;
+        double (*func0)(double);
+        double (*func1)(void *, double);
+        double (*func2)(void *, double, double);
+    } a;
+    AVEvalExpr * param[2];
+};
+
+static double eval_expr(Parser * p, AVEvalExpr * e) {
+    switch (e->type) {
+        case e_value:  return e->value;
+        case e_const:  return e->value * p->const_value[e->a.const_index];
+        case e_func0:  return e->value * e->a.func0(eval_expr(p, e->param[0]));
+        case e_func1:  return e->value * e->a.func1(p->opaque, eval_expr(p, e->param[0]));
+        case e_func2:  return e->value * e->a.func2(p->opaque, eval_expr(p, e->param[0]), eval_expr(p, e->param[1]));
+        case e_squish: return 1/(1+exp(4*eval_expr(p, e->param[0])));
+        case e_gauss: { double d = eval_expr(p, e->param[0]); return exp(-d*d/2)/sqrt(2*M_PI); }
+        case e_ld:     return e->value * p->var[clip(eval_expr(p, e->param[0]), 0, VARS-1)];
+        case e_while: {
+            double d = NAN;
+            while(eval_expr(p, e->param[0]))
+                d=eval_expr(p, e->param[1]);
+            return d;
+        }
+        default: {
+            double d = eval_expr(p, e->param[0]);
+            double d2 = eval_expr(p, e->param[1]);
+            switch (e->type) {
+                case e_mod: return e->value * (d - floor(d/d2)*d2);
+                case e_max: return e->value * (d >  d2 ?   d : d2);
+                case e_min: return e->value * (d <  d2 ?   d : d2);
+                case e_eq:  return e->value * (d == d2 ? 1.0 : 0.0);
+                case e_gt:  return e->value * (d >  d2 ? 1.0 : 0.0);
+                case e_gte: return e->value * (d >= d2 ? 1.0 : 0.0);
+                case e_pow: return e->value * pow(d, d2);
+                case e_mul: return e->value * (d * d2);
+                case e_div: return e->value * (d / d2);
+                case e_add: return e->value * (d + d2);
+                case e_last:return e->value * d2;
+                case e_st : return e->value * (p->var[clip(d, 0, VARS-1)]= d2);
+            }
+        }
+    }
+    return NAN;
+}
+
+static AVEvalExpr * parse_expr(Parser *p);
+
+void ff_eval_free(AVEvalExpr * e) {
+    if (!e) return;
+    ff_eval_free(e->param[0]);
+    ff_eval_free(e->param[1]);
+    av_freep(&e);
+}
+
+static AVEvalExpr * parse_primary(Parser *p) {
+    AVEvalExpr * d = av_mallocz(sizeof(AVEvalExpr));
+    char *next= p->s;
+    int i;
+
+    /* number */
+    d->value = av_strtod(p->s, &next);
+    if(next != p->s){
+        d->type = e_value;
+        p->s= next;
+        return d;
+    }
+    d->value = 1;
+
+    /* named constants */
+    for(i=0; p->const_name && p->const_name[i]; i++){
+        if(strmatch(p->s, p->const_name[i])){
+            p->s+= strlen(p->const_name[i]);
+            d->type = e_const;
+            d->a.const_index = i;
+            return d;
+        }
+    }
+
+    p->s= strchr(p->s, '(');
+    if(p->s==NULL){
+        *p->error = "missing (";
+        p->s= next;
+        ff_eval_free(d);
+        return NULL;
+    }
+    p->s++; // "("
+    if (*next == '(') { // special case do-nothing
+        av_freep(&d);
+        d = parse_expr(p);
+        if(p->s[0] != ')'){
+            *p->error = "missing )";
+            ff_eval_free(d);
+            return NULL;
+        }
+        p->s++; // ")"
+        return d;
+    }
+    d->param[0] = parse_expr(p);
+    if(p->s[0]== ','){
+        p->s++; // ","
+        d->param[1] = parse_expr(p);
+    }
+    if(p->s[0] != ')'){
+        *p->error = "missing )";
+        ff_eval_free(d);
+        return NULL;
+    }
+    p->s++; // ")"
+
+    d->type = e_func0;
+         if( strmatch(next, "sinh"  ) ) d->a.func0 = sinh;
+    else if( strmatch(next, "cosh"  ) ) d->a.func0 = cosh;
+    else if( strmatch(next, "tanh"  ) ) d->a.func0 = tanh;
+    else if( strmatch(next, "sin"   ) ) d->a.func0 = sin;
+    else if( strmatch(next, "cos"   ) ) d->a.func0 = cos;
+    else if( strmatch(next, "tan"   ) ) d->a.func0 = tan;
+    else if( strmatch(next, "atan"  ) ) d->a.func0 = atan;
+    else if( strmatch(next, "asin"  ) ) d->a.func0 = asin;
+    else if( strmatch(next, "acos"  ) ) d->a.func0 = acos;
+    else if( strmatch(next, "exp"   ) ) d->a.func0 = exp;
+    else if( strmatch(next, "log"   ) ) d->a.func0 = log;
+    else if( strmatch(next, "abs"   ) ) d->a.func0 = fabs;
+    else if( strmatch(next, "squish") ) d->type = e_squish;
+    else if( strmatch(next, "gauss" ) ) d->type = e_gauss;
+    else if( strmatch(next, "mod"   ) ) d->type = e_mod;
+    else if( strmatch(next, "max"   ) ) d->type = e_max;
+    else if( strmatch(next, "min"   ) ) d->type = e_min;
+    else if( strmatch(next, "eq"    ) ) d->type = e_eq;
+    else if( strmatch(next, "gte"   ) ) d->type = e_gte;
+    else if( strmatch(next, "gt"    ) ) d->type = e_gt;
+    else if( strmatch(next, "lte"   ) ) { AVEvalExpr * tmp = d->param[1]; d->param[1] = d->param[0]; d->param[0] = tmp; d->type = e_gt; }
+    else if( strmatch(next, "lt"    ) ) { AVEvalExpr * tmp = d->param[1]; d->param[1] = d->param[0]; d->param[0] = tmp; d->type = e_gte; }
+    else if( strmatch(next, "ld"    ) ) d->type = e_ld;
+    else if( strmatch(next, "st"    ) ) d->type = e_st;
+    else if( strmatch(next, "while" ) ) d->type = e_while;
+    else {
+        for(i=0; p->func1_name && p->func1_name[i]; i++){
+            if(strmatch(next, p->func1_name[i])){
+                d->a.func1 = p->func1[i];
+                d->type = e_func1;
+                return d;
+            }
+        }
+
+        for(i=0; p->func2_name && p->func2_name[i]; i++){
+            if(strmatch(next, p->func2_name[i])){
+                d->a.func2 = p->func2[i];
+                d->type = e_func2;
+                return d;
+            }
+        }
+
+        *p->error = "unknown function";
+        ff_eval_free(d);
+        return NULL;
+    }
+
+    return d;
+}
+
+static AVEvalExpr * new_eval_expr(int type, int value, AVEvalExpr *p0, AVEvalExpr *p1){
+    AVEvalExpr * e = av_mallocz(sizeof(AVEvalExpr));
+    e->type     =type   ;
+    e->value    =value  ;
+    e->param[0] =p0     ;
+    e->param[1] =p1     ;
+    return e;
+}
+
+static AVEvalExpr * parse_pow(Parser *p, int *sign){
+    *sign= (*p->s == '+') - (*p->s == '-');
+    p->s += *sign&1;
+    return parse_primary(p);
+}
+
+static AVEvalExpr * parse_factor(Parser *p){
+    int sign, sign2;
+    AVEvalExpr * e = parse_pow(p, &sign);
+    while(p->s[0]=='^'){
+        p->s++;
+        e= new_eval_expr(e_pow, 1, e, parse_pow(p, &sign2));
+        if (e->param[1]) e->param[1]->value *= (sign2|1);
+    }
+    if (e) e->value *= (sign|1);
+    return e;
+}
+
+static AVEvalExpr * parse_term(Parser *p){
+    AVEvalExpr * e = parse_factor(p);
+    while(p->s[0]=='*' || p->s[0]=='/'){
+        int c= *p->s++;
+        e= new_eval_expr(c == '*' ? e_mul : e_div, 1, e, parse_factor(p));
+    }
+    return e;
+}
+
+static AVEvalExpr * parse_subexpr(Parser *p) {
+    AVEvalExpr * e = parse_term(p);
+    while(*p->s == '+' || *p->s == '-') {
+        e= new_eval_expr(e_add, 1, e, parse_term(p));
+    };
+
+    return e;
+}
+
+static AVEvalExpr * parse_expr(Parser *p) {
+    AVEvalExpr * e;
+
+    if(p->stack_index <= 0) //protect against stack overflows
+        return NULL;
+    p->stack_index--;
+
+    e = parse_subexpr(p);
+
+    while(*p->s == ';') {
+        p->s++;
+        e= new_eval_expr(e_last, 1, e, parse_subexpr(p));
+    };
+
+    p->stack_index++;
+
+    return e;
+}
+
+static int verify_expr(AVEvalExpr * e) {
+    if (!e) return 0;
+    switch (e->type) {
+        case e_value:
+        case e_const: return 1;
+        case e_func0:
+        case e_func1:
+        case e_squish:
+        case e_ld:
+        case e_gauss: return verify_expr(e->param[0]);
+        default: return verify_expr(e->param[0]) && verify_expr(e->param[1]);
+    }
+}
+
+AVEvalExpr * ff_parse(char *s, const char **const_name,
+               double (**func1)(void *, double), const char **func1_name,
+               double (**func2)(void *, double, double), char **func2_name,
+               char **error){
+    Parser p;
+    AVEvalExpr * e;
+    char w[strlen(s) + 1], * wp = w;
+
+    while (*s)
+        if (!isspace(*s++)) *wp++ = s[-1];
+    *wp++ = 0;
+
+    p.stack_index=100;
+    p.s= w;
+    p.const_name = const_name;
+    p.func1      = func1;
+    p.func1_name = func1_name;
+    p.func2      = func2;
+    p.func2_name = func2_name;
+    p.error= error;
+
+    e = parse_expr(&p);
+    if (!verify_expr(e)) {
+        ff_eval_free(e);
+        return NULL;
+    }
+    return e;
+}
+
+double ff_parse_eval(AVEvalExpr * e, double *const_value, void *opaque) {
+    Parser p;
+
+    p.const_value= const_value;
+    p.opaque     = opaque;
+    return eval_expr(&p, e);
+}
+
+double ff_eval2(char *s, double *const_value, const char **const_name,
+               double (**func1)(void *, double), const char **func1_name,
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque, char **error){
+    AVEvalExpr * e = ff_parse(s, const_name, func1, func1_name, func2, func2_name, error);
+    double d;
+    if (!e) return NAN;
+    d = ff_parse_eval(e, const_value, opaque);
+    ff_eval_free(e);
+    return d;
+}
+
+#if LIBAVCODEC_VERSION_INT < ((52<<16)+(0<<8)+0)
+attribute_deprecated double ff_eval(char *s, double *const_value, const char **const_name,
+               double (**func1)(void *, double), const char **func1_name,
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque){
+    char *error=NULL;
+    double ret;
+    ret = ff_eval2(s, const_value, const_name, func1, func1_name, func2, func2_name, opaque, &error);
+    if (error)
+        av_log(NULL, AV_LOG_ERROR, "Error evaluating \"%s\": %s\n", s, error);
+    return ret;
+}
+#endif
+
+#ifdef TEST
+#undef printf
+static double const_values[]={
+    M_PI,
+    M_E,
+    0
+};
+static const char *const_names[]={
+    "PI",
+    "E",
+    0
+};
+main(){
+    int i;
+    printf("%f == 12.7\n", ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL));
+    printf("%f == 0.931322575\n", ff_eval("80G/80Gi", const_values, const_names, NULL, NULL, NULL, NULL, NULL));
+
+    for(i=0; i<1050; i++){
+        START_TIMER
+            ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL);
+        STOP_TIMER("ff_eval")
+    }
+}
+#endif
diff --git a/contrib/ffmpeg/libavcodec/eval.h b/contrib/ffmpeg/libavcodec/eval.h
new file mode 100644
index 000000000..b52199cf4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/eval.h
@@ -0,0 +1,84 @@
+/*
+ * simple arithmetic expression evaluator
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file eval.h
+ * eval header.
+ */
+
+#ifndef AVCODEC_EVAL_H
+#define AVCODEC_EVAL_H
+
+#if LIBAVCODEC_VERSION_INT < ((52<<16)+(0<<8)+0)
+double ff_eval(char *s, double *const_value, const char **const_name,
+               double (**func1)(void *, double), const char **func1_name,
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque);
+#endif
+
+/**
+ * Parses and evaluates an expression.
+ * Note, this is significantly slower than ff_parse_eval()
+ * @param s expression as a zero terminated string for example "1+2^3+5*5+sin(2/3)"
+ * @param func1 NULL terminated array of function pointers for functions which take 1 argument
+ * @param func2 NULL terminated array of function pointers for functions which take 2 arguments
+ * @param const_name NULL terminated array of zero terminated strings of constant identifers for example {"PI", "E", 0}
+ * @param func1_name NULL terminated array of zero terminated strings of func1 identifers
+ * @param func2_name NULL terminated array of zero terminated strings of func2 identifers
+ * @param error pointer to a char* which is set to an error message if something goes wrong
+ * @param const_value a zero terminated array of values for the identifers from const_name
+ * @param opaque a pointer which will be passed to all functions from func1 and func2
+ * @return the value of the expression
+ */
+double ff_eval2(char *s, double *const_value, const char **const_name,
+               double (**func1)(void *, double), const char **func1_name,
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque, char **error);
+
+typedef struct ff_expr_s AVEvalExpr;
+
+/**
+ * Parses a expression.
+ * @param s expression as a zero terminated string for example "1+2^3+5*5+sin(2/3)"
+ * @param func1 NULL terminated array of function pointers for functions which take 1 argument
+ * @param func2 NULL terminated array of function pointers for functions which take 2 arguments
+ * @param const_name NULL terminated array of zero terminated strings of constant identifers for example {"PI", "E", 0}
+ * @param func1_name NULL terminated array of zero terminated strings of func1 identifers
+ * @param func2_name NULL terminated array of zero terminated strings of func2 identifers
+ * @param error pointer to a char* which is set to an error message if something goes wrong
+ * @return AVEvalExpr which must be freed with ff_eval_free by the user when its not needed anymore
+ *         NULL if anything went wrong
+ */
+AVEvalExpr * ff_parse(char *s, const char **const_name,
+               double (**func1)(void *, double), const char **func1_name,
+               double (**func2)(void *, double, double), char **func2_name,
+               char **error);
+/**
+ * Evaluates a previously parsed expression.
+ * @param const_value a zero terminated array of values for the identifers from ff_parse const_name
+ * @param opaque a pointer which will be passed to all functions from func1 and func2
+ * @return the value of the expression
+ */
+double ff_parse_eval(AVEvalExpr * e, double *const_value, void *opaque);
+void ff_eval_free(AVEvalExpr * e);
+
+#endif /* AVCODEC_EVAL_H */
diff --git a/contrib/ffmpeg/libavcodec/faac.c b/contrib/ffmpeg/libavcodec/faac.c
new file mode 100644
index 000000000..06e0b4920
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/faac.c
@@ -0,0 +1,133 @@
+/*
+ * Interface to libfaac for aac encoding
+ * Copyright (c) 2002 Gildas Bazin <gbazin@netcourrier.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file faacaudio.c
+ * Interface to libfaac for aac encoding.
+ */
+
+#include "avcodec.h"
+#include <faac.h>
+
+typedef struct FaacAudioContext {
+    faacEncHandle faac_handle;
+} FaacAudioContext;
+
+static int Faac_encode_init(AVCodecContext *avctx)
+{
+    FaacAudioContext *s = avctx->priv_data;
+    faacEncConfigurationPtr faac_cfg;
+    unsigned long samples_input, max_bytes_output;
+
+    /* number of channels */
+    if (avctx->channels < 1 || avctx->channels > 6)
+        return -1;
+
+    s->faac_handle = faacEncOpen(avctx->sample_rate,
+                                 avctx->channels,
+                                 &samples_input, &max_bytes_output);
+
+    /* check faac version */
+    faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle);
+    if (faac_cfg->version != FAAC_CFG_VERSION) {
+        av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
+        faacEncClose(s->faac_handle);
+        return -1;
+    }
+
+    /* put the options in the configuration struct */
+    faac_cfg->aacObjectType = LOW;
+    faac_cfg->mpegVersion = MPEG4;
+    faac_cfg->useTns = 0;
+    faac_cfg->allowMidside = 1;
+    faac_cfg->bitRate = avctx->bit_rate / avctx->channels;
+    faac_cfg->bandWidth = avctx->cutoff;
+    if(avctx->flags & CODEC_FLAG_QSCALE) {
+        faac_cfg->bitRate = 0;
+        faac_cfg->quantqual = avctx->global_quality / FF_QP2LAMBDA;
+    }
+    faac_cfg->outputFormat = 1;
+    faac_cfg->inputFormat = FAAC_INPUT_16BIT;
+
+    avctx->frame_size = samples_input / avctx->channels;
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    /* Set decoder specific info */
+    avctx->extradata_size = 0;
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+
+        unsigned char *buffer;
+        unsigned long decoder_specific_info_size;
+
+        if (!faacEncGetDecoderSpecificInfo(s->faac_handle, &buffer,
+                                           &decoder_specific_info_size)) {
+            avctx->extradata = buffer;
+            avctx->extradata_size = decoder_specific_info_size;
+            faac_cfg->outputFormat = 0;
+        }
+    }
+
+    if (!faacEncSetConfiguration(s->faac_handle, faac_cfg)) {
+        av_log(avctx, AV_LOG_ERROR, "libfaac doesn't support this output format!\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int Faac_encode_frame(AVCodecContext *avctx,
+                             unsigned char *frame, int buf_size, void *data)
+{
+    FaacAudioContext *s = avctx->priv_data;
+    int bytes_written;
+
+    bytes_written = faacEncEncode(s->faac_handle,
+                                  data,
+                                  avctx->frame_size * avctx->channels,
+                                  frame,
+                                  buf_size);
+
+    return bytes_written;
+}
+
+static int Faac_encode_close(AVCodecContext *avctx)
+{
+    FaacAudioContext *s = avctx->priv_data;
+
+    av_freep(&avctx->coded_frame);
+
+    //if (avctx->extradata_size) free(avctx->extradata);
+
+    faacEncClose(s->faac_handle);
+    return 0;
+}
+
+AVCodec faac_encoder = {
+    "aac",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_AAC,
+    sizeof(FaacAudioContext),
+    Faac_encode_init,
+    Faac_encode_frame,
+    Faac_encode_close
+};
diff --git a/contrib/ffmpeg/libavcodec/faad.c b/contrib/ffmpeg/libavcodec/faad.c
new file mode 100644
index 000000000..df33ea0b2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/faad.c
@@ -0,0 +1,334 @@
+/*
+ * Faad decoder
+ * Copyright (c) 2003 Zdenek Kabelac.
+ * Copyright (c) 2004 Thomas Raivio.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file faad.c
+ * AAC decoder.
+ *
+ * still a bit unfinished - but it plays something
+ */
+
+#include "avcodec.h"
+#include "faad.h"
+
+#ifndef FAADAPI
+#define FAADAPI
+#endif
+
+/*
+ * when CONFIG_FAADBIN is defined the libfaad will be opened at runtime
+ */
+//#undef CONFIG_FAADBIN
+//#define CONFIG_FAADBIN
+
+#ifdef CONFIG_FAADBIN
+#include <dlfcn.h>
+static const char* libfaadname = "libfaad.so.0";
+#else
+#define dlopen(a)
+#define dlclose(a)
+#endif
+
+typedef struct {
+    void* handle;               /* dlopen handle */
+    void* faac_handle;          /* FAAD library handle */
+    int sample_size;
+    int init;
+
+    /* faad calls */
+    faacDecHandle FAADAPI (*faacDecOpen)(void);
+    faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder);
+#ifndef FAAD2_VERSION
+        int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
+                                           faacDecConfigurationPtr config);
+        int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
+                                unsigned char *buffer,
+                                unsigned long *samplerate,
+                                unsigned long *channels);
+        int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
+                                unsigned long SizeOfDecoderSpecificInfo,
+                                unsigned long *samplerate, unsigned long *channels);
+        int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
+                                unsigned char *buffer,
+                                unsigned long *bytesconsumed,
+                                short *sample_buffer,
+                                unsigned long *samples);
+#else
+        unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
+                                                     faacDecConfigurationPtr config);
+        long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
+                                   unsigned char *buffer,
+                                 unsigned long buffer_size,
+                                 unsigned long *samplerate,
+                                 unsigned char *channels);
+        char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
+                                 unsigned long SizeOfDecoderSpecificInfo,
+                                 unsigned long *samplerate, unsigned char *channels);
+        void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
+                                         faacDecFrameInfo *hInfo,
+                                         unsigned char *buffer,
+                                                                 unsigned long buffer_size);
+        char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
+#endif
+
+    void FAADAPI (*faacDecClose)(faacDecHandle hDecoder);
+
+
+} FAACContext;
+
+static const unsigned long faac_srates[] =
+{
+    96000, 88200, 64000, 48000, 44100, 32000,
+    24000, 22050, 16000, 12000, 11025, 8000
+};
+
+static int faac_init_mp4(AVCodecContext *avctx)
+{
+    FAACContext *s = (FAACContext *) avctx->priv_data;
+    unsigned long samplerate;
+#ifndef FAAD2_VERSION
+    unsigned long channels;
+#else
+    unsigned char channels;
+#endif
+    int r = 0;
+
+    if (avctx->extradata){
+        r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
+                            avctx->extradata_size,
+                            &samplerate, &channels);
+        if (r < 0){
+            av_log(avctx, AV_LOG_ERROR,
+                   "faacDecInit2 failed r:%d   sr:%ld  ch:%ld  s:%d\n",
+                   r, samplerate, (long)channels, avctx->extradata_size);
+        } else {
+            avctx->sample_rate = samplerate;
+            avctx->channels = channels;
+            s->init = 1;
+        }
+    }
+
+    return r;
+}
+
+static int faac_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    FAACContext *s = (FAACContext *) avctx->priv_data;
+#ifndef FAAD2_VERSION
+    unsigned long bytesconsumed;
+    short *sample_buffer = NULL;
+    unsigned long samples;
+    int out;
+#else
+    faacDecFrameInfo frame_info;
+    void *out;
+#endif
+    if(buf_size == 0)
+        return 0;
+#ifndef FAAD2_VERSION
+    out = s->faacDecDecode(s->faac_handle,
+                           (unsigned char*)buf,
+                           &bytesconsumed,
+                           data,
+                           &samples);
+    samples *= s->sample_size;
+    if (data_size)
+        *data_size = samples;
+    return (buf_size < (int)bytesconsumed)
+        ? buf_size : (int)bytesconsumed;
+#else
+
+    if(!s->init){
+        unsigned long srate;
+        unsigned char channels;
+        int r = s->faacDecInit(s->faac_handle, buf, buf_size, &srate, &channels);
+        if(r < 0){
+            av_log(avctx, AV_LOG_ERROR, "faac: codec init failed: %s\n",
+                   s->faacDecGetErrorMessage(frame_info.error));
+            return -1;
+        }
+        avctx->sample_rate = srate;
+        avctx->channels = channels;
+        s->init = 1;
+    }
+
+    out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size);
+
+    if (frame_info.error > 0) {
+        av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
+                s->faacDecGetErrorMessage(frame_info.error));
+        return -1;
+    }
+
+    frame_info.samples *= s->sample_size;
+    memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one
+
+    if (data_size)
+        *data_size = frame_info.samples;
+
+    return (buf_size < (int)frame_info.bytesconsumed)
+        ? buf_size : (int)frame_info.bytesconsumed;
+#endif
+}
+
+static int faac_decode_end(AVCodecContext *avctx)
+{
+    FAACContext *s = (FAACContext *) avctx->priv_data;
+
+    if (s->faacDecClose)
+        s->faacDecClose(s->faac_handle);
+
+    dlclose(s->handle);
+    return 0;
+}
+
+static int faac_decode_init(AVCodecContext *avctx)
+{
+    FAACContext *s = (FAACContext *) avctx->priv_data;
+    faacDecConfigurationPtr faac_cfg;
+
+#ifdef CONFIG_FAADBIN
+    const char* err = 0;
+
+    s->handle = dlopen(libfaadname, RTLD_LAZY);
+    if (!s->handle)
+    {
+        av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
+                libfaadname, dlerror());
+        return -1;
+    }
+#define dfaac(a, b) \
+    do { static const char* n = "faacDec" #a; \
+    if ((s->faacDec ## a = b dlsym( s->handle, n )) == NULL) { err = n; break; } } while(0)
+    for(;;) {
+#else  /* !CONFIG_FAADBIN */
+#define dfaac(a, b)     s->faacDec ## a = faacDec ## a
+#endif /* CONFIG_FAADBIN */
+
+        // resolve all needed function calls
+        dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
+        dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
+                                        FAADAPI (*)(faacDecHandle)));
+#ifndef FAAD2_VERSION
+        dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
+                                                           faacDecConfigurationPtr)));
+
+        dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
+                                     unsigned long*, unsigned long*)));
+    dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*,
+                                       unsigned long, unsigned long*,
+                                       unsigned long*)));
+    dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder)));
+        dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
+                             unsigned long*, short*, unsigned long*)));
+#else
+        dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
+                                                           faacDecConfigurationPtr)));
+        dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
+                                     unsigned long, unsigned long*, unsigned char*)));
+        dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
+                                       unsigned long, unsigned long*,
+                                       unsigned char*)));
+        dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
+                             unsigned char*, unsigned long)));
+        dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
+#endif
+#undef dfacc
+
+#ifdef CONFIG_FAADBIN
+        break;
+    }
+    if (err) {
+        dlclose(s->handle);
+        av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
+                err, libfaadname);
+        return -1;
+    }
+#endif
+
+    s->faac_handle = s->faacDecOpen();
+    if (!s->faac_handle) {
+        av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot create handler!\n");
+        faac_decode_end(avctx);
+        return -1;
+    }
+
+
+    faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle);
+
+    if (faac_cfg) {
+        switch (avctx->bits_per_sample) {
+        case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
+        default:
+        case 16:
+#ifdef FAAD2_VERSION
+            faac_cfg->outputFormat = FAAD_FMT_16BIT;
+#endif
+            s->sample_size = 2;
+            break;
+        case 24:
+#ifdef FAAD2_VERSION
+            faac_cfg->outputFormat = FAAD_FMT_24BIT;
+#endif
+            s->sample_size = 3;
+            break;
+        case 32:
+#ifdef FAAD2_VERSION
+            faac_cfg->outputFormat = FAAD_FMT_32BIT;
+#endif
+            s->sample_size = 4;
+            break;
+        }
+
+        faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
+        faac_cfg->defObjectType = LC;
+    }
+
+    s->faacDecSetConfiguration(s->faac_handle, faac_cfg);
+
+    faac_init_mp4(avctx);
+
+    return 0;
+}
+
+#define AAC_CODEC(id, name)     \
+AVCodec name ## _decoder = {    \
+    #name,                      \
+    CODEC_TYPE_AUDIO,           \
+    id,                         \
+    sizeof(FAACContext),        \
+    faac_decode_init,           \
+    NULL,                       \
+    faac_decode_end,            \
+    faac_decode_frame,          \
+}
+
+// FIXME - raw AAC files - maybe just one entry will be enough
+AAC_CODEC(CODEC_ID_AAC, aac);
+#if LIBAVCODEC_VERSION_INT < ((52<<16)+(0<<8)+0)
+// If it's mp4 file - usually embeded into Qt Mov
+AAC_CODEC(CODEC_ID_MPEG4AAC, mpeg4aac);
+#endif
+
+#undef AAC_CODEC
diff --git a/contrib/ffmpeg/libavcodec/faandct.c b/contrib/ffmpeg/libavcodec/faandct.c
new file mode 100644
index 000000000..e3c0d84a2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/faandct.c
@@ -0,0 +1,220 @@
+/*
+ * Floating point AAN DCT
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
+ */
+
+/**
+ * @file faandct.c
+ * @brief
+ *     Floating point AAN DCT
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "dsputil.h"
+#include "faandct.h"
+
+#define FLOAT float
+#ifdef FAAN_POSTSCALE
+#    define SCALE(x) postscale[x]
+#else
+#    define SCALE(x) 1
+#endif
+
+//numbers generated by simple c code (not as accurate as they could be)
+/*
+for(i=0; i<8; i++){
+    printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2)));
+}
+*/
+#define B0 1.00000000000000000000
+#define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
+#define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
+#define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
+#define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
+#define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
+#define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
+#define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
+
+
+#define A1 0.70710678118654752438 // cos(pi*4/16)
+#define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
+#define A5 0.38268343236508977170 // cos(pi*6/16)
+#define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
+
+static FLOAT postscale[64]={
+B0*B0, B0*B1, B0*B2, B0*B3, B0*B4, B0*B5, B0*B6, B0*B7,
+B1*B0, B1*B1, B1*B2, B1*B3, B1*B4, B1*B5, B1*B6, B1*B7,
+B2*B0, B2*B1, B2*B2, B2*B3, B2*B4, B2*B5, B2*B6, B2*B7,
+B3*B0, B3*B1, B3*B2, B3*B3, B3*B4, B3*B5, B3*B6, B3*B7,
+B4*B0, B4*B1, B4*B2, B4*B3, B4*B4, B4*B5, B4*B6, B4*B7,
+B5*B0, B5*B1, B5*B2, B5*B3, B5*B4, B5*B5, B5*B6, B5*B7,
+B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7,
+B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7,
+};
+
+static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
+{
+    FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    FLOAT tmp10, tmp11, tmp12, tmp13;
+    FLOAT z1, z2, z3, z4, z5, z11, z13;
+    int i;
+
+    for (i=0; i<8*8; i+=8) {
+        tmp0= data[0 + i] + data[7 + i];
+        tmp7= data[0 + i] - data[7 + i];
+        tmp1= data[1 + i] + data[6 + i];
+        tmp6= data[1 + i] - data[6 + i];
+        tmp2= data[2 + i] + data[5 + i];
+        tmp5= data[2 + i] - data[5 + i];
+        tmp3= data[3 + i] + data[4 + i];
+        tmp4= data[3 + i] - data[4 + i];
+
+        tmp10= tmp0 + tmp3;
+        tmp13= tmp0 - tmp3;
+        tmp11= tmp1 + tmp2;
+        tmp12= tmp1 - tmp2;
+
+        temp[0 + i]= tmp10 + tmp11;
+        temp[4 + i]= tmp10 - tmp11;
+
+        z1= (tmp12 + tmp13)*A1;
+        temp[2 + i]= tmp13 + z1;
+        temp[6 + i]= tmp13 - z1;
+
+        tmp10= tmp4 + tmp5;
+        tmp11= tmp5 + tmp6;
+        tmp12= tmp6 + tmp7;
+
+        z5= (tmp10 - tmp12) * A5;
+        z2= tmp10*A2 + z5;
+        z4= tmp12*A4 + z5;
+        z3= tmp11*A1;
+
+        z11= tmp7 + z3;
+        z13= tmp7 - z3;
+
+        temp[5 + i]= z13 + z2;
+        temp[3 + i]= z13 - z2;
+        temp[1 + i]= z11 + z4;
+        temp[7 + i]= z11 - z4;
+    }
+}
+
+void ff_faandct(DCTELEM * data)
+{
+    FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    FLOAT tmp10, tmp11, tmp12, tmp13;
+    FLOAT z1, z2, z3, z4, z5, z11, z13;
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    row_fdct(temp, data);
+
+    for (i=0; i<8; i++) {
+        tmp0= temp[8*0 + i] + temp[8*7 + i];
+        tmp7= temp[8*0 + i] - temp[8*7 + i];
+        tmp1= temp[8*1 + i] + temp[8*6 + i];
+        tmp6= temp[8*1 + i] - temp[8*6 + i];
+        tmp2= temp[8*2 + i] + temp[8*5 + i];
+        tmp5= temp[8*2 + i] - temp[8*5 + i];
+        tmp3= temp[8*3 + i] + temp[8*4 + i];
+        tmp4= temp[8*3 + i] - temp[8*4 + i];
+
+        tmp10= tmp0 + tmp3;
+        tmp13= tmp0 - tmp3;
+        tmp11= tmp1 + tmp2;
+        tmp12= tmp1 - tmp2;
+
+        data[8*0 + i]= lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
+        data[8*4 + i]= lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
+
+        z1= (tmp12 + tmp13)* A1;
+        data[8*2 + i]= lrintf(SCALE(8*2 + i) * (tmp13 + z1));
+        data[8*6 + i]= lrintf(SCALE(8*6 + i) * (tmp13 - z1));
+
+        tmp10= tmp4 + tmp5;
+        tmp11= tmp5 + tmp6;
+        tmp12= tmp6 + tmp7;
+
+        z5= (tmp10 - tmp12) * A5;
+        z2= tmp10*A2 + z5;
+        z4= tmp12*A4 + z5;
+        z3= tmp11*A1;
+
+        z11= tmp7 + z3;
+        z13= tmp7 - z3;
+
+        data[8*5 + i]= lrintf(SCALE(8*5 + i) * (z13 + z2));
+        data[8*3 + i]= lrintf(SCALE(8*3 + i) * (z13 - z2));
+        data[8*1 + i]= lrintf(SCALE(8*1 + i) * (z11 + z4));
+        data[8*7 + i]= lrintf(SCALE(8*7 + i) * (z11 - z4));
+    }
+}
+
+void ff_faandct248(DCTELEM * data)
+{
+    FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    FLOAT tmp10, tmp11, tmp12, tmp13;
+    FLOAT z1;
+    FLOAT temp[64];
+    int i;
+
+    emms_c();
+
+    row_fdct(temp, data);
+
+    for (i=0; i<8; i++) {
+        tmp0 = temp[8*0 + i] + temp[8*1 + i];
+        tmp1 = temp[8*2 + i] + temp[8*3 + i];
+        tmp2 = temp[8*4 + i] + temp[8*5 + i];
+        tmp3 = temp[8*6 + i] + temp[8*7 + i];
+        tmp4 = temp[8*0 + i] - temp[8*1 + i];
+        tmp5 = temp[8*2 + i] - temp[8*3 + i];
+        tmp6 = temp[8*4 + i] - temp[8*5 + i];
+        tmp7 = temp[8*6 + i] - temp[8*7 + i];
+
+        tmp10 = tmp0 + tmp3;
+        tmp11 = tmp1 + tmp2;
+        tmp12 = tmp1 - tmp2;
+        tmp13 = tmp0 - tmp3;
+
+        data[8*0 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
+        data[8*4 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
+
+        z1 = (tmp12 + tmp13)* A1;
+        data[8*2 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
+        data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
+
+        tmp10 = tmp4 + tmp7;
+        tmp11 = tmp5 + tmp6;
+        tmp12 = tmp5 - tmp6;
+        tmp13 = tmp4 - tmp7;
+
+        data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
+        data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
+
+        z1 = (tmp12 + tmp13)* A1;
+        data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
+        data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/faandct.h b/contrib/ffmpeg/libavcodec/faandct.h
new file mode 100644
index 000000000..77dd41dae
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/faandct.h
@@ -0,0 +1,33 @@
+/*
+ * Floating point AAN DCT
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file faandct.h
+ * @brief
+ *     Floating point AAN DCT
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#define FAAN_POSTSCALE
+
+void ff_faandct(DCTELEM * data);
+void ff_faandct248(DCTELEM * data);
diff --git a/contrib/ffmpeg/libavcodec/fdctref.c b/contrib/ffmpeg/libavcodec/fdctref.c
new file mode 100644
index 000000000..5eff36849
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/fdctref.c
@@ -0,0 +1,158 @@
+/**
+ * @file fdctref.c
+ * forward discrete cosine transform, double precision.
+ */
+
+/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
+
+/*
+ * Disclaimer of Warranty
+ *
+ * These software programs are available to the user without any license fee or
+ * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
+ * any and all warranties, whether express, implied, or statuary, including any
+ * implied warranties or merchantability or of fitness for a particular
+ * purpose.  In no event shall the copyright-holder be liable for any
+ * incidental, punitive, or consequential damages of any kind whatsoever
+ * arising from the use of these programs.
+ *
+ * This disclaimer of warranty extends to the user of these programs and user's
+ * customers, employees, agents, transferees, successors, and assigns.
+ *
+ * The MPEG Software Simulation Group does not represent or warrant that the
+ * programs furnished hereunder are free of infringement of any third-party
+ * patents.
+ *
+ * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
+ * are subject to royalty fees to patent holders.  Many of these patents are
+ * general enough such that they are unavoidable regardless of implementation
+ * design.
+ *
+ */
+
+#include <math.h>
+
+#ifndef PI
+# ifdef M_PI
+#  define PI M_PI
+# else
+#  define PI 3.14159265358979323846
+# endif
+#endif
+
+/* global declarations */
+void init_fdct (void);
+void fdct (short *block);
+
+/* private data */
+static double c[8][8]; /* transform coefficients */
+
+void init_fdct()
+{
+  int i, j;
+  double s;
+
+  for (i=0; i<8; i++)
+  {
+    s = (i==0) ? sqrt(0.125) : 0.5;
+
+    for (j=0; j<8; j++)
+      c[i][j] = s * cos((PI/8.0)*i*(j+0.5));
+  }
+}
+
+void fdct(block)
+short *block;
+{
+        register int i, j;
+        double s;
+        double tmp[64];
+
+        for(i = 0; i < 8; i++)
+            for(j = 0; j < 8; j++)
+            {
+                    s = 0.0;
+
+/*
+ *                     for(k = 0; k < 8; k++)
+ *                         s += c[j][k] * block[8 * i + k];
+ */
+                s += c[j][0] * block[8 * i + 0];
+                s += c[j][1] * block[8 * i + 1];
+                s += c[j][2] * block[8 * i + 2];
+                s += c[j][3] * block[8 * i + 3];
+                s += c[j][4] * block[8 * i + 4];
+                s += c[j][5] * block[8 * i + 5];
+                s += c[j][6] * block[8 * i + 6];
+                s += c[j][7] * block[8 * i + 7];
+
+                    tmp[8 * i + j] = s;
+            }
+
+        for(j = 0; j < 8; j++)
+            for(i = 0; i < 8; i++)
+            {
+                    s = 0.0;
+
+/*
+ *                       for(k = 0; k < 8; k++)
+ *                    s += c[i][k] * tmp[8 * k + j];
+ */
+                s += c[i][0] * tmp[8 * 0 + j];
+                s += c[i][1] * tmp[8 * 1 + j];
+                s += c[i][2] * tmp[8 * 2 + j];
+                s += c[i][3] * tmp[8 * 3 + j];
+                s += c[i][4] * tmp[8 * 4 + j];
+                s += c[i][5] * tmp[8 * 5 + j];
+                s += c[i][6] * tmp[8 * 6 + j];
+                s += c[i][7] * tmp[8 * 7 + j];
+                s*=8.0;
+
+                    block[8 * i + j] = (short)floor(s + 0.499999);
+/*
+ * reason for adding 0.499999 instead of 0.5:
+ * s is quite often x.5 (at least for i and/or j = 0 or 4)
+ * and setting the rounding threshold exactly to 0.5 leads to an
+ * extremely high arithmetic implementation dependency of the result;
+ * s being between x.5 and x.500001 (which is now incorrectly rounded
+ * downwards instead of upwards) is assumed to occur less often
+ * (if at all)
+ */
+      }
+}
+
+/* perform IDCT matrix multiply for 8x8 coefficient block */
+
+void idct(block)
+short *block;
+{
+  int i, j, k, v;
+  double partial_product;
+  double tmp[64];
+
+  for (i=0; i<8; i++)
+    for (j=0; j<8; j++)
+    {
+      partial_product = 0.0;
+
+      for (k=0; k<8; k++)
+        partial_product+= c[k][j]*block[8*i+k];
+
+      tmp[8*i+j] = partial_product;
+    }
+
+  /* Transpose operation is integrated into address mapping by switching
+     loop order of i and j */
+
+  for (j=0; j<8; j++)
+    for (i=0; i<8; i++)
+    {
+      partial_product = 0.0;
+
+      for (k=0; k<8; k++)
+        partial_product+= c[k][i]*tmp[8*k+j];
+
+      v = (int) floor(partial_product+0.5);
+      block[8*i+j] = v;
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/fft-test.c b/contrib/ffmpeg/libavcodec/fft-test.c
new file mode 100644
index 000000000..e108a6f7b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/fft-test.c
@@ -0,0 +1,297 @@
+/*
+ * (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file fft-test.c
+ * FFT and MDCT tests.
+ */
+
+#include "dsputil.h"
+#include <math.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+int mm_flags;
+
+/* reference fft */
+
+#define MUL16(a,b) ((a) * (b))
+
+#define CMAC(pre, pim, are, aim, bre, bim) \
+{\
+   pre += (MUL16(are, bre) - MUL16(aim, bim));\
+   pim += (MUL16(are, bim) + MUL16(bre, aim));\
+}
+
+FFTComplex *exptab;
+
+void fft_ref_init(int nbits, int inverse)
+{
+    int n, i;
+    float c1, s1, alpha;
+
+    n = 1 << nbits;
+    exptab = av_malloc((n / 2) * sizeof(FFTComplex));
+
+    for(i=0;i<(n/2);i++) {
+        alpha = 2 * M_PI * (float)i / (float)n;
+        c1 = cos(alpha);
+        s1 = sin(alpha);
+        if (!inverse)
+            s1 = -s1;
+        exptab[i].re = c1;
+        exptab[i].im = s1;
+    }
+}
+
+void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits)
+{
+    int n, i, j, k, n2;
+    float tmp_re, tmp_im, s, c;
+    FFTComplex *q;
+
+    n = 1 << nbits;
+    n2 = n >> 1;
+    for(i=0;i<n;i++) {
+        tmp_re = 0;
+        tmp_im = 0;
+        q = tab;
+        for(j=0;j<n;j++) {
+            k = (i * j) & (n - 1);
+            if (k >= n2) {
+                c = -exptab[k - n2].re;
+                s = -exptab[k - n2].im;
+            } else {
+                c = exptab[k].re;
+                s = exptab[k].im;
+            }
+            CMAC(tmp_re, tmp_im, c, s, q->re, q->im);
+            q++;
+        }
+        tabr[i].re = tmp_re;
+        tabr[i].im = tmp_im;
+    }
+}
+
+void imdct_ref(float *out, float *in, int n)
+{
+    int k, i, a;
+    float sum, f;
+
+    for(i=0;i<n;i++) {
+        sum = 0;
+        for(k=0;k<n/2;k++) {
+            a = (2 * i + 1 + (n / 2)) * (2 * k + 1);
+            f = cos(M_PI * a / (double)(2 * n));
+            sum += f * in[k];
+        }
+        out[i] = -sum;
+    }
+}
+
+/* NOTE: no normalisation by 1 / N is done */
+void mdct_ref(float *output, float *input, int n)
+{
+    int k, i;
+    float a, s;
+
+    /* do it by hand */
+    for(k=0;k<n/2;k++) {
+        s = 0;
+        for(i=0;i<n;i++) {
+            a = (2*M_PI*(2*i+1+n/2)*(2*k+1) / (4 * n));
+            s += input[i] * cos(a);
+        }
+        output[k] = s;
+    }
+}
+
+
+float frandom(void)
+{
+    return (float)((random() & 0xffff) - 32768) / 32768.0;
+}
+
+int64_t gettime(void)
+{
+    struct timeval tv;
+    gettimeofday(&tv,NULL);
+    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+void check_diff(float *tab1, float *tab2, int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        if (fabsf(tab1[i] - tab2[i]) >= 1e-3) {
+            av_log(NULL, AV_LOG_ERROR, "ERROR %d: %f %f\n",
+                   i, tab1[i], tab2[i]);
+        }
+    }
+}
+
+
+void help(void)
+{
+    av_log(NULL, AV_LOG_INFO,"usage: fft-test [-h] [-s] [-i] [-n b]\n"
+           "-h     print this help\n"
+           "-s     speed test\n"
+           "-m     (I)MDCT test\n"
+           "-i     inverse transform test\n"
+           "-n b   set the transform size to 2^b\n"
+           );
+    exit(1);
+}
+
+
+
+int main(int argc, char **argv)
+{
+    FFTComplex *tab, *tab1, *tab_ref;
+    FFTSample *tabtmp, *tab2;
+    int it, i, c;
+    int do_speed = 0;
+    int do_mdct = 0;
+    int do_inverse = 0;
+    FFTContext s1, *s = &s1;
+    MDCTContext m1, *m = &m1;
+    int fft_nbits, fft_size;
+
+    mm_flags = 0;
+    fft_nbits = 9;
+    for(;;) {
+        c = getopt(argc, argv, "hsimn:");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        case 's':
+            do_speed = 1;
+            break;
+        case 'i':
+            do_inverse = 1;
+            break;
+        case 'm':
+            do_mdct = 1;
+            break;
+        case 'n':
+            fft_nbits = atoi(optarg);
+            break;
+        }
+    }
+
+    fft_size = 1 << fft_nbits;
+    tab = av_malloc(fft_size * sizeof(FFTComplex));
+    tab1 = av_malloc(fft_size * sizeof(FFTComplex));
+    tab_ref = av_malloc(fft_size * sizeof(FFTComplex));
+    tabtmp = av_malloc(fft_size / 2 * sizeof(FFTSample));
+    tab2 = av_malloc(fft_size * sizeof(FFTSample));
+
+    if (do_mdct) {
+        if (do_inverse)
+            av_log(NULL, AV_LOG_INFO,"IMDCT");
+        else
+            av_log(NULL, AV_LOG_INFO,"MDCT");
+        ff_mdct_init(m, fft_nbits, do_inverse);
+    } else {
+        if (do_inverse)
+            av_log(NULL, AV_LOG_INFO,"IFFT");
+        else
+            av_log(NULL, AV_LOG_INFO,"FFT");
+        ff_fft_init(s, fft_nbits, do_inverse);
+        fft_ref_init(fft_nbits, do_inverse);
+    }
+    av_log(NULL, AV_LOG_INFO," %d test\n", fft_size);
+
+    /* generate random data */
+
+    for(i=0;i<fft_size;i++) {
+        tab1[i].re = frandom();
+        tab1[i].im = frandom();
+    }
+
+    /* checking result */
+    av_log(NULL, AV_LOG_INFO,"Checking...\n");
+
+    if (do_mdct) {
+        if (do_inverse) {
+            imdct_ref((float *)tab_ref, (float *)tab1, fft_size);
+            ff_imdct_calc(m, tab2, (float *)tab1, tabtmp);
+            check_diff((float *)tab_ref, tab2, fft_size);
+        } else {
+            mdct_ref((float *)tab_ref, (float *)tab1, fft_size);
+
+            ff_mdct_calc(m, tab2, (float *)tab1, tabtmp);
+
+            check_diff((float *)tab_ref, tab2, fft_size / 2);
+        }
+    } else {
+        memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
+        ff_fft_permute(s, tab);
+        ff_fft_calc(s, tab);
+
+        fft_ref(tab_ref, tab1, fft_nbits);
+        check_diff((float *)tab_ref, (float *)tab, fft_size * 2);
+    }
+
+    /* do a speed test */
+
+    if (do_speed) {
+        int64_t time_start, duration;
+        int nb_its;
+
+        av_log(NULL, AV_LOG_INFO,"Speed test...\n");
+        /* we measure during about 1 seconds */
+        nb_its = 1;
+        for(;;) {
+            time_start = gettime();
+            for(it=0;it<nb_its;it++) {
+                if (do_mdct) {
+                    if (do_inverse) {
+                        ff_imdct_calc(m, (float *)tab, (float *)tab1, tabtmp);
+                    } else {
+                        ff_mdct_calc(m, (float *)tab, (float *)tab1, tabtmp);
+                    }
+                } else {
+                    memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
+                    ff_fft_calc(s, tab);
+                }
+            }
+            duration = gettime() - time_start;
+            if (duration >= 1000000)
+                break;
+            nb_its *= 2;
+        }
+        av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n",
+               (double)duration / nb_its,
+               (double)duration / 1000000.0,
+               nb_its);
+    }
+
+    if (do_mdct) {
+        ff_mdct_end(m);
+    } else {
+        ff_fft_end(s);
+    }
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/fft.c b/contrib/ffmpeg/libavcodec/fft.c
new file mode 100644
index 000000000..62a6a5576
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/fft.c
@@ -0,0 +1,262 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file fft.c
+ * FFT/IFFT transforms.
+ */
+
+#include "dsputil.h"
+
+/**
+ * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
+ * done
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, m, n;
+    float alpha, c1, s1, s2;
+
+    s->nbits = nbits;
+    n = 1 << nbits;
+
+    s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
+    if (!s->exptab)
+        goto fail;
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->inverse = inverse;
+
+    s2 = inverse ? 1.0 : -1.0;
+
+    for(i=0;i<(n/2);i++) {
+        alpha = 2 * M_PI * (float)i / (float)n;
+        c1 = cos(alpha);
+        s1 = sin(alpha) * s2;
+        s->exptab[i].re = c1;
+        s->exptab[i].im = s1;
+    }
+    s->fft_calc = ff_fft_calc_c;
+    s->imdct_calc = ff_imdct_calc;
+    s->exptab1 = NULL;
+
+    /* compute constant table for HAVE_SSE version */
+#if defined(HAVE_MMX) \
+    || (defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE))
+    {
+        int has_vectors = mm_support();
+
+        if (has_vectors) {
+#if defined(HAVE_MMX)
+            if (has_vectors & MM_3DNOWEXT) {
+                /* 3DNowEx for K7/K8 */
+                s->imdct_calc = ff_imdct_calc_3dn2;
+                s->fft_calc = ff_fft_calc_3dn2;
+            } else if (has_vectors & MM_3DNOW) {
+                /* 3DNow! for K6-2/3 */
+                s->fft_calc = ff_fft_calc_3dn;
+            } else if (has_vectors & MM_SSE) {
+                /* SSE for P3/P4 */
+                s->imdct_calc = ff_imdct_calc_sse;
+                s->fft_calc = ff_fft_calc_sse;
+            }
+#else /* HAVE_MMX */
+            if (has_vectors & MM_ALTIVEC)
+                s->fft_calc = ff_fft_calc_altivec;
+#endif
+        }
+        if (s->fft_calc != ff_fft_calc_c) {
+            int np, nblocks, np2, l;
+            FFTComplex *q;
+
+            np = 1 << nbits;
+            nblocks = np >> 3;
+            np2 = np >> 1;
+            s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
+            if (!s->exptab1)
+                goto fail;
+            q = s->exptab1;
+            do {
+                for(l = 0; l < np2; l += 2 * nblocks) {
+                    *q++ = s->exptab[l];
+                    *q++ = s->exptab[l + nblocks];
+
+                    q->re = -s->exptab[l].im;
+                    q->im = s->exptab[l].re;
+                    q++;
+                    q->re = -s->exptab[l + nblocks].im;
+                    q->im = s->exptab[l + nblocks].re;
+                    q++;
+                }
+                nblocks = nblocks >> 1;
+            } while (nblocks != 0);
+            av_freep(&s->exptab);
+        }
+    }
+#endif
+
+    /* compute bit reverse table */
+
+    for(i=0;i<n;i++) {
+        m=0;
+        for(j=0;j<nbits;j++) {
+            m |= ((i >> j) & 1) << (nbits-j-1);
+        }
+        s->revtab[i]=m;
+    }
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    return -1;
+}
+
+/* butter fly op */
+#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
+{\
+  FFTSample ax, ay, bx, by;\
+  bx=pre1;\
+  by=pim1;\
+  ax=qre1;\
+  ay=qim1;\
+  pre = (bx + ax);\
+  pim = (by + ay);\
+  qre = (bx - ax);\
+  qim = (by - ay);\
+}
+
+#define MUL16(a,b) ((a) * (b))
+
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+   pre = (MUL16(are, bre) - MUL16(aim, bim));\
+   pim = (MUL16(are, bim) + MUL16(bre, aim));\
+}
+
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before with s->revtab table. No
+ * 1.0/sqrt(n) normalization is done.
+ */
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    int ln = s->nbits;
+    int j, np, np2;
+    int nblocks, nloops;
+    register FFTComplex *p, *q;
+    FFTComplex *exptab = s->exptab;
+    int l;
+    FFTSample tmp_re, tmp_im;
+
+    np = 1 << ln;
+
+    /* pass 0 */
+
+    p=&z[0];
+    j=(np >> 1);
+    do {
+        BF(p[0].re, p[0].im, p[1].re, p[1].im,
+           p[0].re, p[0].im, p[1].re, p[1].im);
+        p+=2;
+    } while (--j != 0);
+
+    /* pass 1 */
+
+
+    p=&z[0];
+    j=np >> 2;
+    if (s->inverse) {
+        do {
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
+               p[0].re, p[0].im, p[2].re, p[2].im);
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
+               p[1].re, p[1].im, -p[3].im, p[3].re);
+            p+=4;
+        } while (--j != 0);
+    } else {
+        do {
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
+               p[0].re, p[0].im, p[2].re, p[2].im);
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
+               p[1].re, p[1].im, p[3].im, -p[3].re);
+            p+=4;
+        } while (--j != 0);
+    }
+    /* pass 2 .. ln-1 */
+
+    nblocks = np >> 3;
+    nloops = 1 << 2;
+    np2 = np >> 1;
+    do {
+        p = z;
+        q = z + nloops;
+        for (j = 0; j < nblocks; ++j) {
+            BF(p->re, p->im, q->re, q->im,
+               p->re, p->im, q->re, q->im);
+
+            p++;
+            q++;
+            for(l = nblocks; l < np2; l += nblocks) {
+                CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
+                BF(p->re, p->im, q->re, q->im,
+                   p->re, p->im, tmp_re, tmp_im);
+                p++;
+                q++;
+            }
+
+            p += nloops;
+            q += nloops;
+        }
+        nblocks = nblocks >> 1;
+        nloops = nloops << 1;
+    } while (nblocks != 0);
+}
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc()
+ */
+void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    int j, k, np;
+    FFTComplex tmp;
+    const uint16_t *revtab = s->revtab;
+
+    /* reverse */
+    np = 1 << s->nbits;
+    for(j=0;j<np;j++) {
+        k = revtab[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+}
+
+void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+}
+
diff --git a/contrib/ffmpeg/libavcodec/ffv1.c b/contrib/ffmpeg/libavcodec/ffv1.c
new file mode 100644
index 000000000..62623e591
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ffv1.c
@@ -0,0 +1,1040 @@
+/*
+ * FFV1 codec for libavcodec
+ *
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file ffv1.c
+ * FF Video Codec 1 (an experimental lossless codec)
+ */
+
+#include "common.h"
+#include "bitstream.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "rangecoder.h"
+#include "golomb.h"
+
+#define MAX_PLANES 4
+#define CONTEXT_SIZE 32
+
+static const int8_t quant3[256]={
+ 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
+};
+static const int8_t quant5[256]={
+ 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
+};
+static const int8_t quant7[256]={
+ 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
+};
+static const int8_t quant9[256]={
+ 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
+};
+static const int8_t quant11[256]={
+ 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
+};
+static const int8_t quant13[256]={
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
+};
+
+static const uint8_t log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15,
+};
+
+typedef struct VlcState{
+    int16_t drift;
+    uint16_t error_sum;
+    int8_t bias;
+    uint8_t count;
+} VlcState;
+
+typedef struct PlaneContext{
+    int context_count;
+    uint8_t (*state)[CONTEXT_SIZE];
+    VlcState *vlc_state;
+    uint8_t interlace_bit_state[2];
+} PlaneContext;
+
+typedef struct FFV1Context{
+    AVCodecContext *avctx;
+    RangeCoder c;
+    GetBitContext gb;
+    PutBitContext pb;
+    int version;
+    int width, height;
+    int chroma_h_shift, chroma_v_shift;
+    int flags;
+    int picture_number;
+    AVFrame picture;
+    int plane_count;
+    int ac;                              ///< 1-> CABAC 0-> golomb rice
+    PlaneContext plane[MAX_PLANES];
+    int16_t quant_table[5][256];
+    int run_index;
+    int colorspace;
+
+    DSPContext dsp;
+}FFV1Context;
+
+static always_inline int fold(int diff, int bits){
+    if(bits==8)
+        diff= (int8_t)diff;
+    else{
+        diff+= 1<<(bits-1);
+        diff&=(1<<bits)-1;
+        diff-= 1<<(bits-1);
+    }
+
+    return diff;
+}
+
+static inline int predict(int_fast16_t *src, int_fast16_t *last){
+    const int LT= last[-1];
+    const int  T= last[ 0];
+    const int L =  src[-1];
+
+    return mid_pred(L, L + T - LT, T);
+}
+
+static inline int get_context(FFV1Context *f, int_fast16_t *src, int_fast16_t *last, int_fast16_t *last2){
+    const int LT= last[-1];
+    const int  T= last[ 0];
+    const int RT= last[ 1];
+    const int L =  src[-1];
+
+    if(f->quant_table[3][127]){
+        const int TT= last2[0];
+        const int LL=  src[-2];
+        return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF]
+              +f->quant_table[3][(LL-L) & 0xFF] + f->quant_table[4][(TT-T) & 0xFF];
+    }else
+        return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF];
+}
+
+static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
+    int i;
+
+    if(v){
+        const int a= FFABS(v);
+        const int e= av_log2(a);
+        put_rac(c, state+0, 0);
+
+        assert(e<=9);
+
+        for(i=0; i<e; i++){
+            put_rac(c, state+1+i, 1);  //1..10
+        }
+        put_rac(c, state+1+i, 0);
+
+        for(i=e-1; i>=0; i--){
+            put_rac(c, state+22+i, (a>>i)&1); //22..31
+        }
+
+        if(is_signed)
+            put_rac(c, state+11 + e, v < 0); //11..21
+    }else{
+        put_rac(c, state+0, 1);
+    }
+}
+
+static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
+    if(get_rac(c, state+0))
+        return 0;
+    else{
+        int i, e, a;
+        e= 0;
+        while(get_rac(c, state+1 + e)){ //1..10
+            e++;
+        }
+        assert(e<=9);
+
+        a= 1;
+        for(i=e-1; i>=0; i--){
+            a += a + get_rac(c, state+22 + i); //22..31
+        }
+
+        if(is_signed && get_rac(c, state+11 + e)) //11..21
+            return -a;
+        else
+            return a;
+    }
+}
+
+static inline void update_vlc_state(VlcState * const state, const int v){
+    int drift= state->drift;
+    int count= state->count;
+    state->error_sum += FFABS(v);
+    drift += v;
+
+    if(count == 128){ //FIXME variable
+        count >>= 1;
+        drift >>= 1;
+        state->error_sum >>= 1;
+    }
+    count++;
+
+    if(drift <= -count){
+        if(state->bias > -128) state->bias--;
+
+        drift += count;
+        if(drift <= -count)
+            drift= -count + 1;
+    }else if(drift > 0){
+        if(state->bias <  127) state->bias++;
+
+        drift -= count;
+        if(drift > 0)
+            drift= 0;
+    }
+
+    state->drift= drift;
+    state->count= count;
+}
+
+static inline void put_vlc_symbol(PutBitContext *pb, VlcState * const state, int v, int bits){
+    int i, k, code;
+//printf("final: %d ", v);
+    v = fold(v - state->bias, bits);
+
+    i= state->count;
+    k=0;
+    while(i < state->error_sum){ //FIXME optimize
+        k++;
+        i += i;
+    }
+
+    assert(k<=8);
+
+#if 0 // JPEG LS
+    if(k==0 && 2*state->drift <= - state->count) code= v ^ (-1);
+    else                                         code= v;
+#else
+     code= v ^ ((2*state->drift + state->count)>>31);
+#endif
+
+//printf("v:%d/%d bias:%d error:%d drift:%d count:%d k:%d\n", v, code, state->bias, state->error_sum, state->drift, state->count, k);
+    set_sr_golomb(pb, code, k, 12, bits);
+
+    update_vlc_state(state, v);
+}
+
+static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int bits){
+    int k, i, v, ret;
+
+    i= state->count;
+    k=0;
+    while(i < state->error_sum){ //FIXME optimize
+        k++;
+        i += i;
+    }
+
+    assert(k<=8);
+
+    v= get_sr_golomb(gb, k, 12, bits);
+//printf("v:%d bias:%d error:%d drift:%d count:%d k:%d", v, state->bias, state->error_sum, state->drift, state->count, k);
+
+#if 0 // JPEG LS
+    if(k==0 && 2*state->drift <= - state->count) v ^= (-1);
+#else
+     v ^= ((2*state->drift + state->count)>>31);
+#endif
+
+    ret= fold(v + state->bias, bits);
+
+    update_vlc_state(state, v);
+//printf("final: %d\n", ret);
+    return ret;
+}
+
+#ifdef CONFIG_ENCODERS
+static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){
+    PlaneContext * const p= &s->plane[plane_index];
+    RangeCoder * const c= &s->c;
+    int x;
+    int run_index= s->run_index;
+    int run_count=0;
+    int run_mode=0;
+
+    if(s->ac){
+        if(c->bytestream_end - c->bytestream < w*20){
+            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+            return -1;
+        }
+    }else{
+        if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < w*4){
+            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+            return -1;
+        }
+    }
+
+    for(x=0; x<w; x++){
+        int diff, context;
+
+        context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x);
+        diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
+
+        if(context < 0){
+            context = -context;
+            diff= -diff;
+        }
+
+        diff= fold(diff, bits);
+
+        if(s->ac){
+            put_symbol(c, p->state[context], diff, 1);
+        }else{
+            if(context == 0) run_mode=1;
+
+            if(run_mode){
+
+                if(diff){
+                    while(run_count >= 1<<log2_run[run_index]){
+                        run_count -= 1<<log2_run[run_index];
+                        run_index++;
+                        put_bits(&s->pb, 1, 1);
+                    }
+
+                    put_bits(&s->pb, 1 + log2_run[run_index], run_count);
+                    if(run_index) run_index--;
+                    run_count=0;
+                    run_mode=0;
+                    if(diff>0) diff--;
+                }else{
+                    run_count++;
+                }
+            }
+
+//            printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, (int)put_bits_count(&s->pb));
+
+            if(run_mode == 0)
+                put_vlc_symbol(&s->pb, &p->vlc_state[context], diff, bits);
+        }
+    }
+    if(run_mode){
+        while(run_count >= 1<<log2_run[run_index]){
+            run_count -= 1<<log2_run[run_index];
+            run_index++;
+            put_bits(&s->pb, 1, 1);
+        }
+
+        if(run_count)
+            put_bits(&s->pb, 1, 1);
+    }
+    s->run_index= run_index;
+
+    return 0;
+}
+
+static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
+    int x,y,i;
+    const int ring_size= s->avctx->context_model ? 3 : 2;
+    int_fast16_t sample_buffer[ring_size][w+6], *sample[ring_size];
+    s->run_index=0;
+
+    memset(sample_buffer, 0, sizeof(sample_buffer));
+
+    for(y=0; y<h; y++){
+        for(i=0; i<ring_size; i++)
+            sample[i]= sample_buffer[(h+i-y)%ring_size]+3;
+
+        sample[0][-1]= sample[1][0  ];
+        sample[1][ w]= sample[1][w-1];
+//{START_TIMER
+        for(x=0; x<w; x++){
+            sample[0][x]= src[x + stride*y];
+        }
+        encode_line(s, w, sample, plane_index, 8);
+//STOP_TIMER("encode line")}
+    }
+}
+
+static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
+    int x, y, p, i;
+    const int ring_size= s->avctx->context_model ? 3 : 2;
+    int_fast16_t sample_buffer[3][ring_size][w+6], *sample[3][ring_size];
+    s->run_index=0;
+
+    memset(sample_buffer, 0, sizeof(sample_buffer));
+
+    for(y=0; y<h; y++){
+        for(i=0; i<ring_size; i++)
+            for(p=0; p<3; p++)
+                sample[p][i]= sample_buffer[p][(h+i-y)%ring_size]+3;
+
+        for(x=0; x<w; x++){
+            int v= src[x + stride*y];
+            int b= v&0xFF;
+            int g= (v>>8)&0xFF;
+            int r= (v>>16)&0xFF;
+
+            b -= g;
+            r -= g;
+            g += (b + r)>>2;
+            b += 0x100;
+            r += 0x100;
+
+//            assert(g>=0 && b>=0 && r>=0);
+//            assert(g<256 && b<512 && r<512);
+            sample[0][0][x]= g;
+            sample[1][0][x]= b;
+            sample[2][0][x]= r;
+        }
+        for(p=0; p<3; p++){
+            sample[p][0][-1]= sample[p][1][0  ];
+            sample[p][1][ w]= sample[p][1][w-1];
+            encode_line(s, w, sample[p], FFMIN(p, 1), 9);
+        }
+    }
+}
+
+static void write_quant_table(RangeCoder *c, int16_t *quant_table){
+    int last=0;
+    int i;
+    uint8_t state[CONTEXT_SIZE];
+    memset(state, 128, sizeof(state));
+
+    for(i=1; i<128 ; i++){
+        if(quant_table[i] != quant_table[i-1]){
+            put_symbol(c, state, i-last-1, 0);
+            last= i;
+        }
+    }
+    put_symbol(c, state, i-last-1, 0);
+}
+
+static void write_header(FFV1Context *f){
+    uint8_t state[CONTEXT_SIZE];
+    int i;
+    RangeCoder * const c= &f->c;
+
+    memset(state, 128, sizeof(state));
+
+    put_symbol(c, state, f->version, 0);
+    put_symbol(c, state, f->avctx->coder_type, 0);
+    put_symbol(c, state, f->colorspace, 0); //YUV cs type
+    put_rac(c, state, 1); //chroma planes
+        put_symbol(c, state, f->chroma_h_shift, 0);
+        put_symbol(c, state, f->chroma_v_shift, 0);
+    put_rac(c, state, 0); //no transparency plane
+
+    for(i=0; i<5; i++)
+        write_quant_table(c, f->quant_table[i]);
+}
+#endif /* CONFIG_ENCODERS */
+
+static int common_init(AVCodecContext *avctx){
+    FFV1Context *s = avctx->priv_data;
+    int width, height;
+
+    s->avctx= avctx;
+    s->flags= avctx->flags;
+
+    dsputil_init(&s->dsp, avctx);
+
+    width= s->width= avctx->width;
+    height= s->height= avctx->height;
+
+    assert(width && height);
+
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+static int encode_init(AVCodecContext *avctx)
+{
+    FFV1Context *s = avctx->priv_data;
+    int i;
+
+    common_init(avctx);
+
+    s->version=0;
+    s->ac= avctx->coder_type;
+
+    s->plane_count=2;
+    for(i=0; i<256; i++){
+        s->quant_table[0][i]=           quant11[i];
+        s->quant_table[1][i]=        11*quant11[i];
+        if(avctx->context_model==0){
+            s->quant_table[2][i]=     11*11*quant11[i];
+            s->quant_table[3][i]=
+            s->quant_table[4][i]=0;
+        }else{
+            s->quant_table[2][i]=     11*11*quant5 [i];
+            s->quant_table[3][i]=   5*11*11*quant5 [i];
+            s->quant_table[4][i]= 5*5*11*11*quant5 [i];
+        }
+    }
+
+    for(i=0; i<s->plane_count; i++){
+        PlaneContext * const p= &s->plane[i];
+
+        if(avctx->context_model==0){
+            p->context_count= (11*11*11+1)/2;
+        }else{
+            p->context_count= (11*11*5*5*5+1)/2;
+        }
+
+        if(s->ac){
+            if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t));
+        }else{
+            if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState));
+        }
+    }
+
+    avctx->coded_frame= &s->picture;
+    switch(avctx->pix_fmt){
+    case PIX_FMT_YUV444P:
+    case PIX_FMT_YUV422P:
+    case PIX_FMT_YUV420P:
+    case PIX_FMT_YUV411P:
+    case PIX_FMT_YUV410P:
+        s->colorspace= 0;
+        break;
+    case PIX_FMT_RGBA32:
+        s->colorspace= 1;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
+        return -1;
+    }
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+
+    s->picture_number=0;
+
+    return 0;
+}
+#endif /* CONFIG_ENCODERS */
+
+
+static void clear_state(FFV1Context *f){
+    int i, j;
+
+    for(i=0; i<f->plane_count; i++){
+        PlaneContext *p= &f->plane[i];
+
+        p->interlace_bit_state[0]= 128;
+        p->interlace_bit_state[1]= 128;
+
+        for(j=0; j<p->context_count; j++){
+            if(f->ac){
+                memset(p->state[j], 128, sizeof(uint8_t)*CONTEXT_SIZE);
+            }else{
+                p->vlc_state[j].drift= 0;
+                p->vlc_state[j].error_sum= 4; //FFMAX((RANGE + 32)/64, 2);
+                p->vlc_state[j].bias= 0;
+                p->vlc_state[j].count= 1;
+            }
+        }
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    FFV1Context *f = avctx->priv_data;
+    RangeCoder * const c= &f->c;
+    AVFrame *pict = data;
+    const int width= f->width;
+    const int height= f->height;
+    AVFrame * const p= &f->picture;
+    int used_count= 0;
+    uint8_t keystate=128;
+
+    ff_init_range_encoder(c, buf, buf_size);
+//    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
+    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+
+    if(avctx->gop_size==0 || f->picture_number % avctx->gop_size == 0){
+        put_rac(c, &keystate, 1);
+        p->key_frame= 1;
+        write_header(f);
+        clear_state(f);
+    }else{
+        put_rac(c, &keystate, 0);
+        p->key_frame= 0;
+    }
+
+    if(!f->ac){
+        used_count += ff_rac_terminate(c);
+//printf("pos=%d\n", used_count);
+        init_put_bits(&f->pb, buf + used_count, buf_size - used_count);
+    }
+
+    if(f->colorspace==0){
+        const int chroma_width = -((-width )>>f->chroma_h_shift);
+        const int chroma_height= -((-height)>>f->chroma_v_shift);
+
+        encode_plane(f, p->data[0], width, height, p->linesize[0], 0);
+
+        encode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1);
+        encode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1);
+    }else{
+        encode_rgb_frame(f, (uint32_t*)(p->data[0]), width, height, p->linesize[0]/4);
+    }
+    emms_c();
+
+    f->picture_number++;
+
+    if(f->ac){
+        return ff_rac_terminate(c);
+    }else{
+        flush_put_bits(&f->pb); //nicer padding FIXME
+        return used_count + (put_bits_count(&f->pb)+7)/8;
+    }
+}
+#endif /* CONFIG_ENCODERS */
+
+static int common_end(AVCodecContext *avctx){
+    FFV1Context *s = avctx->priv_data;
+    int i;
+
+    for(i=0; i<s->plane_count; i++){
+        PlaneContext *p= &s->plane[i];
+
+        av_freep(&p->state);
+    }
+
+    return 0;
+}
+
+static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){
+    PlaneContext * const p= &s->plane[plane_index];
+    RangeCoder * const c= &s->c;
+    int x;
+    int run_count=0;
+    int run_mode=0;
+    int run_index= s->run_index;
+
+    for(x=0; x<w; x++){
+        int diff, context, sign;
+
+        context= get_context(s, sample[1] + x, sample[0] + x, sample[1] + x);
+        if(context < 0){
+            context= -context;
+            sign=1;
+        }else
+            sign=0;
+
+
+        if(s->ac){
+            diff= get_symbol(c, p->state[context], 1);
+        }else{
+            if(context == 0 && run_mode==0) run_mode=1;
+
+            if(run_mode){
+                if(run_count==0 && run_mode==1){
+                    if(get_bits1(&s->gb)){
+                        run_count = 1<<log2_run[run_index];
+                        if(x + run_count <= w) run_index++;
+                    }else{
+                        if(log2_run[run_index]) run_count = get_bits(&s->gb, log2_run[run_index]);
+                        else run_count=0;
+                        if(run_index) run_index--;
+                        run_mode=2;
+                    }
+                }
+                run_count--;
+                if(run_count < 0){
+                    run_mode=0;
+                    run_count=0;
+                    diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
+                    if(diff>=0) diff++;
+                }else
+                    diff=0;
+            }else
+                diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
+
+//            printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, get_bits_count(&s->gb));
+        }
+
+        if(sign) diff= -diff;
+
+        sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff) & ((1<<bits)-1);
+    }
+    s->run_index= run_index;
+}
+
+static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
+    int x, y;
+    int_fast16_t sample_buffer[2][w+6];
+    int_fast16_t *sample[2]= {sample_buffer[0]+3, sample_buffer[1]+3};
+
+    s->run_index=0;
+
+    memset(sample_buffer, 0, sizeof(sample_buffer));
+
+    for(y=0; y<h; y++){
+        int_fast16_t *temp= sample[0]; //FIXME try a normal buffer
+
+        sample[0]= sample[1];
+        sample[1]= temp;
+
+        sample[1][-1]= sample[0][0  ];
+        sample[0][ w]= sample[0][w-1];
+
+//{START_TIMER
+        decode_line(s, w, sample, plane_index, 8);
+        for(x=0; x<w; x++){
+            src[x + stride*y]= sample[1][x];
+        }
+//STOP_TIMER("decode-line")}
+    }
+}
+
+static void decode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
+    int x, y, p;
+    int_fast16_t sample_buffer[3][2][w+6];
+    int_fast16_t *sample[3][2]= {
+        {sample_buffer[0][0]+3, sample_buffer[0][1]+3},
+        {sample_buffer[1][0]+3, sample_buffer[1][1]+3},
+        {sample_buffer[2][0]+3, sample_buffer[2][1]+3}};
+
+    s->run_index=0;
+
+    memset(sample_buffer, 0, sizeof(sample_buffer));
+
+    for(y=0; y<h; y++){
+        for(p=0; p<3; p++){
+            int_fast16_t *temp= sample[p][0]; //FIXME try a normal buffer
+
+            sample[p][0]= sample[p][1];
+            sample[p][1]= temp;
+
+            sample[p][1][-1]= sample[p][0][0  ];
+            sample[p][0][ w]= sample[p][0][w-1];
+            decode_line(s, w, sample[p], FFMIN(p, 1), 9);
+        }
+        for(x=0; x<w; x++){
+            int g= sample[0][1][x];
+            int b= sample[1][1][x];
+            int r= sample[2][1][x];
+
+//            assert(g>=0 && b>=0 && r>=0);
+//            assert(g<256 && b<512 && r<512);
+
+            b -= 0x100;
+            r -= 0x100;
+            g -= (b + r)>>2;
+            b += g;
+            r += g;
+
+            src[x + stride*y]= b + (g<<8) + (r<<16);
+        }
+    }
+}
+
+static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){
+    int v;
+    int i=0;
+    uint8_t state[CONTEXT_SIZE];
+
+    memset(state, 128, sizeof(state));
+
+    for(v=0; i<128 ; v++){
+        int len= get_symbol(c, state, 0) + 1;
+
+        if(len + i > 128) return -1;
+
+        while(len--){
+            quant_table[i] = scale*v;
+            i++;
+//printf("%2d ",v);
+//if(i%16==0) printf("\n");
+        }
+    }
+
+    for(i=1; i<128; i++){
+        quant_table[256-i]= -quant_table[i];
+    }
+    quant_table[128]= -quant_table[127];
+
+    return 2*v - 1;
+}
+
+static int read_header(FFV1Context *f){
+    uint8_t state[CONTEXT_SIZE];
+    int i, context_count;
+    RangeCoder * const c= &f->c;
+
+    memset(state, 128, sizeof(state));
+
+    f->version= get_symbol(c, state, 0);
+    f->ac= f->avctx->coder_type= get_symbol(c, state, 0);
+    f->colorspace= get_symbol(c, state, 0); //YUV cs type
+    get_rac(c, state); //no chroma = false
+    f->chroma_h_shift= get_symbol(c, state, 0);
+    f->chroma_v_shift= get_symbol(c, state, 0);
+    get_rac(c, state); //transparency plane
+    f->plane_count= 2;
+
+    if(f->colorspace==0){
+        switch(16*f->chroma_h_shift + f->chroma_v_shift){
+        case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P; break;
+        case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P; break;
+        case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P; break;
+        case 0x20: f->avctx->pix_fmt= PIX_FMT_YUV411P; break;
+        case 0x22: f->avctx->pix_fmt= PIX_FMT_YUV410P; break;
+        default:
+            av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
+            return -1;
+        }
+    }else if(f->colorspace==1){
+        if(f->chroma_h_shift || f->chroma_v_shift){
+            av_log(f->avctx, AV_LOG_ERROR, "chroma subsampling not supported in this colorspace\n");
+            return -1;
+        }
+        f->avctx->pix_fmt= PIX_FMT_RGBA32;
+    }else{
+        av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n");
+        return -1;
+    }
+
+//printf("%d %d %d\n", f->chroma_h_shift, f->chroma_v_shift,f->avctx->pix_fmt);
+
+    context_count=1;
+    for(i=0; i<5; i++){
+        context_count*= read_quant_table(c, f->quant_table[i], context_count);
+        if(context_count < 0 || context_count > 32768){
+            av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n");
+            return -1;
+        }
+    }
+    context_count= (context_count+1)/2;
+
+    for(i=0; i<f->plane_count; i++){
+        PlaneContext * const p= &f->plane[i];
+
+        p->context_count= context_count;
+
+        if(f->ac){
+            if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t));
+        }else{
+            if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState));
+        }
+    }
+
+    return 0;
+}
+
+static int decode_init(AVCodecContext *avctx)
+{
+//    FFV1Context *s = avctx->priv_data;
+
+    common_init(avctx);
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
+    FFV1Context *f = avctx->priv_data;
+    RangeCoder * const c= &f->c;
+    const int width= f->width;
+    const int height= f->height;
+    AVFrame * const p= &f->picture;
+    int bytes_read;
+    uint8_t keystate= 128;
+
+    AVFrame *picture = data;
+
+    ff_init_range_decoder(c, buf, buf_size);
+    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+
+
+    p->pict_type= FF_I_TYPE; //FIXME I vs. P
+    if(get_rac(c, &keystate)){
+        p->key_frame= 1;
+        if(read_header(f) < 0)
+            return -1;
+        clear_state(f);
+    }else{
+        p->key_frame= 0;
+    }
+    if(!f->plane[0].state && !f->plane[0].vlc_state)
+        return -1;
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    if(avctx->debug&FF_DEBUG_PICT_INFO)
+        av_log(avctx, AV_LOG_ERROR, "keyframe:%d coder:%d\n", p->key_frame, f->ac);
+
+    if(!f->ac){
+        bytes_read = c->bytestream - c->bytestream_start - 1;
+        if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); //FIXME
+//printf("pos=%d\n", bytes_read);
+        init_get_bits(&f->gb, buf + bytes_read, buf_size - bytes_read);
+    } else {
+        bytes_read = 0; /* avoid warning */
+    }
+
+    if(f->colorspace==0){
+        const int chroma_width = -((-width )>>f->chroma_h_shift);
+        const int chroma_height= -((-height)>>f->chroma_v_shift);
+        decode_plane(f, p->data[0], width, height, p->linesize[0], 0);
+
+        decode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1);
+        decode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1);
+    }else{
+        decode_rgb_frame(f, (uint32_t*)p->data[0], width, height, p->linesize[0]/4);
+    }
+
+    emms_c();
+
+    f->picture_number++;
+
+    *picture= *p;
+
+    avctx->release_buffer(avctx, p); //FIXME
+
+    *data_size = sizeof(AVFrame);
+
+    if(f->ac){
+        bytes_read= c->bytestream - c->bytestream_start - 1;
+        if(bytes_read ==0) av_log(f->avctx, AV_LOG_ERROR, "error at end of frame\n");
+    }else{
+        bytes_read+= (get_bits_count(&f->gb)+7)/8;
+    }
+
+    return bytes_read;
+}
+
+AVCodec ffv1_decoder = {
+    "ffv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FFV1,
+    sizeof(FFV1Context),
+    decode_init,
+    NULL,
+    common_end,
+    decode_frame,
+    CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    NULL
+};
+
+#ifdef CONFIG_ENCODERS
+AVCodec ffv1_encoder = {
+    "ffv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FFV1,
+    sizeof(FFV1Context),
+    encode_init,
+    encode_frame,
+    common_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV444P, PIX_FMT_YUV422P, PIX_FMT_YUV411P, PIX_FMT_YUV410P, PIX_FMT_RGBA32, -1},
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/flac.c b/contrib/ffmpeg/libavcodec/flac.c
new file mode 100644
index 000000000..6c64ad0a1
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/flac.c
@@ -0,0 +1,752 @@
+/*
+ * FLAC (Free Lossless Audio Codec) decoder
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file flac.c
+ * FLAC (Free Lossless Audio Codec) decoder
+ * @author Alex Beregszaszi
+ *
+ * For more information on the FLAC format, visit:
+ *  http://flac.sourceforge.net/
+ *
+ * This decoder can be used in 1 of 2 ways: Either raw FLAC data can be fed
+ * through, starting from the initial 'fLaC' signature; or by passing the
+ * 34-byte streaminfo structure through avctx->extradata[_size] followed
+ * by data starting with the 0xFFF8 marker.
+ */
+
+#include <limits.h>
+
+#define ALT_BITSTREAM_READER
+#include "avcodec.h"
+#include "bitstream.h"
+#include "golomb.h"
+#include "crc.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#define MAX_CHANNELS 8
+#define MAX_BLOCKSIZE 65535
+#define FLAC_STREAMINFO_SIZE 34
+
+enum decorrelation_type {
+    INDEPENDENT,
+    LEFT_SIDE,
+    RIGHT_SIDE,
+    MID_SIDE,
+};
+
+typedef struct FLACContext {
+    AVCodecContext *avctx;
+    GetBitContext gb;
+
+    int min_blocksize, max_blocksize;
+    int min_framesize, max_framesize;
+    int samplerate, channels;
+    int blocksize/*, last_blocksize*/;
+    int bps, curr_bps;
+    enum decorrelation_type decorrelation;
+
+    int32_t *decoded[MAX_CHANNELS];
+    uint8_t *bitstream;
+    int bitstream_size;
+    int bitstream_index;
+    unsigned int allocated_bitstream_size;
+} FLACContext;
+
+#define METADATA_TYPE_STREAMINFO 0
+
+static int sample_rate_table[] =
+{ 0, 0, 0, 0,
+  8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
+  0, 0, 0, 0 };
+
+static int sample_size_table[] =
+{ 0, 8, 12, 0, 16, 20, 24, 0 };
+
+static int blocksize_table[] = {
+     0,    192, 576<<0, 576<<1, 576<<2, 576<<3,      0,      0,
+256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7
+};
+
+static int64_t get_utf8(GetBitContext *gb){
+    int64_t val;
+    GET_UTF8(val, get_bits(gb, 8), return -1;)
+    return val;
+}
+
+static void metadata_streaminfo(FLACContext *s);
+static void allocate_buffers(FLACContext *s);
+static int metadata_parse(FLACContext *s);
+
+static int flac_decode_init(AVCodecContext * avctx)
+{
+    FLACContext *s = avctx->priv_data;
+    s->avctx = avctx;
+
+    if (avctx->extradata_size > 4) {
+        /* initialize based on the demuxer-supplied streamdata header */
+        init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
+        if (avctx->extradata_size == FLAC_STREAMINFO_SIZE) {
+            metadata_streaminfo(s);
+            allocate_buffers(s);
+        } else {
+            metadata_parse(s);
+        }
+    }
+
+    return 0;
+}
+
+static void dump_headers(FLACContext *s)
+{
+    av_log(s->avctx, AV_LOG_DEBUG, "  Blocksize: %d .. %d (%d)\n", s->min_blocksize, s->max_blocksize, s->blocksize);
+    av_log(s->avctx, AV_LOG_DEBUG, "  Framesize: %d .. %d\n", s->min_framesize, s->max_framesize);
+    av_log(s->avctx, AV_LOG_DEBUG, "  Samplerate: %d\n", s->samplerate);
+    av_log(s->avctx, AV_LOG_DEBUG, "  Channels: %d\n", s->channels);
+    av_log(s->avctx, AV_LOG_DEBUG, "  Bits: %d\n", s->bps);
+}
+
+static void allocate_buffers(FLACContext *s){
+    int i;
+
+    assert(s->max_blocksize);
+
+    if(s->max_framesize == 0 && s->max_blocksize){
+        s->max_framesize= (s->channels * s->bps * s->max_blocksize + 7)/ 8; //FIXME header overhead
+    }
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->decoded[i] = av_realloc(s->decoded[i], sizeof(int32_t)*s->max_blocksize);
+    }
+
+    s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize);
+}
+
+static void metadata_streaminfo(FLACContext *s)
+{
+    /* mandatory streaminfo */
+    s->min_blocksize = get_bits(&s->gb, 16);
+    s->max_blocksize = get_bits(&s->gb, 16);
+
+    s->min_framesize = get_bits_long(&s->gb, 24);
+    s->max_framesize = get_bits_long(&s->gb, 24);
+
+    s->samplerate = get_bits_long(&s->gb, 20);
+    s->channels = get_bits(&s->gb, 3) + 1;
+    s->bps = get_bits(&s->gb, 5) + 1;
+
+    s->avctx->channels = s->channels;
+    s->avctx->sample_rate = s->samplerate;
+
+    skip_bits(&s->gb, 36); /* total num of samples */
+
+    skip_bits(&s->gb, 64); /* md5 sum */
+    skip_bits(&s->gb, 64); /* md5 sum */
+
+    dump_headers(s);
+}
+
+/**
+ * Parse a list of metadata blocks. This list of blocks must begin with
+ * the fLaC marker.
+ * @param s the flac decoding context containing the gb bit reader used to
+ *          parse metadata
+ * @return 1 if some metadata was read, 0 if no fLaC marker was found
+ */
+static int metadata_parse(FLACContext *s)
+{
+    int i, metadata_last, metadata_type, metadata_size, streaminfo_updated=0;
+
+    if (show_bits_long(&s->gb, 32) == MKBETAG('f','L','a','C')) {
+        skip_bits(&s->gb, 32);
+
+        av_log(s->avctx, AV_LOG_DEBUG, "STREAM HEADER\n");
+        do {
+            metadata_last = get_bits(&s->gb, 1);
+            metadata_type = get_bits(&s->gb, 7);
+            metadata_size = get_bits_long(&s->gb, 24);
+
+            av_log(s->avctx, AV_LOG_DEBUG,
+                   " metadata block: flag = %d, type = %d, size = %d\n",
+                   metadata_last, metadata_type, metadata_size);
+            if (metadata_size) {
+                switch (metadata_type) {
+                case METADATA_TYPE_STREAMINFO:
+                    metadata_streaminfo(s);
+                    streaminfo_updated = 1;
+                    break;
+
+                default:
+                    for (i=0; i<metadata_size; i++)
+                        skip_bits(&s->gb, 8);
+                }
+            }
+        } while (!metadata_last);
+
+        if (streaminfo_updated)
+            allocate_buffers(s);
+        return 1;
+    }
+    return 0;
+}
+
+static int decode_residuals(FLACContext *s, int channel, int pred_order)
+{
+    int i, tmp, partition, method_type, rice_order;
+    int sample = 0, samples;
+
+    method_type = get_bits(&s->gb, 2);
+    if (method_type != 0){
+        av_log(s->avctx, AV_LOG_DEBUG, "illegal residual coding method %d\n", method_type);
+        return -1;
+    }
+
+    rice_order = get_bits(&s->gb, 4);
+
+    samples= s->blocksize >> rice_order;
+
+    sample=
+    i= pred_order;
+    for (partition = 0; partition < (1 << rice_order); partition++)
+    {
+        tmp = get_bits(&s->gb, 4);
+        if (tmp == 15)
+        {
+            av_log(s->avctx, AV_LOG_DEBUG, "fixed len partition\n");
+            tmp = get_bits(&s->gb, 5);
+            for (; i < samples; i++, sample++)
+                s->decoded[channel][sample] = get_sbits(&s->gb, tmp);
+        }
+        else
+        {
+//            av_log(s->avctx, AV_LOG_DEBUG, "rice coded partition k=%d\n", tmp);
+            for (; i < samples; i++, sample++){
+                s->decoded[channel][sample] = get_sr_golomb_flac(&s->gb, tmp, INT_MAX, 0);
+            }
+        }
+        i= 0;
+    }
+
+//    av_log(s->avctx, AV_LOG_DEBUG, "partitions: %d, samples: %d\n", 1 << rice_order, sample);
+
+    return 0;
+}
+
+static int decode_subframe_fixed(FLACContext *s, int channel, int pred_order)
+{
+    int i;
+
+//    av_log(s->avctx, AV_LOG_DEBUG, "  SUBFRAME FIXED\n");
+
+    /* warm up samples */
+//    av_log(s->avctx, AV_LOG_DEBUG, "   warm up samples: %d\n", pred_order);
+
+    for (i = 0; i < pred_order; i++)
+    {
+        s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps);
+//        av_log(s->avctx, AV_LOG_DEBUG, "    %d: %d\n", i, s->decoded[channel][i]);
+    }
+
+    if (decode_residuals(s, channel, pred_order) < 0)
+        return -1;
+
+    switch(pred_order)
+    {
+        case 0:
+            break;
+        case 1:
+            for (i = pred_order; i < s->blocksize; i++)
+                s->decoded[channel][i] +=   s->decoded[channel][i-1];
+            break;
+        case 2:
+            for (i = pred_order; i < s->blocksize; i++)
+                s->decoded[channel][i] += 2*s->decoded[channel][i-1]
+                                          - s->decoded[channel][i-2];
+            break;
+        case 3:
+            for (i = pred_order; i < s->blocksize; i++)
+                s->decoded[channel][i] += 3*s->decoded[channel][i-1]
+                                        - 3*s->decoded[channel][i-2]
+                                        +   s->decoded[channel][i-3];
+            break;
+        case 4:
+            for (i = pred_order; i < s->blocksize; i++)
+                s->decoded[channel][i] += 4*s->decoded[channel][i-1]
+                                        - 6*s->decoded[channel][i-2]
+                                        + 4*s->decoded[channel][i-3]
+                                        -   s->decoded[channel][i-4];
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+            return -1;
+    }
+
+    return 0;
+}
+
+static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order)
+{
+    int i, j;
+    int coeff_prec, qlevel;
+    int coeffs[pred_order];
+
+//    av_log(s->avctx, AV_LOG_DEBUG, "  SUBFRAME LPC\n");
+
+    /* warm up samples */
+//    av_log(s->avctx, AV_LOG_DEBUG, "   warm up samples: %d\n", pred_order);
+
+    for (i = 0; i < pred_order; i++)
+    {
+        s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps);
+//        av_log(s->avctx, AV_LOG_DEBUG, "    %d: %d\n", i, s->decoded[channel][i]);
+    }
+
+    coeff_prec = get_bits(&s->gb, 4) + 1;
+    if (coeff_prec == 16)
+    {
+        av_log(s->avctx, AV_LOG_DEBUG, "invalid coeff precision\n");
+        return -1;
+    }
+//    av_log(s->avctx, AV_LOG_DEBUG, "   qlp coeff prec: %d\n", coeff_prec);
+    qlevel = get_sbits(&s->gb, 5);
+//    av_log(s->avctx, AV_LOG_DEBUG, "   quant level: %d\n", qlevel);
+    if(qlevel < 0){
+        av_log(s->avctx, AV_LOG_DEBUG, "qlevel %d not supported, maybe buggy stream\n", qlevel);
+        return -1;
+    }
+
+    for (i = 0; i < pred_order; i++)
+    {
+        coeffs[i] = get_sbits(&s->gb, coeff_prec);
+//        av_log(s->avctx, AV_LOG_DEBUG, "    %d: %d\n", i, coeffs[i]);
+    }
+
+    if (decode_residuals(s, channel, pred_order) < 0)
+        return -1;
+
+    if (s->bps > 16) {
+        int64_t sum;
+        for (i = pred_order; i < s->blocksize; i++)
+        {
+            sum = 0;
+            for (j = 0; j < pred_order; j++)
+                sum += (int64_t)coeffs[j] * s->decoded[channel][i-j-1];
+            s->decoded[channel][i] += sum >> qlevel;
+        }
+    } else {
+        int sum;
+        for (i = pred_order; i < s->blocksize; i++)
+        {
+            sum = 0;
+            for (j = 0; j < pred_order; j++)
+                sum += coeffs[j] * s->decoded[channel][i-j-1];
+            s->decoded[channel][i] += sum >> qlevel;
+        }
+    }
+
+    return 0;
+}
+
+static inline int decode_subframe(FLACContext *s, int channel)
+{
+    int type, wasted = 0;
+    int i, tmp;
+
+    s->curr_bps = s->bps;
+    if(channel == 0){
+        if(s->decorrelation == RIGHT_SIDE)
+            s->curr_bps++;
+    }else{
+        if(s->decorrelation == LEFT_SIDE || s->decorrelation == MID_SIDE)
+            s->curr_bps++;
+    }
+
+    if (get_bits1(&s->gb))
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid subframe padding\n");
+        return -1;
+    }
+    type = get_bits(&s->gb, 6);
+//    wasted = get_bits1(&s->gb);
+
+//    if (wasted)
+//    {
+//        while (!get_bits1(&s->gb))
+//            wasted++;
+//        if (wasted)
+//            wasted++;
+//        s->curr_bps -= wasted;
+//    }
+#if 0
+    wasted= 16 - av_log2(show_bits(&s->gb, 17));
+    skip_bits(&s->gb, wasted+1);
+    s->curr_bps -= wasted;
+#else
+    if (get_bits1(&s->gb))
+    {
+        wasted = 1;
+        while (!get_bits1(&s->gb))
+            wasted++;
+        s->curr_bps -= wasted;
+        av_log(s->avctx, AV_LOG_DEBUG, "%d wasted bits\n", wasted);
+    }
+#endif
+//FIXME use av_log2 for types
+    if (type == 0)
+    {
+        av_log(s->avctx, AV_LOG_DEBUG, "coding type: constant\n");
+        tmp = get_sbits(&s->gb, s->curr_bps);
+        for (i = 0; i < s->blocksize; i++)
+            s->decoded[channel][i] = tmp;
+    }
+    else if (type == 1)
+    {
+        av_log(s->avctx, AV_LOG_DEBUG, "coding type: verbatim\n");
+        for (i = 0; i < s->blocksize; i++)
+            s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps);
+    }
+    else if ((type >= 8) && (type <= 12))
+    {
+//        av_log(s->avctx, AV_LOG_DEBUG, "coding type: fixed\n");
+        if (decode_subframe_fixed(s, channel, type & ~0x8) < 0)
+            return -1;
+    }
+    else if (type >= 32)
+    {
+//        av_log(s->avctx, AV_LOG_DEBUG, "coding type: lpc\n");
+        if (decode_subframe_lpc(s, channel, (type & ~0x20)+1) < 0)
+            return -1;
+    }
+    else
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
+        return -1;
+    }
+
+    if (wasted)
+    {
+        int i;
+        for (i = 0; i < s->blocksize; i++)
+            s->decoded[channel][i] <<= wasted;
+    }
+
+    return 0;
+}
+
+static int decode_frame(FLACContext *s)
+{
+    int blocksize_code, sample_rate_code, sample_size_code, assignment, i, crc8;
+    int decorrelation, bps, blocksize, samplerate;
+
+    blocksize_code = get_bits(&s->gb, 4);
+
+    sample_rate_code = get_bits(&s->gb, 4);
+
+    assignment = get_bits(&s->gb, 4); /* channel assignment */
+    if (assignment < 8 && s->channels == assignment+1)
+        decorrelation = INDEPENDENT;
+    else if (assignment >=8 && assignment < 11 && s->channels == 2)
+        decorrelation = LEFT_SIDE + assignment - 8;
+    else
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "unsupported channel assignment %d (channels=%d)\n", assignment, s->channels);
+        return -1;
+    }
+
+    sample_size_code = get_bits(&s->gb, 3);
+    if(sample_size_code == 0)
+        bps= s->bps;
+    else if((sample_size_code != 3) && (sample_size_code != 7))
+        bps = sample_size_table[sample_size_code];
+    else
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid sample size code (%d)\n", sample_size_code);
+        return -1;
+    }
+
+    if (get_bits1(&s->gb))
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "broken stream, invalid padding\n");
+        return -1;
+    }
+
+    if(get_utf8(&s->gb) < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "utf8 fscked\n");
+        return -1;
+    }
+#if 0
+    if (/*((blocksize_code == 6) || (blocksize_code == 7)) &&*/
+        (s->min_blocksize != s->max_blocksize)){
+    }else{
+    }
+#endif
+
+    if (blocksize_code == 0)
+        blocksize = s->min_blocksize;
+    else if (blocksize_code == 6)
+        blocksize = get_bits(&s->gb, 8)+1;
+    else if (blocksize_code == 7)
+        blocksize = get_bits(&s->gb, 16)+1;
+    else
+        blocksize = blocksize_table[blocksize_code];
+
+    if(blocksize > s->max_blocksize){
+        av_log(s->avctx, AV_LOG_ERROR, "blocksize %d > %d\n", blocksize, s->max_blocksize);
+        return -1;
+    }
+
+    if (sample_rate_code == 0){
+        samplerate= s->samplerate;
+    }else if ((sample_rate_code > 3) && (sample_rate_code < 12))
+        samplerate = sample_rate_table[sample_rate_code];
+    else if (sample_rate_code == 12)
+        samplerate = get_bits(&s->gb, 8) * 1000;
+    else if (sample_rate_code == 13)
+        samplerate = get_bits(&s->gb, 16);
+    else if (sample_rate_code == 14)
+        samplerate = get_bits(&s->gb, 16) * 10;
+    else{
+        av_log(s->avctx, AV_LOG_ERROR, "illegal sample rate code %d\n", sample_rate_code);
+        return -1;
+    }
+
+    skip_bits(&s->gb, 8);
+    crc8= av_crc(av_crc07, 0, s->gb.buffer, get_bits_count(&s->gb)/8);
+    if(crc8){
+        av_log(s->avctx, AV_LOG_ERROR, "header crc mismatch crc=%2X\n", crc8);
+        return -1;
+    }
+
+    s->blocksize    = blocksize;
+    s->samplerate   = samplerate;
+    s->bps          = bps;
+    s->decorrelation= decorrelation;
+
+//    dump_headers(s);
+
+    /* subframes */
+    for (i = 0; i < s->channels; i++)
+    {
+//        av_log(s->avctx, AV_LOG_DEBUG, "decoded: %x residual: %x\n", s->decoded[i], s->residual[i]);
+        if (decode_subframe(s, i) < 0)
+            return -1;
+    }
+
+    align_get_bits(&s->gb);
+
+    /* frame footer */
+    skip_bits(&s->gb, 16); /* data crc */
+
+    return 0;
+}
+
+static inline int16_t shift_to_16_bits(int32_t data, int bps)
+{
+    if (bps == 24) {
+        return (data >> 8);
+    } else if (bps == 20) {
+        return (data >> 4);
+    } else {
+        return data;
+    }
+}
+
+static int flac_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    FLACContext *s = avctx->priv_data;
+    int tmp = 0, i, j = 0, input_buf_size = 0;
+    int16_t *samples = data;
+
+    if(s->max_framesize == 0){
+        s->max_framesize= 65536; // should hopefully be enough for the first header
+        s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize);
+    }
+
+    if(1 && s->max_framesize){//FIXME truncated
+            buf_size= FFMAX(FFMIN(buf_size, s->max_framesize - s->bitstream_size), 0);
+            input_buf_size= buf_size;
+
+            if(s->bitstream_index + s->bitstream_size + buf_size > s->allocated_bitstream_size){
+//                printf("memmove\n");
+                memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size);
+                s->bitstream_index=0;
+            }
+            memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], buf, buf_size);
+            buf= &s->bitstream[s->bitstream_index];
+            buf_size += s->bitstream_size;
+            s->bitstream_size= buf_size;
+
+            if(buf_size < s->max_framesize){
+//                printf("wanna more data ...\n");
+                return input_buf_size;
+            }
+    }
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    if (!metadata_parse(s))
+    {
+        tmp = show_bits(&s->gb, 16);
+        if(tmp != 0xFFF8){
+            av_log(s->avctx, AV_LOG_ERROR, "FRAME HEADER not here\n");
+            while(get_bits_count(&s->gb)/8+2 < buf_size && show_bits(&s->gb, 16) != 0xFFF8)
+                skip_bits(&s->gb, 8);
+            goto end; // we may not have enough bits left to decode a frame, so try next time
+        }
+        skip_bits(&s->gb, 16);
+        if (decode_frame(s) < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "decode_frame() failed\n");
+            s->bitstream_size=0;
+            s->bitstream_index=0;
+            return -1;
+        }
+    }
+
+
+#if 0
+    /* fix the channel order here */
+    if (s->order == MID_SIDE)
+    {
+        short *left = samples;
+        short *right = samples + s->blocksize;
+        for (i = 0; i < s->blocksize; i += 2)
+        {
+            uint32_t x = s->decoded[0][i];
+            uint32_t y = s->decoded[0][i+1];
+
+            right[i] = x - (y / 2);
+            left[i] = right[i] + y;
+        }
+        *data_size = 2 * s->blocksize;
+    }
+    else
+    {
+    for (i = 0; i < s->channels; i++)
+    {
+        switch(s->order)
+        {
+            case INDEPENDENT:
+                for (j = 0; j < s->blocksize; j++)
+                    samples[(s->blocksize*i)+j] = s->decoded[i][j];
+                break;
+            case LEFT_SIDE:
+            case RIGHT_SIDE:
+                if (i == 0)
+                    for (j = 0; j < s->blocksize; j++)
+                        samples[(s->blocksize*i)+j] = s->decoded[0][j];
+                else
+                    for (j = 0; j < s->blocksize; j++)
+                        samples[(s->blocksize*i)+j] = s->decoded[0][j] - s->decoded[i][j];
+                break;
+//            case MID_SIDE:
+//                av_log(s->avctx, AV_LOG_DEBUG, "mid-side unsupported\n");
+        }
+        *data_size += s->blocksize;
+    }
+    }
+#else
+#define DECORRELATE(left, right)\
+            assert(s->channels == 2);\
+            for (i = 0; i < s->blocksize; i++)\
+            {\
+                int a= s->decoded[0][i];\
+                int b= s->decoded[1][i];\
+                *(samples++) = (left ) >> (16 - s->bps);\
+                *(samples++) = (right) >> (16 - s->bps);\
+            }\
+            break;
+
+    switch(s->decorrelation)
+    {
+        case INDEPENDENT:
+            for (j = 0; j < s->blocksize; j++)
+            {
+                for (i = 0; i < s->channels; i++)
+                    *(samples++) = shift_to_16_bits(s->decoded[i][j], s->bps);
+            }
+            break;
+        case LEFT_SIDE:
+            DECORRELATE(a,a-b)
+        case RIGHT_SIDE:
+            DECORRELATE(a+b,b)
+        case MID_SIDE:
+            DECORRELATE( (a-=b>>1) + b, a)
+    }
+#endif
+
+    *data_size = (int8_t *)samples - (int8_t *)data;
+//    av_log(s->avctx, AV_LOG_DEBUG, "data size: %d\n", *data_size);
+
+//    s->last_blocksize = s->blocksize;
+end:
+    i= (get_bits_count(&s->gb)+7)/8;;
+    if(i > buf_size){
+        av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", i - buf_size);
+        s->bitstream_size=0;
+        s->bitstream_index=0;
+        return -1;
+    }
+
+    if(s->bitstream_size){
+        s->bitstream_index += i;
+        s->bitstream_size  -= i;
+        return input_buf_size;
+    }else
+        return i;
+}
+
+static int flac_decode_close(AVCodecContext *avctx)
+{
+    FLACContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->channels; i++)
+    {
+        av_freep(&s->decoded[i]);
+    }
+    av_freep(&s->bitstream);
+
+    return 0;
+}
+
+static void flac_flush(AVCodecContext *avctx){
+    FLACContext *s = avctx->priv_data;
+
+    s->bitstream_size=
+    s->bitstream_index= 0;
+}
+
+AVCodec flac_decoder = {
+    "flac",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_FLAC,
+    sizeof(FLACContext),
+    flac_decode_init,
+    NULL,
+    flac_decode_close,
+    flac_decode_frame,
+    .flush= flac_flush,
+};
diff --git a/contrib/ffmpeg/libavcodec/flacenc.c b/contrib/ffmpeg/libavcodec/flacenc.c
new file mode 100644
index 000000000..b7b7d0d8e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/flacenc.c
@@ -0,0 +1,1371 @@
+/**
+ * FLAC audio encoder
+ * Copyright (c) 2006  Justin Ruggles <jruggle@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "crc.h"
+#include "golomb.h"
+#include "lls.h"
+
+#define FLAC_MAX_CH  8
+#define FLAC_MIN_BLOCKSIZE  16
+#define FLAC_MAX_BLOCKSIZE  65535
+
+#define FLAC_SUBFRAME_CONSTANT  0
+#define FLAC_SUBFRAME_VERBATIM  1
+#define FLAC_SUBFRAME_FIXED     8
+#define FLAC_SUBFRAME_LPC      32
+
+#define FLAC_CHMODE_NOT_STEREO      0
+#define FLAC_CHMODE_LEFT_RIGHT      1
+#define FLAC_CHMODE_LEFT_SIDE       8
+#define FLAC_CHMODE_RIGHT_SIDE      9
+#define FLAC_CHMODE_MID_SIDE       10
+
+#define ORDER_METHOD_EST     0
+#define ORDER_METHOD_2LEVEL  1
+#define ORDER_METHOD_4LEVEL  2
+#define ORDER_METHOD_8LEVEL  3
+#define ORDER_METHOD_SEARCH  4
+#define ORDER_METHOD_LOG     5
+
+#define FLAC_STREAMINFO_SIZE  34
+
+#define MIN_LPC_ORDER       1
+#define MAX_LPC_ORDER      32
+#define MAX_FIXED_ORDER     4
+#define MAX_PARTITION_ORDER 8
+#define MAX_PARTITIONS     (1 << MAX_PARTITION_ORDER)
+#define MAX_LPC_PRECISION  15
+#define MAX_LPC_SHIFT      15
+#define MAX_RICE_PARAM     14
+
+typedef struct CompressionOptions {
+    int compression_level;
+    int block_time_ms;
+    int use_lpc;
+    int lpc_coeff_precision;
+    int min_prediction_order;
+    int max_prediction_order;
+    int prediction_order_method;
+    int min_partition_order;
+    int max_partition_order;
+} CompressionOptions;
+
+typedef struct RiceContext {
+    int porder;
+    int params[MAX_PARTITIONS];
+} RiceContext;
+
+typedef struct FlacSubframe {
+    int type;
+    int type_code;
+    int obits;
+    int order;
+    int32_t coefs[MAX_LPC_ORDER];
+    int shift;
+    RiceContext rc;
+    int32_t samples[FLAC_MAX_BLOCKSIZE];
+    int32_t residual[FLAC_MAX_BLOCKSIZE];
+} FlacSubframe;
+
+typedef struct FlacFrame {
+    FlacSubframe subframes[FLAC_MAX_CH];
+    int blocksize;
+    int bs_code[2];
+    uint8_t crc8;
+    int ch_mode;
+} FlacFrame;
+
+typedef struct FlacEncodeContext {
+    PutBitContext pb;
+    int channels;
+    int ch_code;
+    int samplerate;
+    int sr_code[2];
+    int blocksize;
+    int max_framesize;
+    uint32_t frame_count;
+    FlacFrame frame;
+    CompressionOptions options;
+    AVCodecContext *avctx;
+} FlacEncodeContext;
+
+static const int flac_samplerates[16] = {
+    0, 0, 0, 0,
+    8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
+    0, 0, 0, 0
+};
+
+static const int flac_blocksizes[16] = {
+    0,
+    192,
+    576, 1152, 2304, 4608,
+    0, 0,
+    256, 512, 1024, 2048, 4096, 8192, 16384, 32768
+};
+
+/**
+ * Writes streaminfo metadata block to byte array
+ */
+static void write_streaminfo(FlacEncodeContext *s, uint8_t *header)
+{
+    PutBitContext pb;
+
+    memset(header, 0, FLAC_STREAMINFO_SIZE);
+    init_put_bits(&pb, header, FLAC_STREAMINFO_SIZE);
+
+    /* streaminfo metadata block */
+    put_bits(&pb, 16, s->blocksize);
+    put_bits(&pb, 16, s->blocksize);
+    put_bits(&pb, 24, 0);
+    put_bits(&pb, 24, s->max_framesize);
+    put_bits(&pb, 20, s->samplerate);
+    put_bits(&pb, 3, s->channels-1);
+    put_bits(&pb, 5, 15);       /* bits per sample - 1 */
+    flush_put_bits(&pb);
+    /* total samples = 0 */
+    /* MD5 signature = 0 */
+}
+
+/**
+ * Sets blocksize based on samplerate
+ * Chooses the closest predefined blocksize >= BLOCK_TIME_MS milliseconds
+ */
+static int select_blocksize(int samplerate, int block_time_ms)
+{
+    int i;
+    int target;
+    int blocksize;
+
+    assert(samplerate > 0);
+    blocksize = flac_blocksizes[1];
+    target = (samplerate * block_time_ms) / 1000;
+    for(i=0; i<16; i++) {
+        if(target >= flac_blocksizes[i] && flac_blocksizes[i] > blocksize) {
+            blocksize = flac_blocksizes[i];
+        }
+    }
+    return blocksize;
+}
+
+static int flac_encode_init(AVCodecContext *avctx)
+{
+    int freq = avctx->sample_rate;
+    int channels = avctx->channels;
+    FlacEncodeContext *s = avctx->priv_data;
+    int i, level;
+    uint8_t *streaminfo;
+
+    s->avctx = avctx;
+
+    if(avctx->sample_fmt != SAMPLE_FMT_S16) {
+        return -1;
+    }
+
+    if(channels < 1 || channels > FLAC_MAX_CH) {
+        return -1;
+    }
+    s->channels = channels;
+    s->ch_code = s->channels-1;
+
+    /* find samplerate in table */
+    if(freq < 1)
+        return -1;
+    for(i=4; i<12; i++) {
+        if(freq == flac_samplerates[i]) {
+            s->samplerate = flac_samplerates[i];
+            s->sr_code[0] = i;
+            s->sr_code[1] = 0;
+            break;
+        }
+    }
+    /* if not in table, samplerate is non-standard */
+    if(i == 12) {
+        if(freq % 1000 == 0 && freq < 255000) {
+            s->sr_code[0] = 12;
+            s->sr_code[1] = freq / 1000;
+        } else if(freq % 10 == 0 && freq < 655350) {
+            s->sr_code[0] = 14;
+            s->sr_code[1] = freq / 10;
+        } else if(freq < 65535) {
+            s->sr_code[0] = 13;
+            s->sr_code[1] = freq;
+        } else {
+            return -1;
+        }
+        s->samplerate = freq;
+    }
+
+    /* set compression option defaults based on avctx->compression_level */
+    if(avctx->compression_level < 0) {
+        s->options.compression_level = 5;
+    } else {
+        s->options.compression_level = avctx->compression_level;
+    }
+    av_log(avctx, AV_LOG_DEBUG, " compression: %d\n", s->options.compression_level);
+
+    level= s->options.compression_level;
+    if(level > 12) {
+        av_log(avctx, AV_LOG_ERROR, "invalid compression level: %d\n",
+               s->options.compression_level);
+        return -1;
+    }
+
+    s->options.block_time_ms       = ((int[]){ 27, 27, 27,105,105,105,105,105,105,105,105,105,105})[level];
+    s->options.use_lpc             = ((int[]){  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1})[level];
+    s->options.min_prediction_order= ((int[]){  2,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1})[level];
+    s->options.max_prediction_order= ((int[]){  3,  4,  4,  6,  8,  8,  8,  8, 12, 12, 12, 32, 32})[level];
+    s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
+                                                   ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
+                                                   ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG,    ORDER_METHOD_4LEVEL,
+                                                   ORDER_METHOD_LOG,    ORDER_METHOD_SEARCH, ORDER_METHOD_LOG,
+                                                   ORDER_METHOD_SEARCH})[level];
+    s->options.min_partition_order = ((int[]){  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0})[level];
+    s->options.max_partition_order = ((int[]){  2,  2,  3,  3,  3,  8,  8,  8,  8,  8,  8,  8,  8})[level];
+
+    /* set compression option overrides from AVCodecContext */
+    if(avctx->use_lpc >= 0) {
+        s->options.use_lpc = clip(avctx->use_lpc, 0, 11);
+    }
+    if(s->options.use_lpc == 1)
+        av_log(avctx, AV_LOG_DEBUG, " use lpc: Levinson-Durbin recursion with Welch window\n");
+    else if(s->options.use_lpc > 1)
+        av_log(avctx, AV_LOG_DEBUG, " use lpc: Cholesky factorization\n");
+
+    if(avctx->min_prediction_order >= 0) {
+        if(s->options.use_lpc) {
+            if(avctx->min_prediction_order < MIN_LPC_ORDER ||
+                    avctx->min_prediction_order > MAX_LPC_ORDER) {
+                av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
+                       avctx->min_prediction_order);
+                return -1;
+            }
+        } else {
+            if(avctx->min_prediction_order > MAX_FIXED_ORDER) {
+                av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
+                       avctx->min_prediction_order);
+                return -1;
+            }
+        }
+        s->options.min_prediction_order = avctx->min_prediction_order;
+    }
+    if(avctx->max_prediction_order >= 0) {
+        if(s->options.use_lpc) {
+            if(avctx->max_prediction_order < MIN_LPC_ORDER ||
+                    avctx->max_prediction_order > MAX_LPC_ORDER) {
+                av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
+                       avctx->max_prediction_order);
+                return -1;
+            }
+        } else {
+            if(avctx->max_prediction_order > MAX_FIXED_ORDER) {
+                av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
+                       avctx->max_prediction_order);
+                return -1;
+            }
+        }
+        s->options.max_prediction_order = avctx->max_prediction_order;
+    }
+    if(s->options.max_prediction_order < s->options.min_prediction_order) {
+        av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",
+               s->options.min_prediction_order, s->options.max_prediction_order);
+        return -1;
+    }
+    av_log(avctx, AV_LOG_DEBUG, " prediction order: %d, %d\n",
+           s->options.min_prediction_order, s->options.max_prediction_order);
+
+    if(avctx->prediction_order_method >= 0) {
+        if(avctx->prediction_order_method > ORDER_METHOD_LOG) {
+            av_log(avctx, AV_LOG_ERROR, "invalid prediction order method: %d\n",
+                   avctx->prediction_order_method);
+            return -1;
+        }
+        s->options.prediction_order_method = avctx->prediction_order_method;
+    }
+    switch(s->options.prediction_order_method) {
+        case ORDER_METHOD_EST:    av_log(avctx, AV_LOG_DEBUG, " order method: %s\n",
+                                         "estimate"); break;
+        case ORDER_METHOD_2LEVEL: av_log(avctx, AV_LOG_DEBUG, " order method: %s\n",
+                                         "2-level"); break;
+        case ORDER_METHOD_4LEVEL: av_log(avctx, AV_LOG_DEBUG, " order method: %s\n",
+                                         "4-level"); break;
+        case ORDER_METHOD_8LEVEL: av_log(avctx, AV_LOG_DEBUG, " order method: %s\n",
+                                         "8-level"); break;
+        case ORDER_METHOD_SEARCH: av_log(avctx, AV_LOG_DEBUG, " order method: %s\n",
+                                         "full search"); break;
+        case ORDER_METHOD_LOG:    av_log(avctx, AV_LOG_DEBUG, " order method: %s\n",
+                                         "log search"); break;
+    }
+
+    if(avctx->min_partition_order >= 0) {
+        if(avctx->min_partition_order > MAX_PARTITION_ORDER) {
+            av_log(avctx, AV_LOG_ERROR, "invalid min partition order: %d\n",
+                   avctx->min_partition_order);
+            return -1;
+        }
+        s->options.min_partition_order = avctx->min_partition_order;
+    }
+    if(avctx->max_partition_order >= 0) {
+        if(avctx->max_partition_order > MAX_PARTITION_ORDER) {
+            av_log(avctx, AV_LOG_ERROR, "invalid max partition order: %d\n",
+                   avctx->max_partition_order);
+            return -1;
+        }
+        s->options.max_partition_order = avctx->max_partition_order;
+    }
+    if(s->options.max_partition_order < s->options.min_partition_order) {
+        av_log(avctx, AV_LOG_ERROR, "invalid partition orders: min=%d max=%d\n",
+               s->options.min_partition_order, s->options.max_partition_order);
+        return -1;
+    }
+    av_log(avctx, AV_LOG_DEBUG, " partition order: %d, %d\n",
+           s->options.min_partition_order, s->options.max_partition_order);
+
+    if(avctx->frame_size > 0) {
+        if(avctx->frame_size < FLAC_MIN_BLOCKSIZE ||
+                avctx->frame_size > FLAC_MAX_BLOCKSIZE) {
+            av_log(avctx, AV_LOG_ERROR, "invalid block size: %d\n",
+                   avctx->frame_size);
+            return -1;
+        }
+        s->blocksize = avctx->frame_size;
+    } else {
+        s->blocksize = select_blocksize(s->samplerate, s->options.block_time_ms);
+        avctx->frame_size = s->blocksize;
+    }
+    av_log(avctx, AV_LOG_DEBUG, " block size: %d\n", s->blocksize);
+
+    /* set LPC precision */
+    if(avctx->lpc_coeff_precision > 0) {
+        if(avctx->lpc_coeff_precision > MAX_LPC_PRECISION) {
+            av_log(avctx, AV_LOG_ERROR, "invalid lpc coeff precision: %d\n",
+                   avctx->lpc_coeff_precision);
+            return -1;
+        }
+        s->options.lpc_coeff_precision = avctx->lpc_coeff_precision;
+    } else {
+        /* select LPC precision based on block size */
+        if(     s->blocksize <=   192) s->options.lpc_coeff_precision =  7;
+        else if(s->blocksize <=   384) s->options.lpc_coeff_precision =  8;
+        else if(s->blocksize <=   576) s->options.lpc_coeff_precision =  9;
+        else if(s->blocksize <=  1152) s->options.lpc_coeff_precision = 10;
+        else if(s->blocksize <=  2304) s->options.lpc_coeff_precision = 11;
+        else if(s->blocksize <=  4608) s->options.lpc_coeff_precision = 12;
+        else if(s->blocksize <=  8192) s->options.lpc_coeff_precision = 13;
+        else if(s->blocksize <= 16384) s->options.lpc_coeff_precision = 14;
+        else                           s->options.lpc_coeff_precision = 15;
+    }
+    av_log(avctx, AV_LOG_DEBUG, " lpc precision: %d\n",
+           s->options.lpc_coeff_precision);
+
+    /* set maximum encoded frame size in verbatim mode */
+    if(s->channels == 2) {
+        s->max_framesize = 14 + ((s->blocksize * 33 + 7) >> 3);
+    } else {
+        s->max_framesize = 14 + (s->blocksize * s->channels * 2);
+    }
+
+    streaminfo = av_malloc(FLAC_STREAMINFO_SIZE);
+    write_streaminfo(s, streaminfo);
+    avctx->extradata = streaminfo;
+    avctx->extradata_size = FLAC_STREAMINFO_SIZE;
+
+    s->frame_count = 0;
+
+    avctx->coded_frame = avcodec_alloc_frame();
+    avctx->coded_frame->key_frame = 1;
+
+    return 0;
+}
+
+static void init_frame(FlacEncodeContext *s)
+{
+    int i, ch;
+    FlacFrame *frame;
+
+    frame = &s->frame;
+
+    for(i=0; i<16; i++) {
+        if(s->blocksize == flac_blocksizes[i]) {
+            frame->blocksize = flac_blocksizes[i];
+            frame->bs_code[0] = i;
+            frame->bs_code[1] = 0;
+            break;
+        }
+    }
+    if(i == 16) {
+        frame->blocksize = s->blocksize;
+        if(frame->blocksize <= 256) {
+            frame->bs_code[0] = 6;
+            frame->bs_code[1] = frame->blocksize-1;
+        } else {
+            frame->bs_code[0] = 7;
+            frame->bs_code[1] = frame->blocksize-1;
+        }
+    }
+
+    for(ch=0; ch<s->channels; ch++) {
+        frame->subframes[ch].obits = 16;
+    }
+}
+
+/**
+ * Copy channel-interleaved input samples into separate subframes
+ */
+static void copy_samples(FlacEncodeContext *s, int16_t *samples)
+{
+    int i, j, ch;
+    FlacFrame *frame;
+
+    frame = &s->frame;
+    for(i=0,j=0; i<frame->blocksize; i++) {
+        for(ch=0; ch<s->channels; ch++,j++) {
+            frame->subframes[ch].samples[i] = samples[j];
+        }
+    }
+}
+
+
+#define rice_encode_count(sum, n, k) (((n)*((k)+1))+((sum-(n>>1))>>(k)))
+
+static int find_optimal_param(uint32_t sum, int n)
+{
+    int k, k_opt;
+    uint32_t nbits[MAX_RICE_PARAM+1];
+
+    k_opt = 0;
+    nbits[0] = UINT32_MAX;
+    for(k=0; k<=MAX_RICE_PARAM; k++) {
+        nbits[k] = rice_encode_count(sum, n, k);
+        if(nbits[k] < nbits[k_opt]) {
+            k_opt = k;
+        }
+    }
+    return k_opt;
+}
+
+static uint32_t calc_optimal_rice_params(RiceContext *rc, int porder,
+                                         uint32_t *sums, int n, int pred_order)
+{
+    int i;
+    int k, cnt, part;
+    uint32_t all_bits;
+
+    part = (1 << porder);
+    all_bits = 0;
+
+    cnt = (n >> porder) - pred_order;
+    for(i=0; i<part; i++) {
+        if(i == 1) cnt = (n >> porder);
+        k = find_optimal_param(sums[i], cnt);
+        rc->params[i] = k;
+        all_bits += rice_encode_count(sums[i], cnt, k);
+    }
+    all_bits += (4 * part);
+
+    rc->porder = porder;
+
+    return all_bits;
+}
+
+static void calc_sums(int pmin, int pmax, uint32_t *data, int n, int pred_order,
+                      uint32_t sums[][MAX_PARTITIONS])
+{
+    int i, j;
+    int parts;
+    uint32_t *res, *res_end;
+
+    /* sums for highest level */
+    parts = (1 << pmax);
+    res = &data[pred_order];
+    res_end = &data[n >> pmax];
+    for(i=0; i<parts; i++) {
+        sums[pmax][i] = 0;
+        while(res < res_end){
+            sums[pmax][i] += *(res++);
+        }
+        res_end+= n >> pmax;
+    }
+    /* sums for lower levels */
+    for(i=pmax-1; i>=pmin; i--) {
+        parts = (1 << i);
+        for(j=0; j<parts; j++) {
+            sums[i][j] = sums[i+1][2*j] + sums[i+1][2*j+1];
+        }
+    }
+}
+
+static uint32_t calc_rice_params(RiceContext *rc, int pmin, int pmax,
+                                 int32_t *data, int n, int pred_order)
+{
+    int i;
+    uint32_t bits[MAX_PARTITION_ORDER+1];
+    int opt_porder;
+    RiceContext tmp_rc;
+    uint32_t *udata;
+    uint32_t sums[MAX_PARTITION_ORDER+1][MAX_PARTITIONS];
+
+    assert(pmin >= 0 && pmin <= MAX_PARTITION_ORDER);
+    assert(pmax >= 0 && pmax <= MAX_PARTITION_ORDER);
+    assert(pmin <= pmax);
+
+    udata = av_malloc(n * sizeof(uint32_t));
+    for(i=0; i<n; i++) {
+        udata[i] = (2*data[i]) ^ (data[i]>>31);
+    }
+
+    calc_sums(pmin, pmax, udata, n, pred_order, sums);
+
+    opt_porder = pmin;
+    bits[pmin] = UINT32_MAX;
+    for(i=pmin; i<=pmax; i++) {
+        bits[i] = calc_optimal_rice_params(&tmp_rc, i, sums[i], n, pred_order);
+        if(bits[i] <= bits[opt_porder]) {
+            opt_porder = i;
+            *rc= tmp_rc;
+        }
+    }
+
+    av_freep(&udata);
+    return bits[opt_porder];
+}
+
+static int get_max_p_order(int max_porder, int n, int order)
+{
+    int porder = FFMIN(max_porder, av_log2(n^(n-1)));
+    if(order > 0)
+        porder = FFMIN(porder, av_log2(n/order));
+    return porder;
+}
+
+static uint32_t calc_rice_params_fixed(RiceContext *rc, int pmin, int pmax,
+                                       int32_t *data, int n, int pred_order,
+                                       int bps)
+{
+    uint32_t bits;
+    pmin = get_max_p_order(pmin, n, pred_order);
+    pmax = get_max_p_order(pmax, n, pred_order);
+    bits = pred_order*bps + 6;
+    bits += calc_rice_params(rc, pmin, pmax, data, n, pred_order);
+    return bits;
+}
+
+static uint32_t calc_rice_params_lpc(RiceContext *rc, int pmin, int pmax,
+                                     int32_t *data, int n, int pred_order,
+                                     int bps, int precision)
+{
+    uint32_t bits;
+    pmin = get_max_p_order(pmin, n, pred_order);
+    pmax = get_max_p_order(pmax, n, pred_order);
+    bits = pred_order*bps + 4 + 5 + pred_order*precision + 6;
+    bits += calc_rice_params(rc, pmin, pmax, data, n, pred_order);
+    return bits;
+}
+
+/**
+ * Apply Welch window function to audio block
+ */
+static void apply_welch_window(const int32_t *data, int len, double *w_data)
+{
+    int i, n2;
+    double w;
+    double c;
+
+    n2 = (len >> 1);
+    c = 2.0 / (len - 1.0);
+    for(i=0; i<n2; i++) {
+        w = c - i - 1.0;
+        w = 1.0 - (w * w);
+        w_data[i] = data[i] * w;
+        w_data[len-1-i] = data[len-1-i] * w;
+    }
+}
+
+/**
+ * Calculates autocorrelation data from audio samples
+ * A Welch window function is applied before calculation.
+ */
+static void compute_autocorr(const int32_t *data, int len, int lag,
+                             double *autoc)
+{
+    int i, lag_ptr;
+    double tmp[len + lag];
+    double *data1= tmp + lag;
+
+    apply_welch_window(data, len, data1);
+
+    for(i=0; i<lag; i++){
+        autoc[i] = 1.0;
+        data1[i-lag]= 0.0;
+    }
+
+    for(i=0; i<len; i++){
+        for(lag_ptr= i-lag; lag_ptr<=i; lag_ptr++){
+            autoc[i-lag_ptr] += data1[i] * data1[lag_ptr];
+        }
+    }
+}
+
+/**
+ * Levinson-Durbin recursion.
+ * Produces LPC coefficients from autocorrelation data.
+ */
+static void compute_lpc_coefs(const double *autoc, int max_order,
+                              double lpc[][MAX_LPC_ORDER], double *ref)
+{
+   int i, j, i2;
+   double r, err, tmp;
+   double lpc_tmp[MAX_LPC_ORDER];
+
+   for(i=0; i<max_order; i++) lpc_tmp[i] = 0;
+   err = autoc[0];
+
+   for(i=0; i<max_order; i++) {
+      r = -autoc[i+1];
+      for(j=0; j<i; j++) {
+          r -= lpc_tmp[j] * autoc[i-j];
+      }
+      r /= err;
+      ref[i] = fabs(r);
+
+      err *= 1.0 - (r * r);
+
+      i2 = (i >> 1);
+      lpc_tmp[i] = r;
+      for(j=0; j<i2; j++) {
+         tmp = lpc_tmp[j];
+         lpc_tmp[j] += r * lpc_tmp[i-1-j];
+         lpc_tmp[i-1-j] += r * tmp;
+      }
+      if(i & 1) {
+          lpc_tmp[j] += lpc_tmp[j] * r;
+      }
+
+      for(j=0; j<=i; j++) {
+          lpc[i][j] = -lpc_tmp[j];
+      }
+   }
+}
+
+/**
+ * Quantize LPC coefficients
+ */
+static void quantize_lpc_coefs(double *lpc_in, int order, int precision,
+                               int32_t *lpc_out, int *shift)
+{
+    int i;
+    double cmax, error;
+    int32_t qmax;
+    int sh;
+
+    /* define maximum levels */
+    qmax = (1 << (precision - 1)) - 1;
+
+    /* find maximum coefficient value */
+    cmax = 0.0;
+    for(i=0; i<order; i++) {
+        cmax= FFMAX(cmax, fabs(lpc_in[i]));
+    }
+
+    /* if maximum value quantizes to zero, return all zeros */
+    if(cmax * (1 << MAX_LPC_SHIFT) < 1.0) {
+        *shift = 0;
+        memset(lpc_out, 0, sizeof(int32_t) * order);
+        return;
+    }
+
+    /* calculate level shift which scales max coeff to available bits */
+    sh = MAX_LPC_SHIFT;
+    while((cmax * (1 << sh) > qmax) && (sh > 0)) {
+        sh--;
+    }
+
+    /* since negative shift values are unsupported in decoder, scale down
+       coefficients instead */
+    if(sh == 0 && cmax > qmax) {
+        double scale = ((double)qmax) / cmax;
+        for(i=0; i<order; i++) {
+            lpc_in[i] *= scale;
+        }
+    }
+
+    /* output quantized coefficients and level shift */
+    error=0;
+    for(i=0; i<order; i++) {
+        error += lpc_in[i] * (1 << sh);
+        lpc_out[i] = clip(lrintf(error), -qmax, qmax);
+        error -= lpc_out[i];
+    }
+    *shift = sh;
+}
+
+static int estimate_best_order(double *ref, int max_order)
+{
+    int i, est;
+
+    est = 1;
+    for(i=max_order-1; i>=0; i--) {
+        if(ref[i] > 0.10) {
+            est = i+1;
+            break;
+        }
+    }
+    return est;
+}
+
+/**
+ * Calculate LPC coefficients for multiple orders
+ */
+static int lpc_calc_coefs(const int32_t *samples, int blocksize, int max_order,
+                          int precision, int32_t coefs[][MAX_LPC_ORDER],
+                          int *shift, int use_lpc, int omethod)
+{
+    double autoc[MAX_LPC_ORDER+1];
+    double ref[MAX_LPC_ORDER];
+    double lpc[MAX_LPC_ORDER][MAX_LPC_ORDER];
+    int i, j, pass;
+    int opt_order;
+
+    assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER);
+
+    if(use_lpc == 1){
+        compute_autocorr(samples, blocksize, max_order+1, autoc);
+
+        compute_lpc_coefs(autoc, max_order, lpc, ref);
+    }else{
+        LLSModel m[2];
+        double var[MAX_LPC_ORDER+1], eval, weight;
+
+        for(pass=0; pass<use_lpc-1; pass++){
+            av_init_lls(&m[pass&1], max_order);
+
+            weight=0;
+            for(i=max_order; i<blocksize; i++){
+                for(j=0; j<=max_order; j++)
+                    var[j]= samples[i-j];
+
+                if(pass){
+                    eval= av_evaluate_lls(&m[(pass-1)&1], var+1, max_order-1);
+                    eval= (512>>pass) + fabs(eval - var[0]);
+                    for(j=0; j<=max_order; j++)
+                        var[j]/= sqrt(eval);
+                    weight += 1/eval;
+                }else
+                    weight++;
+
+                av_update_lls(&m[pass&1], var, 1.0);
+            }
+            av_solve_lls(&m[pass&1], 0.001, 0);
+        }
+
+        for(i=0; i<max_order; i++){
+            for(j=0; j<max_order; j++)
+                lpc[i][j]= m[(pass-1)&1].coeff[i][j];
+            ref[i]= sqrt(m[(pass-1)&1].variance[i] / weight) * (blocksize - max_order) / 4000;
+        }
+        for(i=max_order-1; i>0; i--)
+            ref[i] = ref[i-1] - ref[i];
+    }
+    opt_order = max_order;
+
+    if(omethod == ORDER_METHOD_EST) {
+        opt_order = estimate_best_order(ref, max_order);
+        i = opt_order-1;
+        quantize_lpc_coefs(lpc[i], i+1, precision, coefs[i], &shift[i]);
+    } else {
+        for(i=0; i<max_order; i++) {
+            quantize_lpc_coefs(lpc[i], i+1, precision, coefs[i], &shift[i]);
+        }
+    }
+
+    return opt_order;
+}
+
+
+static void encode_residual_verbatim(int32_t *res, int32_t *smp, int n)
+{
+    assert(n > 0);
+    memcpy(res, smp, n * sizeof(int32_t));
+}
+
+static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n,
+                                  int order)
+{
+    int i;
+
+    for(i=0; i<order; i++) {
+        res[i] = smp[i];
+    }
+
+    if(order==0){
+        for(i=order; i<n; i++)
+            res[i]= smp[i];
+    }else if(order==1){
+        for(i=order; i<n; i++)
+            res[i]= smp[i] - smp[i-1];
+    }else if(order==2){
+        for(i=order; i<n; i++)
+            res[i]= smp[i] - 2*smp[i-1] + smp[i-2];
+    }else if(order==3){
+        for(i=order; i<n; i++)
+            res[i]= smp[i] - 3*smp[i-1] + 3*smp[i-2] - smp[i-3];
+    }else{
+        for(i=order; i<n; i++)
+            res[i]= smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4];
+    }
+}
+
+static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n,
+                                int order, const int32_t *coefs, int shift)
+{
+    int i, j;
+    int32_t pred;
+
+    for(i=0; i<order; i++) {
+        res[i] = smp[i];
+    }
+    for(i=order; i<n; i++) {
+        pred = 0;
+        for(j=0; j<order; j++) {
+            pred += coefs[j] * smp[i-j-1];
+        }
+        res[i] = smp[i] - (pred >> shift);
+    }
+}
+
+static int encode_residual(FlacEncodeContext *ctx, int ch)
+{
+    int i, n;
+    int min_order, max_order, opt_order, precision, omethod;
+    int min_porder, max_porder;
+    FlacFrame *frame;
+    FlacSubframe *sub;
+    int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
+    int shift[MAX_LPC_ORDER];
+    int32_t *res, *smp;
+
+    frame = &ctx->frame;
+    sub = &frame->subframes[ch];
+    res = sub->residual;
+    smp = sub->samples;
+    n = frame->blocksize;
+
+    /* CONSTANT */
+    for(i=1; i<n; i++) {
+        if(smp[i] != smp[0]) break;
+    }
+    if(i == n) {
+        sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
+        res[0] = smp[0];
+        return sub->obits;
+    }
+
+    /* VERBATIM */
+    if(n < 5) {
+        sub->type = sub->type_code = FLAC_SUBFRAME_VERBATIM;
+        encode_residual_verbatim(res, smp, n);
+        return sub->obits * n;
+    }
+
+    min_order = ctx->options.min_prediction_order;
+    max_order = ctx->options.max_prediction_order;
+    min_porder = ctx->options.min_partition_order;
+    max_porder = ctx->options.max_partition_order;
+    precision = ctx->options.lpc_coeff_precision;
+    omethod = ctx->options.prediction_order_method;
+
+    /* FIXED */
+    if(!ctx->options.use_lpc || max_order == 0 || (n <= max_order)) {
+        uint32_t bits[MAX_FIXED_ORDER+1];
+        if(max_order > MAX_FIXED_ORDER) max_order = MAX_FIXED_ORDER;
+        opt_order = 0;
+        bits[0] = UINT32_MAX;
+        for(i=min_order; i<=max_order; i++) {
+            encode_residual_fixed(res, smp, n, i);
+            bits[i] = calc_rice_params_fixed(&sub->rc, min_porder, max_porder, res,
+                                             n, i, sub->obits);
+            if(bits[i] < bits[opt_order]) {
+                opt_order = i;
+            }
+        }
+        sub->order = opt_order;
+        sub->type = FLAC_SUBFRAME_FIXED;
+        sub->type_code = sub->type | sub->order;
+        if(sub->order != max_order) {
+            encode_residual_fixed(res, smp, n, sub->order);
+            return calc_rice_params_fixed(&sub->rc, min_porder, max_porder, res, n,
+                                          sub->order, sub->obits);
+        }
+        return bits[sub->order];
+    }
+
+    /* LPC */
+    opt_order = lpc_calc_coefs(smp, n, max_order, precision, coefs, shift, ctx->options.use_lpc, omethod);
+
+    if(omethod == ORDER_METHOD_2LEVEL ||
+       omethod == ORDER_METHOD_4LEVEL ||
+       omethod == ORDER_METHOD_8LEVEL) {
+        int levels = 1 << omethod;
+        uint32_t bits[levels];
+        int order;
+        int opt_index = levels-1;
+        opt_order = max_order-1;
+        bits[opt_index] = UINT32_MAX;
+        for(i=levels-1; i>=0; i--) {
+            order = min_order + (((max_order-min_order+1) * (i+1)) / levels)-1;
+            if(order < 0) order = 0;
+            encode_residual_lpc(res, smp, n, order+1, coefs[order], shift[order]);
+            bits[i] = calc_rice_params_lpc(&sub->rc, min_porder, max_porder,
+                                           res, n, order+1, sub->obits, precision);
+            if(bits[i] < bits[opt_index]) {
+                opt_index = i;
+                opt_order = order;
+            }
+        }
+        opt_order++;
+    } else if(omethod == ORDER_METHOD_SEARCH) {
+        // brute-force optimal order search
+        uint32_t bits[MAX_LPC_ORDER];
+        opt_order = 0;
+        bits[0] = UINT32_MAX;
+        for(i=min_order-1; i<max_order; i++) {
+            encode_residual_lpc(res, smp, n, i+1, coefs[i], shift[i]);
+            bits[i] = calc_rice_params_lpc(&sub->rc, min_porder, max_porder,
+                                           res, n, i+1, sub->obits, precision);
+            if(bits[i] < bits[opt_order]) {
+                opt_order = i;
+            }
+        }
+        opt_order++;
+    } else if(omethod == ORDER_METHOD_LOG) {
+        uint32_t bits[MAX_LPC_ORDER];
+        int step;
+
+        opt_order= min_order - 1 + (max_order-min_order)/3;
+        memset(bits, -1, sizeof(bits));
+
+        for(step=16 ;step; step>>=1){
+            int last= opt_order;
+            for(i=last-step; i<=last+step; i+= step){
+                if(i<min_order-1 || i>=max_order || bits[i] < UINT32_MAX)
+                    continue;
+                encode_residual_lpc(res, smp, n, i+1, coefs[i], shift[i]);
+                bits[i] = calc_rice_params_lpc(&sub->rc, min_porder, max_porder,
+                                            res, n, i+1, sub->obits, precision);
+                if(bits[i] < bits[opt_order])
+                    opt_order= i;
+            }
+        }
+        opt_order++;
+    }
+
+    sub->order = opt_order;
+    sub->type = FLAC_SUBFRAME_LPC;
+    sub->type_code = sub->type | (sub->order-1);
+    sub->shift = shift[sub->order-1];
+    for(i=0; i<sub->order; i++) {
+        sub->coefs[i] = coefs[sub->order-1][i];
+    }
+    encode_residual_lpc(res, smp, n, sub->order, sub->coefs, sub->shift);
+    return calc_rice_params_lpc(&sub->rc, min_porder, max_porder, res, n, sub->order,
+                                sub->obits, precision);
+}
+
+static int encode_residual_v(FlacEncodeContext *ctx, int ch)
+{
+    int i, n;
+    FlacFrame *frame;
+    FlacSubframe *sub;
+    int32_t *res, *smp;
+
+    frame = &ctx->frame;
+    sub = &frame->subframes[ch];
+    res = sub->residual;
+    smp = sub->samples;
+    n = frame->blocksize;
+
+    /* CONSTANT */
+    for(i=1; i<n; i++) {
+        if(smp[i] != smp[0]) break;
+    }
+    if(i == n) {
+        sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
+        res[0] = smp[0];
+        return sub->obits;
+    }
+
+    /* VERBATIM */
+    sub->type = sub->type_code = FLAC_SUBFRAME_VERBATIM;
+    encode_residual_verbatim(res, smp, n);
+    return sub->obits * n;
+}
+
+static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
+{
+    int i, best;
+    int32_t lt, rt;
+    uint64_t sum[4];
+    uint64_t score[4];
+    int k;
+
+    /* calculate sum of 2nd order residual for each channel */
+    sum[0] = sum[1] = sum[2] = sum[3] = 0;
+    for(i=2; i<n; i++) {
+        lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
+        rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
+        sum[2] += FFABS((lt + rt) >> 1);
+        sum[3] += FFABS(lt - rt);
+        sum[0] += FFABS(lt);
+        sum[1] += FFABS(rt);
+    }
+    /* estimate bit counts */
+    for(i=0; i<4; i++) {
+        k = find_optimal_param(2*sum[i], n);
+        sum[i] = rice_encode_count(2*sum[i], n, k);
+    }
+
+    /* calculate score for each mode */
+    score[0] = sum[0] + sum[1];
+    score[1] = sum[0] + sum[3];
+    score[2] = sum[1] + sum[3];
+    score[3] = sum[2] + sum[3];
+
+    /* return mode with lowest score */
+    best = 0;
+    for(i=1; i<4; i++) {
+        if(score[i] < score[best]) {
+            best = i;
+        }
+    }
+    if(best == 0) {
+        return FLAC_CHMODE_LEFT_RIGHT;
+    } else if(best == 1) {
+        return FLAC_CHMODE_LEFT_SIDE;
+    } else if(best == 2) {
+        return FLAC_CHMODE_RIGHT_SIDE;
+    } else {
+        return FLAC_CHMODE_MID_SIDE;
+    }
+}
+
+/**
+ * Perform stereo channel decorrelation
+ */
+static void channel_decorrelation(FlacEncodeContext *ctx)
+{
+    FlacFrame *frame;
+    int32_t *left, *right;
+    int i, n;
+
+    frame = &ctx->frame;
+    n = frame->blocksize;
+    left  = frame->subframes[0].samples;
+    right = frame->subframes[1].samples;
+
+    if(ctx->channels != 2) {
+        frame->ch_mode = FLAC_CHMODE_NOT_STEREO;
+        return;
+    }
+
+    frame->ch_mode = estimate_stereo_mode(left, right, n);
+
+    /* perform decorrelation and adjust bits-per-sample */
+    if(frame->ch_mode == FLAC_CHMODE_LEFT_RIGHT) {
+        return;
+    }
+    if(frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
+        int32_t tmp;
+        for(i=0; i<n; i++) {
+            tmp = left[i];
+            left[i] = (tmp + right[i]) >> 1;
+            right[i] = tmp - right[i];
+        }
+        frame->subframes[1].obits++;
+    } else if(frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) {
+        for(i=0; i<n; i++) {
+            right[i] = left[i] - right[i];
+        }
+        frame->subframes[1].obits++;
+    } else {
+        for(i=0; i<n; i++) {
+            left[i] -= right[i];
+        }
+        frame->subframes[0].obits++;
+    }
+}
+
+static void put_sbits(PutBitContext *pb, int bits, int32_t val)
+{
+    assert(bits >= 0 && bits <= 31);
+
+    put_bits(pb, bits, val & ((1<<bits)-1));
+}
+
+static void write_utf8(PutBitContext *pb, uint32_t val)
+{
+    uint8_t tmp;
+    PUT_UTF8(val, tmp, put_bits(pb, 8, tmp);)
+}
+
+static void output_frame_header(FlacEncodeContext *s)
+{
+    FlacFrame *frame;
+    int crc;
+
+    frame = &s->frame;
+
+    put_bits(&s->pb, 16, 0xFFF8);
+    put_bits(&s->pb, 4, frame->bs_code[0]);
+    put_bits(&s->pb, 4, s->sr_code[0]);
+    if(frame->ch_mode == FLAC_CHMODE_NOT_STEREO) {
+        put_bits(&s->pb, 4, s->ch_code);
+    } else {
+        put_bits(&s->pb, 4, frame->ch_mode);
+    }
+    put_bits(&s->pb, 3, 4); /* bits-per-sample code */
+    put_bits(&s->pb, 1, 0);
+    write_utf8(&s->pb, s->frame_count);
+    if(frame->bs_code[0] == 6) {
+        put_bits(&s->pb, 8, frame->bs_code[1]);
+    } else if(frame->bs_code[0] == 7) {
+        put_bits(&s->pb, 16, frame->bs_code[1]);
+    }
+    if(s->sr_code[0] == 12) {
+        put_bits(&s->pb, 8, s->sr_code[1]);
+    } else if(s->sr_code[0] > 12) {
+        put_bits(&s->pb, 16, s->sr_code[1]);
+    }
+    flush_put_bits(&s->pb);
+    crc = av_crc(av_crc07, 0, s->pb.buf, put_bits_count(&s->pb)>>3);
+    put_bits(&s->pb, 8, crc);
+}
+
+static void output_subframe_constant(FlacEncodeContext *s, int ch)
+{
+    FlacSubframe *sub;
+    int32_t res;
+
+    sub = &s->frame.subframes[ch];
+    res = sub->residual[0];
+    put_sbits(&s->pb, sub->obits, res);
+}
+
+static void output_subframe_verbatim(FlacEncodeContext *s, int ch)
+{
+    int i;
+    FlacFrame *frame;
+    FlacSubframe *sub;
+    int32_t res;
+
+    frame = &s->frame;
+    sub = &frame->subframes[ch];
+
+    for(i=0; i<frame->blocksize; i++) {
+        res = sub->residual[i];
+        put_sbits(&s->pb, sub->obits, res);
+    }
+}
+
+static void output_residual(FlacEncodeContext *ctx, int ch)
+{
+    int i, j, p, n, parts;
+    int k, porder, psize, res_cnt;
+    FlacFrame *frame;
+    FlacSubframe *sub;
+    int32_t *res;
+
+    frame = &ctx->frame;
+    sub = &frame->subframes[ch];
+    res = sub->residual;
+    n = frame->blocksize;
+
+    /* rice-encoded block */
+    put_bits(&ctx->pb, 2, 0);
+
+    /* partition order */
+    porder = sub->rc.porder;
+    psize = n >> porder;
+    parts = (1 << porder);
+    put_bits(&ctx->pb, 4, porder);
+    res_cnt = psize - sub->order;
+
+    /* residual */
+    j = sub->order;
+    for(p=0; p<parts; p++) {
+        k = sub->rc.params[p];
+        put_bits(&ctx->pb, 4, k);
+        if(p == 1) res_cnt = psize;
+        for(i=0; i<res_cnt && j<n; i++, j++) {
+            set_sr_golomb_flac(&ctx->pb, res[j], k, INT32_MAX, 0);
+        }
+    }
+}
+
+static void output_subframe_fixed(FlacEncodeContext *ctx, int ch)
+{
+    int i;
+    FlacFrame *frame;
+    FlacSubframe *sub;
+
+    frame = &ctx->frame;
+    sub = &frame->subframes[ch];
+
+    /* warm-up samples */
+    for(i=0; i<sub->order; i++) {
+        put_sbits(&ctx->pb, sub->obits, sub->residual[i]);
+    }
+
+    /* residual */
+    output_residual(ctx, ch);
+}
+
+static void output_subframe_lpc(FlacEncodeContext *ctx, int ch)
+{
+    int i, cbits;
+    FlacFrame *frame;
+    FlacSubframe *sub;
+
+    frame = &ctx->frame;
+    sub = &frame->subframes[ch];
+
+    /* warm-up samples */
+    for(i=0; i<sub->order; i++) {
+        put_sbits(&ctx->pb, sub->obits, sub->residual[i]);
+    }
+
+    /* LPC coefficients */
+    cbits = ctx->options.lpc_coeff_precision;
+    put_bits(&ctx->pb, 4, cbits-1);
+    put_sbits(&ctx->pb, 5, sub->shift);
+    for(i=0; i<sub->order; i++) {
+        put_sbits(&ctx->pb, cbits, sub->coefs[i]);
+    }
+
+    /* residual */
+    output_residual(ctx, ch);
+}
+
+static void output_subframes(FlacEncodeContext *s)
+{
+    FlacFrame *frame;
+    FlacSubframe *sub;
+    int ch;
+
+    frame = &s->frame;
+
+    for(ch=0; ch<s->channels; ch++) {
+        sub = &frame->subframes[ch];
+
+        /* subframe header */
+        put_bits(&s->pb, 1, 0);
+        put_bits(&s->pb, 6, sub->type_code);
+        put_bits(&s->pb, 1, 0); /* no wasted bits */
+
+        /* subframe */
+        if(sub->type == FLAC_SUBFRAME_CONSTANT) {
+            output_subframe_constant(s, ch);
+        } else if(sub->type == FLAC_SUBFRAME_VERBATIM) {
+            output_subframe_verbatim(s, ch);
+        } else if(sub->type == FLAC_SUBFRAME_FIXED) {
+            output_subframe_fixed(s, ch);
+        } else if(sub->type == FLAC_SUBFRAME_LPC) {
+            output_subframe_lpc(s, ch);
+        }
+    }
+}
+
+static void output_frame_footer(FlacEncodeContext *s)
+{
+    int crc;
+    flush_put_bits(&s->pb);
+    crc = bswap_16(av_crc(av_crc8005, 0, s->pb.buf, put_bits_count(&s->pb)>>3));
+    put_bits(&s->pb, 16, crc);
+    flush_put_bits(&s->pb);
+}
+
+static int flac_encode_frame(AVCodecContext *avctx, uint8_t *frame,
+                             int buf_size, void *data)
+{
+    int ch;
+    FlacEncodeContext *s;
+    int16_t *samples = data;
+    int out_bytes;
+
+    s = avctx->priv_data;
+
+    s->blocksize = avctx->frame_size;
+    init_frame(s);
+
+    copy_samples(s, samples);
+
+    channel_decorrelation(s);
+
+    for(ch=0; ch<s->channels; ch++) {
+        encode_residual(s, ch);
+    }
+    init_put_bits(&s->pb, frame, buf_size);
+    output_frame_header(s);
+    output_subframes(s);
+    output_frame_footer(s);
+    out_bytes = put_bits_count(&s->pb) >> 3;
+
+    if(out_bytes > s->max_framesize || out_bytes >= buf_size) {
+        /* frame too large. use verbatim mode */
+        for(ch=0; ch<s->channels; ch++) {
+            encode_residual_v(s, ch);
+        }
+        init_put_bits(&s->pb, frame, buf_size);
+        output_frame_header(s);
+        output_subframes(s);
+        output_frame_footer(s);
+        out_bytes = put_bits_count(&s->pb) >> 3;
+
+        if(out_bytes > s->max_framesize || out_bytes >= buf_size) {
+            /* still too large. must be an error. */
+            av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");
+            return -1;
+        }
+    }
+
+    s->frame_count++;
+    return out_bytes;
+}
+
+static int flac_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->extradata);
+    avctx->extradata_size = 0;
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+AVCodec flac_encoder = {
+    "flac",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_FLAC,
+    sizeof(FlacEncodeContext),
+    flac_encode_init,
+    flac_encode_frame,
+    flac_encode_close,
+    NULL,
+    .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
+};
diff --git a/contrib/ffmpeg/libavcodec/flashsv.c b/contrib/ffmpeg/libavcodec/flashsv.c
new file mode 100644
index 000000000..fea8e2224
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/flashsv.c
@@ -0,0 +1,276 @@
+/*
+ * Flash Screen Video decoder
+ * Copyright (C) 2004 Alex Beregszaszi
+ * Copyright (C) 2006 Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file flashsv.c
+ * Flash Screen Video decoder
+ * @author Alex Beregszaszi
+ * @author Benjamin Larsson
+ */
+
+/* Bitstream description
+ * The picture is divided into blocks that are zlib compressed.
+ *
+ * The decoder is fed complete frames, the frameheader contains:
+ * 4bits of block width
+ * 12bits of frame width
+ * 4bits of block height
+ * 12bits of frame height
+ *
+ * Directly after the header are the compressed blocks. The blocks
+ * have their compressed size represented with 16bits in the beginnig.
+ * If the size = 0 then the block is unchanged from the previous frame.
+ * All blocks are decompressed until the buffer is consumed.
+ *
+ * Encoding ideas, a basic encoder would just use a fixed block size.
+ * Block sizes can be multipels of 16, from 16 to 256. The blocks don't
+ * have to be quadratic. A brute force search with a set of diffrent
+ * block sizes should give a better result then to just use a fixed size.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "bitstream.h"
+
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+#endif
+
+typedef struct FlashSVContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+    int image_width, image_height;
+    int block_width, block_height;
+    uint8_t* tmpblock;
+    int block_size;
+#ifdef CONFIG_ZLIB
+    z_stream zstream;
+#endif
+} FlashSVContext;
+
+
+static void copy_region(uint8_t *sptr, uint8_t *dptr,
+        int dx, int dy, int h, int w, int stride)
+{
+    int i;
+
+    for (i = dx+h; i > dx; i--)
+    {
+        memcpy(dptr+(i*stride)+dy*3, sptr, w*3);
+        sptr += w*3;
+    }
+}
+
+
+static int flashsv_decode_init(AVCodecContext *avctx)
+{
+    FlashSVContext *s = (FlashSVContext *)avctx->priv_data;
+    int zret; // Zlib return code
+
+    s->avctx = avctx;
+#ifdef CONFIG_ZLIB
+    s->zstream.zalloc = Z_NULL;
+    s->zstream.zfree = Z_NULL;
+    s->zstream.opaque = Z_NULL;
+    zret = inflateInit(&(s->zstream));
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+        return 1;
+    }
+#else
+    av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled. Needed for the decoder.\n");
+    return 1;
+#endif
+    avctx->pix_fmt = PIX_FMT_BGR24;
+    avctx->has_b_frames = 0;
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+
+static int flashsv_decode_frame(AVCodecContext *avctx,
+                                    void *data, int *data_size,
+                                    uint8_t *buf, int buf_size)
+{
+    FlashSVContext *s = (FlashSVContext *)avctx->priv_data;
+    int h_blocks, v_blocks, h_part, v_part, i, j;
+    GetBitContext gb;
+
+    /* no supplementary picture */
+    if (buf_size == 0)
+        return 0;
+
+    if(s->frame.data[0])
+            avctx->release_buffer(avctx, &s->frame);
+
+    init_get_bits(&gb, buf, buf_size * 8);
+
+    /* start to parse the bitstream */
+    s->block_width = 16* (get_bits(&gb, 4)+1);
+    s->image_width =     get_bits(&gb,12);
+    s->block_height= 16* (get_bits(&gb, 4)+1);
+    s->image_height=     get_bits(&gb,12);
+
+    /* calculate amount of blocks and the size of the border blocks */
+    h_blocks = s->image_width / s->block_width;
+    h_part = s->image_width % s->block_width;
+    v_blocks = s->image_height / s->block_height;
+    v_part = s->image_height % s->block_height;
+
+    /* the block size could change between frames, make sure the buffer
+     * is large enough, if not, get a larger one */
+    if(s->block_size < s->block_width*s->block_height) {
+        if (s->tmpblock != NULL)
+            av_free(s->tmpblock);
+        s->block_size = s->block_width*s->block_height;
+        if ((s->tmpblock = av_malloc(3*s->block_size)) == NULL) {
+            av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
+            return -1;
+        }
+    }
+
+    /* init the image size once */
+    if((avctx->width==0) && (avctx->height==0)){
+        avctx->width = s->image_width;
+        avctx->height = s->image_height;
+    }
+
+    /* check for changes of image width and image height */
+    if ((avctx->width != s->image_width) || (avctx->height != s->image_height)) {
+        av_log(avctx, AV_LOG_ERROR, "Frame width or height differs from first frames!\n");
+        av_log(avctx, AV_LOG_ERROR, "fh = %d, fv %d  vs  ch = %d, cv = %d\n",avctx->height,
+        avctx->width,s->image_height,s->image_width);
+        return -1;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "image: %dx%d block: %dx%d num: %dx%d part: %dx%d\n",
+        s->image_width, s->image_height, s->block_width, s->block_height,
+        h_blocks, v_blocks, h_part, v_part);
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID;
+    if (avctx->get_buffer(avctx, &s->frame) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    /* loop over all block columns */
+    for (j = 0; j < v_blocks + (v_part?1:0); j++)
+    {
+
+        int hp = j*s->block_height; // horiz position in frame
+        int hs = (j<v_blocks)?s->block_height:v_part; // size of block
+
+
+        /* loop over all block rows */
+        for (i = 0; i < h_blocks + (h_part?1:0); i++)
+        {
+            int wp = i*s->block_width; // vert position in frame
+            int ws = (i<h_blocks)?s->block_width:h_part; // size of block
+
+            /* get the size of the compressed zlib chunk */
+            int size = get_bits(&gb, 16);
+
+            if (size == 0) {
+                /* no change, don't do anything */
+            } else {
+                /* decompress block */
+#ifdef CONFIG_ZLIB
+                int ret = inflateReset(&(s->zstream));
+                if (ret != Z_OK)
+                {
+                    av_log(avctx, AV_LOG_ERROR, "error in decompression (reset) of block %dx%d\n", i, j);
+                    /* return -1; */
+                }
+                s->zstream.next_in = buf+(get_bits_count(&gb)/8);
+                s->zstream.avail_in = size;
+                s->zstream.next_out = s->tmpblock;
+                s->zstream.avail_out = s->block_size*3;
+                ret = inflate(&(s->zstream), Z_FINISH);
+                if (ret == Z_DATA_ERROR)
+                {
+                    av_log(avctx, AV_LOG_ERROR, "Zlib resync occured\n");
+                    inflateSync(&(s->zstream));
+                    ret = inflate(&(s->zstream), Z_FINISH);
+                }
+
+                if ((ret != Z_OK) && (ret != Z_STREAM_END))
+                {
+                    av_log(avctx, AV_LOG_ERROR, "error in decompression of block %dx%d: %d\n", i, j, ret);
+                    /* return -1; */
+                }
+#else
+                av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled in.\n");
+                return -1;
+#endif
+                copy_region(s->tmpblock, s->frame.data[0], s->image_height-(hp+hs+1), wp, hs, ws, s->frame.linesize[0]);
+                skip_bits(&gb, 8*size);   /* skip the consumed bits */
+            }
+        }
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    if ((get_bits_count(&gb)/8) != buf_size)
+        av_log(avctx, AV_LOG_ERROR, "buffer not fully consumed (%d != %d)\n",
+            buf_size, (get_bits_count(&gb)/8));
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+
+static int flashsv_decode_end(AVCodecContext *avctx)
+{
+    FlashSVContext *s = (FlashSVContext *)avctx->priv_data;
+#ifdef CONFIG_ZLIB
+    inflateEnd(&(s->zstream));
+#endif
+    /* release the frame if needed */
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    /* free the tmpblock */
+    if (s->tmpblock != NULL)
+        av_free(s->tmpblock);
+
+    return 0;
+}
+
+
+AVCodec flashsv_decoder = {
+    "flashsv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FLASHSV,
+    sizeof(FlashSVContext),
+    flashsv_decode_init,
+    NULL,
+    flashsv_decode_end,
+    flashsv_decode_frame,
+    CODEC_CAP_DR1,
+    .pix_fmts = (enum PixelFormat[]){PIX_FMT_BGR24, -1},
+};
diff --git a/contrib/ffmpeg/libavcodec/flicvideo.c b/contrib/ffmpeg/libavcodec/flicvideo.c
new file mode 100644
index 000000000..95cb26ce4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/flicvideo.c
@@ -0,0 +1,754 @@
+/*
+ * FLI/FLC Animation Video Decoder
+ * Copyright (C) 2003, 2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file flic.c
+ * Autodesk Animator FLI/FLC Video Decoder
+ * by Mike Melanson (melanson@pcisys.net)
+ * for more information on the .fli/.flc file format and all of its many
+ * variations, visit:
+ *   http://www.compuphase.com/flic.htm
+ *
+ * This decoder outputs PAL8/RGB555/RGB565 and maybe one day RGB24
+ * colorspace data, depending on the FLC. To use this decoder, be
+ * sure that your demuxer sends the FLI file header to the decoder via
+ * the extradata chunk in AVCodecContext. The chunk should be 128 bytes
+ * large. The only exception is for FLI files from the game "Magic Carpet",
+ * in which the header is only 12 bytes.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "bswap.h"
+
+#define FLI_256_COLOR 4
+#define FLI_DELTA     7
+#define FLI_COLOR     11
+#define FLI_LC        12
+#define FLI_BLACK     13
+#define FLI_BRUN      15
+#define FLI_COPY      16
+#define FLI_MINI      18
+#define FLI_DTA_BRUN  25
+#define FLI_DTA_COPY  26
+#define FLI_DTA_LC    27
+
+#define FLI_TYPE_CODE     (0xAF11)
+#define FLC_FLX_TYPE_CODE (0xAF12)
+#define FLC_DTA_TYPE_CODE (0xAF44) /* Marks an "Extended FLC" comes from Dave's Targa Animator (DTA) */
+#define FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE (0xAF13)
+
+#define CHECK_PIXEL_PTR(n) \
+    if (pixel_ptr + n > pixel_limit) { \
+        av_log (s->avctx, AV_LOG_INFO, "Problem: pixel_ptr >= pixel_limit (%d >= %d)\n", \
+        pixel_ptr + n, pixel_limit); \
+        return -1; \
+    } \
+
+typedef struct FlicDecodeContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+
+    unsigned int palette[256];
+    int new_palette;
+    int fli_type;  /* either 0xAF11 or 0xAF12, affects palette resolution */
+} FlicDecodeContext;
+
+static int flic_decode_init(AVCodecContext *avctx)
+{
+    FlicDecodeContext *s = (FlicDecodeContext *)avctx->priv_data;
+    unsigned char *fli_header = (unsigned char *)avctx->extradata;
+    int depth;
+
+    s->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    s->fli_type = LE_16(&fli_header[4]); /* Might be overridden if a Magic Carpet FLC */
+    depth       = LE_16(&fli_header[12]);
+
+    if (depth == 0) {
+      depth = 8; /* Some FLC generators set depth to zero, when they mean 8Bpp. Fix up here */
+    }
+
+    if (s->avctx->extradata_size == 12) {
+        /* special case for magic carpet FLIs */
+        s->fli_type = FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE;
+    } else if (s->avctx->extradata_size != 128) {
+        av_log(avctx, AV_LOG_ERROR, "Expected extradata of 12 or 128 bytes\n");
+        return -1;
+    }
+
+    if ((s->fli_type == FLC_FLX_TYPE_CODE) && (depth == 16)) {
+        depth = 15; /* Original Autodesk FLX's say the depth is 16Bpp when it is really 15Bpp */
+    }
+
+    switch (depth) {
+        case 8  : avctx->pix_fmt = PIX_FMT_PAL8; break;
+        case 15 : avctx->pix_fmt = PIX_FMT_RGB555; break;
+        case 16 : avctx->pix_fmt = PIX_FMT_RGB565; break;
+        case 24 : avctx->pix_fmt = PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */
+                  av_log(avctx, AV_LOG_ERROR, "24Bpp FLC/FLX is unsupported due to no test files.\n");
+                  return -1;
+                  break;
+        default :
+                  av_log(avctx, AV_LOG_ERROR, "Unkown FLC/FLX depth of %d Bpp is unsupported.\n",depth);
+                  return -1;
+    }
+
+    s->frame.data[0] = NULL;
+    s->new_palette = 0;
+
+    return 0;
+}
+
+static int flic_decode_frame_8BPP(AVCodecContext *avctx,
+                                  void *data, int *data_size,
+                                  uint8_t *buf, int buf_size)
+{
+    FlicDecodeContext *s = (FlicDecodeContext *)avctx->priv_data;
+
+    int stream_ptr = 0;
+    int stream_ptr_after_color_chunk;
+    int pixel_ptr;
+    int palette_ptr;
+    unsigned char palette_idx1;
+    unsigned char palette_idx2;
+
+    unsigned int frame_size;
+    int num_chunks;
+
+    unsigned int chunk_size;
+    int chunk_type;
+
+    int i, j;
+
+    int color_packets;
+    int color_changes;
+    int color_shift;
+    unsigned char r, g, b;
+
+    int lines;
+    int compressed_lines;
+    int starting_line;
+    signed short line_packets;
+    int y_ptr;
+    int byte_run;
+    int pixel_skip;
+    int pixel_countdown;
+    unsigned char *pixels;
+    int pixel_limit;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    pixels = s->frame.data[0];
+    pixel_limit = s->avctx->height * s->frame.linesize[0];
+
+    frame_size = LE_32(&buf[stream_ptr]);
+    stream_ptr += 6;  /* skip the magic number */
+    num_chunks = LE_16(&buf[stream_ptr]);
+    stream_ptr += 10;  /* skip padding */
+
+    frame_size -= 16;
+
+    /* iterate through the chunks */
+    while ((frame_size > 0) && (num_chunks > 0)) {
+        chunk_size = LE_32(&buf[stream_ptr]);
+        stream_ptr += 4;
+        chunk_type = LE_16(&buf[stream_ptr]);
+        stream_ptr += 2;
+
+        switch (chunk_type) {
+        case FLI_256_COLOR:
+        case FLI_COLOR:
+            stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6;
+
+            /* check special case: If this file is from the Magic Carpet
+             * game and uses 6-bit colors even though it reports 256-color
+             * chunks in a 0xAF12-type file (fli_type is set to 0xAF13 during
+             * initialization) */
+            if ((chunk_type == FLI_256_COLOR) && (s->fli_type != FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE))
+                color_shift = 0;
+            else
+                color_shift = 2;
+            /* set up the palette */
+            color_packets = LE_16(&buf[stream_ptr]);
+            stream_ptr += 2;
+            palette_ptr = 0;
+            for (i = 0; i < color_packets; i++) {
+                /* first byte is how many colors to skip */
+                palette_ptr += buf[stream_ptr++];
+
+                /* next byte indicates how many entries to change */
+                color_changes = buf[stream_ptr++];
+
+                /* if there are 0 color changes, there are actually 256 */
+                if (color_changes == 0)
+                    color_changes = 256;
+
+                for (j = 0; j < color_changes; j++) {
+                    unsigned int entry;
+
+                    /* wrap around, for good measure */
+                    if ((unsigned)palette_ptr >= 256)
+                        palette_ptr = 0;
+
+                    r = buf[stream_ptr++] << color_shift;
+                    g = buf[stream_ptr++] << color_shift;
+                    b = buf[stream_ptr++] << color_shift;
+                    entry = (r << 16) | (g << 8) | b;
+                    if (s->palette[palette_ptr] != entry)
+                        s->new_palette = 1;
+                    s->palette[palette_ptr++] = entry;
+                }
+            }
+
+            /* color chunks sometimes have weird 16-bit alignment issues;
+             * therefore, take the hardline approach and set the stream_ptr
+             * to the value calculated w.r.t. the size specified by the color
+             * chunk header */
+            stream_ptr = stream_ptr_after_color_chunk;
+
+            break;
+
+        case FLI_DELTA:
+            y_ptr = 0;
+            compressed_lines = LE_16(&buf[stream_ptr]);
+            stream_ptr += 2;
+            while (compressed_lines > 0) {
+                line_packets = LE_16(&buf[stream_ptr]);
+                stream_ptr += 2;
+                if ((line_packets & 0xC000) == 0xC000) {
+                    // line skip opcode
+                    line_packets = -line_packets;
+                    y_ptr += line_packets * s->frame.linesize[0];
+                } else if ((line_packets & 0xC000) == 0x4000) {
+                    av_log(avctx, AV_LOG_ERROR, "Undefined opcode (%x) in DELTA_FLI\n", line_packets);
+                } else if ((line_packets & 0xC000) == 0x8000) {
+                    // "last byte" opcode
+                    pixels[y_ptr + s->frame.linesize[0] - 1] = line_packets & 0xff;
+                } else {
+                    compressed_lines--;
+                    pixel_ptr = y_ptr;
+                    pixel_countdown = s->avctx->width;
+                    for (i = 0; i < line_packets; i++) {
+                        /* account for the skip bytes */
+                        pixel_skip = buf[stream_ptr++];
+                        pixel_ptr += pixel_skip;
+                        pixel_countdown -= pixel_skip;
+                        byte_run = (signed char)(buf[stream_ptr++]);
+                        if (byte_run < 0) {
+                            byte_run = -byte_run;
+                            palette_idx1 = buf[stream_ptr++];
+                            palette_idx2 = buf[stream_ptr++];
+                            CHECK_PIXEL_PTR(byte_run);
+                            for (j = 0; j < byte_run; j++, pixel_countdown -= 2) {
+                                pixels[pixel_ptr++] = palette_idx1;
+                                pixels[pixel_ptr++] = palette_idx2;
+                            }
+                        } else {
+                            CHECK_PIXEL_PTR(byte_run * 2);
+                            for (j = 0; j < byte_run * 2; j++, pixel_countdown--) {
+                                palette_idx1 = buf[stream_ptr++];
+                                pixels[pixel_ptr++] = palette_idx1;
+                            }
+                        }
+                    }
+
+                    y_ptr += s->frame.linesize[0];
+                }
+            }
+            break;
+
+        case FLI_LC:
+            /* line compressed */
+            starting_line = LE_16(&buf[stream_ptr]);
+            stream_ptr += 2;
+            y_ptr = 0;
+            y_ptr += starting_line * s->frame.linesize[0];
+
+            compressed_lines = LE_16(&buf[stream_ptr]);
+            stream_ptr += 2;
+            while (compressed_lines > 0) {
+                pixel_ptr = y_ptr;
+                pixel_countdown = s->avctx->width;
+                line_packets = buf[stream_ptr++];
+                if (line_packets > 0) {
+                    for (i = 0; i < line_packets; i++) {
+                        /* account for the skip bytes */
+                        pixel_skip = buf[stream_ptr++];
+                        pixel_ptr += pixel_skip;
+                        pixel_countdown -= pixel_skip;
+                        byte_run = (signed char)(buf[stream_ptr++]);
+                        if (byte_run > 0) {
+                            CHECK_PIXEL_PTR(byte_run);
+                            for (j = 0; j < byte_run; j++, pixel_countdown--) {
+                                palette_idx1 = buf[stream_ptr++];
+                                pixels[pixel_ptr++] = palette_idx1;
+                            }
+                        } else if (byte_run < 0) {
+                            byte_run = -byte_run;
+                            palette_idx1 = buf[stream_ptr++];
+                            CHECK_PIXEL_PTR(byte_run);
+                            for (j = 0; j < byte_run; j++, pixel_countdown--) {
+                                pixels[pixel_ptr++] = palette_idx1;
+                            }
+                        }
+                    }
+                }
+
+                y_ptr += s->frame.linesize[0];
+                compressed_lines--;
+            }
+            break;
+
+        case FLI_BLACK:
+            /* set the whole frame to color 0 (which is usually black) */
+            memset(pixels, 0,
+                s->frame.linesize[0] * s->avctx->height);
+            break;
+
+        case FLI_BRUN:
+            /* Byte run compression: This chunk type only occurs in the first
+             * FLI frame and it will update the entire frame. */
+            y_ptr = 0;
+            for (lines = 0; lines < s->avctx->height; lines++) {
+                pixel_ptr = y_ptr;
+                /* disregard the line packets; instead, iterate through all
+                 * pixels on a row */
+                stream_ptr++;
+                pixel_countdown = s->avctx->width;
+                while (pixel_countdown > 0) {
+                    byte_run = (signed char)(buf[stream_ptr++]);
+                    if (byte_run > 0) {
+                        palette_idx1 = buf[stream_ptr++];
+                        CHECK_PIXEL_PTR(byte_run);
+                        for (j = 0; j < byte_run; j++) {
+                            pixels[pixel_ptr++] = palette_idx1;
+                            pixel_countdown--;
+                            if (pixel_countdown < 0)
+                                av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
+                                       pixel_countdown);
+                        }
+                    } else {  /* copy bytes if byte_run < 0 */
+                        byte_run = -byte_run;
+                        CHECK_PIXEL_PTR(byte_run);
+                        for (j = 0; j < byte_run; j++) {
+                            palette_idx1 = buf[stream_ptr++];
+                            pixels[pixel_ptr++] = palette_idx1;
+                            pixel_countdown--;
+                            if (pixel_countdown < 0)
+                                av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
+                                       pixel_countdown);
+                        }
+                    }
+                }
+
+                y_ptr += s->frame.linesize[0];
+            }
+            break;
+
+        case FLI_COPY:
+            /* copy the chunk (uncompressed frame) */
+            if (chunk_size - 6 > s->avctx->width * s->avctx->height) {
+                av_log(avctx, AV_LOG_ERROR, "In chunk FLI_COPY : source data (%d bytes) " \
+                       "bigger than image, skipping chunk\n", chunk_size - 6);
+                stream_ptr += chunk_size - 6;
+            } else {
+                for (y_ptr = 0; y_ptr < s->frame.linesize[0] * s->avctx->height;
+                     y_ptr += s->frame.linesize[0]) {
+                    memcpy(&pixels[y_ptr], &buf[stream_ptr],
+                        s->avctx->width);
+                    stream_ptr += s->avctx->width;
+                }
+            }
+            break;
+
+        case FLI_MINI:
+            /* some sort of a thumbnail? disregard this chunk... */
+            stream_ptr += chunk_size - 6;
+            break;
+
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unrecognized chunk type: %d\n", chunk_type);
+            break;
+        }
+
+        frame_size -= chunk_size;
+        num_chunks--;
+    }
+
+    /* by the end of the chunk, the stream ptr should equal the frame
+     * size (minus 1, possibly); if it doesn't, issue a warning */
+    if ((stream_ptr != buf_size) && (stream_ptr != buf_size - 1))
+        av_log(avctx, AV_LOG_ERROR, "Processed FLI chunk where chunk size = %d " \
+               "and final chunk ptr = %d\n", buf_size, stream_ptr);
+
+    /* make the palette available on the way out */
+    memcpy(s->frame.data[1], s->palette, AVPALETTE_SIZE);
+    if (s->new_palette) {
+        s->frame.palette_has_changed = 1;
+        s->new_palette = 0;
+    }
+
+    *data_size=sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    return buf_size;
+}
+
+static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
+                                      void *data, int *data_size,
+                                      uint8_t *buf, int buf_size)
+{
+    /* Note, the only difference between the 15Bpp and 16Bpp */
+    /* Format is the pixel format, the packets are processed the same. */
+    FlicDecodeContext *s = (FlicDecodeContext *)avctx->priv_data;
+
+    int stream_ptr = 0;
+    int pixel_ptr;
+    unsigned char palette_idx1;
+
+    unsigned int frame_size;
+    int num_chunks;
+
+    unsigned int chunk_size;
+    int chunk_type;
+
+    int i, j;
+
+    int lines;
+    int compressed_lines;
+    signed short line_packets;
+    int y_ptr;
+    int byte_run;
+    int pixel_skip;
+    int pixel_countdown;
+    unsigned char *pixels;
+    int pixel;
+    int pixel_limit;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    pixels = s->frame.data[0];
+    pixel_limit = s->avctx->height * s->frame.linesize[0];
+
+    frame_size = LE_32(&buf[stream_ptr]);
+    stream_ptr += 6;  /* skip the magic number */
+    num_chunks = LE_16(&buf[stream_ptr]);
+    stream_ptr += 10;  /* skip padding */
+
+    frame_size -= 16;
+
+    /* iterate through the chunks */
+    while ((frame_size > 0) && (num_chunks > 0)) {
+        chunk_size = LE_32(&buf[stream_ptr]);
+        stream_ptr += 4;
+        chunk_type = LE_16(&buf[stream_ptr]);
+        stream_ptr += 2;
+
+        switch (chunk_type) {
+        case FLI_256_COLOR:
+        case FLI_COLOR:
+            /* For some reason, it seems that non-paletised flics do include one of these */
+            /* chunks in their first frame.  Why i do not know, it seems rather extraneous */
+/*            av_log(avctx, AV_LOG_ERROR, "Unexpected Palette chunk %d in non-paletised FLC\n",chunk_type);*/
+            stream_ptr = stream_ptr + chunk_size - 6;
+            break;
+
+        case FLI_DELTA:
+        case FLI_DTA_LC:
+            y_ptr = 0;
+            compressed_lines = LE_16(&buf[stream_ptr]);
+            stream_ptr += 2;
+            while (compressed_lines > 0) {
+                line_packets = LE_16(&buf[stream_ptr]);
+                stream_ptr += 2;
+                if (line_packets < 0) {
+                    line_packets = -line_packets;
+                    y_ptr += line_packets * s->frame.linesize[0];
+                } else {
+                    compressed_lines--;
+                    pixel_ptr = y_ptr;
+                    pixel_countdown = s->avctx->width;
+                    for (i = 0; i < line_packets; i++) {
+                        /* account for the skip bytes */
+                        pixel_skip = buf[stream_ptr++];
+                        pixel_ptr += (pixel_skip*2); /* Pixel is 2 bytes wide */
+                        pixel_countdown -= pixel_skip;
+                        byte_run = (signed char)(buf[stream_ptr++]);
+                        if (byte_run < 0) {
+                            byte_run = -byte_run;
+                            pixel    = LE_16(&buf[stream_ptr]);
+                            stream_ptr += 2;
+                            CHECK_PIXEL_PTR(byte_run);
+                            for (j = 0; j < byte_run; j++, pixel_countdown -= 2) {
+                                *((signed short*)(&pixels[pixel_ptr])) = pixel;
+                                pixel_ptr += 2;
+                            }
+                        } else {
+                            CHECK_PIXEL_PTR(byte_run);
+                            for (j = 0; j < byte_run; j++, pixel_countdown--) {
+                                *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[stream_ptr]);
+                                stream_ptr += 2;
+                                pixel_ptr += 2;
+                            }
+                        }
+                    }
+
+                    y_ptr += s->frame.linesize[0];
+                }
+            }
+            break;
+
+        case FLI_LC:
+            av_log(avctx, AV_LOG_ERROR, "Unexpected FLI_LC chunk in non-paletised FLC\n");
+            stream_ptr = stream_ptr + chunk_size - 6;
+            break;
+
+        case FLI_BLACK:
+            /* set the whole frame to 0x0000 which is black in both 15Bpp and 16Bpp modes. */
+            memset(pixels, 0x0000,
+                   s->frame.linesize[0] * s->avctx->height);
+            break;
+
+        case FLI_BRUN:
+            y_ptr = 0;
+            for (lines = 0; lines < s->avctx->height; lines++) {
+                pixel_ptr = y_ptr;
+                /* disregard the line packets; instead, iterate through all
+                 * pixels on a row */
+                stream_ptr++;
+                pixel_countdown = (s->avctx->width * 2);
+
+                while (pixel_countdown > 0) {
+                    byte_run = (signed char)(buf[stream_ptr++]);
+                    if (byte_run > 0) {
+                        palette_idx1 = buf[stream_ptr++];
+                        CHECK_PIXEL_PTR(byte_run);
+                        for (j = 0; j < byte_run; j++) {
+                            pixels[pixel_ptr++] = palette_idx1;
+                            pixel_countdown--;
+                            if (pixel_countdown < 0)
+                                av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
+                                       pixel_countdown);
+                        }
+                    } else {  /* copy bytes if byte_run < 0 */
+                        byte_run = -byte_run;
+                        CHECK_PIXEL_PTR(byte_run);
+                        for (j = 0; j < byte_run; j++) {
+                            palette_idx1 = buf[stream_ptr++];
+                            pixels[pixel_ptr++] = palette_idx1;
+                            pixel_countdown--;
+                            if (pixel_countdown < 0)
+                                av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
+                                       pixel_countdown);
+                        }
+                    }
+                }
+
+                /* Now FLX is strange, in that it is "byte" as opposed to "pixel" run length compressed.
+                 * This doesnt give us any good oportunity to perform word endian conversion
+                 * during decompression. So if its requried (ie, this isnt a LE target, we do
+                 * a second pass over the line here, swapping the bytes.
+                 */
+                pixel = 0xFF00;
+                if (0xFF00 != LE_16(&pixel)) /* Check if its not an LE Target */
+                {
+                  pixel_ptr = y_ptr;
+                  pixel_countdown = s->avctx->width;
+                  while (pixel_countdown > 0) {
+                    *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[pixel_ptr]);
+                    pixel_ptr += 2;
+                  }
+                }
+                y_ptr += s->frame.linesize[0];
+            }
+            break;
+
+        case FLI_DTA_BRUN:
+            y_ptr = 0;
+            for (lines = 0; lines < s->avctx->height; lines++) {
+                pixel_ptr = y_ptr;
+                /* disregard the line packets; instead, iterate through all
+                 * pixels on a row */
+                stream_ptr++;
+                pixel_countdown = s->avctx->width; /* Width is in pixels, not bytes */
+
+                while (pixel_countdown > 0) {
+                    byte_run = (signed char)(buf[stream_ptr++]);
+                    if (byte_run > 0) {
+                        pixel    = LE_16(&buf[stream_ptr]);
+                        stream_ptr += 2;
+                        CHECK_PIXEL_PTR(byte_run);
+                        for (j = 0; j < byte_run; j++) {
+                            *((signed short*)(&pixels[pixel_ptr])) = pixel;
+                            pixel_ptr += 2;
+                            pixel_countdown--;
+                            if (pixel_countdown < 0)
+                                av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
+                                       pixel_countdown);
+                        }
+                    } else {  /* copy pixels if byte_run < 0 */
+                        byte_run = -byte_run;
+                        CHECK_PIXEL_PTR(byte_run);
+                        for (j = 0; j < byte_run; j++) {
+                            *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[stream_ptr]);
+                            stream_ptr += 2;
+                            pixel_ptr  += 2;
+                            pixel_countdown--;
+                            if (pixel_countdown < 0)
+                                av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
+                                       pixel_countdown);
+                        }
+                    }
+                }
+
+                y_ptr += s->frame.linesize[0];
+            }
+            break;
+
+        case FLI_COPY:
+        case FLI_DTA_COPY:
+            /* copy the chunk (uncompressed frame) */
+            if (chunk_size - 6 > (unsigned int)(s->avctx->width * s->avctx->height)*2) {
+                av_log(avctx, AV_LOG_ERROR, "In chunk FLI_COPY : source data (%d bytes) " \
+                       "bigger than image, skipping chunk\n", chunk_size - 6);
+                stream_ptr += chunk_size - 6;
+            } else {
+
+                for (y_ptr = 0; y_ptr < s->frame.linesize[0] * s->avctx->height;
+                     y_ptr += s->frame.linesize[0]) {
+
+                    pixel_countdown = s->avctx->width;
+                    pixel_ptr = 0;
+                    while (pixel_countdown > 0) {
+                      *((signed short*)(&pixels[y_ptr + pixel_ptr])) = LE_16(&buf[stream_ptr+pixel_ptr]);
+                      pixel_ptr += 2;
+                      pixel_countdown--;
+                    }
+                    stream_ptr += s->avctx->width*2;
+                }
+            }
+            break;
+
+        case FLI_MINI:
+            /* some sort of a thumbnail? disregard this chunk... */
+            stream_ptr += chunk_size - 6;
+            break;
+
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unrecognized chunk type: %d\n", chunk_type);
+            break;
+        }
+
+        frame_size -= chunk_size;
+        num_chunks--;
+    }
+
+    /* by the end of the chunk, the stream ptr should equal the frame
+     * size (minus 1, possibly); if it doesn't, issue a warning */
+    if ((stream_ptr != buf_size) && (stream_ptr != buf_size - 1))
+        av_log(avctx, AV_LOG_ERROR, "Processed FLI chunk where chunk size = %d " \
+               "and final chunk ptr = %d\n", buf_size, stream_ptr);
+
+
+    *data_size=sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    return buf_size;
+}
+
+static int flic_decode_frame_24BPP(AVCodecContext *avctx,
+                                   void *data, int *data_size,
+                                   uint8_t *buf, int buf_size)
+{
+  av_log(avctx, AV_LOG_ERROR, "24Bpp FLC Unsupported due to lack of test files.\n");
+  return -1;
+}
+
+static int flic_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    if (avctx->pix_fmt == PIX_FMT_PAL8) {
+      return flic_decode_frame_8BPP(avctx, data, data_size,
+                                    buf, buf_size);
+    }
+    else if ((avctx->pix_fmt == PIX_FMT_RGB555) ||
+             (avctx->pix_fmt == PIX_FMT_RGB565)) {
+      return flic_decode_frame_15_16BPP(avctx, data, data_size,
+                                        buf, buf_size);
+    }
+    else if (avctx->pix_fmt == PIX_FMT_BGR24) {
+      return flic_decode_frame_24BPP(avctx, data, data_size,
+                                     buf, buf_size);
+    }
+
+    /* Shouldnt get  here, ever as the pix_fmt is processed */
+    /* in flic_decode_init and the above if should deal with */
+    /* the finite set of possibilites allowable by here. */
+    /* but in case we do, just error out. */
+    av_log(avctx, AV_LOG_ERROR, "Unknown Format of FLC. My Science cant explain how this happened\n");
+    return -1;
+}
+
+
+static int flic_decode_end(AVCodecContext *avctx)
+{
+    FlicDecodeContext *s = avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec flic_decoder = {
+    "flic",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FLIC,
+    sizeof(FlicDecodeContext),
+    flic_decode_init,
+    NULL,
+    flic_decode_end,
+    flic_decode_frame,
+    CODEC_CAP_DR1,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
diff --git a/contrib/ffmpeg/libavcodec/fraps.c b/contrib/ffmpeg/libavcodec/fraps.c
new file mode 100644
index 000000000..18d270049
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/fraps.c
@@ -0,0 +1,398 @@
+/*
+ * Fraps FPS1 decoder
+ * Copyright (c) 2005 Roine Gustafsson
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file fraps.c
+ * Lossless Fraps 'FPS1' decoder
+ * @author Roine Gustafsson <roine at users sf net>
+ * @author Konstantin Shishkov
+ *
+ * Codec algorithm for version 0 is taken from Transcode <www.transcoding.org>
+ *
+ * Version 2 files support by Konstantin Shishkov
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+#define FPS_TAG MKTAG('F', 'P', 'S', 'x')
+
+/* symbol for Huffman tree node */
+#define HNODE -1
+
+/**
+ * Huffman node
+ * FIXME one day this should belong to one general framework
+ */
+typedef struct Node{
+    int16_t sym;
+    int16_t n0;
+    int count;
+}Node;
+
+/**
+ * local variable storage
+ */
+typedef struct FrapsContext{
+    AVCodecContext *avctx;
+    AVFrame frame;
+    Node nodes[512];
+    uint8_t *tmpbuf;
+    DSPContext dsp;
+} FrapsContext;
+
+
+/**
+ * initializes decoder
+ * @param avctx codec context
+ * @return 0 on success or negative if fails
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+    FrapsContext * const s = avctx->priv_data;
+
+    avctx->coded_frame = (AVFrame*)&s->frame;
+    avctx->has_b_frames = 0;
+    avctx->pix_fmt= PIX_FMT_NONE; /* set in decode_frame */
+
+    s->avctx = avctx;
+    s->frame.data[0] = NULL;
+    s->tmpbuf = NULL;
+
+    dsputil_init(&s->dsp, avctx);
+
+    return 0;
+}
+
+/**
+ * Comparator - our nodes should ascend by count
+ * but with preserved symbol order
+ */
+static int huff_cmp(const Node *a, const Node *b){
+    return (a->count - b->count)*256 + a->sym - b->sym;
+}
+
+static void get_tree_codes(uint32_t *bits, int16_t *lens, uint8_t *xlat, Node *nodes, int node, uint32_t pfx, int pl, int *pos)
+{
+    int s;
+
+    s = nodes[node].sym;
+    if(s != HNODE || !nodes[node].count){
+        bits[*pos] = pfx;
+        lens[*pos] = pl;
+        xlat[*pos] = s;
+        (*pos)++;
+    }else{
+        pfx <<= 1;
+        pl++;
+        get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0, pfx, pl, pos);
+        pfx |= 1;
+        get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0+1, pfx, pl, pos);
+    }
+}
+
+static int build_huff_tree(VLC *vlc, Node *nodes, uint8_t *xlat)
+{
+    uint32_t bits[256];
+    int16_t lens[256];
+    int pos = 0;
+
+    get_tree_codes(bits, lens, xlat, nodes, 510, 0, 0, &pos);
+    return init_vlc(vlc, 9, pos, lens, 2, 2, bits, 4, 4, 0);
+}
+
+
+/**
+ * decode Fraps v2 packed plane
+ */
+static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w,
+                               int h, uint8_t *src, int size, int Uoff)
+{
+    int i, j;
+    int cur_node;
+    GetBitContext gb;
+    VLC vlc;
+    int64_t sum = 0;
+    uint8_t recode[256];
+
+    for(i = 0; i < 256; i++){
+        s->nodes[i].sym = i;
+        s->nodes[i].count = LE_32(src);
+        s->nodes[i].n0 = -2;
+        if(s->nodes[i].count < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "Symbol count < 0\n");
+            return -1;
+        }
+        src += 4;
+        sum += s->nodes[i].count;
+    }
+    size -= 1024;
+
+    if(sum >> 31) {
+        av_log(s->avctx, AV_LOG_ERROR, "Too high symbol frequencies. Tree construction is not possible\n");
+        return -1;
+    }
+    qsort(s->nodes, 256, sizeof(Node), huff_cmp);
+    cur_node = 256;
+    for(i = 0; i < 511; i += 2){
+        s->nodes[cur_node].sym = HNODE;
+        s->nodes[cur_node].count = s->nodes[i].count + s->nodes[i+1].count;
+        s->nodes[cur_node].n0 = i;
+        for(j = cur_node; j > 0; j--){
+            if(s->nodes[j].count >= s->nodes[j - 1].count) break;
+            FFSWAP(Node, s->nodes[j], s->nodes[j - 1]);
+        }
+        cur_node++;
+    }
+    if(build_huff_tree(&vlc, s->nodes, recode) < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "Error building tree\n");
+        return -1;
+    }
+    /* we have built Huffman table and are ready to decode plane */
+
+    /* convert bits so they may be used by standard bitreader */
+    s->dsp.bswap_buf(s->tmpbuf, src, size >> 2);
+
+    init_get_bits(&gb, s->tmpbuf, size * 8);
+    for(j = 0; j < h; j++){
+        for(i = 0; i < w; i++){
+            dst[i] = recode[get_vlc2(&gb, vlc.table, 9, 3)];
+            /* lines are stored as deltas between previous lines
+             * and we need to add 0x80 to the first lines of chroma planes
+             */
+            if(j) dst[i] += dst[i - stride];
+            else if(Uoff) dst[i] += 0x80;
+        }
+        dst += stride;
+    }
+    free_vlc(&vlc);
+    return 0;
+}
+
+/**
+ * decode a frame
+ * @param avctx codec context
+ * @param data output AVFrame
+ * @param data_size size of output data or 0 if no picture is returned
+ * @param buf input data frame
+ * @param buf_size size of input data frame
+ * @return number of consumed bytes on success or negative if decode fails
+ */
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    FrapsContext * const s = avctx->priv_data;
+    AVFrame *frame = data;
+    AVFrame * const f = (AVFrame*)&s->frame;
+    uint32_t header;
+    unsigned int version,header_size;
+    unsigned int x, y;
+    uint32_t *buf32;
+    uint32_t *luma1,*luma2,*cb,*cr;
+    uint32_t offs[4];
+    int i, is_chroma, planes;
+
+
+    header = LE_32(buf);
+    version = header & 0xff;
+    header_size = (header & (1<<30))? 8 : 4; /* bit 30 means pad to 8 bytes */
+
+    if (version > 2 && version != 4) {
+        av_log(avctx, AV_LOG_ERROR,
+               "This file is encoded with Fraps version %d. " \
+               "This codec can only decode version 0, 1, 2 and 4.\n", version);
+        return -1;
+    }
+
+    buf+=4;
+    if (header_size == 8)
+        buf+=4;
+
+    switch(version) {
+    case 0:
+    default:
+        /* Fraps v0 is a reordered YUV420 */
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+
+        if ( (buf_size != avctx->width*avctx->height*3/2+header_size) &&
+             (buf_size != header_size) ) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid frame length %d (should be %d)\n",
+                   buf_size, avctx->width*avctx->height*3/2+header_size);
+            return -1;
+        }
+
+        if (( (avctx->width % 8) != 0) || ( (avctx->height % 2) != 0 )) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid frame size %dx%d\n",
+                   avctx->width, avctx->height);
+            return -1;
+        }
+
+        f->reference = 1;
+        f->buffer_hints = FF_BUFFER_HINTS_VALID |
+                          FF_BUFFER_HINTS_PRESERVE |
+                          FF_BUFFER_HINTS_REUSABLE;
+        if (avctx->reget_buffer(avctx, f)) {
+            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+            return -1;
+        }
+        /* bit 31 means same as previous pic */
+        f->pict_type = (header & (1<<31))? FF_P_TYPE : FF_I_TYPE;
+        f->key_frame = f->pict_type == FF_I_TYPE;
+
+        if (f->pict_type == FF_I_TYPE) {
+            buf32=(uint32_t*)buf;
+            for(y=0; y<avctx->height/2; y++){
+                luma1=(uint32_t*)&f->data[0][ y*2*f->linesize[0] ];
+                luma2=(uint32_t*)&f->data[0][ (y*2+1)*f->linesize[0] ];
+                cr=(uint32_t*)&f->data[1][ y*f->linesize[1] ];
+                cb=(uint32_t*)&f->data[2][ y*f->linesize[2] ];
+                for(x=0; x<avctx->width; x+=8){
+                    *(luma1++) = *(buf32++);
+                    *(luma1++) = *(buf32++);
+                    *(luma2++) = *(buf32++);
+                    *(luma2++) = *(buf32++);
+                    *(cr++) = *(buf32++);
+                    *(cb++) = *(buf32++);
+                }
+            }
+        }
+        break;
+
+    case 1:
+        /* Fraps v1 is an upside-down BGR24 */
+        avctx->pix_fmt = PIX_FMT_BGR24;
+
+        if ( (buf_size != avctx->width*avctx->height*3+header_size) &&
+             (buf_size != header_size) ) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid frame length %d (should be %d)\n",
+                   buf_size, avctx->width*avctx->height*3+header_size);
+            return -1;
+        }
+
+        f->reference = 1;
+        f->buffer_hints = FF_BUFFER_HINTS_VALID |
+                          FF_BUFFER_HINTS_PRESERVE |
+                          FF_BUFFER_HINTS_REUSABLE;
+        if (avctx->reget_buffer(avctx, f)) {
+            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+            return -1;
+        }
+        /* bit 31 means same as previous pic */
+        f->pict_type = (header & (1<<31))? FF_P_TYPE : FF_I_TYPE;
+        f->key_frame = f->pict_type == FF_I_TYPE;
+
+        if (f->pict_type == FF_I_TYPE) {
+            for(y=0; y<avctx->height; y++)
+                memcpy(&f->data[0][ (avctx->height-y)*f->linesize[0] ],
+                       &buf[y*avctx->width*3],
+                       f->linesize[0]);
+        }
+        break;
+
+    case 2:
+    case 4:
+        /**
+         * Fraps v2 is Huffman-coded YUV420 planes
+         * Fraps v4 is virtually the same
+         */
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+        planes = 3;
+        f->reference = 1;
+        f->buffer_hints = FF_BUFFER_HINTS_VALID |
+                          FF_BUFFER_HINTS_PRESERVE |
+                          FF_BUFFER_HINTS_REUSABLE;
+        if (avctx->reget_buffer(avctx, f)) {
+            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+            return -1;
+        }
+        /* skip frame */
+        if(buf_size == 8) {
+            f->pict_type = FF_P_TYPE;
+            f->key_frame = 0;
+            break;
+        }
+        f->pict_type = FF_I_TYPE;
+        f->key_frame = 1;
+        if ((LE_32(buf) != FPS_TAG)||(buf_size < (planes*1024 + 24))) {
+            av_log(avctx, AV_LOG_ERROR, "Fraps: error in data stream\n");
+            return -1;
+        }
+        for(i = 0; i < planes; i++) {
+            offs[i] = LE_32(buf + 4 + i * 4);
+            if(offs[i] >= buf_size || (i && offs[i] <= offs[i - 1] + 1024)) {
+                av_log(avctx, AV_LOG_ERROR, "Fraps: plane %i offset is out of bounds\n", i);
+                return -1;
+            }
+        }
+        offs[planes] = buf_size;
+        for(i = 0; i < planes; i++){
+            is_chroma = !!i;
+            s->tmpbuf = av_realloc(s->tmpbuf, offs[i + 1] - offs[i] - 1024 + FF_INPUT_BUFFER_PADDING_SIZE);
+            if(fraps2_decode_plane(s, f->data[i], f->linesize[i], avctx->width >> is_chroma,
+                    avctx->height >> is_chroma, buf + offs[i], offs[i + 1] - offs[i], is_chroma) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Error decoding plane %i\n", i);
+                return -1;
+            }
+        }
+        break;
+    }
+
+    *frame = *f;
+    *data_size = sizeof(AVFrame);
+
+    return buf_size;
+}
+
+
+/**
+ * closes decoder
+ * @param avctx codec context
+ * @return 0 on success or negative if fails
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+    FrapsContext *s = (FrapsContext*)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    av_freep(&s->tmpbuf);
+    return 0;
+}
+
+
+AVCodec fraps_decoder = {
+    "fraps",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FRAPS,
+    sizeof(FrapsContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/g726.c b/contrib/ffmpeg/libavcodec/g726.c
new file mode 100644
index 000000000..c509292b6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/g726.c
@@ -0,0 +1,429 @@
+/*
+ * G.726 ADPCM audio codec
+ * Copyright (c) 2004 Roman Shaposhnik.
+ *
+ * This is a very straightforward rendition of the G.726
+ * Section 4 "Computational Details".
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <limits.h>
+#include "avcodec.h"
+#include "common.h"
+#include "bitstream.h"
+
+/**
+ * G.726 11bit float.
+ * G.726 Standard uses rather odd 11bit floating point arithmentic for
+ * numerous occasions. It's a mistery to me why they did it this way
+ * instead of simply using 32bit integer arithmetic.
+ */
+typedef struct Float11 {
+    int sign;   /**< 1bit sign */
+    int exp;    /**< 4bit exponent */
+    int mant;   /**< 6bit mantissa */
+} Float11;
+
+static inline Float11* i2f(int16_t i, Float11* f)
+{
+    f->sign = (i < 0);
+    if (f->sign)
+        i = -i;
+    f->exp = av_log2_16bit(i) + !!i;
+    f->mant = i? (i<<6) >> f->exp : 1<<5;
+    return f;
+}
+
+static inline int16_t mult(Float11* f1, Float11* f2)
+{
+        int res, exp;
+
+        exp = f1->exp + f2->exp;
+        res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
+        res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
+        return (f1->sign ^ f2->sign) ? -res : res;
+}
+
+static inline int sgn(int value)
+{
+    return (value < 0) ? -1 : 1;
+}
+
+typedef struct G726Tables {
+    int  bits;          /**< bits per sample */
+    int* quant;         /**< quantization table */
+    int* iquant;        /**< inverse quantization table */
+    int* W;             /**< special table #1 ;-) */
+    int* F;             /**< special table #2 */
+} G726Tables;
+
+typedef struct G726Context {
+    G726Tables* tbls;   /**< static tables needed for computation */
+
+    Float11 sr[2];      /**< prev. reconstructed samples */
+    Float11 dq[6];      /**< prev. difference */
+    int a[2];           /**< second order predictor coeffs */
+    int b[6];           /**< sixth order predictor coeffs */
+    int pk[2];          /**< signs of prev. 2 sez + dq */
+
+    int ap;             /**< scale factor control */
+    int yu;             /**< fast scale factor */
+    int yl;             /**< slow scale factor */
+    int dms;            /**< short average magnitude of F[i] */
+    int dml;            /**< long average magnitude of F[i] */
+    int td;             /**< tone detect */
+
+    int se;             /**< estimated signal for the next iteration */
+    int sez;            /**< estimated second order prediction */
+    int y;              /**< quantizer scaling factor for the next iteration */
+} G726Context;
+
+static int quant_tbl16[] =                  /**< 16kbit/s 2bits per sample */
+           { 260, INT_MAX };
+static int iquant_tbl16[] =
+           { 116, 365, 365, 116 };
+static int W_tbl16[] =
+           { -22, 439, 439, -22 };
+static int F_tbl16[] =
+           { 0, 7, 7, 0 };
+
+static int quant_tbl24[] =                  /**< 24kbit/s 3bits per sample */
+           {  7, 217, 330, INT_MAX };
+static int iquant_tbl24[] =
+           { INT_MIN, 135, 273, 373, 373, 273, 135, INT_MIN };
+static int W_tbl24[] =
+           { -4,  30, 137, 582, 582, 137,  30, -4 };
+static int F_tbl24[] =
+           { 0, 1, 2, 7, 7, 2, 1, 0 };
+
+static int quant_tbl32[] =                  /**< 32kbit/s 4bits per sample */
+           { -125,  79, 177, 245, 299, 348, 399, INT_MAX };
+static int iquant_tbl32[] =
+           { INT_MIN,   4, 135, 213, 273, 323, 373, 425,
+                 425, 373, 323, 273, 213, 135,   4, INT_MIN };
+static int W_tbl32[] =
+           { -12,  18,  41,  64, 112, 198, 355, 1122,
+            1122, 355, 198, 112,  64,  41,  18, -12};
+static int F_tbl32[] =
+           { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 };
+
+static int quant_tbl40[] =                  /**< 40kbit/s 5bits per sample */
+           { -122, -16,  67, 138, 197, 249, 297, 338,
+              377, 412, 444, 474, 501, 527, 552, INT_MAX };
+static int iquant_tbl40[] =
+           { INT_MIN, -66,  28, 104, 169, 224, 274, 318,
+                 358, 395, 429, 459, 488, 514, 539, 566,
+                 566, 539, 514, 488, 459, 429, 395, 358,
+                 318, 274, 224, 169, 104,  28, -66, INT_MIN };
+static int W_tbl40[] =
+           {   14,  14,  24,  39,  40,  41,   58,  100,
+              141, 179, 219, 280, 358, 440,  529,  696,
+              696, 529, 440, 358, 280, 219,  179,  141,
+              100,  58,  41,  40,  39,  24,   14,   14 };
+static int F_tbl40[] =
+           { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6,
+             6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+
+static G726Tables G726Tables_pool[] =
+           {{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 },
+            { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
+            { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
+            { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }};
+
+
+/**
+ * Para 4.2.2 page 18: Adaptive quantizer.
+ */
+static inline uint8_t quant(G726Context* c, int d)
+{
+    int sign, exp, i, dln;
+
+    sign = i = 0;
+    if (d < 0) {
+        sign = 1;
+        d = -d;
+    }
+    exp = av_log2_16bit(d);
+    dln = ((exp<<7) + (((d<<7)>>exp)&0x7f)) - (c->y>>2);
+
+    while (c->tbls->quant[i] < INT_MAX && c->tbls->quant[i] < dln)
+        ++i;
+
+    if (sign)
+        i = ~i;
+    if (c->tbls->bits != 2 && i == 0) /* I'm not sure this is a good idea */
+        i = 0xff;
+
+    return i;
+}
+
+/**
+ * Para 4.2.3 page 22: Inverse adaptive quantizer.
+ */
+static inline int16_t inverse_quant(G726Context* c, int i)
+{
+    int dql, dex, dqt;
+
+    dql = c->tbls->iquant[i] + (c->y >> 2);
+    dex = (dql>>7) & 0xf;        /* 4bit exponent */
+    dqt = (1<<7) + (dql & 0x7f); /* log2 -> linear */
+    return (dql < 0) ? 0 : ((dqt<<7) >> (14-dex));
+}
+
+static inline int16_t g726_iterate(G726Context* c, int16_t I)
+{
+    int dq, re_signal, pk0, fa1, i, tr, ylint, ylfrac, thr2, al, dq0;
+    Float11 f;
+
+    dq = inverse_quant(c, I);
+    if (I >> (c->tbls->bits - 1))  /* get the sign */
+        dq = -dq;
+    re_signal = c->se + dq;
+
+    /* Transition detect */
+    ylint = (c->yl >> 15);
+    ylfrac = (c->yl >> 10) & 0x1f;
+    thr2 = (ylint > 9) ? 0x1f << 10 : (0x20 + ylfrac) << ylint;
+    if (c->td == 1 && abs(dq) > ((thr2+(thr2>>1))>>1))
+        tr = 1;
+    else
+        tr = 0;
+
+    /* Update second order predictor coefficient A2 and A1 */
+    pk0 = (c->sez + dq) ? sgn(c->sez + dq) : 0;
+    dq0 = dq ? sgn(dq) : 0;
+    if (tr) {
+        c->a[0] = 0;
+        c->a[1] = 0;
+        for (i=0; i<6; i++)
+            c->b[i] = 0;
+    } else {
+        /* This is a bit crazy, but it really is +255 not +256 */
+        fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
+
+        c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
+        c->a[1] = clip(c->a[1], -12288, 12288);
+        c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8);
+        c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
+
+        for (i=0; i<6; i++)
+            c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
+    }
+
+    /* Update Dq and Sr and Pk */
+    c->pk[1] = c->pk[0];
+    c->pk[0] = pk0 ? pk0 : 1;
+    c->sr[1] = c->sr[0];
+    i2f(re_signal, &c->sr[0]);
+    for (i=5; i>0; i--)
+        c->dq[i] = c->dq[i-1];
+    i2f(dq, &c->dq[0]);
+    c->dq[0].sign = I >> (c->tbls->bits - 1); /* Isn't it crazy ?!?! */
+
+    /* Update tone detect [I'm not sure 'tr == 0' is really needed] */
+    c->td = (tr == 0 && c->a[1] < -11776);
+
+    /* Update Ap */
+    c->dms += ((c->tbls->F[I]<<9) - c->dms) >> 5;
+    c->dml += ((c->tbls->F[I]<<11) - c->dml) >> 7;
+    if (tr)
+        c->ap = 256;
+    else if (c->y > 1535 && !c->td && (abs((c->dms << 2) - c->dml) < (c->dml >> 3)))
+        c->ap += (-c->ap) >> 4;
+    else
+        c->ap += (0x200 - c->ap) >> 4;
+
+    /* Update Yu and Yl */
+    c->yu = clip(c->y + (((c->tbls->W[I] << 5) - c->y) >> 5), 544, 5120);
+    c->yl += c->yu + ((-c->yl)>>6);
+
+    /* Next iteration for Y */
+    al = (c->ap >= 256) ? 1<<6 : c->ap >> 2;
+    c->y = (c->yl + (c->yu - (c->yl>>6))*al) >> 6;
+
+    /* Next iteration for SE and SEZ */
+    c->se = 0;
+    for (i=0; i<6; i++)
+        c->se += mult(i2f(c->b[i] >> 2, &f), &c->dq[i]);
+    c->sez = c->se >> 1;
+    for (i=0; i<2; i++)
+        c->se += mult(i2f(c->a[i] >> 2, &f), &c->sr[i]);
+    c->se >>= 1;
+
+    return clip(re_signal << 2, -0xffff, 0xffff);
+}
+
+static int g726_reset(G726Context* c, int bit_rate)
+{
+    int i;
+
+    c->tbls = &G726Tables_pool[bit_rate/8000 - 2];
+    for (i=0; i<2; i++) {
+        i2f(0, &c->sr[i]);
+        c->a[i] = 0;
+        c->pk[i] = 1;
+    }
+    for (i=0; i<6; i++) {
+        i2f(0, &c->dq[i]);
+        c->b[i] = 0;
+    }
+    c->ap = 0;
+    c->dms = 0;
+    c->dml = 0;
+    c->yu = 544;
+    c->yl = 34816;
+    c->td = 0;
+
+    c->se = 0;
+    c->sez = 0;
+    c->y = 544;
+
+    return 0;
+}
+
+static int16_t g726_decode(G726Context* c, int16_t i)
+{
+    return g726_iterate(c, i);
+}
+
+#ifdef CONFIG_ENCODERS
+static int16_t g726_encode(G726Context* c, int16_t sig)
+{
+    uint8_t i;
+
+    i = quant(c, sig/4 - c->se) & ((1<<c->tbls->bits) - 1);
+    g726_iterate(c, i);
+    return i;
+}
+#endif
+
+/* Interfacing to the libavcodec */
+
+typedef struct AVG726Context {
+    G726Context c;
+    int bits_left;
+    int bit_buffer;
+    int code_size;
+} AVG726Context;
+
+static int g726_init(AVCodecContext * avctx)
+{
+    AVG726Context* c = (AVG726Context*)avctx->priv_data;
+
+    if (avctx->channels != 1 ||
+        (avctx->bit_rate != 16000 && avctx->bit_rate != 24000 &&
+         avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
+        av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
+        return -1;
+    }
+    if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) {
+        av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
+        return -1;
+    }
+    g726_reset(&c->c, avctx->bit_rate);
+    c->code_size = c->c.tbls->bits;
+    c->bit_buffer = 0;
+    c->bits_left = 0;
+
+    avctx->coded_frame = avcodec_alloc_frame();
+    if (!avctx->coded_frame)
+        return -ENOMEM;
+    avctx->coded_frame->key_frame = 1;
+
+    return 0;
+}
+
+static int g726_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+static int g726_encode_frame(AVCodecContext *avctx,
+                            uint8_t *dst, int buf_size, void *data)
+{
+    AVG726Context *c = avctx->priv_data;
+    short *samples = data;
+    PutBitContext pb;
+
+    init_put_bits(&pb, dst, 1024*1024);
+
+    for (; buf_size; buf_size--)
+        put_bits(&pb, c->code_size, g726_encode(&c->c, *samples++));
+
+    flush_put_bits(&pb);
+
+    return put_bits_count(&pb)>>3;
+}
+#endif
+
+static int g726_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    AVG726Context *c = avctx->priv_data;
+    short *samples = data;
+    uint8_t code;
+    uint8_t mask;
+    GetBitContext gb;
+
+    if (!buf_size)
+        goto out;
+
+    mask = (1<<c->code_size) - 1;
+    init_get_bits(&gb, buf, buf_size * 8);
+    if (c->bits_left) {
+        int s = c->code_size - c->bits_left;;
+        code = (c->bit_buffer << s) | get_bits(&gb, s);
+        *samples++ = g726_decode(&c->c, code & mask);
+    }
+
+    while (get_bits_count(&gb) + c->code_size <= buf_size*8)
+        *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
+
+    c->bits_left = buf_size*8 - get_bits_count(&gb);
+    c->bit_buffer = get_bits(&gb, c->bits_left);
+
+out:
+    *data_size = (uint8_t*)samples - (uint8_t*)data;
+    return buf_size;
+}
+
+#ifdef CONFIG_ENCODERS
+AVCodec adpcm_g726_encoder = {
+    "g726",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_ADPCM_G726,
+    sizeof(AVG726Context),
+    g726_init,
+    g726_encode_frame,
+    g726_close,
+    NULL,
+};
+#endif //CONFIG_ENCODERS
+
+AVCodec adpcm_g726_decoder = {
+    "g726",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_ADPCM_G726,
+    sizeof(AVG726Context),
+    g726_init,
+    NULL,
+    g726_close,
+    g726_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/gif.c b/contrib/ffmpeg/libavcodec/gif.c
new file mode 100644
index 000000000..f67ab52c2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/gif.c
@@ -0,0 +1,350 @@
+/*
+ * GIF encoder.
+ * Copyright (c) 2000 Fabrice Bellard.
+ * Copyright (c) 2002 Francois Revol.
+ * Copyright (c) 2006 Baptiste Coudurier.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * First version by Francois Revol revol@free.fr
+ *
+ * Features and limitations:
+ * - currently no compression is performed,
+ *   in fact the size of the data is 9/8 the size of the image in 8bpp
+ * - uses only a global standard palette
+ * - tested with IE 5.0, Opera for BeOS, NetPositive (BeOS), and Mozilla (BeOS).
+ *
+ * Reference documents:
+ * http://www.goice.co.jp/member/mo/formats/gif.html
+ * http://astronomy.swin.edu.au/pbourke/dataformats/gif/
+ * http://www.dcs.ed.ac.uk/home/mxr/gfx/2d/GIF89a.txt
+ *
+ * this url claims to have an LZW algorithm not covered by Unisys patent:
+ * http://www.msg.net/utility/whirlgif/gifencod.html
+ * could help reduce the size of the files _a lot_...
+ * some sites mentions an RLE type compression also.
+ */
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "bitstream.h"
+
+/* bitstream minipacket size */
+#define GIF_CHUNKS 100
+
+/* slows down the decoding (and some browsers don't like it) */
+/* update on the 'some browsers don't like it issue from above: this was probably due to missing 'Data Sub-block Terminator' (byte 19) in the app_header */
+#define GIF_ADD_APP_HEADER // required to enable looping of animated gif
+
+typedef struct {
+    unsigned char r;
+    unsigned char g;
+    unsigned char b;
+} rgb_triplet;
+
+/* we use the standard 216 color palette */
+
+/* this script was used to create the palette:
+ * for r in 00 33 66 99 cc ff; do for g in 00 33 66 99 cc ff; do echo -n "    "; for b in 00 33 66 99 cc ff; do
+ *   echo -n "{ 0x$r, 0x$g, 0x$b }, "; done; echo ""; done; done
+ */
+
+static const rgb_triplet gif_clut[216] = {
+    { 0x00, 0x00, 0x00 }, { 0x00, 0x00, 0x33 }, { 0x00, 0x00, 0x66 }, { 0x00, 0x00, 0x99 }, { 0x00, 0x00, 0xcc }, { 0x00, 0x00, 0xff },
+    { 0x00, 0x33, 0x00 }, { 0x00, 0x33, 0x33 }, { 0x00, 0x33, 0x66 }, { 0x00, 0x33, 0x99 }, { 0x00, 0x33, 0xcc }, { 0x00, 0x33, 0xff },
+    { 0x00, 0x66, 0x00 }, { 0x00, 0x66, 0x33 }, { 0x00, 0x66, 0x66 }, { 0x00, 0x66, 0x99 }, { 0x00, 0x66, 0xcc }, { 0x00, 0x66, 0xff },
+    { 0x00, 0x99, 0x00 }, { 0x00, 0x99, 0x33 }, { 0x00, 0x99, 0x66 }, { 0x00, 0x99, 0x99 }, { 0x00, 0x99, 0xcc }, { 0x00, 0x99, 0xff },
+    { 0x00, 0xcc, 0x00 }, { 0x00, 0xcc, 0x33 }, { 0x00, 0xcc, 0x66 }, { 0x00, 0xcc, 0x99 }, { 0x00, 0xcc, 0xcc }, { 0x00, 0xcc, 0xff },
+    { 0x00, 0xff, 0x00 }, { 0x00, 0xff, 0x33 }, { 0x00, 0xff, 0x66 }, { 0x00, 0xff, 0x99 }, { 0x00, 0xff, 0xcc }, { 0x00, 0xff, 0xff },
+    { 0x33, 0x00, 0x00 }, { 0x33, 0x00, 0x33 }, { 0x33, 0x00, 0x66 }, { 0x33, 0x00, 0x99 }, { 0x33, 0x00, 0xcc }, { 0x33, 0x00, 0xff },
+    { 0x33, 0x33, 0x00 }, { 0x33, 0x33, 0x33 }, { 0x33, 0x33, 0x66 }, { 0x33, 0x33, 0x99 }, { 0x33, 0x33, 0xcc }, { 0x33, 0x33, 0xff },
+    { 0x33, 0x66, 0x00 }, { 0x33, 0x66, 0x33 }, { 0x33, 0x66, 0x66 }, { 0x33, 0x66, 0x99 }, { 0x33, 0x66, 0xcc }, { 0x33, 0x66, 0xff },
+    { 0x33, 0x99, 0x00 }, { 0x33, 0x99, 0x33 }, { 0x33, 0x99, 0x66 }, { 0x33, 0x99, 0x99 }, { 0x33, 0x99, 0xcc }, { 0x33, 0x99, 0xff },
+    { 0x33, 0xcc, 0x00 }, { 0x33, 0xcc, 0x33 }, { 0x33, 0xcc, 0x66 }, { 0x33, 0xcc, 0x99 }, { 0x33, 0xcc, 0xcc }, { 0x33, 0xcc, 0xff },
+    { 0x33, 0xff, 0x00 }, { 0x33, 0xff, 0x33 }, { 0x33, 0xff, 0x66 }, { 0x33, 0xff, 0x99 }, { 0x33, 0xff, 0xcc }, { 0x33, 0xff, 0xff },
+    { 0x66, 0x00, 0x00 }, { 0x66, 0x00, 0x33 }, { 0x66, 0x00, 0x66 }, { 0x66, 0x00, 0x99 }, { 0x66, 0x00, 0xcc }, { 0x66, 0x00, 0xff },
+    { 0x66, 0x33, 0x00 }, { 0x66, 0x33, 0x33 }, { 0x66, 0x33, 0x66 }, { 0x66, 0x33, 0x99 }, { 0x66, 0x33, 0xcc }, { 0x66, 0x33, 0xff },
+    { 0x66, 0x66, 0x00 }, { 0x66, 0x66, 0x33 }, { 0x66, 0x66, 0x66 }, { 0x66, 0x66, 0x99 }, { 0x66, 0x66, 0xcc }, { 0x66, 0x66, 0xff },
+    { 0x66, 0x99, 0x00 }, { 0x66, 0x99, 0x33 }, { 0x66, 0x99, 0x66 }, { 0x66, 0x99, 0x99 }, { 0x66, 0x99, 0xcc }, { 0x66, 0x99, 0xff },
+    { 0x66, 0xcc, 0x00 }, { 0x66, 0xcc, 0x33 }, { 0x66, 0xcc, 0x66 }, { 0x66, 0xcc, 0x99 }, { 0x66, 0xcc, 0xcc }, { 0x66, 0xcc, 0xff },
+    { 0x66, 0xff, 0x00 }, { 0x66, 0xff, 0x33 }, { 0x66, 0xff, 0x66 }, { 0x66, 0xff, 0x99 }, { 0x66, 0xff, 0xcc }, { 0x66, 0xff, 0xff },
+    { 0x99, 0x00, 0x00 }, { 0x99, 0x00, 0x33 }, { 0x99, 0x00, 0x66 }, { 0x99, 0x00, 0x99 }, { 0x99, 0x00, 0xcc }, { 0x99, 0x00, 0xff },
+    { 0x99, 0x33, 0x00 }, { 0x99, 0x33, 0x33 }, { 0x99, 0x33, 0x66 }, { 0x99, 0x33, 0x99 }, { 0x99, 0x33, 0xcc }, { 0x99, 0x33, 0xff },
+    { 0x99, 0x66, 0x00 }, { 0x99, 0x66, 0x33 }, { 0x99, 0x66, 0x66 }, { 0x99, 0x66, 0x99 }, { 0x99, 0x66, 0xcc }, { 0x99, 0x66, 0xff },
+    { 0x99, 0x99, 0x00 }, { 0x99, 0x99, 0x33 }, { 0x99, 0x99, 0x66 }, { 0x99, 0x99, 0x99 }, { 0x99, 0x99, 0xcc }, { 0x99, 0x99, 0xff },
+    { 0x99, 0xcc, 0x00 }, { 0x99, 0xcc, 0x33 }, { 0x99, 0xcc, 0x66 }, { 0x99, 0xcc, 0x99 }, { 0x99, 0xcc, 0xcc }, { 0x99, 0xcc, 0xff },
+    { 0x99, 0xff, 0x00 }, { 0x99, 0xff, 0x33 }, { 0x99, 0xff, 0x66 }, { 0x99, 0xff, 0x99 }, { 0x99, 0xff, 0xcc }, { 0x99, 0xff, 0xff },
+    { 0xcc, 0x00, 0x00 }, { 0xcc, 0x00, 0x33 }, { 0xcc, 0x00, 0x66 }, { 0xcc, 0x00, 0x99 }, { 0xcc, 0x00, 0xcc }, { 0xcc, 0x00, 0xff },
+    { 0xcc, 0x33, 0x00 }, { 0xcc, 0x33, 0x33 }, { 0xcc, 0x33, 0x66 }, { 0xcc, 0x33, 0x99 }, { 0xcc, 0x33, 0xcc }, { 0xcc, 0x33, 0xff },
+    { 0xcc, 0x66, 0x00 }, { 0xcc, 0x66, 0x33 }, { 0xcc, 0x66, 0x66 }, { 0xcc, 0x66, 0x99 }, { 0xcc, 0x66, 0xcc }, { 0xcc, 0x66, 0xff },
+    { 0xcc, 0x99, 0x00 }, { 0xcc, 0x99, 0x33 }, { 0xcc, 0x99, 0x66 }, { 0xcc, 0x99, 0x99 }, { 0xcc, 0x99, 0xcc }, { 0xcc, 0x99, 0xff },
+    { 0xcc, 0xcc, 0x00 }, { 0xcc, 0xcc, 0x33 }, { 0xcc, 0xcc, 0x66 }, { 0xcc, 0xcc, 0x99 }, { 0xcc, 0xcc, 0xcc }, { 0xcc, 0xcc, 0xff },
+    { 0xcc, 0xff, 0x00 }, { 0xcc, 0xff, 0x33 }, { 0xcc, 0xff, 0x66 }, { 0xcc, 0xff, 0x99 }, { 0xcc, 0xff, 0xcc }, { 0xcc, 0xff, 0xff },
+    { 0xff, 0x00, 0x00 }, { 0xff, 0x00, 0x33 }, { 0xff, 0x00, 0x66 }, { 0xff, 0x00, 0x99 }, { 0xff, 0x00, 0xcc }, { 0xff, 0x00, 0xff },
+    { 0xff, 0x33, 0x00 }, { 0xff, 0x33, 0x33 }, { 0xff, 0x33, 0x66 }, { 0xff, 0x33, 0x99 }, { 0xff, 0x33, 0xcc }, { 0xff, 0x33, 0xff },
+    { 0xff, 0x66, 0x00 }, { 0xff, 0x66, 0x33 }, { 0xff, 0x66, 0x66 }, { 0xff, 0x66, 0x99 }, { 0xff, 0x66, 0xcc }, { 0xff, 0x66, 0xff },
+    { 0xff, 0x99, 0x00 }, { 0xff, 0x99, 0x33 }, { 0xff, 0x99, 0x66 }, { 0xff, 0x99, 0x99 }, { 0xff, 0x99, 0xcc }, { 0xff, 0x99, 0xff },
+    { 0xff, 0xcc, 0x00 }, { 0xff, 0xcc, 0x33 }, { 0xff, 0xcc, 0x66 }, { 0xff, 0xcc, 0x99 }, { 0xff, 0xcc, 0xcc }, { 0xff, 0xcc, 0xff },
+    { 0xff, 0xff, 0x00 }, { 0xff, 0xff, 0x33 }, { 0xff, 0xff, 0x66 }, { 0xff, 0xff, 0x99 }, { 0xff, 0xff, 0xcc }, { 0xff, 0xff, 0xff },
+};
+
+/* The GIF format uses reversed order for bitstreams... */
+/* at least they don't use PDP_ENDIAN :) */
+/* so we 'extend' PutBitContext. hmmm, OOP :) */
+/* seems this thing changed slightly since I wrote it... */
+
+#ifdef ALT_BITSTREAM_WRITER
+# error no ALT_BITSTREAM_WRITER support for now
+#endif
+
+static void gif_put_bits_rev(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf;
+    int bit_cnt;
+
+    //    printf("put_bits=%d %x\n", n, value);
+    assert(n == 32 || value < (1U << n));
+
+    bit_buf = s->bit_buf;
+    bit_cnt = 32 - s->bit_left; /* XXX:lazyness... was = s->bit_cnt; */
+
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+    if (n < (32-bit_cnt)) {
+        bit_buf |= value << (bit_cnt);
+        bit_cnt+=n;
+    } else {
+        bit_buf |= value << (bit_cnt);
+
+        *s->buf_ptr = bit_buf & 0xff;
+        s->buf_ptr[1] = (bit_buf >> 8) & 0xff;
+        s->buf_ptr[2] = (bit_buf >> 16) & 0xff;
+        s->buf_ptr[3] = (bit_buf >> 24) & 0xff;
+
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        if (s->buf_ptr >= s->buf_end)
+            puts("bit buffer overflow !!"); // should never happen ! who got rid of the callback ???
+//            flush_buffer_rev(s);
+        bit_cnt=bit_cnt + n - 32;
+        if (bit_cnt == 0) {
+            bit_buf = 0;
+        } else {
+            bit_buf = value >> (n - bit_cnt);
+        }
+    }
+
+    s->bit_buf = bit_buf;
+    s->bit_left = 32 - bit_cnt;
+}
+
+/* pad the end of the output stream with zeros */
+static void gif_flush_put_bits_rev(PutBitContext *s)
+{
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+        *s->buf_ptr++=s->bit_buf & 0xff;
+        s->bit_buf>>=8;
+        s->bit_left+=8;
+    }
+//    flush_buffer_rev(s);
+    s->bit_left=32;
+    s->bit_buf=0;
+}
+
+/* !RevPutBitContext */
+
+/* GIF header */
+static int gif_image_write_header(uint8_t **bytestream,
+                                  int width, int height, int loop_count,
+                                  uint32_t *palette)
+{
+    int i;
+    unsigned int v;
+
+    bytestream_put_buffer(bytestream, "GIF", 3);
+    bytestream_put_buffer(bytestream, "89a", 3);
+    bytestream_put_le16(bytestream, width);
+    bytestream_put_le16(bytestream, height);
+
+    bytestream_put_byte(bytestream, 0xf7); /* flags: global clut, 256 entries */
+    bytestream_put_byte(bytestream, 0x1f); /* background color index */
+    bytestream_put_byte(bytestream, 0); /* aspect ratio */
+
+    /* the global palette */
+    if (!palette) {
+        bytestream_put_buffer(bytestream, (const unsigned char *)gif_clut, 216*3);
+        for(i=0;i<((256-216)*3);i++)
+            bytestream_put_byte(bytestream, 0);
+    } else {
+        for(i=0;i<256;i++) {
+            v = palette[i];
+            bytestream_put_byte(bytestream, (v >> 16) & 0xff);
+            bytestream_put_byte(bytestream, (v >> 8) & 0xff);
+            bytestream_put_byte(bytestream, (v) & 0xff);
+        }
+    }
+
+        /*        update: this is the 'NETSCAPE EXTENSION' that allows for looped animated gif
+                see http://members.aol.com/royalef/gifabout.htm#net-extension
+
+                byte   1       : 33 (hex 0x21) GIF Extension code
+                byte   2       : 255 (hex 0xFF) Application Extension Label
+                byte   3       : 11 (hex (0x0B) Length of Application Block
+                                         (eleven bytes of data to follow)
+                bytes  4 to 11 : "NETSCAPE"
+                bytes 12 to 14 : "2.0"
+                byte  15       : 3 (hex 0x03) Length of Data Sub-Block
+                                         (three bytes of data to follow)
+                byte  16       : 1 (hex 0x01)
+                bytes 17 to 18 : 0 to 65535, an unsigned integer in
+                                         lo-hi byte format. This indicate the
+                                         number of iterations the loop should
+                                         be executed.
+                bytes 19       : 0 (hex 0x00) a Data Sub-block Terminator
+        */
+
+    /* application extension header */
+#ifdef GIF_ADD_APP_HEADER
+    if (loop_count >= 0 && loop_count <= 65535) {
+        bytestream_put_byte(bytestream, 0x21);
+        bytestream_put_byte(bytestream, 0xff);
+        bytestream_put_byte(bytestream, 0x0b);
+        bytestream_put_buffer(bytestream, "NETSCAPE2.0", 11);  // bytes 4 to 14
+        bytestream_put_byte(bytestream, 0x03); // byte 15
+        bytestream_put_byte(bytestream, 0x01); // byte 16
+        bytestream_put_le16(bytestream, (uint16_t)loop_count);
+        bytestream_put_byte(bytestream, 0x00); // byte 19
+    }
+#endif
+    return 0;
+}
+
+/* this is maybe slow, but allows for extensions */
+static inline unsigned char gif_clut_index(uint8_t r, uint8_t g, uint8_t b)
+{
+    return ((((r)/47)%6)*6*6+(((g)/47)%6)*6+(((b)/47)%6));
+}
+
+
+static int gif_image_write_image(uint8_t **bytestream,
+                                 int x1, int y1, int width, int height,
+                                 const uint8_t *buf, int linesize, int pix_fmt)
+{
+    PutBitContext p;
+    uint8_t buffer[200]; /* 100 * 9 / 8 = 113 */
+    int i, left, w, v;
+    const uint8_t *ptr;
+    /* image block */
+
+    bytestream_put_byte(bytestream, 0x2c);
+    bytestream_put_le16(bytestream, x1);
+    bytestream_put_le16(bytestream, y1);
+    bytestream_put_le16(bytestream, width);
+    bytestream_put_le16(bytestream, height);
+    bytestream_put_byte(bytestream, 0x00); /* flags */
+    /* no local clut */
+
+    bytestream_put_byte(bytestream, 0x08);
+
+    left= width * height;
+
+    init_put_bits(&p, buffer, 130);
+
+/*
+ * the thing here is the bitstream is written as little packets, with a size byte before
+ * but it's still the same bitstream between packets (no flush !)
+ */
+    ptr = buf;
+    w = width;
+    while(left>0) {
+
+        gif_put_bits_rev(&p, 9, 0x0100); /* clear code */
+
+        for(i=(left<GIF_CHUNKS)?left:GIF_CHUNKS;i;i--) {
+            if (pix_fmt == PIX_FMT_RGB24) {
+                v = gif_clut_index(ptr[0], ptr[1], ptr[2]);
+                ptr+=3;
+            } else {
+                v = *ptr++;
+            }
+            gif_put_bits_rev(&p, 9, v);
+            if (--w == 0) {
+                w = width;
+                buf += linesize;
+                ptr = buf;
+            }
+        }
+
+        if(left<=GIF_CHUNKS) {
+            gif_put_bits_rev(&p, 9, 0x101); /* end of stream */
+            gif_flush_put_bits_rev(&p);
+        }
+        if(pbBufPtr(&p) - p.buf > 0) {
+            bytestream_put_byte(bytestream, pbBufPtr(&p) - p.buf); /* byte count of the packet */
+            bytestream_put_buffer(bytestream, p.buf, pbBufPtr(&p) - p.buf); /* the actual buffer */
+            p.buf_ptr = p.buf; /* dequeue the bytes off the bitstream */
+        }
+        left-=GIF_CHUNKS;
+    }
+    bytestream_put_byte(bytestream, 0x00); /* end of image block */
+    bytestream_put_byte(bytestream, 0x3b);
+    return 0;
+}
+
+typedef struct {
+    int64_t time, file_time;
+    uint8_t buffer[100]; /* data chunks */
+    AVFrame picture;
+} GIFContext;
+
+static int gif_encode_init(AVCodecContext *avctx)
+{
+    GIFContext *s = avctx->priv_data;
+
+    avctx->coded_frame = &s->picture;
+    return 0;
+}
+
+/* better than nothing gif encoder */
+static int gif_encode_frame(AVCodecContext *avctx, unsigned char *outbuf, int buf_size, void *data)
+{
+    GIFContext *s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame *const p = (AVFrame *)&s->picture;
+    uint8_t *outbuf_ptr = outbuf;
+
+    *p = *pict;
+    p->pict_type = FF_I_TYPE;
+    p->key_frame = 1;
+    gif_image_write_header(&outbuf_ptr, avctx->width, avctx->height, -1, (uint32_t *)pict->data[1]);
+    gif_image_write_image(&outbuf_ptr, 0, 0, avctx->width, avctx->height, pict->data[0], pict->linesize[0], PIX_FMT_PAL8);
+    return outbuf_ptr - outbuf;
+}
+
+AVCodec gif_encoder = {
+    "gif",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_GIF,
+    sizeof(GIFContext),
+    gif_encode_init,
+    gif_encode_frame,
+    NULL, //encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_PAL8, -1},
+};
diff --git a/contrib/ffmpeg/libavcodec/gifdec.c b/contrib/ffmpeg/libavcodec/gifdec.c
new file mode 100644
index 000000000..5a5712299
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/gifdec.c
@@ -0,0 +1,339 @@
+/*
+ * GIF decoder
+ * Copyright (c) 2003 Fabrice Bellard.
+ * Copyright (c) 2006 Baptiste Coudurier.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+//#define DEBUG
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "lzw.h"
+
+#define GCE_DISPOSAL_NONE       0
+#define GCE_DISPOSAL_INPLACE    1
+#define GCE_DISPOSAL_BACKGROUND 2
+#define GCE_DISPOSAL_RESTORE    3
+
+typedef struct GifState {
+    AVFrame picture;
+    int screen_width;
+    int screen_height;
+    int bits_per_pixel;
+    int background_color_index;
+    int transparent_color_index;
+    int color_resolution;
+    uint32_t *image_palette;
+
+    /* after the frame is displayed, the disposal method is used */
+    int gce_disposal;
+    /* delay during which the frame is shown */
+    int gce_delay;
+
+    /* LZW compatible decoder */
+    uint8_t *bytestream;
+    LZWState *lzw;
+
+    /* aux buffers */
+    uint8_t global_palette[256 * 3];
+    uint8_t local_palette[256 * 3];
+} GifState;
+
+static const uint8_t gif87a_sig[6] = "GIF87a";
+static const uint8_t gif89a_sig[6] = "GIF89a";
+
+static int gif_read_image(GifState *s)
+{
+    int left, top, width, height, bits_per_pixel, code_size, flags;
+    int is_interleaved, has_local_palette, y, pass, y1, linesize, n, i;
+    uint8_t *ptr, *spal, *palette, *ptr1;
+
+    left = bytestream_get_le16(&s->bytestream);
+    top = bytestream_get_le16(&s->bytestream);
+    width = bytestream_get_le16(&s->bytestream);
+    height = bytestream_get_le16(&s->bytestream);
+    flags = bytestream_get_byte(&s->bytestream);
+    is_interleaved = flags & 0x40;
+    has_local_palette = flags & 0x80;
+    bits_per_pixel = (flags & 0x07) + 1;
+#ifdef DEBUG
+    dprintf("gif: image x=%d y=%d w=%d h=%d\n", left, top, width, height);
+#endif
+
+    if (has_local_palette) {
+        bytestream_get_buffer(&s->bytestream, s->local_palette, 3 * (1 << bits_per_pixel));
+        palette = s->local_palette;
+    } else {
+        palette = s->global_palette;
+        bits_per_pixel = s->bits_per_pixel;
+    }
+
+    /* verify that all the image is inside the screen dimensions */
+    if (left + width > s->screen_width ||
+        top + height > s->screen_height)
+        return -EINVAL;
+
+    /* build the palette */
+    n = (1 << bits_per_pixel);
+    spal = palette;
+    for(i = 0; i < n; i++) {
+        s->image_palette[i] = (0xff << 24) |
+            (spal[0] << 16) | (spal[1] << 8) | (spal[2]);
+        spal += 3;
+    }
+    for(; i < 256; i++)
+        s->image_palette[i] = (0xff << 24);
+    /* handle transparency */
+    if (s->transparent_color_index >= 0)
+        s->image_palette[s->transparent_color_index] = 0;
+
+    /* now get the image data */
+    code_size = bytestream_get_byte(&s->bytestream);
+    //TODO: add proper data size
+    ff_lzw_decode_init(s->lzw, code_size, s->bytestream, 0, FF_LZW_GIF);
+
+    /* read all the image */
+    linesize = s->picture.linesize[0];
+    ptr1 = s->picture.data[0] + top * linesize + left;
+    ptr = ptr1;
+    pass = 0;
+    y1 = 0;
+    for (y = 0; y < height; y++) {
+        ff_lzw_decode(s->lzw, ptr, width);
+        if (is_interleaved) {
+            switch(pass) {
+            default:
+            case 0:
+            case 1:
+                y1 += 8;
+                ptr += linesize * 8;
+                if (y1 >= height) {
+                    y1 = 4;
+                    if (pass == 0)
+                        ptr = ptr1 + linesize * 4;
+                    else
+                        ptr = ptr1 + linesize * 2;
+                    pass++;
+                }
+                break;
+            case 2:
+                y1 += 4;
+                ptr += linesize * 4;
+                if (y1 >= height) {
+                    y1 = 1;
+                    ptr = ptr1 + linesize;
+                    pass++;
+                }
+                break;
+            case 3:
+                y1 += 2;
+                ptr += linesize * 2;
+                break;
+            }
+        } else {
+            ptr += linesize;
+        }
+    }
+    /* read the garbage data until end marker is found */
+    ff_lzw_decode_tail(s->lzw);
+    s->bytestream = ff_lzw_cur_ptr(s->lzw);
+    return 0;
+}
+
+static int gif_read_extension(GifState *s)
+{
+    int ext_code, ext_len, i, gce_flags, gce_transparent_index;
+
+    /* extension */
+    ext_code = bytestream_get_byte(&s->bytestream);
+    ext_len = bytestream_get_byte(&s->bytestream);
+#ifdef DEBUG
+    dprintf("gif: ext_code=0x%x len=%d\n", ext_code, ext_len);
+#endif
+    switch(ext_code) {
+    case 0xf9:
+        if (ext_len != 4)
+            goto discard_ext;
+        s->transparent_color_index = -1;
+        gce_flags = bytestream_get_byte(&s->bytestream);
+        s->gce_delay = bytestream_get_le16(&s->bytestream);
+        gce_transparent_index = bytestream_get_byte(&s->bytestream);
+        if (gce_flags & 0x01)
+            s->transparent_color_index = gce_transparent_index;
+        else
+            s->transparent_color_index = -1;
+        s->gce_disposal = (gce_flags >> 2) & 0x7;
+#ifdef DEBUG
+        dprintf("gif: gce_flags=%x delay=%d tcolor=%d disposal=%d\n",
+               gce_flags, s->gce_delay,
+               s->transparent_color_index, s->gce_disposal);
+#endif
+        ext_len = bytestream_get_byte(&s->bytestream);
+        break;
+    }
+
+    /* NOTE: many extension blocks can come after */
+ discard_ext:
+    while (ext_len != 0) {
+        for (i = 0; i < ext_len; i++)
+            bytestream_get_byte(&s->bytestream);
+        ext_len = bytestream_get_byte(&s->bytestream);
+#ifdef DEBUG
+        dprintf("gif: ext_len1=%d\n", ext_len);
+#endif
+    }
+    return 0;
+}
+
+static int gif_read_header1(GifState *s)
+{
+    uint8_t sig[6];
+    int v, n;
+    int has_global_palette;
+
+    /* read gif signature */
+    bytestream_get_buffer(&s->bytestream, sig, 6);
+    if (memcmp(sig, gif87a_sig, 6) != 0 &&
+        memcmp(sig, gif89a_sig, 6) != 0)
+        return -1;
+
+    /* read screen header */
+    s->transparent_color_index = -1;
+    s->screen_width = bytestream_get_le16(&s->bytestream);
+    s->screen_height = bytestream_get_le16(&s->bytestream);
+    if(   (unsigned)s->screen_width  > 32767
+       || (unsigned)s->screen_height > 32767){
+        av_log(NULL, AV_LOG_ERROR, "picture size too large\n");
+        return -1;
+    }
+
+    v = bytestream_get_byte(&s->bytestream);
+    s->color_resolution = ((v & 0x70) >> 4) + 1;
+    has_global_palette = (v & 0x80);
+    s->bits_per_pixel = (v & 0x07) + 1;
+    s->background_color_index = bytestream_get_byte(&s->bytestream);
+    bytestream_get_byte(&s->bytestream);                /* ignored */
+#ifdef DEBUG
+    dprintf("gif: screen_w=%d screen_h=%d bpp=%d global_palette=%d\n",
+           s->screen_width, s->screen_height, s->bits_per_pixel,
+           has_global_palette);
+#endif
+    if (has_global_palette) {
+        n = 1 << s->bits_per_pixel;
+        bytestream_get_buffer(&s->bytestream, s->global_palette, n * 3);
+    }
+    return 0;
+}
+
+static int gif_parse_next_image(GifState *s)
+{
+    int ret, code;
+
+    for (;;) {
+        code = bytestream_get_byte(&s->bytestream);
+#ifdef DEBUG
+        dprintf("gif: code=%02x '%c'\n", code, code);
+#endif
+        switch (code) {
+        case ',':
+            if (gif_read_image(s) < 0)
+                return -1;
+            ret = 0;
+            goto the_end;
+        case ';':
+            /* end of image */
+            ret = -1;
+            goto the_end;
+        case '!':
+            if (gif_read_extension(s) < 0)
+                return -1;
+            break;
+        case EOF:
+        default:
+            /* error or errneous EOF */
+            ret = -1;
+            goto the_end;
+        }
+    }
+  the_end:
+    return ret;
+}
+
+static int gif_decode_init(AVCodecContext *avctx)
+{
+    GifState *s = avctx->priv_data;
+
+    avcodec_get_frame_defaults(&s->picture);
+    avctx->coded_frame= &s->picture;
+    s->picture.data[0] = NULL;
+    ff_lzw_decode_open(&s->lzw);
+    return 0;
+}
+
+static int gif_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+    GifState *s = avctx->priv_data;
+    AVFrame *picture = data;
+    int ret;
+
+    s->bytestream = buf;
+    if (gif_read_header1(s) < 0)
+        return -1;
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    if (avcodec_check_dimensions(avctx, s->screen_width, s->screen_height))
+        return -1;
+    avcodec_set_dimensions(avctx, s->screen_width, s->screen_height);
+
+    if (s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+    if (avctx->get_buffer(avctx, &s->picture) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    s->image_palette = (uint32_t *)s->picture.data[1];
+    ret = gif_parse_next_image(s);
+    if (ret < 0)
+        return ret;
+
+    *picture = s->picture;
+    *data_size = sizeof(AVPicture);
+    return 0;
+}
+
+static int gif_decode_close(AVCodecContext *avctx)
+{
+    GifState *s = avctx->priv_data;
+
+    ff_lzw_decode_close(&s->lzw);
+    if(s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+    return 0;
+}
+
+AVCodec gif_decoder = {
+    "gif",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_GIF,
+    sizeof(GifState),
+    gif_decode_init,
+    NULL,
+    gif_decode_close,
+    gif_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/golomb.c b/contrib/ffmpeg/libavcodec/golomb.c
new file mode 100644
index 000000000..50df4fc40
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/golomb.c
@@ -0,0 +1,156 @@
+/*
+ * exp golomb vlc stuff
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file golomb.c
+ * @brief
+ *     exp golomb vlc stuff
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "common.h"
+
+const uint8_t ff_golomb_vlc_len[512]={
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+const uint8_t ff_ue_golomb_vlc_code[512]={
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
+ 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const int8_t ff_se_golomb_vlc_code[512]={
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  8, -8,  9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15,
+  4,  4,  4,  4, -4, -4, -4, -4,  5,  5,  5,  5, -5, -5, -5, -5,  6,  6,  6,  6, -6, -6, -6, -6,  7,  7,  7,  7, -7, -7, -7, -7,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+
+const uint8_t ff_ue_golomb_len[256]={
+ 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,11,
+11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17,
+};
+
+const uint8_t ff_interleaved_golomb_vlc_len[256]={
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+};
+
+const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={
+ 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3,
+ 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5,
+ 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+const int8_t ff_interleaved_se_golomb_vlc_code[256]={
+  8, -8,  4,  4,  9, -9, -4, -4,  2,  2,  2,  2,  2,  2,  2,  2,
+ 10,-10,  5,  5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ 12,-12,  6,  6, 13,-13, -6, -6,  3,  3,  3,  3,  3,  3,  3,  3,
+ 14,-14,  7,  7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
diff --git a/contrib/ffmpeg/libavcodec/golomb.h b/contrib/ffmpeg/libavcodec/golomb.h
new file mode 100644
index 000000000..9bf7aec46
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/golomb.h
@@ -0,0 +1,479 @@
+/*
+ * exp golomb vlc stuff
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file golomb.h
+ * @brief
+ *     exp golomb vlc stuff
+ * @author Michael Niedermayer <michaelni@gmx.at> and Alex Beregszaszi
+ */
+
+#define INVALID_VLC           0x80000000
+
+extern const uint8_t ff_golomb_vlc_len[512];
+extern const uint8_t ff_ue_golomb_vlc_code[512];
+extern const  int8_t ff_se_golomb_vlc_code[512];
+extern const uint8_t ff_ue_golomb_len[256];
+
+extern const uint8_t ff_interleaved_golomb_vlc_len[256];
+extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256];
+extern const  int8_t ff_interleaved_se_golomb_vlc_code[256];
+
+
+ /**
+ * read unsigned exp golomb code.
+ */
+static inline int get_ue_golomb(GetBitContext *gb){
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    if(buf >= (1<<27)){
+        buf >>= 32 - 9;
+        LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_ue_golomb_vlc_code[buf];
+    }else{
+        log= 2*av_log2(buf) - 31;
+        buf>>= log;
+        buf--;
+        LAST_SKIP_BITS(re, gb, 32 - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    }
+}
+
+static inline int svq3_get_ue_golomb(GetBitContext *gb){
+    uint32_t buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    if(buf&0xAA800000){
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    }else{
+        LAST_SKIP_BITS(re, gb, 8);
+        UPDATE_CACHE(re, gb);
+        buf |= 1 | (GET_CACHE(re, gb) >> 8);
+
+        if((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for(log=31; (buf & 0x80000000) == 0; log--){
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+        }
+
+        LAST_SKIP_BITS(re, gb, 63 - 2*log - 8);
+        CLOSE_READER(re, gb);
+
+        return ((buf << log) >> log) - 1;
+    }
+}
+
+/**
+ * read unsigned truncated exp golomb code.
+ */
+static inline int get_te0_golomb(GetBitContext *gb, int range){
+    assert(range >= 1);
+
+    if(range==1)      return 0;
+    else if(range==2) return get_bits1(gb)^1;
+    else              return get_ue_golomb(gb);
+}
+
+/**
+ * read unsigned truncated exp golomb code.
+ */
+static inline int get_te_golomb(GetBitContext *gb, int range){
+    assert(range >= 1);
+
+    if(range==2) return get_bits1(gb)^1;
+    else         return get_ue_golomb(gb);
+}
+
+
+/**
+ * read signed exp golomb code.
+ */
+static inline int get_se_golomb(GetBitContext *gb){
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    if(buf >= (1<<27)){
+        buf >>= 32 - 9;
+        LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_se_golomb_vlc_code[buf];
+    }else{
+        log= 2*av_log2(buf) - 31;
+        buf>>= log;
+
+        LAST_SKIP_BITS(re, gb, 32 - log);
+        CLOSE_READER(re, gb);
+
+        if(buf&1) buf= -(buf>>1);
+        else      buf=  (buf>>1);
+
+        return buf;
+    }
+}
+
+static inline int svq3_get_se_golomb(GetBitContext *gb){
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    if(buf&0xAA800000){
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    }else{
+        LAST_SKIP_BITS(re, gb, 8);
+        UPDATE_CACHE(re, gb);
+        buf |= 1 | (GET_CACHE(re, gb) >> 8);
+
+        if((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for(log=31; (buf & 0x80000000) == 0; log--){
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+        }
+
+        LAST_SKIP_BITS(re, gb, 63 - 2*log - 8);
+        CLOSE_READER(re, gb);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
+}
+
+/**
+ * read unsigned golomb rice code (ffv1).
+ */
+static inline int get_ur_golomb(GetBitContext *gb, int k, int limit, int esc_len){
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    log= av_log2(buf);
+
+    if(log > 31-limit){
+        buf >>= log - k;
+        buf += (30-log)<<k;
+        LAST_SKIP_BITS(re, gb, 32 + k - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    }else{
+        buf >>= 32 - limit - esc_len;
+        LAST_SKIP_BITS(re, gb, esc_len + limit);
+        CLOSE_READER(re, gb);
+
+        return buf + limit - 1;
+    }
+}
+
+/**
+ * read unsigned golomb rice code (jpegls).
+ */
+static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int esc_len){
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    log= av_log2(buf);
+
+    if(log > 31-11){
+        buf >>= log - k;
+        buf += (30-log)<<k;
+        LAST_SKIP_BITS(re, gb, 32 + k - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    }else{
+        int i;
+        for(i=0; SHOW_UBITS(re, gb, 1) == 0; i++){
+            LAST_SKIP_BITS(re, gb, 1);
+            UPDATE_CACHE(re, gb);
+        }
+        SKIP_BITS(re, gb, 1);
+
+        if(i < limit - 1){
+            if(k){
+                buf = SHOW_UBITS(re, gb, k);
+                LAST_SKIP_BITS(re, gb, k);
+            }else{
+                buf=0;
+            }
+
+            CLOSE_READER(re, gb);
+            return buf + (i<<k);
+        }else if(i == limit - 1){
+            buf = SHOW_UBITS(re, gb, esc_len);
+            LAST_SKIP_BITS(re, gb, esc_len);
+            CLOSE_READER(re, gb);
+
+            return buf + 1;
+        }else
+            return -1;
+    }
+}
+
+/**
+ * read signed golomb rice code (ffv1).
+ */
+static inline int get_sr_golomb(GetBitContext *gb, int k, int limit, int esc_len){
+    int v= get_ur_golomb(gb, k, limit, esc_len);
+
+    v++;
+    if (v&1) return v>>1;
+    else return -(v>>1);
+
+//    return (v>>1) ^ -(v&1);
+}
+
+/**
+ * read signed golomb rice code (flac).
+ */
+static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int esc_len){
+    int v= get_ur_golomb_jpegls(gb, k, limit, esc_len);
+    return (v>>1) ^ -(v&1);
+}
+
+/**
+ * read unsigned golomb rice code (shorten).
+ */
+static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){
+        return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
+}
+
+/**
+ * read signed golomb rice code (shorten).
+ */
+static inline int get_sr_golomb_shorten(GetBitContext* gb, int k)
+{
+    int uvar = get_ur_golomb_jpegls(gb, k + 1, INT_MAX, 0);
+    if (uvar & 1)
+        return ~(uvar >> 1);
+    else
+        return uvar >> 1;
+}
+
+
+
+#ifdef TRACE
+
+static inline int get_ue(GetBitContext *s, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int i= get_ue_golomb(s);
+    int len= get_bits_count(s) - pos;
+    int bits= show>>(24-len);
+
+    print_bin(bits, len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue  @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+static inline int get_se(GetBitContext *s, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int i= get_se_golomb(s);
+    int len= get_bits_count(s) - pos;
+    int bits= show>>(24-len);
+
+    print_bin(bits, len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se  @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+static inline int get_te(GetBitContext *s, int r, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int i= get_te0_golomb(s, r);
+    int len= get_bits_count(s) - pos;
+    int bits= show>>(24-len);
+
+    print_bin(bits, len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te  @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+#define get_ue_golomb(a) get_ue(a, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_se_golomb(a) get_se(a, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_te_golomb(a, r) get_te(a, r, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_te0_golomb(a, r) get_te(a, r, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+
+#endif
+
+/**
+ * write unsigned exp golomb code.
+ */
+static inline void set_ue_golomb(PutBitContext *pb, int i){
+    int e;
+
+    assert(i>=0);
+
+#if 0
+    if(i=0){
+        put_bits(pb, 1, 1);
+        return;
+    }
+#endif
+    if(i<256)
+        put_bits(pb, ff_ue_golomb_len[i], i+1);
+    else{
+        e= av_log2(i+1);
+
+        put_bits(pb, 2*e+1, i+1);
+    }
+}
+
+/**
+ * write truncated unsigned exp golomb code.
+ */
+static inline void set_te_golomb(PutBitContext *pb, int i, int range){
+    assert(range >= 1);
+    assert(i<=range);
+
+    if(range==2) put_bits(pb, 1, i^1);
+    else         set_ue_golomb(pb, i);
+}
+
+/**
+ * write signed exp golomb code. 16 bits at most.
+ */
+static inline void set_se_golomb(PutBitContext *pb, int i){
+//    if (i>32767 || i<-32767)
+//        av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
+#if 0
+    if(i<=0) i= -2*i;
+    else     i=  2*i-1;
+#elif 1
+    i= 2*i-1;
+    if(i<0) i^= -1; //FIXME check if gcc does the right thing
+#else
+    i= 2*i-1;
+    i^= (i>>31);
+#endif
+    set_ue_golomb(pb, i);
+}
+
+/**
+ * write unsigned golomb rice code (ffv1).
+ */
+static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit, int esc_len){
+    int e;
+
+    assert(i>=0);
+
+    e= i>>k;
+    if(e<limit){
+        put_bits(pb, e + k + 1, (1<<k) + (i&((1<<k)-1)));
+    }else{
+        put_bits(pb, limit + esc_len, i - limit + 1);
+    }
+}
+
+/**
+ * write unsigned golomb rice code (jpegls).
+ */
+static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k, int limit, int esc_len){
+    int e;
+
+    assert(i>=0);
+
+    e= (i>>k) + 1;
+    if(e<limit){
+        while(e > 31) {
+            put_bits(pb, 31, 0);
+            e -= 31;
+        }
+        put_bits(pb, e, 1);
+        if(k)
+            put_bits(pb, k, i&((1<<k)-1));
+    }else{
+        while(limit > 31) {
+            put_bits(pb, 31, 0);
+            limit -= 31;
+        }
+        put_bits(pb, limit  , 1);
+        put_bits(pb, esc_len, i - 1);
+    }
+}
+
+/**
+ * write signed golomb rice code (ffv1).
+ */
+static inline void set_sr_golomb(PutBitContext *pb, int i, int k, int limit, int esc_len){
+    int v;
+
+    v = -2*i-1;
+    v ^= (v>>31);
+
+    set_ur_golomb(pb, v, k, limit, esc_len);
+}
+
+/**
+ * write signed golomb rice code (flac).
+ */
+static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k, int limit, int esc_len){
+    int v;
+
+    v = -2*i-1;
+    v ^= (v>>31);
+
+    set_ur_golomb_jpegls(pb, v, k, limit, esc_len);
+}
diff --git a/contrib/ffmpeg/libavcodec/h261.c b/contrib/ffmpeg/libavcodec/h261.c
new file mode 100644
index 000000000..8d4ca08cd
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h261.c
@@ -0,0 +1,1055 @@
+/*
+ * H261 decoder
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261.c
+ * h261codec.
+ */
+
+#include "common.h"
+#include "dsputil.h"
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "h261data.h"
+
+
+#define H261_MBA_VLC_BITS 9
+#define H261_MTYPE_VLC_BITS 6
+#define H261_MV_VLC_BITS 7
+#define H261_CBP_VLC_BITS 9
+#define TCOEFF_VLC_BITS 9
+
+#define MBA_STUFFING 33
+#define MBA_STARTCODE 34
+#define IS_FIL(a)    ((a)&MB_TYPE_H261_FIL)
+
+/**
+ * H261Context
+ */
+typedef struct H261Context{
+    MpegEncContext s;
+
+    int current_mba;
+    int previous_mba;
+    int mba_diff;
+    int mtype;
+    int current_mv_x;
+    int current_mv_y;
+    int gob_number;
+    int gob_start_code_skipped; // 1 if gob start code is already read before gob header is read
+}H261Context;
+
+void ff_h261_loop_filter(MpegEncContext *s){
+    H261Context * h= (H261Context*)s;
+    const int linesize  = s->linesize;
+    const int uvlinesize= s->uvlinesize;
+    uint8_t *dest_y = s->dest[0];
+    uint8_t *dest_cb= s->dest[1];
+    uint8_t *dest_cr= s->dest[2];
+
+    if(!(IS_FIL (h->mtype)))
+        return;
+
+    s->dsp.h261_loop_filter(dest_y                   , linesize);
+    s->dsp.h261_loop_filter(dest_y                + 8, linesize);
+    s->dsp.h261_loop_filter(dest_y + 8 * linesize    , linesize);
+    s->dsp.h261_loop_filter(dest_y + 8 * linesize + 8, linesize);
+    s->dsp.h261_loop_filter(dest_cb, uvlinesize);
+    s->dsp.h261_loop_filter(dest_cr, uvlinesize);
+}
+
+int ff_h261_get_picture_format(int width, int height){
+    // QCIF
+    if (width == 176 && height == 144)
+        return 0;
+    // CIF
+    else if (width == 352 && height == 288)
+        return 1;
+    // ERROR
+    else
+        return -1;
+}
+
+static void h261_encode_block(H261Context * h, DCTELEM * block,
+                              int n);
+static int h261_decode_block(H261Context *h, DCTELEM *block,
+                             int n, int coded);
+
+void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number){
+    H261Context * h = (H261Context *) s;
+    int format, temp_ref;
+
+    align_put_bits(&s->pb);
+
+    /* Update the pointer to last GOB */
+    s->ptr_lastgob = pbBufPtr(&s->pb);
+
+    put_bits(&s->pb, 20, 0x10); /* PSC */
+
+    temp_ref= s->picture_number * (int64_t)30000 * s->avctx->time_base.num /
+                         (1001 * (int64_t)s->avctx->time_base.den); //FIXME maybe this should use a timestamp
+    put_bits(&s->pb, 5, temp_ref & 0x1f); /* TemporalReference */
+
+    put_bits(&s->pb, 1, 0); /* split screen off */
+    put_bits(&s->pb, 1, 0); /* camera  off */
+    put_bits(&s->pb, 1, 0); /* freeze picture release off */
+
+    format = ff_h261_get_picture_format(s->width, s->height);
+
+    put_bits(&s->pb, 1, format); /* 0 == QCIF, 1 == CIF */
+
+    put_bits(&s->pb, 1, 0); /* still image mode */
+    put_bits(&s->pb, 1, 0); /* reserved */
+
+    put_bits(&s->pb, 1, 0); /* no PEI */
+    if(format == 0)
+        h->gob_number = -1;
+    else
+        h->gob_number = 0;
+    h->current_mba = 0;
+}
+
+/**
+ * Encodes a group of blocks header.
+ */
+static void h261_encode_gob_header(MpegEncContext * s, int mb_line){
+    H261Context * h = (H261Context *)s;
+    if(ff_h261_get_picture_format(s->width, s->height) == 0){
+        h->gob_number+=2; // QCIF
+    }
+    else{
+        h->gob_number++; // CIF
+    }
+    put_bits(&s->pb, 16, 1); /* GBSC */
+    put_bits(&s->pb, 4, h->gob_number); /* GN */
+    put_bits(&s->pb, 5, s->qscale); /* GQUANT */
+    put_bits(&s->pb, 1, 0); /* no GEI */
+    h->current_mba = 0;
+    h->previous_mba = 0;
+    h->current_mv_x=0;
+    h->current_mv_y=0;
+}
+
+void ff_h261_reorder_mb_index(MpegEncContext* s){
+    int index= s->mb_x + s->mb_y*s->mb_width;
+
+    if(index % 33 == 0)
+        h261_encode_gob_header(s,0);
+
+    /* for CIF the GOB's are fragmented in the middle of a scanline
+       that's why we need to adjust the x and y index of the macroblocks */
+    if(ff_h261_get_picture_format(s->width,s->height) == 1){ // CIF
+        s->mb_x =     index % 11 ; index /= 11;
+        s->mb_y =     index %  3 ; index /=  3;
+        s->mb_x+= 11*(index %  2); index /=  2;
+        s->mb_y+=  3*index;
+
+        ff_init_block_index(s);
+        ff_update_block_index(s);
+    }
+}
+
+static void h261_encode_motion(H261Context * h, int val){
+    MpegEncContext * const s = &h->s;
+    int sign, code;
+    if(val==0){
+        code = 0;
+        put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]);
+    }
+    else{
+        if(val > 15)
+            val -=32;
+        if(val < -16)
+            val+=32;
+        sign = val < 0;
+        code = sign ? -val : val;
+        put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]);
+        put_bits(&s->pb,1,sign);
+    }
+}
+
+static inline int get_cbp(MpegEncContext * s,
+                      DCTELEM block[6][64])
+{
+    int i, cbp;
+    cbp= 0;
+    for (i = 0; i < 6; i++) {
+        if (s->block_last_index[i] >= 0)
+            cbp |= 1 << (5 - i);
+    }
+    return cbp;
+}
+void ff_h261_encode_mb(MpegEncContext * s,
+         DCTELEM block[6][64],
+         int motion_x, int motion_y)
+{
+    H261Context * h = (H261Context *)s;
+    int mvd, mv_diff_x, mv_diff_y, i, cbp;
+    cbp = 63; // avoid warning
+    mvd = 0;
+
+    h->current_mba++;
+    h->mtype = 0;
+
+    if (!s->mb_intra){
+        /* compute cbp */
+        cbp= get_cbp(s, block);
+
+        /* mvd indicates if this block is motion compensated */
+        mvd = motion_x | motion_y;
+
+        if((cbp | mvd | s->dquant ) == 0) {
+            /* skip macroblock */
+            s->skip_count++;
+            h->current_mv_x=0;
+            h->current_mv_y=0;
+            return;
+        }
+    }
+
+    /* MB is not skipped, encode MBA */
+    put_bits(&s->pb, h261_mba_bits[(h->current_mba-h->previous_mba)-1], h261_mba_code[(h->current_mba-h->previous_mba)-1]);
+
+    /* calculate MTYPE */
+    if(!s->mb_intra){
+        h->mtype++;
+
+        if(mvd || s->loop_filter)
+            h->mtype+=3;
+        if(s->loop_filter)
+            h->mtype+=3;
+        if(cbp || s->dquant)
+            h->mtype++;
+        assert(h->mtype > 1);
+    }
+
+    if(s->dquant)
+        h->mtype++;
+
+    put_bits(&s->pb, h261_mtype_bits[h->mtype], h261_mtype_code[h->mtype]);
+
+    h->mtype = h261_mtype_map[h->mtype];
+
+    if(IS_QUANT(h->mtype)){
+        ff_set_qscale(s,s->qscale+s->dquant);
+        put_bits(&s->pb, 5, s->qscale);
+    }
+
+    if(IS_16X16(h->mtype)){
+        mv_diff_x = (motion_x >> 1) - h->current_mv_x;
+        mv_diff_y = (motion_y >> 1) - h->current_mv_y;
+        h->current_mv_x = (motion_x >> 1);
+        h->current_mv_y = (motion_y >> 1);
+        h261_encode_motion(h,mv_diff_x);
+        h261_encode_motion(h,mv_diff_y);
+    }
+
+    h->previous_mba = h->current_mba;
+
+    if(HAS_CBP(h->mtype)){
+        assert(cbp>0);
+        put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]);
+    }
+    for(i=0; i<6; i++) {
+        /* encode each block */
+        h261_encode_block(h, block[i], i);
+    }
+
+    if ( ( h->current_mba == 11 ) || ( h->current_mba == 22 ) || ( h->current_mba == 33 ) || ( !IS_16X16 ( h->mtype ) )){
+        h->current_mv_x=0;
+        h->current_mv_y=0;
+    }
+}
+
+void ff_h261_encode_init(MpegEncContext *s){
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+        init_rl(&h261_rl_tcoeff, 1);
+    }
+
+    s->min_qcoeff= -127;
+    s->max_qcoeff=  127;
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+}
+
+
+/**
+ * encodes a 8x8 block.
+ * @param block the 8x8 block
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
+static void h261_encode_block(H261Context * h, DCTELEM * block, int n){
+    MpegEncContext * const s = &h->s;
+    int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
+    RLTable *rl;
+
+    rl = &h261_rl_tcoeff;
+    if (s->mb_intra) {
+        /* DC coef */
+        level = block[0];
+        /* 255 cannot be represented, so we clamp */
+        if (level > 254) {
+            level = 254;
+            block[0] = 254;
+        }
+        /* 0 cannot be represented also */
+        else if (level < 1) {
+            level = 1;
+            block[0] = 1;
+        }
+        if (level == 128)
+            put_bits(&s->pb, 8, 0xff);
+        else
+            put_bits(&s->pb, 8, level);
+        i = 1;
+    } else if((block[0]==1 || block[0] == -1) && (s->block_last_index[n] > -1)){
+        //special case
+        put_bits(&s->pb,2,block[0]>0 ? 2 : 3 );
+        i = 1;
+    } else {
+        i = 0;
+    }
+
+    /* AC coefs */
+    last_index = s->block_last_index[n];
+    last_non_zero = i - 1;
+    for (; i <= last_index; i++) {
+        j = s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            slevel = level;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
+            code = get_rl_index(rl, 0 /*no last in H.261, EOB is used*/, run, level);
+            if(run==0 && level < 16)
+            code+=1;
+            put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+            if (code == rl->n) {
+                put_bits(&s->pb, 6, run);
+                assert(slevel != 0);
+                assert(level <= 127);
+                put_bits(&s->pb, 8, slevel & 0xff);
+            } else {
+                put_bits(&s->pb, 1, sign);
+            }
+            last_non_zero = i;
+        }
+    }
+    if(last_index > -1){
+        put_bits(&s->pb, rl->table_vlc[0][1], rl->table_vlc[0][0]);// END OF BLOCK
+    }
+}
+
+/***********************************************/
+/* decoding */
+
+static VLC h261_mba_vlc;
+static VLC h261_mtype_vlc;
+static VLC h261_mv_vlc;
+static VLC h261_cbp_vlc;
+
+static void h261_decode_init_vlc(H261Context *h){
+    static int done = 0;
+
+    if(!done){
+        done = 1;
+        init_vlc(&h261_mba_vlc, H261_MBA_VLC_BITS, 35,
+                 h261_mba_bits, 1, 1,
+                 h261_mba_code, 1, 1, 1);
+        init_vlc(&h261_mtype_vlc, H261_MTYPE_VLC_BITS, 10,
+                 h261_mtype_bits, 1, 1,
+                 h261_mtype_code, 1, 1, 1);
+        init_vlc(&h261_mv_vlc, H261_MV_VLC_BITS, 17,
+                 &h261_mv_tab[0][1], 2, 1,
+                 &h261_mv_tab[0][0], 2, 1, 1);
+        init_vlc(&h261_cbp_vlc, H261_CBP_VLC_BITS, 63,
+                 &h261_cbp_tab[0][1], 2, 1,
+                 &h261_cbp_tab[0][0], 2, 1, 1);
+        init_rl(&h261_rl_tcoeff, 1);
+        init_vlc_rl(&h261_rl_tcoeff, 1);
+    }
+}
+
+static int h261_decode_init(AVCodecContext *avctx){
+    H261Context *h= avctx->priv_data;
+    MpegEncContext * const s = &h->s;
+
+    // set defaults
+    MPV_decode_defaults(s);
+    s->avctx = avctx;
+
+    s->width  = s->avctx->coded_width;
+    s->height = s->avctx->coded_height;
+    s->codec_id = s->avctx->codec->id;
+
+    s->out_format = FMT_H261;
+    s->low_delay= 1;
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    s->codec_id= avctx->codec->id;
+
+    h261_decode_init_vlc(h);
+
+    h->gob_start_code_skipped = 0;
+
+    return 0;
+}
+
+/**
+ * decodes the group of blocks header or slice header.
+ * @return <0 if an error occured
+ */
+static int h261_decode_gob_header(H261Context *h){
+    unsigned int val;
+    MpegEncContext * const s = &h->s;
+
+    if ( !h->gob_start_code_skipped ){
+        /* Check for GOB Start Code */
+        val = show_bits(&s->gb, 15);
+        if(val)
+            return -1;
+
+        /* We have a GBSC */
+        skip_bits(&s->gb, 16);
+    }
+
+    h->gob_start_code_skipped = 0;
+
+    h->gob_number = get_bits(&s->gb, 4); /* GN */
+    s->qscale = get_bits(&s->gb, 5); /* GQUANT */
+
+    /* Check if gob_number is valid */
+    if (s->mb_height==18){ //cif
+        if ((h->gob_number<=0) || (h->gob_number>12))
+            return -1;
+    }
+    else{ //qcif
+        if ((h->gob_number!=1) && (h->gob_number!=3) && (h->gob_number!=5))
+            return -1;
+    }
+
+    /* GEI */
+    while (get_bits1(&s->gb) != 0) {
+        skip_bits(&s->gb, 8);
+    }
+
+    if(s->qscale==0)
+        return -1;
+
+    // For the first transmitted macroblock in a GOB, MBA is the absolute address. For
+    // subsequent macroblocks, MBA is the difference between the absolute addresses of
+    // the macroblock and the last transmitted macroblock.
+    h->current_mba = 0;
+    h->mba_diff = 0;
+
+    return 0;
+}
+
+/**
+ * decodes the group of blocks / video packet header.
+ * @return <0 if no resync found
+ */
+static int ff_h261_resync(H261Context *h){
+    MpegEncContext * const s = &h->s;
+    int left, ret;
+
+    if ( h->gob_start_code_skipped ){
+        ret= h261_decode_gob_header(h);
+        if(ret>=0)
+            return 0;
+    }
+    else{
+        if(show_bits(&s->gb, 15)==0){
+            ret= h261_decode_gob_header(h);
+            if(ret>=0)
+                return 0;
+        }
+        //ok, its not where its supposed to be ...
+        s->gb= s->last_resync_gb;
+        align_get_bits(&s->gb);
+        left= s->gb.size_in_bits - get_bits_count(&s->gb);
+
+        for(;left>15+1+4+5; left-=8){
+            if(show_bits(&s->gb, 15)==0){
+                GetBitContext bak= s->gb;
+
+                ret= h261_decode_gob_header(h);
+                if(ret>=0)
+                    return 0;
+
+                s->gb= bak;
+            }
+            skip_bits(&s->gb, 8);
+        }
+    }
+
+    return -1;
+}
+
+/**
+ * decodes skipped macroblocks
+ * @return 0
+ */
+static int h261_decode_mb_skipped(H261Context *h, int mba1, int mba2 )
+{
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    s->mb_intra = 0;
+
+    for(i=mba1; i<mba2; i++){
+        int j, xy;
+
+        s->mb_x= ((h->gob_number-1) % 2) * 11 + i % 11;
+        s->mb_y= ((h->gob_number-1) / 2) * 3 + i / 11;
+        xy = s->mb_x + s->mb_y * s->mb_stride;
+        ff_init_block_index(s);
+        ff_update_block_index(s);
+
+        for(j=0;j<6;j++)
+            s->block_last_index[j] = -1;
+
+        s->mv_dir = MV_DIR_FORWARD;
+        s->mv_type = MV_TYPE_16X16;
+        s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+        s->mv[0][0][0] = 0;
+        s->mv[0][0][1] = 0;
+        s->mb_skipped = 1;
+        h->mtype &= ~MB_TYPE_H261_FIL;
+
+        MPV_decode_mb(s, s->block);
+    }
+
+    return 0;
+}
+
+static int decode_mv_component(GetBitContext *gb, int v){
+    int mv_diff = get_vlc2(gb, h261_mv_vlc.table, H261_MV_VLC_BITS, 2);
+
+    /* check if mv_diff is valid */
+    if ( mv_diff < 0 )
+        return v;
+
+    mv_diff = mvmap[mv_diff];
+
+    if(mv_diff && !get_bits1(gb))
+        mv_diff= -mv_diff;
+
+    v += mv_diff;
+    if     (v <=-16) v+= 32;
+    else if(v >= 16) v-= 32;
+
+    return v;
+}
+
+static int h261_decode_mb(H261Context *h){
+    MpegEncContext * const s = &h->s;
+    int i, cbp, xy;
+
+    cbp = 63;
+    // Read mba
+    do{
+        h->mba_diff = get_vlc2(&s->gb, h261_mba_vlc.table, H261_MBA_VLC_BITS, 2);
+
+        /* Check for slice end */
+        /* NOTE: GOB can be empty (no MB data) or exist only of MBA_stuffing */
+        if (h->mba_diff == MBA_STARTCODE){ // start code
+            h->gob_start_code_skipped = 1;
+            return SLICE_END;
+        }
+    }
+    while( h->mba_diff == MBA_STUFFING ); // stuffing
+
+    if ( h->mba_diff < 0 ){
+        if ( get_bits_count(&s->gb) + 7 >= s->gb.size_in_bits )
+            return SLICE_END;
+
+        av_log(s->avctx, AV_LOG_ERROR, "illegal mba at %d %d\n", s->mb_x, s->mb_y);
+        return SLICE_ERROR;
+    }
+
+    h->mba_diff += 1;
+    h->current_mba += h->mba_diff;
+
+    if ( h->current_mba > MBA_STUFFING )
+        return SLICE_ERROR;
+
+    s->mb_x= ((h->gob_number-1) % 2) * 11 + ((h->current_mba-1) % 11);
+    s->mb_y= ((h->gob_number-1) / 2) * 3 + ((h->current_mba-1) / 11);
+    xy = s->mb_x + s->mb_y * s->mb_stride;
+    ff_init_block_index(s);
+    ff_update_block_index(s);
+
+    // Read mtype
+    h->mtype = get_vlc2(&s->gb, h261_mtype_vlc.table, H261_MTYPE_VLC_BITS, 2);
+    h->mtype = h261_mtype_map[h->mtype];
+
+    // Read mquant
+    if ( IS_QUANT ( h->mtype ) ){
+        ff_set_qscale(s, get_bits(&s->gb, 5));
+    }
+
+    s->mb_intra = IS_INTRA4x4(h->mtype);
+
+    // Read mv
+    if ( IS_16X16 ( h->mtype ) ){
+        // Motion vector data is included for all MC macroblocks. MVD is obtained from the macroblock vector by subtracting the
+        // vector of the preceding macroblock. For this calculation the vector of the preceding macroblock is regarded as zero in the
+        // following three situations:
+        // 1) evaluating MVD for macroblocks 1, 12 and 23;
+        // 2) evaluating MVD for macroblocks in which MBA does not represent a difference of 1;
+        // 3) MTYPE of the previous macroblock was not MC.
+        if ( ( h->current_mba == 1 ) || ( h->current_mba == 12 ) || ( h->current_mba == 23 ) ||
+             ( h->mba_diff != 1))
+        {
+            h->current_mv_x = 0;
+            h->current_mv_y = 0;
+        }
+
+        h->current_mv_x= decode_mv_component(&s->gb, h->current_mv_x);
+        h->current_mv_y= decode_mv_component(&s->gb, h->current_mv_y);
+    }else{
+        h->current_mv_x = 0;
+        h->current_mv_y = 0;
+    }
+
+    // Read cbp
+    if ( HAS_CBP( h->mtype ) ){
+        cbp = get_vlc2(&s->gb, h261_cbp_vlc.table, H261_CBP_VLC_BITS, 2) + 1;
+    }
+
+    if(s->mb_intra){
+        s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+        goto intra;
+    }
+
+    //set motion vectors
+    s->mv_dir = MV_DIR_FORWARD;
+    s->mv_type = MV_TYPE_16X16;
+    s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+    s->mv[0][0][0] = h->current_mv_x * 2;//gets divided by 2 in motion compensation
+    s->mv[0][0][1] = h->current_mv_y * 2;
+
+intra:
+    /* decode each block */
+    if(s->mb_intra || HAS_CBP(h->mtype)){
+        s->dsp.clear_blocks(s->block[0]);
+        for (i = 0; i < 6; i++) {
+            if (h261_decode_block(h, s->block[i], i, cbp&32) < 0){
+                return SLICE_ERROR;
+            }
+            cbp+=cbp;
+        }
+    }else{
+        for (i = 0; i < 6; i++)
+            s->block_last_index[i]= -1;
+    }
+
+    MPV_decode_mb(s, s->block);
+
+    return SLICE_OK;
+}
+
+/**
+ * decodes a macroblock
+ * @return <0 if an error occured
+ */
+static int h261_decode_block(H261Context * h, DCTELEM * block,
+                             int n, int coded)
+{
+    MpegEncContext * const s = &h->s;
+    int code, level, i, j, run;
+    RLTable *rl = &h261_rl_tcoeff;
+    const uint8_t *scan_table;
+
+    // For the variable length encoding there are two code tables, one being used for
+    // the first transmitted LEVEL in INTER, INTER+MC and INTER+MC+FIL blocks, the second
+    // for all other LEVELs except the first one in INTRA blocks which is fixed length
+    // coded with 8 bits.
+    // NOTE: the two code tables only differ in one VLC so we handle that manually.
+    scan_table = s->intra_scantable.permutated;
+    if (s->mb_intra){
+        /* DC coef */
+        level = get_bits(&s->gb, 8);
+        // 0 (00000000b) and -128 (10000000b) are FORBIDDEN
+        if((level&0x7F) == 0){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y);
+            return -1;
+        }
+        // The code 1000 0000 is not used, the reconstruction level of 1024 being coded as 1111 1111.
+        if (level == 255)
+            level = 128;
+        block[0] = level;
+        i = 1;
+    }else if(coded){
+        // Run  Level   Code
+        // EOB                  Not possible for first level when cbp is available (that's why the table is different)
+        // 0    1               1s
+        // *    *               0*
+        int check = show_bits(&s->gb, 2);
+        i = 0;
+        if ( check & 0x2 ){
+            skip_bits(&s->gb, 2);
+            block[0] = ( check & 0x1 ) ? -1 : 1;
+            i = 1;
+        }
+    }else{
+        i = 0;
+    }
+    if(!coded){
+        s->block_last_index[n] = i - 1;
+        return 0;
+    }
+    for(;;){
+        code = get_vlc2(&s->gb, rl->vlc.table, TCOEFF_VLC_BITS, 2);
+        if (code < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        if (code == rl->n) {
+            /* escape */
+            // The remaining combinations of (run, level) are encoded with a 20-bit word consisting of 6 bits escape, 6 bits run and 8 bits level.
+            run = get_bits(&s->gb, 6);
+            level = get_sbits(&s->gb, 8);
+        }else if(code == 0){
+            break;
+        }else{
+            run = rl->table_run[code];
+            level = rl->table_level[code];
+            if (get_bits1(&s->gb))
+                level = -level;
+        }
+        i += run;
+        if (i >= 64){
+            av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        j = scan_table[i];
+        block[j] = level;
+        i++;
+    }
+    s->block_last_index[n] = i-1;
+    return 0;
+}
+
+/**
+ * decodes the H261 picture header.
+ * @return <0 if no startcode found
+ */
+static int h261_decode_picture_header(H261Context *h){
+    MpegEncContext * const s = &h->s;
+    int format, i;
+    uint32_t startcode= 0;
+
+    for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=1){
+        startcode = ((startcode << 1) | get_bits(&s->gb, 1)) & 0x000FFFFF;
+
+        if(startcode == 0x10)
+            break;
+    }
+
+    if (startcode != 0x10){
+        av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
+        return -1;
+    }
+
+    /* temporal reference */
+    i= get_bits(&s->gb, 5); /* picture timestamp */
+    if(i < (s->picture_number&31))
+        i += 32;
+    s->picture_number = (s->picture_number&~31) + i;
+
+    s->avctx->time_base= (AVRational){1001, 30000};
+    s->current_picture.pts= s->picture_number;
+
+
+    /* PTYPE starts here */
+    skip_bits1(&s->gb); /* split screen off */
+    skip_bits1(&s->gb); /* camera  off */
+    skip_bits1(&s->gb); /* freeze picture release off */
+
+    format = get_bits1(&s->gb);
+
+    //only 2 formats possible
+    if (format == 0){//QCIF
+        s->width = 176;
+        s->height = 144;
+        s->mb_width = 11;
+        s->mb_height = 9;
+    }else{//CIF
+        s->width = 352;
+        s->height = 288;
+        s->mb_width = 22;
+        s->mb_height = 18;
+    }
+
+    s->mb_num = s->mb_width * s->mb_height;
+
+    skip_bits1(&s->gb); /* still image mode off */
+    skip_bits1(&s->gb); /* Reserved */
+
+    /* PEI */
+    while (get_bits1(&s->gb) != 0){
+        skip_bits(&s->gb, 8);
+    }
+
+    // h261 has no I-FRAMES, but if we pass I_TYPE for the first frame, the codec crashes if it does
+    // not contain all I-blocks (e.g. when a packet is lost)
+    s->pict_type = P_TYPE;
+
+    h->gob_number = 0;
+    return 0;
+}
+
+static int h261_decode_gob(H261Context *h){
+    MpegEncContext * const s = &h->s;
+
+    ff_set_qscale(s, s->qscale);
+
+    /* decode mb's */
+    while(h->current_mba <= MBA_STUFFING)
+    {
+        int ret;
+        /* DCT & quantize */
+        ret= h261_decode_mb(h);
+        if(ret<0){
+            if(ret==SLICE_END){
+                h261_decode_mb_skipped(h, h->current_mba, 33);
+                return 0;
+            }
+            av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", s->mb_x + s->mb_y*s->mb_stride);
+            return -1;
+        }
+
+        h261_decode_mb_skipped(h, h->current_mba-h->mba_diff, h->current_mba-1);
+    }
+
+    return -1;
+}
+
+#ifdef CONFIG_H261_PARSER
+static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const uint8_t *buf, int buf_size){
+    int vop_found, i, j;
+    uint32_t state;
+
+    vop_found= pc->frame_start_found;
+    state= pc->state;
+
+    for(i=0; i<buf_size && !vop_found; i++){
+        state= (state<<8) | buf[i];
+        for(j=0; j<8; j++){
+            if(((state>>j)&0xFFFFF) == 0x00010){
+                vop_found=1;
+                break;
+            }
+        }
+    }
+    if(vop_found){
+        for(; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            for(j=0; j<8; j++){
+                if(((state>>j)&0xFFFFF) == 0x00010){
+                    pc->frame_start_found=0;
+                    pc->state= state>>(2*8);
+                    return i-1;
+                }
+            }
+        }
+    }
+
+    pc->frame_start_found= vop_found;
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+static int h261_parse(AVCodecParserContext *s,
+                      AVCodecContext *avctx,
+                      uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    next= h261_find_frame_end(pc,avctx, buf, buf_size);
+    if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+#endif
+
+/**
+ * returns the number of bytes consumed for building the current frame
+ */
+static int get_consumed_bytes(MpegEncContext *s, int buf_size){
+    int pos= get_bits_count(&s->gb)>>3;
+    if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
+    if(pos+10>buf_size) pos=buf_size; // oops ;)
+
+    return pos;
+}
+
+static int h261_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    H261Context *h= avctx->priv_data;
+    MpegEncContext *s = &h->s;
+    int ret;
+    AVFrame *pict = data;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size);
+    av_log(avctx, AV_LOG_DEBUG, "bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
+#endif
+    s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
+
+    h->gob_start_code_skipped=0;
+
+retry:
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    if(!s->context_initialized){
+        if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
+            return -1;
+    }
+
+    //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there
+    if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){
+        int i= ff_find_unused_picture(s, 0);
+        s->current_picture_ptr= &s->picture[i];
+    }
+
+    ret = h261_decode_picture_header(h);
+
+    /* skip if the header was thrashed */
+    if (ret < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "header damaged\n");
+        return -1;
+    }
+
+    if (s->width != avctx->coded_width || s->height != avctx->coded_height){
+        ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat
+        s->parse_context.buffer=0;
+        MPV_common_end(s);
+        s->parse_context= pc;
+    }
+    if (!s->context_initialized) {
+        avcodec_set_dimensions(avctx, s->width, s->height);
+
+        goto retry;
+    }
+
+    // for hurry_up==5
+    s->current_picture.pict_type= s->pict_type;
+    s->current_picture.key_frame= s->pict_type == I_TYPE;
+
+    /* skip everything if we are in a hurry>=5 */
+    if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
+    if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
+       ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
+       || avctx->skip_frame >= AVDISCARD_ALL)
+        return get_consumed_bytes(s, buf_size);
+
+    if(MPV_frame_start(s, avctx) < 0)
+        return -1;
+
+    ff_er_frame_start(s);
+
+    /* decode each macroblock */
+    s->mb_x=0;
+    s->mb_y=0;
+
+    while(h->gob_number < (s->mb_height==18 ? 12 : 5)){
+        if(ff_h261_resync(h)<0)
+            break;
+        h261_decode_gob(h);
+    }
+    MPV_frame_end(s);
+
+assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type);
+assert(s->current_picture.pict_type == s->pict_type);
+    *pict= *(AVFrame*)s->current_picture_ptr;
+    ff_print_debug_info(s, pict);
+
+    *data_size = sizeof(AVFrame);
+
+    return get_consumed_bytes(s, buf_size);
+}
+
+static int h261_decode_end(AVCodecContext *avctx)
+{
+    H261Context *h= avctx->priv_data;
+    MpegEncContext *s = &h->s;
+
+    MPV_common_end(s);
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+AVCodec h261_encoder = {
+    "h261",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H261,
+    sizeof(H261Context),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+#endif
+
+AVCodec h261_decoder = {
+    "h261",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H261,
+    sizeof(H261Context),
+    h261_decode_init,
+    NULL,
+    h261_decode_end,
+    h261_decode_frame,
+    CODEC_CAP_DR1,
+};
+
+#ifdef CONFIG_H261_PARSER
+AVCodecParser h261_parser = {
+    { CODEC_ID_H261 },
+    sizeof(ParseContext),
+    NULL,
+    h261_parse,
+    ff_parse_close,
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/h261data.h b/contrib/ffmpeg/libavcodec/h261data.h
new file mode 100644
index 000000000..2a93b73e3
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h261data.h
@@ -0,0 +1,157 @@
+/*
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261data.h
+ * H.261 tables.
+ */
+#define MB_TYPE_H261_FIL 0x800000
+
+// H.261 VLC table for macroblock addressing
+static const uint8_t h261_mba_code[35] = {
+     1,  3,  2,  3,
+     2,  3,  2,  7,
+     6, 11, 10,  9,
+     8,  7,  6, 23,
+    22, 21, 20, 19,
+    18, 35, 34, 33,
+    32, 31, 30, 29,
+    28, 27, 26, 25,
+    24,
+    15,           //(MBA stuffing)
+    1             //(start code)
+};
+
+static const uint8_t h261_mba_bits[35] = {
+     1,  3,  3,  4,
+     4,  5,  5,  7,
+     7,  8,  8,  8,
+     8,  8,  8, 10,
+    10, 10, 10, 10,
+    10, 11, 11, 11,
+    11, 11, 11, 11,
+    11, 11, 11, 11,
+    11,
+    11,           //(MBA stuffing)
+    16            //(start code)
+};
+
+//H.261 VLC table for macroblock type
+static const uint8_t h261_mtype_code[10] = {
+    1,  1,  1,  1,
+    1,  1,  1,  1,
+    1,  1
+};
+
+static const uint8_t h261_mtype_bits[10] = {
+    4,  7,  1,  5,
+    9,  8, 10,  3,
+    2,  6
+};
+
+static const int h261_mtype_map[10]= {
+        MB_TYPE_INTRA4x4,
+        MB_TYPE_INTRA4x4  |  MB_TYPE_QUANT,
+                                               MB_TYPE_CBP,
+                             MB_TYPE_QUANT  |  MB_TYPE_CBP,
+                                                               MB_TYPE_16x16,
+                                               MB_TYPE_CBP  |  MB_TYPE_16x16,
+                             MB_TYPE_QUANT  |  MB_TYPE_CBP  |  MB_TYPE_16x16,
+                                                               MB_TYPE_16x16  |  MB_TYPE_H261_FIL,
+                                               MB_TYPE_CBP  |  MB_TYPE_16x16  |  MB_TYPE_H261_FIL,
+                             MB_TYPE_QUANT  |  MB_TYPE_CBP  |  MB_TYPE_16x16  |  MB_TYPE_H261_FIL
+};
+
+//H.261 VLC table for motion vectors
+static const uint8_t h261_mv_tab[17][2] = {
+    {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7},
+    {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, {12,10}
+};
+
+static const int mvmap[17] =
+{
+    0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+};
+
+//H.261 VLC table for coded block pattern
+static const uint8_t h261_cbp_tab[63][2] =
+{
+    {11,5}, {9,5}, {13,6}, {13,4}, {23,7}, {19,7}, {31,8}, {12,4},
+    {22,7}, {18,7}, {30,8}, {19,5}, {27,8}, {23,8}, {19,8}, {11,4},
+    {21,7}, {17,7}, {29,8}, {17,5}, {25,8}, {21,8}, {17,8}, {15,6},
+    {15,8}, {13,8}, {3,9}, {15,5}, {11,8}, {7,8}, {7,9}, {10,4},
+    {20,7}, {16,7}, {28,8}, {14,6}, {14,8}, {12,8}, {2,9}, {16,5},
+    {24,8}, {20,8}, {16,8}, {14,5}, {10,8}, {6,8}, {6,9}, {18,5},
+    {26,8}, {22,8}, {18,8}, {13,5}, {9,8}, {5,8}, {5,9}, {12,5},
+    {8,8}, {4,8}, {4,9}, {7,3}, {10,5}, {8,5}, {12,6}
+};
+
+//H.261 VLC table for transform coefficients
+static const uint16_t h261_tcoeff_vlc[65][2] = {
+{ 0x2, 2 }, { 0x3, 2 },{ 0x4, 4 },{ 0x5, 5 },
+{ 0x6, 7 },{ 0x26, 8 },{ 0x21, 8 },{ 0xa, 10 },
+{ 0x1d, 12 },{ 0x18, 12 },{ 0x13, 12 },{ 0x10 , 12 },
+{ 0x1a, 13},{ 0x19, 13 }, { 0x18, 13 }, { 0x17, 13 },
+{ 0x3, 3 }, { 0x6, 6 }, { 0x25 , 8 }, { 0xc, 10 },
+{ 0x1b, 12 }, { 0x16, 13 }, { 0x15, 13 }, { 0x5, 4},
+{ 0x4, 7}, { 0xb, 10 }, { 0x14, 12 }, { 0x14, 13 },
+{ 0x7, 5 }, { 0x24, 8 }, { 0x1c, 12 }, { 0x13, 13 },
+{ 0x6, 5 }, { 0xf, 10 }, { 0x12, 12}, { 0x7, 6},
+{ 0x9 , 10 }, { 0x12, 13 }, { 0x5, 6 }, { 0x1e, 12 },
+{ 0x4, 6 }, { 0x15, 12 }, { 0x7, 7 }, { 0x11, 12},
+{ 0x5, 7 }, { 0x11, 13 }, { 0x27, 8 }, { 0x10, 13 },
+{ 0x23, 8 }, { 0x22, 8 }, { 0x20, 8 }, { 0xe , 10 },
+{ 0xd, 10 }, { 0x8, 10 },{ 0x1f, 12 }, { 0x1a, 12 },
+{ 0x19, 12 }, { 0x17, 12 }, { 0x16, 12}, { 0x1f, 13},
+{ 0x1e, 13 }, { 0x1d, 13 }, { 0x1c, 13}, { 0x1b, 13},
+{ 0x1, 6 }                                             //escape
+};
+
+static const int8_t h261_tcoeff_level[64] = {
+    0,  1,  2,  3,  4,  5,  6,  7,
+    8,  9, 10, 11, 12, 13, 14, 15,
+    1,  2,  3,  4,  5,  6,  7,  1,
+    2,  3,  4,  5,  1,  2,  3,  4,
+    1,  2,  3,  1,  2,  3,  1,  2,
+    1,  2,  1,  2,  1,  2,  1,  2,
+    1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1
+};
+
+static const int8_t h261_tcoeff_run[64] = {
+    0,
+    0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  1,
+    1,  1,  1,  1,  1,  1,  2,  2,
+    2,  2,  2,  3,  3,  3,  3,  4,
+    4,  4,  5,  5,  5,  6,  6,  7,
+    7,  8,  8,  9,  9, 10, 10, 11,
+   12, 13, 14, 15, 16, 17, 18, 19,
+   20, 21, 22, 23, 24, 25, 26
+};
+
+static RLTable h261_rl_tcoeff = {
+    64,
+    64,
+    h261_tcoeff_vlc,
+    h261_tcoeff_run,
+    h261_tcoeff_level,
+};
diff --git a/contrib/ffmpeg/libavcodec/h263.c b/contrib/ffmpeg/libavcodec/h263.c
new file mode 100644
index 000000000..ba51c245a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h263.c
@@ -0,0 +1,6294 @@
+/*
+ * H263/MPEG4 backend for ffmpeg encoder and decoder
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ * H263+ support.
+ * Copyright (c) 2001 Juan J. Sierralta P.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * ac prediction encoding, b-frame support, error resilience, optimizations,
+ * qpel decoding, gmc decoding, interlaced decoding,
+ * by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file h263.c
+ * h263/mpeg4 codec.
+ */
+
+//#define DEBUG
+#include <limits.h>
+
+#include "common.h"
+#include "dsputil.h"
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "h263data.h"
+#include "mpeg4data.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+#define INTRA_MCBPC_VLC_BITS 6
+#define INTER_MCBPC_VLC_BITS 7
+#define CBPY_VLC_BITS 6
+#define MV_VLC_BITS 9
+#define DC_VLC_BITS 9
+#define SPRITE_TRAJ_VLC_BITS 6
+#define MB_TYPE_B_VLC_BITS 4
+#define TEX_VLC_BITS 9
+#define H263_MBTYPE_B_VLC_BITS 6
+#define CBPC_B_VLC_BITS 3
+
+#ifdef CONFIG_ENCODERS
+static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
+                              int n);
+static void h263p_encode_umotion(MpegEncContext * s, int val);
+static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
+                               int n, int dc, uint8_t *scan_table,
+                               PutBitContext *dc_pb, PutBitContext *ac_pb);
+static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
+                                  uint8_t *scan_table);
+#endif
+
+static int h263_decode_motion(MpegEncContext * s, int pred, int fcode);
+static int h263p_decode_umotion(MpegEncContext * s, int pred);
+static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
+                             int n, int coded);
+static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
+static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+                              int n, int coded, int intra, int rvlc);
+#ifdef CONFIG_ENCODERS
+static int h263_pred_dc(MpegEncContext * s, int n, int16_t **dc_val_ptr);
+static void mpeg4_encode_visual_object_header(MpegEncContext * s);
+static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_number);
+#endif //CONFIG_ENCODERS
+static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb);
+static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding);
+
+#ifdef CONFIG_ENCODERS
+static uint8_t uni_DCtab_lum_len[512];
+static uint8_t uni_DCtab_chrom_len[512];
+static uint16_t uni_DCtab_lum_bits[512];
+static uint16_t uni_DCtab_chrom_bits[512];
+
+static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
+static uint8_t fcode_tab[MAX_MV*2+1];
+static uint8_t umv_fcode_tab[MAX_MV*2+1];
+
+static uint32_t uni_mpeg4_intra_rl_bits[64*64*2*2];
+static uint8_t  uni_mpeg4_intra_rl_len [64*64*2*2];
+static uint32_t uni_mpeg4_inter_rl_bits[64*64*2*2];
+static uint8_t  uni_mpeg4_inter_rl_len [64*64*2*2];
+static uint8_t  uni_h263_intra_aic_rl_len [64*64*2*2];
+static uint8_t  uni_h263_inter_rl_len [64*64*2*2];
+//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level))
+//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64)
+#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run)*128 + (level))
+
+/* mpeg4
+inter
+max level: 24/6
+max run: 53/63
+
+intra
+max level: 53/16
+max run: 29/41
+*/
+#endif
+
+#if 0 //3IV1 is quite rare and it slows things down a tiny bit
+#define IS_3IV1 s->codec_tag == ff_get_fourcc("3IV1")
+#else
+#define IS_3IV1 0
+#endif
+
+int h263_get_picture_format(int width, int height)
+{
+    int format;
+
+    if (width == 128 && height == 96)
+        format = 1;
+    else if (width == 176 && height == 144)
+        format = 2;
+    else if (width == 352 && height == 288)
+        format = 3;
+    else if (width == 704 && height == 576)
+        format = 4;
+    else if (width == 1408 && height == 1152)
+        format = 5;
+    else
+        format = 7;
+    return format;
+}
+
+#ifdef CONFIG_ENCODERS
+
+static void aspect_to_info(MpegEncContext * s, AVRational aspect){
+    int i;
+
+    if(aspect.num==0) aspect= (AVRational){1,1};
+
+    for(i=1; i<6; i++){
+        if(av_cmp_q(pixel_aspect[i], aspect) == 0){
+            s->aspect_ratio_info=i;
+            return;
+        }
+    }
+
+    s->aspect_ratio_info= FF_ASPECT_EXTENDED;
+}
+
+void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number)
+{
+      int format;
+
+      align_put_bits(&s->pb);
+
+      put_bits(&s->pb, 17, 1);
+      put_bits(&s->pb, 5, (s->h263_flv-1)); /* 0: h263 escape codes 1: 11-bit escape codes */
+      put_bits(&s->pb, 8, (((int64_t)s->picture_number * 30 * s->avctx->time_base.num) / //FIXME use timestamp
+                           s->avctx->time_base.den) & 0xff); /* TemporalReference */
+      if (s->width == 352 && s->height == 288)
+        format = 2;
+      else if (s->width == 176 && s->height == 144)
+        format = 3;
+      else if (s->width == 128 && s->height == 96)
+        format = 4;
+      else if (s->width == 320 && s->height == 240)
+        format = 5;
+      else if (s->width == 160 && s->height == 120)
+        format = 6;
+      else if (s->width <= 255 && s->height <= 255)
+        format = 0; /* use 1 byte width & height */
+      else
+        format = 1; /* use 2 bytes width & height */
+      put_bits(&s->pb, 3, format); /* PictureSize */
+      if (format == 0) {
+        put_bits(&s->pb, 8, s->width);
+        put_bits(&s->pb, 8, s->height);
+      } else if (format == 1) {
+        put_bits(&s->pb, 16, s->width);
+        put_bits(&s->pb, 16, s->height);
+      }
+      put_bits(&s->pb, 2, s->pict_type == P_TYPE); /* PictureType */
+      put_bits(&s->pb, 1, 1); /* DeblockingFlag: on */
+      put_bits(&s->pb, 5, s->qscale); /* Quantizer */
+      put_bits(&s->pb, 1, 0); /* ExtraInformation */
+
+      if(s->h263_aic){
+        s->y_dc_scale_table=
+          s->c_dc_scale_table= ff_aic_dc_scale_table;
+      }else{
+        s->y_dc_scale_table=
+          s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+      }
+}
+
+void h263_encode_picture_header(MpegEncContext * s, int picture_number)
+{
+    int format, coded_frame_rate, coded_frame_rate_base, i, temp_ref;
+    int best_clock_code=1;
+    int best_divisor=60;
+    int best_error= INT_MAX;
+
+    if(s->h263_plus){
+        for(i=0; i<2; i++){
+            int div, error;
+            div= (s->avctx->time_base.num*1800000LL + 500LL*s->avctx->time_base.den) / ((1000LL+i)*s->avctx->time_base.den);
+            div= clip(1, div, 127);
+            error= FFABS(s->avctx->time_base.num*1800000LL - (1000LL+i)*s->avctx->time_base.den*div);
+            if(error < best_error){
+                best_error= error;
+                best_divisor= div;
+                best_clock_code= i;
+            }
+        }
+    }
+    s->custom_pcf= best_clock_code!=1 || best_divisor!=60;
+    coded_frame_rate= 1800000;
+    coded_frame_rate_base= (1000+best_clock_code)*best_divisor;
+
+    align_put_bits(&s->pb);
+
+    /* Update the pointer to last GOB */
+    s->ptr_lastgob = pbBufPtr(&s->pb);
+    put_bits(&s->pb, 22, 0x20); /* PSC */
+    temp_ref= s->picture_number * (int64_t)coded_frame_rate * s->avctx->time_base.num / //FIXME use timestamp
+                         (coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
+    put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
+
+    put_bits(&s->pb, 1, 1);     /* marker */
+    put_bits(&s->pb, 1, 0);     /* h263 id */
+    put_bits(&s->pb, 1, 0);     /* split screen off */
+    put_bits(&s->pb, 1, 0);     /* camera  off */
+    put_bits(&s->pb, 1, 0);     /* freeze picture release off */
+
+    format = h263_get_picture_format(s->width, s->height);
+    if (!s->h263_plus) {
+        /* H.263v1 */
+        put_bits(&s->pb, 3, format);
+        put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
+        /* By now UMV IS DISABLED ON H.263v1, since the restrictions
+        of H.263v1 UMV implies to check the predicted MV after
+        calculation of the current MB to see if we're on the limits */
+        put_bits(&s->pb, 1, 0);         /* Unrestricted Motion Vector: off */
+        put_bits(&s->pb, 1, 0);         /* SAC: off */
+        put_bits(&s->pb, 1, s->obmc);   /* Advanced Prediction */
+        put_bits(&s->pb, 1, 0);         /* only I/P frames, no PB frame */
+        put_bits(&s->pb, 5, s->qscale);
+        put_bits(&s->pb, 1, 0);         /* Continuous Presence Multipoint mode: off */
+    } else {
+        int ufep=1;
+        /* H.263v2 */
+        /* H.263 Plus PTYPE */
+
+        put_bits(&s->pb, 3, 7);
+        put_bits(&s->pb,3,ufep); /* Update Full Extended PTYPE */
+        if (format == 7)
+            put_bits(&s->pb,3,6); /* Custom Source Format */
+        else
+            put_bits(&s->pb, 3, format);
+
+        put_bits(&s->pb,1, s->custom_pcf);
+        put_bits(&s->pb,1, s->umvplus); /* Unrestricted Motion Vector */
+        put_bits(&s->pb,1,0); /* SAC: off */
+        put_bits(&s->pb,1,s->obmc); /* Advanced Prediction Mode */
+        put_bits(&s->pb,1,s->h263_aic); /* Advanced Intra Coding */
+        put_bits(&s->pb,1,s->loop_filter); /* Deblocking Filter */
+        put_bits(&s->pb,1,s->h263_slice_structured); /* Slice Structured */
+        put_bits(&s->pb,1,0); /* Reference Picture Selection: off */
+        put_bits(&s->pb,1,0); /* Independent Segment Decoding: off */
+        put_bits(&s->pb,1,s->alt_inter_vlc); /* Alternative Inter VLC */
+        put_bits(&s->pb,1,s->modified_quant); /* Modified Quantization: */
+        put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
+        put_bits(&s->pb,3,0); /* Reserved */
+
+        put_bits(&s->pb, 3, s->pict_type == P_TYPE);
+
+        put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
+        put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
+        put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
+        put_bits(&s->pb,2,0); /* Reserved */
+        put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
+
+        /* This should be here if PLUSPTYPE */
+        put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
+
+                if (format == 7) {
+            /* Custom Picture Format (CPFMT) */
+            aspect_to_info(s, s->avctx->sample_aspect_ratio);
+
+            put_bits(&s->pb,4,s->aspect_ratio_info);
+            put_bits(&s->pb,9,(s->width >> 2) - 1);
+            put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
+            put_bits(&s->pb,9,(s->height >> 2));
+            if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
+                put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
+                put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
+            }
+        }
+        if(s->custom_pcf){
+            if(ufep){
+                put_bits(&s->pb, 1, best_clock_code);
+                put_bits(&s->pb, 7, best_divisor);
+            }
+            put_bits(&s->pb, 2, (temp_ref>>8)&3);
+        }
+
+        /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
+        if (s->umvplus)
+//            put_bits(&s->pb,1,1); /* Limited according tables of Annex D */
+//FIXME check actual requested range
+            put_bits(&s->pb,2,1); /* unlimited */
+        if(s->h263_slice_structured)
+            put_bits(&s->pb,2,0); /* no weird submodes */
+
+        put_bits(&s->pb, 5, s->qscale);
+    }
+
+    put_bits(&s->pb, 1, 0);     /* no PEI */
+
+    if(s->h263_slice_structured){
+        put_bits(&s->pb, 1, 1);
+
+        assert(s->mb_x == 0 && s->mb_y == 0);
+        ff_h263_encode_mba(s);
+
+        put_bits(&s->pb, 1, 1);
+    }
+
+    if(s->h263_aic){
+         s->y_dc_scale_table=
+         s->c_dc_scale_table= ff_aic_dc_scale_table;
+    }else{
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+}
+
+/**
+ * Encodes a group of blocks header.
+ */
+void h263_encode_gob_header(MpegEncContext * s, int mb_line)
+{
+    put_bits(&s->pb, 17, 1); /* GBSC */
+
+    if(s->h263_slice_structured){
+        put_bits(&s->pb, 1, 1);
+
+        ff_h263_encode_mba(s);
+
+        if(s->mb_num > 1583)
+            put_bits(&s->pb, 1, 1);
+        put_bits(&s->pb, 5, s->qscale); /* GQUANT */
+        put_bits(&s->pb, 1, 1);
+        put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */
+    }else{
+        int gob_number= mb_line / s->gob_index;
+
+        put_bits(&s->pb, 5, gob_number); /* GN */
+        put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */
+        put_bits(&s->pb, 5, s->qscale); /* GQUANT */
+    }
+}
+
+static inline int get_block_rate(MpegEncContext * s, DCTELEM block[64], int block_last_index, uint8_t scantable[64]){
+    int last=0;
+    int j;
+    int rate=0;
+
+    for(j=1; j<=block_last_index; j++){
+        const int index= scantable[j];
+        int level= block[index];
+        if(level){
+            level+= 64;
+            if((level&(~127)) == 0){
+                if(j<block_last_index) rate+= s->intra_ac_vlc_length     [UNI_AC_ENC_INDEX(j-last-1, level)];
+                else                   rate+= s->intra_ac_vlc_last_length[UNI_AC_ENC_INDEX(j-last-1, level)];
+            }else
+                rate += s->ac_esc_length;
+            level-= 64;
+
+            last= j;
+        }
+    }
+
+    return rate;
+}
+
+static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int dir[6], uint8_t *st[6], int zigzag_last_index[6])
+{
+    int score= 0;
+    int i, n;
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+
+    memcpy(zigzag_last_index, s->block_last_index, sizeof(int)*6);
+
+    for(n=0; n<6; n++){
+        int16_t *ac_val, *ac_val1;
+
+        score -= get_block_rate(s, block[n], s->block_last_index[n], s->intra_scantable.permutated);
+
+        ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+        ac_val1= ac_val;
+        if(dir[n]){
+            const int xy= s->mb_x + s->mb_y*s->mb_stride - s->mb_stride;
+            /* top prediction */
+            ac_val-= s->block_wrap[n]*16;
+            if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){
+                /* same qscale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->dsp.idct_permutation[i   ]];
+                    block[n][s->dsp.idct_permutation[i   ]] = level - ac_val[i+8];
+                    ac_val1[i  ]=    block[n][s->dsp.idct_permutation[i<<3]];
+                    ac_val1[i+8]= level;
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->dsp.idct_permutation[i   ]];
+                    block[n][s->dsp.idct_permutation[i   ]] = level - ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale);
+                    ac_val1[i  ]=    block[n][s->dsp.idct_permutation[i<<3]];
+                    ac_val1[i+8]= level;
+                }
+            }
+            st[n]= s->intra_h_scantable.permutated;
+        }else{
+            const int xy= s->mb_x-1 + s->mb_y*s->mb_stride;
+            /* left prediction */
+            ac_val-= 16;
+            if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
+                /* same qscale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->dsp.idct_permutation[i<<3]];
+                    block[n][s->dsp.idct_permutation[i<<3]]= level - ac_val[i];
+                    ac_val1[i  ]= level;
+                    ac_val1[i+8]=    block[n][s->dsp.idct_permutation[i   ]];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->dsp.idct_permutation[i<<3]];
+                    block[n][s->dsp.idct_permutation[i<<3]]= level - ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale);
+                    ac_val1[i  ]= level;
+                    ac_val1[i+8]=    block[n][s->dsp.idct_permutation[i   ]];
+                }
+            }
+            st[n]= s->intra_v_scantable.permutated;
+        }
+
+        for(i=63; i>0; i--) //FIXME optimize
+            if(block[n][ st[n][i] ]) break;
+        s->block_last_index[n]= i;
+
+        score += get_block_rate(s, block[n], s->block_last_index[n], st[n]);
+    }
+
+    return score < 0;
+}
+
+static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], int dir[6], uint8_t *st[6], int zigzag_last_index[6])
+{
+    int i, n;
+    memcpy(s->block_last_index, zigzag_last_index, sizeof(int)*6);
+
+    for(n=0; n<6; n++){
+        int16_t *ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+
+        st[n]= s->intra_scantable.permutated;
+        if(dir[n]){
+            /* top prediction */
+            for(i=1; i<8; i++){
+                block[n][s->dsp.idct_permutation[i   ]] = ac_val[i+8];
+            }
+        }else{
+            /* left prediction */
+            for(i=1; i<8; i++){
+                block[n][s->dsp.idct_permutation[i<<3]]= ac_val[i  ];
+            }
+        }
+    }
+}
+
+/**
+ * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
+ */
+void ff_clean_h263_qscales(MpegEncContext *s){
+    int i;
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+
+    for(i=1; i<s->mb_num; i++){
+        if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2)
+            qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2;
+    }
+    for(i=s->mb_num-2; i>=0; i--){
+        if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i+1] ] >2)
+            qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i+1] ]+2;
+    }
+
+    if(s->codec_id != CODEC_ID_H263P){
+        for(i=1; i<s->mb_num; i++){
+            int mb_xy= s->mb_index2xy[i];
+
+            if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
+                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
+                s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
+            }
+        }
+    }
+}
+
+/**
+ * modify mb_type & qscale so that encoding is acually possible in mpeg4
+ */
+void ff_clean_mpeg4_qscales(MpegEncContext *s){
+    int i;
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+
+    ff_clean_h263_qscales(s);
+
+    if(s->pict_type== B_TYPE){
+        int odd=0;
+        /* ok, come on, this isn't funny anymore, there's more code for handling this mpeg4 mess than for the actual adaptive quantization */
+
+        for(i=0; i<s->mb_num; i++){
+            int mb_xy= s->mb_index2xy[i];
+            odd += qscale_table[mb_xy]&1;
+        }
+
+        if(2*odd > s->mb_num) odd=1;
+        else                  odd=0;
+
+        for(i=0; i<s->mb_num; i++){
+            int mb_xy= s->mb_index2xy[i];
+            if((qscale_table[mb_xy]&1) != odd)
+                qscale_table[mb_xy]++;
+            if(qscale_table[mb_xy] > 31)
+                qscale_table[mb_xy]= 31;
+        }
+
+        for(i=1; i<s->mb_num; i++){
+            int mb_xy= s->mb_index2xy[i];
+            if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
+                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
+                s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
+            }
+        }
+    }
+}
+
+#endif //CONFIG_ENCODERS
+
+#define tab_size ((signed)(sizeof(s->direct_scale_mv[0])/sizeof(int16_t)))
+#define tab_bias (tab_size/2)
+
+void ff_mpeg4_init_direct_mv(MpegEncContext *s){
+    int i;
+    for(i=0; i<tab_size; i++){
+        s->direct_scale_mv[0][i] = (i-tab_bias)*s->pb_time/s->pp_time;
+        s->direct_scale_mv[1][i] = (i-tab_bias)*(s->pb_time-s->pp_time)/s->pp_time;
+    }
+}
+
+static inline void ff_mpeg4_set_one_direct_mv(MpegEncContext *s, int mx, int my, int i){
+    int xy= s->block_index[i];
+    uint16_t time_pp= s->pp_time;
+    uint16_t time_pb= s->pb_time;
+    int p_mx, p_my;
+
+    p_mx= s->next_picture.motion_val[0][xy][0];
+    if((unsigned)(p_mx + tab_bias) < tab_size){
+        s->mv[0][i][0] = s->direct_scale_mv[0][p_mx + tab_bias] + mx;
+        s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx
+                            : s->direct_scale_mv[1][p_mx + tab_bias];
+    }else{
+        s->mv[0][i][0] = p_mx*time_pb/time_pp + mx;
+        s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx
+                            : p_mx*(time_pb - time_pp)/time_pp;
+    }
+    p_my= s->next_picture.motion_val[0][xy][1];
+    if((unsigned)(p_my + tab_bias) < tab_size){
+        s->mv[0][i][1] = s->direct_scale_mv[0][p_my + tab_bias] + my;
+        s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my
+                            : s->direct_scale_mv[1][p_my + tab_bias];
+    }else{
+        s->mv[0][i][1] = p_my*time_pb/time_pp + my;
+        s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my
+                            : p_my*(time_pb - time_pp)/time_pp;
+    }
+}
+
+#undef tab_size
+#undef tab_bias
+
+/**
+ *
+ * @return the mb_type
+ */
+int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
+    const int mb_index= s->mb_x + s->mb_y*s->mb_stride;
+    const int colocated_mb_type= s->next_picture.mb_type[mb_index];
+    uint16_t time_pp= s->pp_time;
+    uint16_t time_pb= s->pb_time;
+    int i;
+
+    //FIXME avoid divides
+    // try special case with shifts for 1 and 3 B-frames?
+
+    if(IS_8X8(colocated_mb_type)){
+        s->mv_type = MV_TYPE_8X8;
+        for(i=0; i<4; i++){
+            ff_mpeg4_set_one_direct_mv(s, mx, my, i);
+        }
+        return MB_TYPE_DIRECT2 | MB_TYPE_8x8 | MB_TYPE_L0L1;
+    } else if(IS_INTERLACED(colocated_mb_type)){
+        s->mv_type = MV_TYPE_FIELD;
+        for(i=0; i<2; i++){
+            int field_select= s->next_picture.ref_index[0][s->block_index[2*i]];
+            s->field_select[0][i]= field_select;
+            s->field_select[1][i]= i;
+            if(s->top_field_first){
+                time_pp= s->pp_field_time - field_select + i;
+                time_pb= s->pb_field_time - field_select + i;
+            }else{
+                time_pp= s->pp_field_time + field_select - i;
+                time_pb= s->pb_field_time + field_select - i;
+            }
+            s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx;
+            s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my;
+            s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0]
+                                : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp;
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1]
+                                : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp;
+        }
+        return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED;
+    }else{
+        ff_mpeg4_set_one_direct_mv(s, mx, my, 0);
+        s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->mv[0][0][0];
+        s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->mv[0][0][1];
+        s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = s->mv[1][0][0];
+        s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = s->mv[1][0][1];
+        if((s->avctx->workaround_bugs & FF_BUG_DIRECT_BLOCKSIZE) || !s->quarter_sample)
+            s->mv_type= MV_TYPE_16X16;
+        else
+            s->mv_type= MV_TYPE_8X8;
+        return MB_TYPE_DIRECT2 | MB_TYPE_16x16 | MB_TYPE_L0L1; //Note see prev line
+    }
+}
+
+void ff_h263_update_motion_val(MpegEncContext * s){
+    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
+               //FIXME a lot of that is only needed for !low_delay
+    const int wrap = s->b8_stride;
+    const int xy = s->block_index[0];
+
+    s->current_picture.mbskip_table[mb_xy]= s->mb_skipped;
+
+    if(s->mv_type != MV_TYPE_8X8){
+        int motion_x, motion_y;
+        if (s->mb_intra) {
+            motion_x = 0;
+            motion_y = 0;
+        } else if (s->mv_type == MV_TYPE_16X16) {
+            motion_x = s->mv[0][0][0];
+            motion_y = s->mv[0][0][1];
+        } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
+            int i;
+            motion_x = s->mv[0][0][0] + s->mv[0][1][0];
+            motion_y = s->mv[0][0][1] + s->mv[0][1][1];
+            motion_x = (motion_x>>1) | (motion_x&1);
+            for(i=0; i<2; i++){
+                s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0];
+                s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1];
+            }
+            s->current_picture.ref_index[0][xy           ]=
+            s->current_picture.ref_index[0][xy        + 1]= s->field_select[0][0];
+            s->current_picture.ref_index[0][xy + wrap    ]=
+            s->current_picture.ref_index[0][xy + wrap + 1]= s->field_select[0][1];
+        }
+
+        /* no update if 8X8 because it has been done during parsing */
+        s->current_picture.motion_val[0][xy][0] = motion_x;
+        s->current_picture.motion_val[0][xy][1] = motion_y;
+        s->current_picture.motion_val[0][xy + 1][0] = motion_x;
+        s->current_picture.motion_val[0][xy + 1][1] = motion_y;
+        s->current_picture.motion_val[0][xy + wrap][0] = motion_x;
+        s->current_picture.motion_val[0][xy + wrap][1] = motion_y;
+        s->current_picture.motion_val[0][xy + 1 + wrap][0] = motion_x;
+        s->current_picture.motion_val[0][xy + 1 + wrap][1] = motion_y;
+    }
+
+    if(s->encoding){ //FIXME encoding MUST be cleaned up
+        if (s->mv_type == MV_TYPE_8X8)
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
+        else if(s->mb_intra)
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA;
+        else
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+
+static inline int h263_get_motion_length(MpegEncContext * s, int val, int f_code){
+    int l, bit_size, code;
+
+    if (val == 0) {
+        return mvtab[0][1];
+    } else {
+        bit_size = f_code - 1;
+        /* modulo encoding */
+        l= INT_BIT - 6 - bit_size;
+        val = (val<<l)>>l;
+        val--;
+        code = (val >> bit_size) + 1;
+
+        return mvtab[code][1] + 1 + bit_size;
+    }
+}
+
+static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y, int f_code){
+    if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
+        skip_put_bits(&s->pb,
+            h263_get_motion_length(s, x, f_code)
+           +h263_get_motion_length(s, y, f_code));
+    }else{
+        ff_h263_encode_motion(s, x, f_code);
+        ff_h263_encode_motion(s, y, f_code);
+    }
+}
+
+static inline int get_p_cbp(MpegEncContext * s,
+                      DCTELEM block[6][64],
+                      int motion_x, int motion_y){
+    int cbp, i;
+
+    if(s->flags & CODEC_FLAG_CBP_RD){
+        int best_cbpy_score= INT_MAX;
+        int best_cbpc_score= INT_MAX;
+        int cbpc = (-1), cbpy= (-1);
+        const int offset= (s->mv_type==MV_TYPE_16X16 ? 0 : 16) + (s->dquant ? 8 : 0);
+        const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
+
+        for(i=0; i<4; i++){
+            int score= inter_MCBPC_bits[i + offset] * lambda;
+            if(i&1) score += s->coded_score[5];
+            if(i&2) score += s->coded_score[4];
+
+            if(score < best_cbpc_score){
+                best_cbpc_score= score;
+                cbpc= i;
+            }
+        }
+
+        for(i=0; i<16; i++){
+            int score= cbpy_tab[i ^ 0xF][1] * lambda;
+            if(i&1) score += s->coded_score[3];
+            if(i&2) score += s->coded_score[2];
+            if(i&4) score += s->coded_score[1];
+            if(i&8) score += s->coded_score[0];
+
+            if(score < best_cbpy_score){
+                best_cbpy_score= score;
+                cbpy= i;
+            }
+        }
+        cbp= cbpc + 4*cbpy;
+        if ((motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16){
+            if(best_cbpy_score + best_cbpc_score + 2*lambda >= 0)
+                cbp= 0;
+        }
+
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
+                s->block_last_index[i]= -1;
+                memset(s->block[i], 0, sizeof(DCTELEM)*64);
+            }
+        }
+    }else{
+        cbp= 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+    }
+    return cbp;
+}
+
+static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
+                            int motion_x, int motion_y, int mb_type){
+    int cbp=0, i;
+
+    if(s->flags & CODEC_FLAG_CBP_RD){
+        int score=0;
+        const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
+
+        for(i=0; i<6; i++){
+            if(s->coded_score[i] < 0){
+                score += s->coded_score[i];
+                cbp |= 1 << (5 - i);
+            }
+        }
+
+        if(cbp){
+            int zero_score= -6;
+            if ((motion_x | motion_y | s->dquant | mb_type) == 0){
+                zero_score-= 4; //2*MV + mb_type + cbp bit
+            }
+
+            zero_score*= lambda;
+            if(zero_score <= score){
+                cbp=0;
+            }
+        }
+
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
+                s->block_last_index[i]= -1;
+                memset(s->block[i], 0, sizeof(DCTELEM)*64);
+            }
+        }
+    }else{
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+    }
+    return cbp;
+}
+
+static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6],
+                               uint8_t **scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb){
+    int i;
+
+    if(scan_table){
+        if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
+            for (i = 0; i < 6; i++) {
+                skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, intra_dc[i], scan_table[i]));
+            }
+        }else{
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, intra_dc[i], scan_table[i], dc_pb, ac_pb);
+            }
+        }
+    }else{
+        if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
+            for (i = 0; i < 6; i++) {
+                skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, 0, s->intra_scantable.permutated));
+            }
+        }else{
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, dc_pb, ac_pb);
+            }
+        }
+    }
+}
+
+void mpeg4_encode_mb(MpegEncContext * s,
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
+{
+    int cbpc, cbpy, pred_x, pred_y;
+    PutBitContext * const pb2    = s->data_partitioning                         ? &s->pb2    : &s->pb;
+    PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb;
+    PutBitContext * const dc_pb  = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2    : &s->pb;
+    const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
+    const int dquant_code[5]= {1,0,9,2,3};
+
+    //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
+    if (!s->mb_intra) {
+        int i, cbp;
+
+        if(s->pict_type==B_TYPE){
+            static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
+            int mb_type=  mb_type_table[s->mv_dir];
+
+            if(s->mb_x==0){
+                for(i=0; i<2; i++){
+                    s->last_mv[i][0][0]=
+                    s->last_mv[i][0][1]=
+                    s->last_mv[i][1][0]=
+                    s->last_mv[i][1][1]= 0;
+                }
+            }
+
+            assert(s->dquant>=-2 && s->dquant<=2);
+            assert((s->dquant&1)==0);
+            assert(mb_type>=0);
+
+            /* nothing to do if this MB was skipped in the next P Frame */
+            if(s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]){ //FIXME avoid DCT & ...
+                s->skip_count++;
+                s->mv[0][0][0]=
+                s->mv[0][0][1]=
+                s->mv[1][0][0]=
+                s->mv[1][0][1]= 0;
+                s->mv_dir= MV_DIR_FORWARD; //doesn't matter
+                s->qscale -= s->dquant;
+//                s->mb_skipped=1;
+
+                return;
+            }
+
+            cbp= get_b_cbp(s, block, motion_x, motion_y, mb_type);
+
+            if ((cbp | motion_x | motion_y | mb_type) ==0) {
+                /* direct MB with MV={0,0} */
+                assert(s->dquant==0);
+
+                put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
+
+                if(interleaved_stats){
+                    s->misc_bits++;
+                    s->last_bits++;
+                }
+                s->skip_count++;
+                return;
+            }
+
+            put_bits(&s->pb, 1, 0);     /* mb coded modb1=0 */
+            put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
+            put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
+            if(cbp) put_bits(&s->pb, 6, cbp);
+
+            if(cbp && mb_type){
+                if(s->dquant)
+                    put_bits(&s->pb, 2, (s->dquant>>2)+3);
+                else
+                    put_bits(&s->pb, 1, 0);
+            }else
+                s->qscale -= s->dquant;
+
+            if(!s->progressive_sequence){
+                if(cbp)
+                    put_bits(&s->pb, 1, s->interlaced_dct);
+                if(mb_type) // not direct mode
+                    put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD);
+            }
+
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }
+
+            if(mb_type == 0){
+                assert(s->mv_dir & MV_DIRECT);
+                ff_h263_encode_motion_vector(s, motion_x, motion_y, 1);
+                s->b_count++;
+                s->f_count++;
+            }else{
+                assert(mb_type > 0 && mb_type < 4);
+                if(s->mv_type != MV_TYPE_FIELD){
+                    if(s->mv_dir & MV_DIR_FORWARD){
+                        ff_h263_encode_motion_vector(s, s->mv[0][0][0] - s->last_mv[0][0][0],
+                                                        s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+                        s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0];
+                        s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1];
+                        s->f_count++;
+                    }
+                    if(s->mv_dir & MV_DIR_BACKWARD){
+                        ff_h263_encode_motion_vector(s, s->mv[1][0][0] - s->last_mv[1][0][0],
+                                                        s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+                        s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0];
+                        s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1];
+                        s->b_count++;
+                    }
+                }else{
+                    if(s->mv_dir & MV_DIR_FORWARD){
+                        put_bits(&s->pb, 1, s->field_select[0][0]);
+                        put_bits(&s->pb, 1, s->field_select[0][1]);
+                    }
+                    if(s->mv_dir & MV_DIR_BACKWARD){
+                        put_bits(&s->pb, 1, s->field_select[1][0]);
+                        put_bits(&s->pb, 1, s->field_select[1][1]);
+                    }
+                    if(s->mv_dir & MV_DIR_FORWARD){
+                        for(i=0; i<2; i++){
+                            ff_h263_encode_motion_vector(s, s->mv[0][i][0] - s->last_mv[0][i][0]  ,
+                                                            s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code);
+                            s->last_mv[0][i][0]= s->mv[0][i][0];
+                            s->last_mv[0][i][1]= s->mv[0][i][1]*2;
+                        }
+                        s->f_count++;
+                    }
+                    if(s->mv_dir & MV_DIR_BACKWARD){
+                        for(i=0; i<2; i++){
+                            ff_h263_encode_motion_vector(s, s->mv[1][i][0] - s->last_mv[1][i][0]  ,
+                                                            s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code);
+                            s->last_mv[1][i][0]= s->mv[1][i][0];
+                            s->last_mv[1][i][1]= s->mv[1][i][1]*2;
+                        }
+                        s->b_count++;
+                    }
+                }
+            }
+
+            if(interleaved_stats){
+                s->mv_bits+= get_bits_diff(s);
+            }
+
+            mpeg4_encode_blocks(s, block, NULL, NULL, NULL, &s->pb);
+
+            if(interleaved_stats){
+                s->p_tex_bits+= get_bits_diff(s);
+            }
+
+        }else{ /* s->pict_type==B_TYPE */
+            cbp= get_p_cbp(s, block, motion_x, motion_y);
+
+            if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
+                /* check if the B frames can skip it too, as we must skip it if we skip here
+                   why didn't they just compress the skip-mb bits instead of reusing them ?! */
+                if(s->max_b_frames>0){
+                    int i;
+                    int x,y, offset;
+                    uint8_t *p_pic;
+
+                    x= s->mb_x*16;
+                    y= s->mb_y*16;
+                    if(x+16 > s->width)  x= s->width-16;
+                    if(y+16 > s->height) y= s->height-16;
+
+                    offset= x + y*s->linesize;
+                    p_pic= s->new_picture.data[0] + offset;
+
+                    s->mb_skipped=1;
+                    for(i=0; i<s->max_b_frames; i++){
+                        uint8_t *b_pic;
+                        int diff;
+                        Picture *pic= s->reordered_input_picture[i+1];
+
+                        if(pic==NULL || pic->pict_type!=B_TYPE) break;
+
+                        b_pic= pic->data[0] + offset;
+                        if(pic->type != FF_BUFFER_TYPE_SHARED)
+                            b_pic+= INPLACE_OFFSET;
+                        diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
+                        if(diff>s->qscale*70){ //FIXME check that 70 is optimal
+                            s->mb_skipped=0;
+                            break;
+                        }
+                    }
+                }else
+                    s->mb_skipped=1;
+
+                if(s->mb_skipped==1){
+                    /* skip macroblock */
+                    put_bits(&s->pb, 1, 1);
+
+                    if(interleaved_stats){
+                        s->misc_bits++;
+                        s->last_bits++;
+                    }
+                    s->skip_count++;
+
+                    return;
+                }
+            }
+
+            put_bits(&s->pb, 1, 0);     /* mb coded */
+            cbpc = cbp & 3;
+            cbpy = cbp >> 2;
+            cbpy ^= 0xf;
+            if(s->mv_type==MV_TYPE_16X16){
+                if(s->dquant) cbpc+= 8;
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc],
+                        inter_MCBPC_code[cbpc]);
+
+                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+                if(s->dquant)
+                    put_bits(pb2, 2, dquant_code[s->dquant+2]);
+
+                if(!s->progressive_sequence){
+                    if(cbp)
+                        put_bits(pb2, 1, s->interlaced_dct);
+                    put_bits(pb2, 1, 0);
+                }
+
+                if(interleaved_stats){
+                    s->misc_bits+= get_bits_diff(s);
+                }
+
+                /* motion vectors: 16x16 mode */
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+
+                ff_h263_encode_motion_vector(s, motion_x - pred_x,
+                                                motion_y - pred_y, s->f_code);
+            }else if(s->mv_type==MV_TYPE_FIELD){
+                if(s->dquant) cbpc+= 8;
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc],
+                        inter_MCBPC_code[cbpc]);
+
+                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+                if(s->dquant)
+                    put_bits(pb2, 2, dquant_code[s->dquant+2]);
+
+                assert(!s->progressive_sequence);
+                if(cbp)
+                    put_bits(pb2, 1, s->interlaced_dct);
+                put_bits(pb2, 1, 1);
+
+                if(interleaved_stats){
+                    s->misc_bits+= get_bits_diff(s);
+                }
+
+                /* motion vectors: 16x8 interlaced mode */
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+                pred_y /=2;
+
+                put_bits(&s->pb, 1, s->field_select[0][0]);
+                put_bits(&s->pb, 1, s->field_select[0][1]);
+
+                ff_h263_encode_motion_vector(s, s->mv[0][0][0] - pred_x,
+                                                s->mv[0][0][1] - pred_y, s->f_code);
+                ff_h263_encode_motion_vector(s, s->mv[0][1][0] - pred_x,
+                                                s->mv[0][1][1] - pred_y, s->f_code);
+            }else{
+                assert(s->mv_type==MV_TYPE_8X8);
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc+16],
+                        inter_MCBPC_code[cbpc+16]);
+                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+                if(!s->progressive_sequence){
+                    if(cbp)
+                        put_bits(pb2, 1, s->interlaced_dct);
+                }
+
+                if(interleaved_stats){
+                    s->misc_bits+= get_bits_diff(s);
+                }
+
+                for(i=0; i<4; i++){
+                    /* motion vectors: 8x8 mode*/
+                    h263_pred_motion(s, i, 0, &pred_x, &pred_y);
+
+                    ff_h263_encode_motion_vector(s, s->current_picture.motion_val[0][ s->block_index[i] ][0] - pred_x,
+                                                    s->current_picture.motion_val[0][ s->block_index[i] ][1] - pred_y, s->f_code);
+                }
+            }
+
+            if(interleaved_stats){
+                s->mv_bits+= get_bits_diff(s);
+            }
+
+            mpeg4_encode_blocks(s, block, NULL, NULL, NULL, tex_pb);
+
+            if(interleaved_stats){
+                s->p_tex_bits+= get_bits_diff(s);
+            }
+            s->f_count++;
+        }
+    } else {
+        int cbp;
+        int dc_diff[6];   //dc values with the dc prediction subtracted
+        int dir[6];  //prediction direction
+        int zigzag_last_index[6];
+        uint8_t *scan_table[6];
+        int i;
+
+        for(i=0; i<6; i++){
+            dc_diff[i]= ff_mpeg4_pred_dc(s, i, block[i][0], &dir[i], 1);
+        }
+
+        if(s->flags & CODEC_FLAG_AC_PRED){
+            s->ac_pred= decide_ac_pred(s, block, dir, scan_table, zigzag_last_index);
+            if(!s->ac_pred)
+                restore_ac_coeffs(s, block, dir, scan_table, zigzag_last_index);
+        }else{
+            for(i=0; i<6; i++)
+                scan_table[i]= s->intra_scantable.permutated;
+        }
+
+        /* compute cbp */
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 1)
+                cbp |= 1 << (5 - i);
+        }
+
+        cbpc = cbp & 3;
+        if (s->pict_type == I_TYPE) {
+            if(s->dquant) cbpc+=4;
+            put_bits(&s->pb,
+                intra_MCBPC_bits[cbpc],
+                intra_MCBPC_code[cbpc]);
+        } else {
+            if(s->dquant) cbpc+=8;
+            put_bits(&s->pb, 1, 0);     /* mb coded */
+            put_bits(&s->pb,
+                inter_MCBPC_bits[cbpc + 4],
+                inter_MCBPC_code[cbpc + 4]);
+        }
+        put_bits(pb2, 1, s->ac_pred);
+        cbpy = cbp >> 2;
+        put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(dc_pb, 2, dquant_code[s->dquant+2]);
+
+        if(!s->progressive_sequence){
+            put_bits(dc_pb, 1, s->interlaced_dct);
+        }
+
+        if(interleaved_stats){
+            s->misc_bits+= get_bits_diff(s);
+        }
+
+        mpeg4_encode_blocks(s, block, dc_diff, scan_table, dc_pb, tex_pb);
+
+        if(interleaved_stats){
+            s->i_tex_bits+= get_bits_diff(s);
+        }
+        s->i_count++;
+
+        /* restore ac coeffs & last_index stuff if we messed them up with the prediction */
+        if(s->ac_pred)
+            restore_ac_coeffs(s, block, dir, scan_table, zigzag_last_index);
+    }
+}
+
+void h263_encode_mb(MpegEncContext * s,
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
+{
+    int cbpc, cbpy, i, cbp, pred_x, pred_y;
+    int16_t pred_dc;
+    int16_t rec_intradc[6];
+    int16_t *dc_ptr[6];
+    const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1);
+    const int dquant_code[5]= {1,0,9,2,3};
+
+    //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
+    if (!s->mb_intra) {
+        /* compute cbp */
+        cbp= get_p_cbp(s, block, motion_x, motion_y);
+
+        if ((cbp | motion_x | motion_y | s->dquant | (s->mv_type - MV_TYPE_16X16)) == 0) {
+            /* skip macroblock */
+            put_bits(&s->pb, 1, 1);
+            if(interleaved_stats){
+                s->misc_bits++;
+                s->last_bits++;
+            }
+            s->skip_count++;
+
+            return;
+        }
+        put_bits(&s->pb, 1, 0);         /* mb coded */
+
+        cbpc = cbp & 3;
+        cbpy = cbp >> 2;
+        if(s->alt_inter_vlc==0 || cbpc!=3)
+            cbpy ^= 0xF;
+        if(s->dquant) cbpc+= 8;
+        if(s->mv_type==MV_TYPE_16X16){
+            put_bits(&s->pb,
+                    inter_MCBPC_bits[cbpc],
+                    inter_MCBPC_code[cbpc]);
+
+            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+            if(s->dquant)
+                put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }
+
+            /* motion vectors: 16x16 mode */
+            h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+
+            if (!s->umvplus) {
+                ff_h263_encode_motion_vector(s, motion_x - pred_x,
+                                                motion_y - pred_y, 1);
+            }
+            else {
+                h263p_encode_umotion(s, motion_x - pred_x);
+                h263p_encode_umotion(s, motion_y - pred_y);
+                if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                    /* To prevent Start Code emulation */
+                    put_bits(&s->pb,1,1);
+            }
+        }else{
+            put_bits(&s->pb,
+                    inter_MCBPC_bits[cbpc+16],
+                    inter_MCBPC_code[cbpc+16]);
+            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+            if(s->dquant)
+                put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }
+
+            for(i=0; i<4; i++){
+                /* motion vectors: 8x8 mode*/
+                h263_pred_motion(s, i, 0, &pred_x, &pred_y);
+
+                motion_x= s->current_picture.motion_val[0][ s->block_index[i] ][0];
+                motion_y= s->current_picture.motion_val[0][ s->block_index[i] ][1];
+                if (!s->umvplus) {
+                    ff_h263_encode_motion_vector(s, motion_x - pred_x,
+                                                    motion_y - pred_y, 1);
+                }
+                else {
+                    h263p_encode_umotion(s, motion_x - pred_x);
+                    h263p_encode_umotion(s, motion_y - pred_y);
+                    if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                        /* To prevent Start Code emulation */
+                        put_bits(&s->pb,1,1);
+                }
+            }
+        }
+
+        if(interleaved_stats){
+            s->mv_bits+= get_bits_diff(s);
+        }
+    } else {
+        assert(s->mb_intra);
+
+        cbp = 0;
+        if (s->h263_aic) {
+            /* Predict DC */
+            for(i=0; i<6; i++) {
+                int16_t level = block[i][0];
+                int scale;
+
+                if(i<4) scale= s->y_dc_scale;
+                else    scale= s->c_dc_scale;
+
+                pred_dc = h263_pred_dc(s, i, &dc_ptr[i]);
+                level -= pred_dc;
+                /* Quant */
+                if (level >= 0)
+                    level = (level + (scale>>1))/scale;
+                else
+                    level = (level - (scale>>1))/scale;
+
+                /* AIC can change CBP */
+                if (level == 0 && s->block_last_index[i] == 0)
+                    s->block_last_index[i] = -1;
+
+                if(!s->modified_quant){
+                    if (level < -127)
+                        level = -127;
+                    else if (level > 127)
+                        level = 127;
+                }
+
+                block[i][0] = level;
+                /* Reconstruction */
+                rec_intradc[i] = scale*level + pred_dc;
+                /* Oddify */
+                rec_intradc[i] |= 1;
+                //if ((rec_intradc[i] % 2) == 0)
+                //    rec_intradc[i]++;
+                /* Clipping */
+                if (rec_intradc[i] < 0)
+                    rec_intradc[i] = 0;
+                else if (rec_intradc[i] > 2047)
+                    rec_intradc[i] = 2047;
+
+                /* Update AC/DC tables */
+                *dc_ptr[i] = rec_intradc[i];
+                if (s->block_last_index[i] >= 0)
+                    cbp |= 1 << (5 - i);
+            }
+        }else{
+            for(i=0; i<6; i++) {
+                /* compute cbp */
+                if (s->block_last_index[i] >= 1)
+                    cbp |= 1 << (5 - i);
+            }
+        }
+
+        cbpc = cbp & 3;
+        if (s->pict_type == I_TYPE) {
+            if(s->dquant) cbpc+=4;
+            put_bits(&s->pb,
+                intra_MCBPC_bits[cbpc],
+                intra_MCBPC_code[cbpc]);
+        } else {
+            if(s->dquant) cbpc+=8;
+            put_bits(&s->pb, 1, 0);     /* mb coded */
+            put_bits(&s->pb,
+                inter_MCBPC_bits[cbpc + 4],
+                inter_MCBPC_code[cbpc + 4]);
+        }
+        if (s->h263_aic) {
+            /* XXX: currently, we do not try to use ac prediction */
+            put_bits(&s->pb, 1, 0);     /* no AC prediction */
+        }
+        cbpy = cbp >> 2;
+        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+        if(interleaved_stats){
+            s->misc_bits+= get_bits_diff(s);
+        }
+    }
+
+    for(i=0; i<6; i++) {
+        /* encode each block */
+        h263_encode_block(s, block[i], i);
+
+        /* Update INTRADC for decoding */
+        if (s->h263_aic && s->mb_intra) {
+            block[i][0] = rec_intradc[i];
+
+        }
+    }
+
+    if(interleaved_stats){
+        if (!s->mb_intra) {
+            s->p_tex_bits+= get_bits_diff(s);
+            s->f_count++;
+        }else{
+            s->i_tex_bits+= get_bits_diff(s);
+            s->i_count++;
+        }
+    }
+}
+#endif
+
+void ff_h263_loop_filter(MpegEncContext * s){
+    int qp_c;
+    const int linesize  = s->linesize;
+    const int uvlinesize= s->uvlinesize;
+    const int xy = s->mb_y * s->mb_stride + s->mb_x;
+    uint8_t *dest_y = s->dest[0];
+    uint8_t *dest_cb= s->dest[1];
+    uint8_t *dest_cr= s->dest[2];
+
+//    if(s->pict_type==B_TYPE && !s->readable) return;
+
+    /*
+       Diag Top
+       Left Center
+    */
+    if(!IS_SKIP(s->current_picture.mb_type[xy])){
+        qp_c= s->qscale;
+        s->dsp.h263_v_loop_filter(dest_y+8*linesize  , linesize, qp_c);
+        s->dsp.h263_v_loop_filter(dest_y+8*linesize+8, linesize, qp_c);
+    }else
+        qp_c= 0;
+
+    if(s->mb_y){
+        int qp_dt, qp_t, qp_tc;
+
+        if(IS_SKIP(s->current_picture.mb_type[xy-s->mb_stride]))
+            qp_t=0;
+        else
+            qp_t= s->current_picture.qscale_table[xy-s->mb_stride];
+
+        if(qp_c)
+            qp_tc= qp_c;
+        else
+            qp_tc= qp_t;
+
+        if(qp_tc){
+            const int chroma_qp= s->chroma_qscale_table[qp_tc];
+            s->dsp.h263_v_loop_filter(dest_y  ,   linesize, qp_tc);
+            s->dsp.h263_v_loop_filter(dest_y+8,   linesize, qp_tc);
+
+            s->dsp.h263_v_loop_filter(dest_cb , uvlinesize, chroma_qp);
+            s->dsp.h263_v_loop_filter(dest_cr , uvlinesize, chroma_qp);
+        }
+
+        if(qp_t)
+            s->dsp.h263_h_loop_filter(dest_y-8*linesize+8  ,   linesize, qp_t);
+
+        if(s->mb_x){
+            if(qp_t || IS_SKIP(s->current_picture.mb_type[xy-1-s->mb_stride]))
+                qp_dt= qp_t;
+            else
+                qp_dt= s->current_picture.qscale_table[xy-1-s->mb_stride];
+
+            if(qp_dt){
+                const int chroma_qp= s->chroma_qscale_table[qp_dt];
+                s->dsp.h263_h_loop_filter(dest_y -8*linesize  ,   linesize, qp_dt);
+                s->dsp.h263_h_loop_filter(dest_cb-8*uvlinesize, uvlinesize, chroma_qp);
+                s->dsp.h263_h_loop_filter(dest_cr-8*uvlinesize, uvlinesize, chroma_qp);
+            }
+        }
+    }
+
+    if(qp_c){
+        s->dsp.h263_h_loop_filter(dest_y +8,   linesize, qp_c);
+        if(s->mb_y + 1 == s->mb_height)
+            s->dsp.h263_h_loop_filter(dest_y+8*linesize+8,   linesize, qp_c);
+    }
+
+    if(s->mb_x){
+        int qp_lc;
+        if(qp_c || IS_SKIP(s->current_picture.mb_type[xy-1]))
+            qp_lc= qp_c;
+        else
+            qp_lc= s->current_picture.qscale_table[xy-1];
+
+        if(qp_lc){
+            s->dsp.h263_h_loop_filter(dest_y,   linesize, qp_lc);
+            if(s->mb_y + 1 == s->mb_height){
+                const int chroma_qp= s->chroma_qscale_table[qp_lc];
+                s->dsp.h263_h_loop_filter(dest_y +8*  linesize,   linesize, qp_lc);
+                s->dsp.h263_h_loop_filter(dest_cb             , uvlinesize, chroma_qp);
+                s->dsp.h263_h_loop_filter(dest_cr             , uvlinesize, chroma_qp);
+            }
+        }
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static int h263_pred_dc(MpegEncContext * s, int n, int16_t **dc_val_ptr)
+{
+    int x, y, wrap, a, c, pred_dc, scale;
+    int16_t *dc_val;
+
+    /* find prediction */
+    if (n < 4) {
+        x = 2 * s->mb_x + (n & 1);
+        y = 2 * s->mb_y + ((n & 2) >> 1);
+        wrap = s->b8_stride;
+        dc_val = s->dc_val[0];
+        scale = s->y_dc_scale;
+    } else {
+        x = s->mb_x;
+        y = s->mb_y;
+        wrap = s->mb_stride;
+        dc_val = s->dc_val[n - 4 + 1];
+        scale = s->c_dc_scale;
+    }
+    /* B C
+     * A X
+     */
+    a = dc_val[(x - 1) + (y) * wrap];
+    c = dc_val[(x) + (y - 1) * wrap];
+
+    /* No prediction outside GOB boundary */
+    if(s->first_slice_line && n!=3){
+        if(n!=2) c= 1024;
+        if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024;
+    }
+    pred_dc = 1024;
+    /* just DC prediction */
+    if (a != 1024 && c != 1024)
+        pred_dc = (a + c) >> 1;
+    else if (a != 1024)
+        pred_dc = a;
+    else
+        pred_dc = c;
+
+    /* we assume pred is positive */
+    //pred_dc = (pred_dc + (scale >> 1)) / scale;
+    *dc_val_ptr = &dc_val[x + y * wrap];
+    return pred_dc;
+}
+#endif /* CONFIG_ENCODERS */
+
+static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
+{
+    int x, y, wrap, a, c, pred_dc, scale, i;
+    int16_t *dc_val, *ac_val, *ac_val1;
+
+    /* find prediction */
+    if (n < 4) {
+        x = 2 * s->mb_x + (n & 1);
+        y = 2 * s->mb_y + (n>> 1);
+        wrap = s->b8_stride;
+        dc_val = s->dc_val[0];
+        ac_val = s->ac_val[0][0];
+        scale = s->y_dc_scale;
+    } else {
+        x = s->mb_x;
+        y = s->mb_y;
+        wrap = s->mb_stride;
+        dc_val = s->dc_val[n - 4 + 1];
+        ac_val = s->ac_val[n - 4 + 1][0];
+        scale = s->c_dc_scale;
+    }
+
+    ac_val += ((y) * wrap + (x)) * 16;
+    ac_val1 = ac_val;
+
+    /* B C
+     * A X
+     */
+    a = dc_val[(x - 1) + (y) * wrap];
+    c = dc_val[(x) + (y - 1) * wrap];
+
+    /* No prediction outside GOB boundary */
+    if(s->first_slice_line && n!=3){
+        if(n!=2) c= 1024;
+        if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024;
+    }
+
+    if (s->ac_pred) {
+        pred_dc = 1024;
+        if (s->h263_aic_dir) {
+            /* left prediction */
+            if (a != 1024) {
+                ac_val -= 16;
+                for(i=1;i<8;i++) {
+                    block[s->dsp.idct_permutation[i<<3]] += ac_val[i];
+                }
+                pred_dc = a;
+            }
+        } else {
+            /* top prediction */
+            if (c != 1024) {
+                ac_val -= 16 * wrap;
+                for(i=1;i<8;i++) {
+                    block[s->dsp.idct_permutation[i   ]] += ac_val[i + 8];
+                }
+                pred_dc = c;
+            }
+        }
+    } else {
+        /* just DC prediction */
+        if (a != 1024 && c != 1024)
+            pred_dc = (a + c) >> 1;
+        else if (a != 1024)
+            pred_dc = a;
+        else
+            pred_dc = c;
+    }
+
+    /* we assume pred is positive */
+    block[0]=block[0]*scale + pred_dc;
+
+    if (block[0] < 0)
+        block[0] = 0;
+    else
+        block[0] |= 1;
+
+    /* Update AC/DC tables */
+    dc_val[(x) + (y) * wrap] = block[0];
+
+    /* left copy */
+    for(i=1;i<8;i++)
+        ac_val1[i    ] = block[s->dsp.idct_permutation[i<<3]];
+    /* top copy */
+    for(i=1;i<8;i++)
+        ac_val1[8 + i] = block[s->dsp.idct_permutation[i   ]];
+}
+
+int16_t *h263_pred_motion(MpegEncContext * s, int block, int dir,
+                        int *px, int *py)
+{
+    int wrap;
+    int16_t *A, *B, *C, (*mot_val)[2];
+    static const int off[4]= {2, 1, 1, -1};
+
+    wrap = s->b8_stride;
+    mot_val = s->current_picture.motion_val[dir] + s->block_index[block];
+
+    A = mot_val[ - 1];
+    /* special case for first (slice) line */
+    if (s->first_slice_line && block<3) {
+        // we can't just change some MVs to simulate that as we need them for the B frames (and ME)
+        // and if we ever support non rectangular objects than we need to do a few ifs here anyway :(
+        if(block==0){ //most common case
+            if(s->mb_x  == s->resync_mb_x){ //rare
+                *px= *py = 0;
+            }else if(s->mb_x + 1 == s->resync_mb_x && s->h263_pred){ //rare
+                C = mot_val[off[block] - wrap];
+                if(s->mb_x==0){
+                    *px = C[0];
+                    *py = C[1];
+                }else{
+                    *px = mid_pred(A[0], 0, C[0]);
+                    *py = mid_pred(A[1], 0, C[1]);
+                }
+            }else{
+                *px = A[0];
+                *py = A[1];
+            }
+        }else if(block==1){
+            if(s->mb_x + 1 == s->resync_mb_x && s->h263_pred){ //rare
+                C = mot_val[off[block] - wrap];
+                *px = mid_pred(A[0], 0, C[0]);
+                *py = mid_pred(A[1], 0, C[1]);
+            }else{
+                *px = A[0];
+                *py = A[1];
+            }
+        }else{ /* block==2*/
+            B = mot_val[ - wrap];
+            C = mot_val[off[block] - wrap];
+            if(s->mb_x == s->resync_mb_x) //rare
+                A[0]=A[1]=0;
+
+            *px = mid_pred(A[0], B[0], C[0]);
+            *py = mid_pred(A[1], B[1], C[1]);
+        }
+    } else {
+        B = mot_val[ - wrap];
+        C = mot_val[off[block] - wrap];
+        *px = mid_pred(A[0], B[0], C[0]);
+        *py = mid_pred(A[1], B[1], C[1]);
+    }
+    return *mot_val;
+}
+
+#ifdef CONFIG_ENCODERS
+void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code)
+{
+    int range, l, bit_size, sign, code, bits;
+
+    if (val == 0) {
+        /* zero vector */
+        code = 0;
+        put_bits(&s->pb, mvtab[code][1], mvtab[code][0]);
+    } else {
+        bit_size = f_code - 1;
+        range = 1 << bit_size;
+        /* modulo encoding */
+        l= INT_BIT - 6 - bit_size;
+        val = (val<<l)>>l;
+        sign = val>>31;
+        val= (val^sign)-sign;
+        sign&=1;
+
+        val--;
+        code = (val >> bit_size) + 1;
+        bits = val & (range - 1);
+
+        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
+        if (bit_size > 0) {
+            put_bits(&s->pb, bit_size, bits);
+        }
+    }
+}
+
+/* Encode MV differences on H.263+ with Unrestricted MV mode */
+static void h263p_encode_umotion(MpegEncContext * s, int val)
+{
+    short sval = 0;
+    short i = 0;
+    short n_bits = 0;
+    short temp_val;
+    int code = 0;
+    int tcode;
+
+    if ( val == 0)
+        put_bits(&s->pb, 1, 1);
+    else if (val == 1)
+        put_bits(&s->pb, 3, 0);
+    else if (val == -1)
+        put_bits(&s->pb, 3, 2);
+    else {
+
+        sval = ((val < 0) ? (short)(-val):(short)val);
+        temp_val = sval;
+
+        while (temp_val != 0) {
+            temp_val = temp_val >> 1;
+            n_bits++;
+        }
+
+        i = n_bits - 1;
+        while (i > 0) {
+            tcode = (sval & (1 << (i-1))) >> (i-1);
+            tcode = (tcode << 1) | 1;
+            code = (code << 2) | tcode;
+            i--;
+        }
+        code = ((code << 1) | (val < 0)) << 1;
+        put_bits(&s->pb, (2*n_bits)+1, code);
+        //printf("\nVal = %d\tCode = %d", sval, code);
+    }
+}
+
+static void init_mv_penalty_and_fcode(MpegEncContext *s)
+{
+    int f_code;
+    int mv;
+
+    if(mv_penalty==NULL)
+        mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+
+    for(f_code=1; f_code<=MAX_FCODE; f_code++){
+        for(mv=-MAX_MV; mv<=MAX_MV; mv++){
+            int len;
+
+            if(mv==0) len= mvtab[0][1];
+            else{
+                int val, bit_size, range, code;
+
+                bit_size = f_code - 1;
+                range = 1 << bit_size;
+
+                val=mv;
+                if (val < 0)
+                    val = -val;
+                val--;
+                code = (val >> bit_size) + 1;
+                if(code<33){
+                    len= mvtab[code][1] + 1 + bit_size;
+                }else{
+                    len= mvtab[32][1] + av_log2(code>>5) + 2 + bit_size;
+                }
+            }
+
+            mv_penalty[f_code][mv+MAX_MV]= len;
+        }
+    }
+
+    for(f_code=MAX_FCODE; f_code>0; f_code--){
+        for(mv=-(16<<f_code); mv<(16<<f_code); mv++){
+            fcode_tab[mv+MAX_MV]= f_code;
+        }
+    }
+
+    for(mv=0; mv<MAX_MV*2+1; mv++){
+        umv_fcode_tab[mv]= 1;
+    }
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+
+static void init_uni_dc_tab(void)
+{
+    int level, uni_code, uni_len;
+
+    for(level=-256; level<256; level++){
+        int size, v, l;
+        /* find number of bits */
+        size = 0;
+        v = abs(level);
+        while (v) {
+            v >>= 1;
+            size++;
+        }
+
+        if (level < 0)
+            l= (-level) ^ ((1 << size) - 1);
+        else
+            l= level;
+
+        /* luminance */
+        uni_code= DCtab_lum[size][0];
+        uni_len = DCtab_lum[size][1];
+
+        if (size > 0) {
+            uni_code<<=size; uni_code|=l;
+            uni_len+=size;
+            if (size > 8){
+                uni_code<<=1; uni_code|=1;
+                uni_len++;
+            }
+        }
+        uni_DCtab_lum_bits[level+256]= uni_code;
+        uni_DCtab_lum_len [level+256]= uni_len;
+
+        /* chrominance */
+        uni_code= DCtab_chrom[size][0];
+        uni_len = DCtab_chrom[size][1];
+
+        if (size > 0) {
+            uni_code<<=size; uni_code|=l;
+            uni_len+=size;
+            if (size > 8){
+                uni_code<<=1; uni_code|=1;
+                uni_len++;
+            }
+        }
+        uni_DCtab_chrom_bits[level+256]= uni_code;
+        uni_DCtab_chrom_len [level+256]= uni_len;
+
+    }
+}
+
+#endif //CONFIG_ENCODERS
+
+#ifdef CONFIG_ENCODERS
+static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){
+    int slevel, run, last;
+
+    assert(MAX_LEVEL >= 64);
+    assert(MAX_RUN   >= 63);
+
+    for(slevel=-64; slevel<64; slevel++){
+        if(slevel==0) continue;
+        for(run=0; run<64; run++){
+            for(last=0; last<=1; last++){
+                const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64);
+                int level= slevel < 0 ? -slevel : slevel;
+                int sign= slevel < 0 ? 1 : 0;
+                int bits, len, code;
+                int level1, run1;
+
+                len_tab[index]= 100;
+
+                /* ESC0 */
+                code= get_rl_index(rl, last, run, level);
+                bits= rl->table_vlc[code][0];
+                len=  rl->table_vlc[code][1];
+                bits=bits*2+sign; len++;
+
+                if(code!=rl->n && len < len_tab[index]){
+                    bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+#if 1
+                /* ESC1 */
+                bits= rl->table_vlc[rl->n][0];
+                len=  rl->table_vlc[rl->n][1];
+                bits=bits*2;    len++; //esc1
+                level1= level - rl->max_level[last][run];
+                if(level1>0){
+                    code= get_rl_index(rl, last, run, level1);
+                    bits<<= rl->table_vlc[code][1];
+                    len  += rl->table_vlc[code][1];
+                    bits += rl->table_vlc[code][0];
+                    bits=bits*2+sign; len++;
+
+                    if(code!=rl->n && len < len_tab[index]){
+                        bits_tab[index]= bits;
+                        len_tab [index]= len;
+                    }
+                }
+#endif
+#if 1
+                /* ESC2 */
+                bits= rl->table_vlc[rl->n][0];
+                len=  rl->table_vlc[rl->n][1];
+                bits=bits*4+2;    len+=2; //esc2
+                run1 = run - rl->max_run[last][level] - 1;
+                if(run1>=0){
+                    code= get_rl_index(rl, last, run1, level);
+                    bits<<= rl->table_vlc[code][1];
+                    len  += rl->table_vlc[code][1];
+                    bits += rl->table_vlc[code][0];
+                    bits=bits*2+sign; len++;
+
+                    if(code!=rl->n && len < len_tab[index]){
+                        bits_tab[index]= bits;
+                        len_tab [index]= len;
+                    }
+                }
+#endif
+                /* ESC3 */
+                bits= rl->table_vlc[rl->n][0];
+                len = rl->table_vlc[rl->n][1];
+                bits=bits*4+3;    len+=2; //esc3
+                bits=bits*2+last; len++;
+                bits=bits*64+run; len+=6;
+                bits=bits*2+1;    len++;  //marker
+                bits=bits*4096+(slevel&0xfff); len+=12;
+                bits=bits*2+1;    len++;  //marker
+
+                if(len < len_tab[index]){
+                    bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+            }
+        }
+    }
+}
+
+static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){
+    int slevel, run, last;
+
+    assert(MAX_LEVEL >= 64);
+    assert(MAX_RUN   >= 63);
+
+    for(slevel=-64; slevel<64; slevel++){
+        if(slevel==0) continue;
+        for(run=0; run<64; run++){
+            for(last=0; last<=1; last++){
+                const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64);
+                int level= slevel < 0 ? -slevel : slevel;
+                int sign= slevel < 0 ? 1 : 0;
+                int bits, len, code;
+
+                len_tab[index]= 100;
+
+                /* ESC0 */
+                code= get_rl_index(rl, last, run, level);
+                bits= rl->table_vlc[code][0];
+                len=  rl->table_vlc[code][1];
+                bits=bits*2+sign; len++;
+
+                if(code!=rl->n && len < len_tab[index]){
+                    if(bits_tab) bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+                /* ESC */
+                bits= rl->table_vlc[rl->n][0];
+                len = rl->table_vlc[rl->n][1];
+                bits=bits*2+last; len++;
+                bits=bits*64+run; len+=6;
+                bits=bits*256+(level&0xff); len+=8;
+
+                if(len < len_tab[index]){
+                    if(bits_tab) bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+            }
+        }
+    }
+}
+
+void h263_encode_init(MpegEncContext *s)
+{
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+
+        init_uni_dc_tab();
+
+        init_rl(&rl_inter, 1);
+        init_rl(&rl_intra, 1);
+        init_rl(&rl_intra_aic, 1);
+
+        init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len);
+        init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len);
+
+        init_uni_h263_rl_tab(&rl_intra_aic, NULL, uni_h263_intra_aic_rl_len);
+        init_uni_h263_rl_tab(&rl_inter    , NULL, uni_h263_inter_rl_len);
+
+        init_mv_penalty_and_fcode(s);
+    }
+    s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
+
+    s->intra_ac_vlc_length     =s->inter_ac_vlc_length     = uni_h263_inter_rl_len;
+    s->intra_ac_vlc_last_length=s->inter_ac_vlc_last_length= uni_h263_inter_rl_len + 128*64;
+    if(s->h263_aic){
+        s->intra_ac_vlc_length     = uni_h263_intra_aic_rl_len;
+        s->intra_ac_vlc_last_length= uni_h263_intra_aic_rl_len + 128*64;
+    }
+    s->ac_esc_length= 7+1+6+8;
+
+    // use fcodes >1 only for mpeg4 & h263 & h263p FIXME
+    switch(s->codec_id){
+    case CODEC_ID_MPEG4:
+        s->fcode_tab= fcode_tab;
+        s->min_qcoeff= -2048;
+        s->max_qcoeff=  2047;
+        s->intra_ac_vlc_length     = uni_mpeg4_intra_rl_len;
+        s->intra_ac_vlc_last_length= uni_mpeg4_intra_rl_len + 128*64;
+        s->inter_ac_vlc_length     = uni_mpeg4_inter_rl_len;
+        s->inter_ac_vlc_last_length= uni_mpeg4_inter_rl_len + 128*64;
+        s->luma_dc_vlc_length= uni_DCtab_lum_len;
+        s->chroma_dc_vlc_length= uni_DCtab_chrom_len;
+        s->ac_esc_length= 7+2+1+6+1+12+1;
+        s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table;
+        s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+
+        if(s->flags & CODEC_FLAG_GLOBAL_HEADER){
+
+            s->avctx->extradata= av_malloc(1024);
+            init_put_bits(&s->pb, s->avctx->extradata, 1024);
+
+            if(!(s->workaround_bugs & FF_BUG_MS))
+                mpeg4_encode_visual_object_header(s);
+            mpeg4_encode_vol_header(s, 0, 0);
+
+//            ff_mpeg4_stuffing(&s->pb); ?
+            flush_put_bits(&s->pb);
+            s->avctx->extradata_size= (put_bits_count(&s->pb)+7)>>3;
+        }
+
+        break;
+    case CODEC_ID_H263P:
+        if(s->umvplus)
+            s->fcode_tab= umv_fcode_tab;
+        if(s->modified_quant){
+            s->min_qcoeff= -2047;
+            s->max_qcoeff=  2047;
+        }else{
+            s->min_qcoeff= -127;
+            s->max_qcoeff=  127;
+        }
+        break;
+        //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later
+    case CODEC_ID_FLV1:
+        if (s->h263_flv > 1) {
+            s->min_qcoeff= -1023;
+            s->max_qcoeff=  1023;
+        } else {
+            s->min_qcoeff= -127;
+            s->max_qcoeff=  127;
+        }
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+        break;
+    default: //nothing needed - default table already set in mpegvideo.c
+        s->min_qcoeff= -127;
+        s->max_qcoeff=  127;
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+}
+
+/**
+ * encodes a 8x8 block.
+ * @param block the 8x8 block
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
+static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
+{
+    int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
+    RLTable *rl;
+
+    rl = &rl_inter;
+    if (s->mb_intra && !s->h263_aic) {
+        /* DC coef */
+        level = block[0];
+        /* 255 cannot be represented, so we clamp */
+        if (level > 254) {
+            level = 254;
+            block[0] = 254;
+        }
+        /* 0 cannot be represented also */
+        else if (level < 1) {
+            level = 1;
+            block[0] = 1;
+        }
+        if (level == 128) //FIXME check rv10
+            put_bits(&s->pb, 8, 0xff);
+        else
+            put_bits(&s->pb, 8, level);
+        i = 1;
+    } else {
+        i = 0;
+        if (s->h263_aic && s->mb_intra)
+            rl = &rl_intra_aic;
+
+        if(s->alt_inter_vlc && !s->mb_intra){
+            int aic_vlc_bits=0;
+            int inter_vlc_bits=0;
+            int wrong_pos=-1;
+            int aic_code;
+
+            last_index = s->block_last_index[n];
+            last_non_zero = i - 1;
+            for (; i <= last_index; i++) {
+                j = s->intra_scantable.permutated[i];
+                level = block[j];
+                if (level) {
+                    run = i - last_non_zero - 1;
+                    last = (i == last_index);
+
+                    if(level<0) level= -level;
+
+                    code = get_rl_index(rl, last, run, level);
+                    aic_code = get_rl_index(&rl_intra_aic, last, run, level);
+                    inter_vlc_bits += rl->table_vlc[code][1]+1;
+                    aic_vlc_bits   += rl_intra_aic.table_vlc[aic_code][1]+1;
+
+                    if (code == rl->n) {
+                        inter_vlc_bits += 1+6+8-1;
+                    }
+                    if (aic_code == rl_intra_aic.n) {
+                        aic_vlc_bits += 1+6+8-1;
+                        wrong_pos += run + 1;
+                    }else
+                        wrong_pos += wrong_run[aic_code];
+                    last_non_zero = i;
+                }
+            }
+            i = 0;
+            if(aic_vlc_bits < inter_vlc_bits && wrong_pos > 63)
+                rl = &rl_intra_aic;
+        }
+    }
+
+    /* AC coefs */
+    last_index = s->block_last_index[n];
+    last_non_zero = i - 1;
+    for (; i <= last_index; i++) {
+        j = s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            slevel = level;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
+            code = get_rl_index(rl, last, run, level);
+            put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+            if (code == rl->n) {
+              if(s->h263_flv <= 1){
+                put_bits(&s->pb, 1, last);
+                put_bits(&s->pb, 6, run);
+
+                assert(slevel != 0);
+
+                if(level < 128)
+                    put_bits(&s->pb, 8, slevel & 0xff);
+                else{
+                    put_bits(&s->pb, 8, 128);
+                    put_bits(&s->pb, 5, slevel & 0x1f);
+                    put_bits(&s->pb, 6, (slevel>>5)&0x3f);
+                }
+              }else{
+                if(level < 64) { // 7-bit level
+                        put_bits(&s->pb, 1, 0);
+                        put_bits(&s->pb, 1, last);
+                        put_bits(&s->pb, 6, run);
+
+                        put_bits(&s->pb, 7, slevel & 0x7f);
+                    } else {
+                        /* 11-bit level */
+                        put_bits(&s->pb, 1, 1);
+                        put_bits(&s->pb, 1, last);
+                        put_bits(&s->pb, 6, run);
+
+                        put_bits(&s->pb, 11, slevel & 0x7ff);
+                    }
+              }
+            } else {
+                put_bits(&s->pb, 1, sign);
+            }
+            last_non_zero = i;
+        }
+    }
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+
+/***************************************************/
+/**
+ * add mpeg4 stuffing bits (01...1)
+ */
+void ff_mpeg4_stuffing(PutBitContext * pbc)
+{
+    int length;
+    put_bits(pbc, 1, 0);
+    length= (-put_bits_count(pbc))&7;
+    if(length) put_bits(pbc, length, (1<<length)-1);
+}
+
+/* must be called before writing the header */
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
+    int time_div, time_mod;
+
+    assert(s->current_picture_ptr->pts != AV_NOPTS_VALUE);
+    s->time= s->current_picture_ptr->pts*s->avctx->time_base.num;
+
+    time_div= s->time/s->avctx->time_base.den;
+    time_mod= s->time%s->avctx->time_base.den;
+
+    if(s->pict_type==B_TYPE){
+        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+        assert(s->pb_time > 0 && s->pb_time < s->pp_time);
+        ff_mpeg4_init_direct_mv(s);
+    }else{
+        s->last_time_base= s->time_base;
+        s->time_base= time_div;
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
+        assert(picture_number==0 || s->pp_time > 0);
+    }
+}
+
+static void mpeg4_encode_gop_header(MpegEncContext * s){
+    int hours, minutes, seconds;
+    int64_t time;
+
+    put_bits(&s->pb, 16, 0);
+    put_bits(&s->pb, 16, GOP_STARTCODE);
+
+    time= s->current_picture_ptr->pts;
+    if(s->reordered_input_picture[1])
+        time= FFMIN(time, s->reordered_input_picture[1]->pts);
+    time= time*s->avctx->time_base.num;
+
+    seconds= time/s->avctx->time_base.den;
+    minutes= seconds/60; seconds %= 60;
+    hours= minutes/60; minutes %= 60;
+    hours%=24;
+
+    put_bits(&s->pb, 5, hours);
+    put_bits(&s->pb, 6, minutes);
+    put_bits(&s->pb, 1, 1);
+    put_bits(&s->pb, 6, seconds);
+
+    put_bits(&s->pb, 1, !!(s->flags&CODEC_FLAG_CLOSED_GOP));
+    put_bits(&s->pb, 1, 0); //broken link == NO
+
+    s->last_time_base= time / s->avctx->time_base.den;
+
+    ff_mpeg4_stuffing(&s->pb);
+}
+
+static void mpeg4_encode_visual_object_header(MpegEncContext * s){
+    int profile_and_level_indication;
+    int vo_ver_id;
+
+    if(s->avctx->profile != FF_PROFILE_UNKNOWN){
+        profile_and_level_indication = s->avctx->profile << 4;
+    }else if(s->max_b_frames || s->quarter_sample){
+        profile_and_level_indication= 0xF0; // adv simple
+    }else{
+        profile_and_level_indication= 0x00; // simple
+    }
+
+    if(s->avctx->level != FF_LEVEL_UNKNOWN){
+        profile_and_level_indication |= s->avctx->level;
+    }else{
+        profile_and_level_indication |= 1; //level 1
+    }
+
+    if(profile_and_level_indication>>4 == 0xF){
+        vo_ver_id= 5;
+    }else{
+        vo_ver_id= 1;
+    }
+
+    //FIXME levels
+
+    put_bits(&s->pb, 16, 0);
+    put_bits(&s->pb, 16, VOS_STARTCODE);
+
+    put_bits(&s->pb, 8, profile_and_level_indication);
+
+    put_bits(&s->pb, 16, 0);
+    put_bits(&s->pb, 16, VISUAL_OBJ_STARTCODE);
+
+    put_bits(&s->pb, 1, 1);
+        put_bits(&s->pb, 4, vo_ver_id);
+        put_bits(&s->pb, 3, 1); //priority
+
+    put_bits(&s->pb, 4, 1); //visual obj type== video obj
+
+    put_bits(&s->pb, 1, 0); //video signal type == no clue //FIXME
+
+    ff_mpeg4_stuffing(&s->pb);
+}
+
+static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_number)
+{
+    int vo_ver_id;
+
+    if(s->max_b_frames || s->quarter_sample){
+        vo_ver_id= 5;
+        s->vo_type= ADV_SIMPLE_VO_TYPE;
+    }else{
+        vo_ver_id= 1;
+        s->vo_type= SIMPLE_VO_TYPE;
+    }
+
+    put_bits(&s->pb, 16, 0);
+    put_bits(&s->pb, 16, 0x100 + vo_number);        /* video obj */
+    put_bits(&s->pb, 16, 0);
+    put_bits(&s->pb, 16, 0x120 + vol_number);       /* video obj layer */
+
+    put_bits(&s->pb, 1, 0);             /* random access vol */
+    put_bits(&s->pb, 8, s->vo_type);    /* video obj type indication */
+    if(s->workaround_bugs & FF_BUG_MS) {
+        put_bits(&s->pb, 1, 0);         /* is obj layer id= no */
+    } else {
+        put_bits(&s->pb, 1, 1);         /* is obj layer id= yes */
+        put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
+        put_bits(&s->pb, 3, 1);         /* is obj layer priority */
+    }
+
+    aspect_to_info(s, s->avctx->sample_aspect_ratio);
+
+    put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
+    if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
+        put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
+        put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
+    }
+
+    if(s->workaround_bugs & FF_BUG_MS) { //
+        put_bits(&s->pb, 1, 0);         /* vol control parameters= no @@@ */
+    } else {
+        put_bits(&s->pb, 1, 1);         /* vol control parameters= yes */
+        put_bits(&s->pb, 2, 1);         /* chroma format YUV 420/YV12 */
+        put_bits(&s->pb, 1, s->low_delay);
+        put_bits(&s->pb, 1, 0);         /* vbv parameters= no */
+    }
+
+    put_bits(&s->pb, 2, RECT_SHAPE);    /* vol shape= rectangle */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+
+    put_bits(&s->pb, 16, s->avctx->time_base.den);
+    if (s->time_increment_bits < 1)
+        s->time_increment_bits = 1;
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 1, 0);             /* fixed vop rate=no */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 13, s->width);     /* vol width */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 13, s->height);    /* vol height */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
+    put_bits(&s->pb, 1, 1);             /* obmc disable */
+    if (vo_ver_id == 1) {
+        put_bits(&s->pb, 1, s->vol_sprite_usage);       /* sprite enable */
+    }else{
+        put_bits(&s->pb, 2, s->vol_sprite_usage);       /* sprite enable */
+    }
+
+    put_bits(&s->pb, 1, 0);             /* not 8 bit == false */
+    put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
+
+    if(s->mpeg_quant){
+        ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
+        ff_write_quant_matrix(&s->pb, s->avctx->inter_matrix);
+    }
+
+    if (vo_ver_id != 1)
+        put_bits(&s->pb, 1, s->quarter_sample);
+    put_bits(&s->pb, 1, 1);             /* complexity estimation disable */
+    s->resync_marker= s->rtp_mode;
+    put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
+    put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
+    if(s->data_partitioning){
+        put_bits(&s->pb, 1, 0);         /* no rvlc */
+    }
+
+    if (vo_ver_id != 1){
+        put_bits(&s->pb, 1, 0);         /* newpred */
+        put_bits(&s->pb, 1, 0);         /* reduced res vop */
+    }
+    put_bits(&s->pb, 1, 0);             /* scalability */
+
+    ff_mpeg4_stuffing(&s->pb);
+
+    /* user data */
+    if(!(s->flags & CODEC_FLAG_BITEXACT)){
+        put_bits(&s->pb, 16, 0);
+        put_bits(&s->pb, 16, 0x1B2);    /* user_data */
+        ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
+    }
+}
+
+/* write mpeg4 VOP header */
+void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
+{
+    int time_incr;
+    int time_div, time_mod;
+
+    if(s->pict_type==I_TYPE){
+        if(!(s->flags&CODEC_FLAG_GLOBAL_HEADER)){
+            if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT) //HACK, the reference sw is buggy
+                mpeg4_encode_visual_object_header(s);
+            if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT || picture_number==0) //HACK, the reference sw is buggy
+                mpeg4_encode_vol_header(s, 0, 0);
+        }
+        if(!(s->workaround_bugs & FF_BUG_MS))
+            mpeg4_encode_gop_header(s);
+    }
+
+    s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
+
+//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
+
+    put_bits(&s->pb, 16, 0);                /* vop header */
+    put_bits(&s->pb, 16, VOP_STARTCODE);    /* vop header */
+    put_bits(&s->pb, 2, s->pict_type - 1);  /* pict type: I = 0 , P = 1 */
+
+    assert(s->time>=0);
+    time_div= s->time/s->avctx->time_base.den;
+    time_mod= s->time%s->avctx->time_base.den;
+    time_incr= time_div - s->last_time_base;
+    assert(time_incr >= 0);
+    while(time_incr--)
+        put_bits(&s->pb, 1, 1);
+
+    put_bits(&s->pb, 1, 0);
+
+    put_bits(&s->pb, 1, 1);                             /* marker */
+    put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
+    put_bits(&s->pb, 1, 1);                             /* marker */
+    put_bits(&s->pb, 1, 1);                             /* vop coded */
+    if (    s->pict_type == P_TYPE
+        || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
+        put_bits(&s->pb, 1, s->no_rounding);    /* rounding type */
+    }
+    put_bits(&s->pb, 3, 0);     /* intra dc VLC threshold */
+    if(!s->progressive_sequence){
+         put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
+         put_bits(&s->pb, 1, s->alternate_scan);
+    }
+    //FIXME sprite stuff
+
+    put_bits(&s->pb, 5, s->qscale);
+
+    if (s->pict_type != I_TYPE)
+        put_bits(&s->pb, 3, s->f_code); /* fcode_for */
+    if (s->pict_type == B_TYPE)
+        put_bits(&s->pb, 3, s->b_code); /* fcode_back */
+    //    printf("****frame %d\n", picture_number);
+}
+
+#endif //CONFIG_ENCODERS
+
+/**
+ * set qscale and update qscale dependant variables.
+ */
+void ff_set_qscale(MpegEncContext * s, int qscale)
+{
+    if (qscale < 1)
+        qscale = 1;
+    else if (qscale > 31)
+        qscale = 31;
+
+    s->qscale = qscale;
+    s->chroma_qscale= s->chroma_qscale_table[qscale];
+
+    s->y_dc_scale= s->y_dc_scale_table[ qscale ];
+    s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
+}
+
+/**
+ * predicts the dc.
+ * encoding quantized level -> quantized diff
+ * decoding quantized diff -> quantized level
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ * @param dir_ptr pointer to an integer where the prediction direction will be stored
+ */
+static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding)
+{
+    int a, b, c, wrap, pred, scale, ret;
+    int16_t *dc_val;
+
+    /* find prediction */
+    if (n < 4) {
+        scale = s->y_dc_scale;
+    } else {
+        scale = s->c_dc_scale;
+    }
+    if(IS_3IV1)
+        scale= 8;
+
+    wrap= s->block_wrap[n];
+    dc_val = s->dc_val[0] + s->block_index[n];
+
+    /* B C
+     * A X
+     */
+    a = dc_val[ - 1];
+    b = dc_val[ - 1 - wrap];
+    c = dc_val[ - wrap];
+
+    /* outside slice handling (we can't do that by memset as we need the dc for error resilience) */
+    if(s->first_slice_line && n!=3){
+        if(n!=2) b=c= 1024;
+        if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024;
+    }
+    if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1){
+        if(n==0 || n==4 || n==5)
+            b=1024;
+    }
+
+    if (abs(a - b) < abs(b - c)) {
+        pred = c;
+        *dir_ptr = 1; /* top */
+    } else {
+        pred = a;
+        *dir_ptr = 0; /* left */
+    }
+    /* we assume pred is positive */
+    pred = FASTDIV((pred + (scale >> 1)), scale);
+
+    if(encoding){
+        ret = level - pred;
+    }else{
+        level += pred;
+        ret= level;
+        if(s->error_resilience>=3){
+            if(level<0){
+                av_log(s->avctx, AV_LOG_ERROR, "dc<0 at %dx%d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+            if(level*scale > 2048 + scale){
+                av_log(s->avctx, AV_LOG_ERROR, "dc overflow at %dx%d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }
+    }
+    level *=scale;
+    if(level&(~2047)){
+        if(level<0)
+            level=0;
+        else if(!(s->workaround_bugs&FF_BUG_DC_CLIP))
+            level=2047;
+    }
+    dc_val[0]= level;
+
+    return ret;
+}
+
+/**
+ * predicts the ac.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ * @param dir the ac prediction direction
+ */
+void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
+                   int dir)
+{
+    int i;
+    int16_t *ac_val, *ac_val1;
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+
+    /* find prediction */
+    ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val1 = ac_val;
+    if (s->ac_pred) {
+        if (dir == 0) {
+            const int xy= s->mb_x-1 + s->mb_y*s->mb_stride;
+            /* left prediction */
+            ac_val -= 16;
+
+            if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
+                /* same qscale */
+                for(i=1;i<8;i++) {
+                    block[s->dsp.idct_permutation[i<<3]] += ac_val[i];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1;i<8;i++) {
+                    block[s->dsp.idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale);
+                }
+            }
+        } else {
+            const int xy= s->mb_x + s->mb_y*s->mb_stride - s->mb_stride;
+            /* top prediction */
+            ac_val -= 16 * s->block_wrap[n];
+
+            if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){
+                /* same qscale */
+                for(i=1;i<8;i++) {
+                    block[s->dsp.idct_permutation[i]] += ac_val[i + 8];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1;i<8;i++) {
+                    block[s->dsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale);
+                }
+            }
+        }
+    }
+    /* left copy */
+    for(i=1;i<8;i++)
+        ac_val1[i    ] = block[s->dsp.idct_permutation[i<<3]];
+
+    /* top copy */
+    for(i=1;i<8;i++)
+        ac_val1[8 + i] = block[s->dsp.idct_permutation[i   ]];
+
+}
+
+#ifdef CONFIG_ENCODERS
+
+/**
+ * encodes the dc value.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
+static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
+{
+#if 1
+//    if(level<-255 || level>255) printf("dc overflow\n");
+    level+=256;
+    if (n < 4) {
+        /* luminance */
+        put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
+    } else {
+        /* chrominance */
+        put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
+    }
+#else
+    int size, v;
+    /* find number of bits */
+    size = 0;
+    v = abs(level);
+    while (v) {
+        v >>= 1;
+        size++;
+    }
+
+    if (n < 4) {
+        /* luminance */
+        put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
+    } else {
+        /* chrominance */
+        put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
+    }
+
+    /* encode remaining bits */
+    if (size > 0) {
+        if (level < 0)
+            level = (-level) ^ ((1 << size) - 1);
+        put_bits(&s->pb, size, level);
+        if (size > 8)
+            put_bits(&s->pb, 1, 1);
+    }
+#endif
+}
+
+static inline int mpeg4_get_dc_length(int level, int n){
+    if (n < 4) {
+        return uni_DCtab_lum_len[level + 256];
+    } else {
+        return uni_DCtab_chrom_len[level + 256];
+    }
+}
+
+/**
+ * encodes a 8x8 block
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
+static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
+                               uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
+{
+    int i, last_non_zero;
+#if 0 //variables for the outcommented version
+    int code, sign, last;
+#endif
+    const RLTable *rl;
+    uint32_t *bits_tab;
+    uint8_t *len_tab;
+    const int last_index = s->block_last_index[n];
+
+    if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
+        /* mpeg4 based DC predictor */
+        mpeg4_encode_dc(dc_pb, intra_dc, n);
+        if(last_index<1) return;
+        i = 1;
+        rl = &rl_intra;
+        bits_tab= uni_mpeg4_intra_rl_bits;
+        len_tab = uni_mpeg4_intra_rl_len;
+    } else {
+        if(last_index<0) return;
+        i = 0;
+        rl = &rl_inter;
+        bits_tab= uni_mpeg4_inter_rl_bits;
+        len_tab = uni_mpeg4_inter_rl_len;
+    }
+
+    /* AC coefs */
+    last_non_zero = i - 1;
+#if 1
+    for (; i < last_index; i++) {
+        int level = block[ scan_table[i] ];
+        if (level) {
+            int run = i - last_non_zero - 1;
+            level+=64;
+            if((level&(~127)) == 0){
+                const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
+                put_bits(ac_pb, len_tab[index], bits_tab[index]);
+            }else{ //ESC3
+                put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
+            }
+            last_non_zero = i;
+        }
+    }
+    /*if(i<=last_index)*/{
+        int level = block[ scan_table[i] ];
+        int run = i - last_non_zero - 1;
+        level+=64;
+        if((level&(~127)) == 0){
+            const int index= UNI_MPEG4_ENC_INDEX(1, run, level);
+            put_bits(ac_pb, len_tab[index], bits_tab[index]);
+        }else{ //ESC3
+            put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
+        }
+    }
+#else
+    for (; i <= last_index; i++) {
+        const int slevel = block[ scan_table[i] ];
+        if (slevel) {
+            int level;
+            int run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            level = slevel;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
+            code = get_rl_index(rl, last, run, level);
+            put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+            if (code == rl->n) {
+                int level1, run1;
+                level1 = level - rl->max_level[last][run];
+                if (level1 < 1)
+                    goto esc2;
+                code = get_rl_index(rl, last, run, level1);
+                if (code == rl->n) {
+                esc2:
+                    put_bits(ac_pb, 1, 1);
+                    if (level > MAX_LEVEL)
+                        goto esc3;
+                    run1 = run - rl->max_run[last][level] - 1;
+                    if (run1 < 0)
+                        goto esc3;
+                    code = get_rl_index(rl, last, run1, level);
+                    if (code == rl->n) {
+                    esc3:
+                        /* third escape */
+                        put_bits(ac_pb, 1, 1);
+                        put_bits(ac_pb, 1, last);
+                        put_bits(ac_pb, 6, run);
+                        put_bits(ac_pb, 1, 1);
+                        put_bits(ac_pb, 12, slevel & 0xfff);
+                        put_bits(ac_pb, 1, 1);
+                    } else {
+                        /* second escape */
+                        put_bits(ac_pb, 1, 0);
+                        put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+                        put_bits(ac_pb, 1, sign);
+                    }
+                } else {
+                    /* first escape */
+                    put_bits(ac_pb, 1, 0);
+                    put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+                    put_bits(ac_pb, 1, sign);
+                }
+            } else {
+                put_bits(ac_pb, 1, sign);
+            }
+            last_non_zero = i;
+        }
+    }
+#endif
+}
+
+static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
+                               uint8_t *scan_table)
+{
+    int i, last_non_zero;
+    const RLTable *rl;
+    uint8_t *len_tab;
+    const int last_index = s->block_last_index[n];
+    int len=0;
+
+    if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
+        /* mpeg4 based DC predictor */
+        len += mpeg4_get_dc_length(intra_dc, n);
+        if(last_index<1) return len;
+        i = 1;
+        rl = &rl_intra;
+        len_tab = uni_mpeg4_intra_rl_len;
+    } else {
+        if(last_index<0) return 0;
+        i = 0;
+        rl = &rl_inter;
+        len_tab = uni_mpeg4_inter_rl_len;
+    }
+
+    /* AC coefs */
+    last_non_zero = i - 1;
+    for (; i < last_index; i++) {
+        int level = block[ scan_table[i] ];
+        if (level) {
+            int run = i - last_non_zero - 1;
+            level+=64;
+            if((level&(~127)) == 0){
+                const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
+                len += len_tab[index];
+            }else{ //ESC3
+                len += 7+2+1+6+1+12+1;
+            }
+            last_non_zero = i;
+        }
+    }
+    /*if(i<=last_index)*/{
+        int level = block[ scan_table[i] ];
+        int run = i - last_non_zero - 1;
+        level+=64;
+        if((level&(~127)) == 0){
+            const int index= UNI_MPEG4_ENC_INDEX(1, run, level);
+            len += len_tab[index];
+        }else{ //ESC3
+            len += 7+2+1+6+1+12+1;
+        }
+    }
+
+    return len;
+}
+
+#endif
+
+
+/***********************************************/
+/* decoding */
+
+static VLC intra_MCBPC_vlc;
+static VLC inter_MCBPC_vlc;
+static VLC cbpy_vlc;
+static VLC mv_vlc;
+static VLC dc_lum, dc_chrom;
+static VLC sprite_trajectory;
+static VLC mb_type_b_vlc;
+static VLC h263_mbtype_b_vlc;
+static VLC cbpc_b_vlc;
+
+void init_vlc_rl(RLTable *rl, int use_static)
+{
+    int i, q;
+
+    /* Return if static table is already initialized */
+    if(use_static && rl->rl_vlc[0])
+        return;
+
+    init_vlc(&rl->vlc, 9, rl->n + 1,
+             &rl->table_vlc[0][1], 4, 2,
+             &rl->table_vlc[0][0], 4, 2, use_static);
+
+
+    for(q=0; q<32; q++){
+        int qmul= q*2;
+        int qadd= (q-1)|1;
+
+        if(q==0){
+            qmul=1;
+            qadd=0;
+        }
+        if(use_static)
+            rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
+        else
+            rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
+        for(i=0; i<rl->vlc.table_size; i++){
+            int code= rl->vlc.table[i][0];
+            int len = rl->vlc.table[i][1];
+            int level, run;
+
+            if(len==0){ // illegal code
+                run= 66;
+                level= MAX_LEVEL;
+            }else if(len<0){ //more bits needed
+                run= 0;
+                level= code;
+            }else{
+                if(code==rl->n){ //esc
+                    run= 66;
+                    level= 0;
+                }else{
+                    run=   rl->table_run  [code] + 1;
+                    level= rl->table_level[code] * qmul + qadd;
+                    if(code >= rl->last) run+=192;
+                }
+            }
+            rl->rl_vlc[q][i].len= len;
+            rl->rl_vlc[q][i].level= level;
+            rl->rl_vlc[q][i].run= run;
+        }
+    }
+}
+
+/* init vlcs */
+
+/* XXX: find a better solution to handle static init */
+void h263_decode_init_vlc(MpegEncContext *s)
+{
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+
+        init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9,
+                 intra_MCBPC_bits, 1, 1,
+                 intra_MCBPC_code, 1, 1, 1);
+        init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28,
+                 inter_MCBPC_bits, 1, 1,
+                 inter_MCBPC_code, 1, 1, 1);
+        init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16,
+                 &cbpy_tab[0][1], 2, 1,
+                 &cbpy_tab[0][0], 2, 1, 1);
+        init_vlc(&mv_vlc, MV_VLC_BITS, 33,
+                 &mvtab[0][1], 2, 1,
+                 &mvtab[0][0], 2, 1, 1);
+        init_rl(&rl_inter, 1);
+        init_rl(&rl_intra, 1);
+        init_rl(&rvlc_rl_inter, 1);
+        init_rl(&rvlc_rl_intra, 1);
+        init_rl(&rl_intra_aic, 1);
+        init_vlc_rl(&rl_inter, 1);
+        init_vlc_rl(&rl_intra, 1);
+        init_vlc_rl(&rvlc_rl_inter, 1);
+        init_vlc_rl(&rvlc_rl_intra, 1);
+        init_vlc_rl(&rl_intra_aic, 1);
+        init_vlc(&dc_lum, DC_VLC_BITS, 10 /* 13 */,
+                 &DCtab_lum[0][1], 2, 1,
+                 &DCtab_lum[0][0], 2, 1, 1);
+        init_vlc(&dc_chrom, DC_VLC_BITS, 10 /* 13 */,
+                 &DCtab_chrom[0][1], 2, 1,
+                 &DCtab_chrom[0][0], 2, 1, 1);
+        init_vlc(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15,
+                 &sprite_trajectory_tab[0][1], 4, 2,
+                 &sprite_trajectory_tab[0][0], 4, 2, 1);
+        init_vlc(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4,
+                 &mb_type_b_tab[0][1], 2, 1,
+                 &mb_type_b_tab[0][0], 2, 1, 1);
+        init_vlc(&h263_mbtype_b_vlc, H263_MBTYPE_B_VLC_BITS, 15,
+                 &h263_mbtype_b_tab[0][1], 2, 1,
+                 &h263_mbtype_b_tab[0][0], 2, 1, 1);
+        init_vlc(&cbpc_b_vlc, CBPC_B_VLC_BITS, 4,
+                 &cbpc_b_tab[0][1], 2, 1,
+                 &cbpc_b_tab[0][0], 2, 1, 1);
+    }
+}
+
+/**
+ * Get the GOB height based on picture height.
+ */
+int ff_h263_get_gob_height(MpegEncContext *s){
+    if (s->height <= 400)
+        return 1;
+    else if (s->height <= 800)
+        return  2;
+    else
+        return 4;
+}
+
+int ff_h263_decode_mba(MpegEncContext *s)
+{
+    int i, mb_pos;
+
+    for(i=0; i<6; i++){
+        if(s->mb_num-1 <= ff_mba_max[i]) break;
+    }
+    mb_pos= get_bits(&s->gb, ff_mba_length[i]);
+    s->mb_x= mb_pos % s->mb_width;
+    s->mb_y= mb_pos / s->mb_width;
+
+    return mb_pos;
+}
+
+void ff_h263_encode_mba(MpegEncContext *s)
+{
+    int i, mb_pos;
+
+    for(i=0; i<6; i++){
+        if(s->mb_num-1 <= ff_mba_max[i]) break;
+    }
+    mb_pos= s->mb_x + s->mb_width*s->mb_y;
+    put_bits(&s->pb, ff_mba_length[i], mb_pos);
+}
+
+/**
+ * decodes the group of blocks header or slice header.
+ * @return <0 if an error occured
+ */
+static int h263_decode_gob_header(MpegEncContext *s)
+{
+    unsigned int val, gfid, gob_number;
+    int left;
+
+    /* Check for GOB Start Code */
+    val = show_bits(&s->gb, 16);
+    if(val)
+        return -1;
+
+        /* We have a GBSC probably with GSTUFF */
+    skip_bits(&s->gb, 16); /* Drop the zeros */
+    left= s->gb.size_in_bits - get_bits_count(&s->gb);
+    //MN: we must check the bits left or we might end in a infinite loop (or segfault)
+    for(;left>13; left--){
+        if(get_bits1(&s->gb)) break; /* Seek the '1' bit */
+    }
+    if(left<=13)
+        return -1;
+
+    if(s->h263_slice_structured){
+        if(get_bits1(&s->gb)==0)
+            return -1;
+
+        ff_h263_decode_mba(s);
+
+        if(s->mb_num > 1583)
+            if(get_bits1(&s->gb)==0)
+                return -1;
+
+        s->qscale = get_bits(&s->gb, 5); /* SQUANT */
+        if(get_bits1(&s->gb)==0)
+            return -1;
+        gfid = get_bits(&s->gb, 2); /* GFID */
+    }else{
+        gob_number = get_bits(&s->gb, 5); /* GN */
+        s->mb_x= 0;
+        s->mb_y= s->gob_index* gob_number;
+        gfid = get_bits(&s->gb, 2); /* GFID */
+        s->qscale = get_bits(&s->gb, 5); /* GQUANT */
+    }
+
+    if(s->mb_y >= s->mb_height)
+        return -1;
+
+    if(s->qscale==0)
+        return -1;
+
+    return 0;
+}
+
+static inline void memsetw(short *tab, int val, int n)
+{
+    int i;
+    for(i=0;i<n;i++)
+        tab[i] = val;
+}
+
+#ifdef CONFIG_ENCODERS
+
+void ff_mpeg4_init_partitions(MpegEncContext *s)
+{
+    uint8_t *start= pbBufPtr(&s->pb);
+    uint8_t *end= s->pb.buf_end;
+    int size= end - start;
+    int pb_size = (((long)start + size/3)&(~3)) - (long)start;
+    int tex_size= (size - 2*pb_size)&(~3);
+
+    set_put_bits_buffer_size(&s->pb, pb_size);
+    init_put_bits(&s->tex_pb, start + pb_size           , tex_size);
+    init_put_bits(&s->pb2   , start + pb_size + tex_size, pb_size);
+}
+
+void ff_mpeg4_merge_partitions(MpegEncContext *s)
+{
+    const int pb2_len   = put_bits_count(&s->pb2   );
+    const int tex_pb_len= put_bits_count(&s->tex_pb);
+    const int bits= put_bits_count(&s->pb);
+
+    if(s->pict_type==I_TYPE){
+        put_bits(&s->pb, 19, DC_MARKER);
+        s->misc_bits+=19 + pb2_len + bits - s->last_bits;
+        s->i_tex_bits+= tex_pb_len;
+    }else{
+        put_bits(&s->pb, 17, MOTION_MARKER);
+        s->misc_bits+=17 + pb2_len;
+        s->mv_bits+= bits - s->last_bits;
+        s->p_tex_bits+= tex_pb_len;
+    }
+
+    flush_put_bits(&s->pb2);
+    flush_put_bits(&s->tex_pb);
+
+    set_put_bits_buffer_size(&s->pb, s->pb2.buf_end - s->pb.buf);
+    ff_copy_bits(&s->pb, s->pb2.buf   , pb2_len);
+    ff_copy_bits(&s->pb, s->tex_pb.buf, tex_pb_len);
+    s->last_bits= put_bits_count(&s->pb);
+}
+
+#endif //CONFIG_ENCODERS
+
+int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s){
+    switch(s->pict_type){
+        case I_TYPE:
+            return 16;
+        case P_TYPE:
+        case S_TYPE:
+            return s->f_code+15;
+        case B_TYPE:
+            return FFMAX(FFMAX(s->f_code, s->b_code)+15, 17);
+        default:
+            return -1;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+
+void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
+{
+    int mb_num_bits= av_log2(s->mb_num - 1) + 1;
+
+    put_bits(&s->pb, ff_mpeg4_get_video_packet_prefix_length(s), 0);
+    put_bits(&s->pb, 1, 1);
+
+    put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width);
+    put_bits(&s->pb, s->quant_precision, s->qscale);
+    put_bits(&s->pb, 1, 0); /* no HEC */
+}
+
+#endif //CONFIG_ENCODERS
+
+/**
+ * check if the next stuff is a resync marker or the end.
+ * @return 0 if not
+ */
+static inline int mpeg4_is_resync(MpegEncContext *s){
+    int bits_count= get_bits_count(&s->gb);
+    int v= show_bits(&s->gb, 16);
+
+    if(s->workaround_bugs&FF_BUG_NO_PADDING){
+        return 0;
+    }
+
+    while(v<=0xFF){
+        if(s->pict_type==B_TYPE || (v>>(8-s->pict_type)!=1) || s->partitioned_frame)
+            break;
+        skip_bits(&s->gb, 8+s->pict_type);
+        bits_count+= 8+s->pict_type;
+        v= show_bits(&s->gb, 16);
+    }
+
+    if(bits_count + 8 >= s->gb.size_in_bits){
+        v>>=8;
+        v|= 0x7F >> (7-(bits_count&7));
+
+        if(v==0x7F)
+            return 1;
+    }else{
+        if(v == ff_mpeg4_resync_prefix[bits_count&7]){
+            int len;
+            GetBitContext gb= s->gb;
+
+            skip_bits(&s->gb, 1);
+            align_get_bits(&s->gb);
+
+            for(len=0; len<32; len++){
+                if(get_bits1(&s->gb)) break;
+            }
+
+            s->gb= gb;
+
+            if(len>=ff_mpeg4_get_video_packet_prefix_length(s))
+                return 1;
+        }
+    }
+    return 0;
+}
+
+/**
+ * decodes the next video packet.
+ * @return <0 if something went wrong
+ */
+static int mpeg4_decode_video_packet_header(MpegEncContext *s)
+{
+    int mb_num_bits= av_log2(s->mb_num - 1) + 1;
+    int header_extension=0, mb_num, len;
+
+    /* is there enough space left for a video packet + header */
+    if( get_bits_count(&s->gb) > s->gb.size_in_bits-20) return -1;
+
+    for(len=0; len<32; len++){
+        if(get_bits1(&s->gb)) break;
+    }
+
+    if(len!=ff_mpeg4_get_video_packet_prefix_length(s)){
+        av_log(s->avctx, AV_LOG_ERROR, "marker does not match f_code\n");
+        return -1;
+    }
+
+    if(s->shape != RECT_SHAPE){
+        header_extension= get_bits1(&s->gb);
+        //FIXME more stuff here
+    }
+
+    mb_num= get_bits(&s->gb, mb_num_bits);
+    if(mb_num>=s->mb_num){
+        av_log(s->avctx, AV_LOG_ERROR, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num);
+        return -1;
+    }
+    if(s->pict_type == B_TYPE){
+        while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) mb_num++;
+        if(mb_num >= s->mb_num) return -1; // slice contains just skipped MBs which where allready decoded
+    }
+
+    s->mb_x= mb_num % s->mb_width;
+    s->mb_y= mb_num / s->mb_width;
+
+    if(s->shape != BIN_ONLY_SHAPE){
+        int qscale= get_bits(&s->gb, s->quant_precision);
+        if(qscale)
+            s->chroma_qscale=s->qscale= qscale;
+    }
+
+    if(s->shape == RECT_SHAPE){
+        header_extension= get_bits1(&s->gb);
+    }
+    if(header_extension){
+        int time_increment;
+        int time_incr=0;
+
+        while (get_bits1(&s->gb) != 0)
+            time_incr++;
+
+        check_marker(&s->gb, "before time_increment in video packed header");
+        time_increment= get_bits(&s->gb, s->time_increment_bits);
+        check_marker(&s->gb, "before vop_coding_type in video packed header");
+
+        skip_bits(&s->gb, 2); /* vop coding type */
+        //FIXME not rect stuff here
+
+        if(s->shape != BIN_ONLY_SHAPE){
+            skip_bits(&s->gb, 3); /* intra dc vlc threshold */
+//FIXME don't just ignore everything
+            if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                mpeg4_decode_sprite_trajectory(s, &s->gb);
+                av_log(s->avctx, AV_LOG_ERROR, "untested\n");
+            }
+
+            //FIXME reduced res stuff here
+
+            if (s->pict_type != I_TYPE) {
+                int f_code = get_bits(&s->gb, 3);       /* fcode_for */
+                if(f_code==0){
+                    av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
+                }
+            }
+            if (s->pict_type == B_TYPE) {
+                int b_code = get_bits(&s->gb, 3);
+                if(b_code==0){
+                    av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (b_code=0)\n");
+                }
+            }
+        }
+    }
+    //FIXME new-pred stuff
+
+//printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb));
+
+    return 0;
+}
+
+void ff_mpeg4_clean_buffers(MpegEncContext *s)
+{
+    int c_wrap, c_xy, l_wrap, l_xy;
+
+    l_wrap= s->b8_stride;
+    l_xy= (2*s->mb_y-1)*l_wrap + s->mb_x*2 - 1;
+    c_wrap= s->mb_stride;
+    c_xy= (s->mb_y-1)*c_wrap + s->mb_x - 1;
+
+#if 0
+    /* clean DC */
+    memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*2+1);
+    memsetw(s->dc_val[1] + c_xy, 1024, c_wrap+1);
+    memsetw(s->dc_val[2] + c_xy, 1024, c_wrap+1);
+#endif
+
+    /* clean AC */
+    memset(s->ac_val[0] + l_xy, 0, (l_wrap*2+1)*16*sizeof(int16_t));
+    memset(s->ac_val[1] + c_xy, 0, (c_wrap  +1)*16*sizeof(int16_t));
+    memset(s->ac_val[2] + c_xy, 0, (c_wrap  +1)*16*sizeof(int16_t));
+
+    /* clean MV */
+    // we can't clear the MVs as they might be needed by a b frame
+//    memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(int16_t));
+//    memset(s->motion_val, 0, 2*sizeof(int16_t)*(2 + s->mb_width*2)*(2 + s->mb_height*2));
+    s->last_mv[0][0][0]=
+    s->last_mv[0][0][1]=
+    s->last_mv[1][0][0]=
+    s->last_mv[1][0][1]= 0;
+}
+
+/**
+ * decodes the group of blocks / video packet header.
+ * @return <0 if no resync found
+ */
+int ff_h263_resync(MpegEncContext *s){
+    int left, ret;
+
+    if(s->codec_id==CODEC_ID_MPEG4){
+        skip_bits1(&s->gb);
+        align_get_bits(&s->gb);
+    }
+
+    if(show_bits(&s->gb, 16)==0){
+        if(s->codec_id==CODEC_ID_MPEG4)
+            ret= mpeg4_decode_video_packet_header(s);
+        else
+            ret= h263_decode_gob_header(s);
+        if(ret>=0)
+            return 0;
+    }
+    //ok, it's not where its supposed to be ...
+    s->gb= s->last_resync_gb;
+    align_get_bits(&s->gb);
+    left= s->gb.size_in_bits - get_bits_count(&s->gb);
+
+    for(;left>16+1+5+5; left-=8){
+        if(show_bits(&s->gb, 16)==0){
+            GetBitContext bak= s->gb;
+
+            if(s->codec_id==CODEC_ID_MPEG4)
+                ret= mpeg4_decode_video_packet_header(s);
+            else
+                ret= h263_decode_gob_header(s);
+            if(ret>=0)
+                return 0;
+
+            s->gb= bak;
+        }
+        skip_bits(&s->gb, 8);
+    }
+
+    return -1;
+}
+
+/**
+ * gets the average motion vector for a GMC MB.
+ * @param n either 0 for the x component or 1 for y
+ * @returns the average MV for a GMC MB
+ */
+static inline int get_amv(MpegEncContext *s, int n){
+    int x, y, mb_v, sum, dx, dy, shift;
+    int len = 1 << (s->f_code + 4);
+    const int a= s->sprite_warping_accuracy;
+
+    if(s->workaround_bugs & FF_BUG_AMV)
+        len >>= s->quarter_sample;
+
+    if(s->real_sprite_warping_points==1){
+        if(s->divx_version==500 && s->divx_build==413)
+            sum= s->sprite_offset[0][n] / (1<<(a - s->quarter_sample));
+        else
+            sum= RSHIFT(s->sprite_offset[0][n]<<s->quarter_sample, a);
+    }else{
+        dx= s->sprite_delta[n][0];
+        dy= s->sprite_delta[n][1];
+        shift= s->sprite_shift[0];
+        if(n) dy -= 1<<(shift + a + 1);
+        else  dx -= 1<<(shift + a + 1);
+        mb_v= s->sprite_offset[0][n] + dx*s->mb_x*16 + dy*s->mb_y*16;
+
+        sum=0;
+        for(y=0; y<16; y++){
+            int v;
+
+            v= mb_v + dy*y;
+            //XXX FIXME optimize
+            for(x=0; x<16; x++){
+                sum+= v>>shift;
+                v+= dx;
+            }
+        }
+        sum= RSHIFT(sum, a+8-s->quarter_sample);
+    }
+
+    if      (sum < -len) sum= -len;
+    else if (sum >= len) sum= len-1;
+
+    return sum;
+}
+
+/**
+ * decodes first partition.
+ * @return number of MBs decoded or <0 if an error occured
+ */
+static int mpeg4_decode_partition_a(MpegEncContext *s){
+    int mb_num;
+    static const int8_t quant_tab[4] = { -1, -2, 1, 2 };
+
+    /* decode first partition */
+    mb_num=0;
+    s->first_slice_line=1;
+    for(; s->mb_y<s->mb_height; s->mb_y++){
+        ff_init_block_index(s);
+        for(; s->mb_x<s->mb_width; s->mb_x++){
+            const int xy= s->mb_x + s->mb_y*s->mb_stride;
+            int cbpc;
+            int dir=0;
+
+            mb_num++;
+            ff_update_block_index(s);
+            if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
+                s->first_slice_line=0;
+
+            if(s->pict_type==I_TYPE){
+                int i;
+
+                do{
+                    if(show_bits_long(&s->gb, 19)==DC_MARKER){
+                        return mb_num-1;
+                    }
+
+                    cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
+                    if (cbpc < 0){
+                        av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return -1;
+                    }
+                }while(cbpc == 8);
+
+                s->cbp_table[xy]= cbpc & 3;
+                s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+                s->mb_intra = 1;
+
+                if(cbpc & 4) {
+                    ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
+                }
+                s->current_picture.qscale_table[xy]= s->qscale;
+
+                s->mbintra_table[xy]= 1;
+                for(i=0; i<6; i++){
+                    int dc_pred_dir;
+                    int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
+                    if(dc < 0){
+                        av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return -1;
+                    }
+                    dir<<=1;
+                    if(dc_pred_dir) dir|=1;
+                }
+                s->pred_dir_table[xy]= dir;
+            }else{ /* P/S_TYPE */
+                int mx, my, pred_x, pred_y, bits;
+                int16_t * const mot_val= s->current_picture.motion_val[0][s->block_index[0]];
+                const int stride= s->b8_stride*2;
+
+try_again:
+                bits= show_bits(&s->gb, 17);
+                if(bits==MOTION_MARKER){
+                    return mb_num-1;
+                }
+                skip_bits1(&s->gb);
+                if(bits&0x10000){
+                    /* skip mb */
+                    if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                        s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_GMC | MB_TYPE_L0;
+                        mx= get_amv(s, 0);
+                        my= get_amv(s, 1);
+                    }else{
+                        s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+                        mx=my=0;
+                    }
+                    mot_val[0       ]= mot_val[2       ]=
+                    mot_val[0+stride]= mot_val[2+stride]= mx;
+                    mot_val[1       ]= mot_val[3       ]=
+                    mot_val[1+stride]= mot_val[3+stride]= my;
+
+                    if(s->mbintra_table[xy])
+                        ff_clean_intra_table_entries(s);
+                    continue;
+                }
+
+                cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+                if (cbpc < 0){
+                    av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                    return -1;
+                }
+                if(cbpc == 20)
+                    goto try_again;
+
+                s->cbp_table[xy]= cbpc&(8+3); //8 is dquant
+
+                s->mb_intra = ((cbpc & 4) != 0);
+
+                if(s->mb_intra){
+                    s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+                    s->mbintra_table[xy]= 1;
+                    mot_val[0       ]= mot_val[2       ]=
+                    mot_val[0+stride]= mot_val[2+stride]= 0;
+                    mot_val[1       ]= mot_val[3       ]=
+                    mot_val[1+stride]= mot_val[3+stride]= 0;
+                }else{
+                    if(s->mbintra_table[xy])
+                        ff_clean_intra_table_entries(s);
+
+                    if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
+                        s->mcsel= get_bits1(&s->gb);
+                    else s->mcsel= 0;
+
+                    if ((cbpc & 16) == 0) {
+                        /* 16x16 motion prediction */
+
+                        h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+                        if(!s->mcsel){
+                            mx = h263_decode_motion(s, pred_x, s->f_code);
+                            if (mx >= 0xffff)
+                                return -1;
+
+                            my = h263_decode_motion(s, pred_y, s->f_code);
+                            if (my >= 0xffff)
+                                return -1;
+                            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+                        } else {
+                            mx = get_amv(s, 0);
+                            my = get_amv(s, 1);
+                            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_GMC | MB_TYPE_L0;
+                        }
+
+                        mot_val[0       ]= mot_val[2       ] =
+                        mot_val[0+stride]= mot_val[2+stride]= mx;
+                        mot_val[1       ]= mot_val[3       ]=
+                        mot_val[1+stride]= mot_val[3+stride]= my;
+                    } else {
+                        int i;
+                        s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
+                        for(i=0;i<4;i++) {
+                            int16_t *mot_val= h263_pred_motion(s, i, 0, &pred_x, &pred_y);
+                            mx = h263_decode_motion(s, pred_x, s->f_code);
+                            if (mx >= 0xffff)
+                                return -1;
+
+                            my = h263_decode_motion(s, pred_y, s->f_code);
+                            if (my >= 0xffff)
+                                return -1;
+                            mot_val[0] = mx;
+                            mot_val[1] = my;
+                        }
+                    }
+                }
+            }
+        }
+        s->mb_x= 0;
+    }
+
+    return mb_num;
+}
+
+/**
+ * decode second partition.
+ * @return <0 if an error occured
+ */
+static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
+    int mb_num=0;
+    static const int8_t quant_tab[4] = { -1, -2, 1, 2 };
+
+    s->mb_x= s->resync_mb_x;
+    s->first_slice_line=1;
+    for(s->mb_y= s->resync_mb_y; mb_num < mb_count; s->mb_y++){
+        ff_init_block_index(s);
+        for(; mb_num < mb_count && s->mb_x<s->mb_width; s->mb_x++){
+            const int xy= s->mb_x + s->mb_y*s->mb_stride;
+
+            mb_num++;
+            ff_update_block_index(s);
+            if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
+                s->first_slice_line=0;
+
+            if(s->pict_type==I_TYPE){
+                int ac_pred= get_bits1(&s->gb);
+                int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+                if(cbpy<0){
+                    av_log(s->avctx, AV_LOG_ERROR, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+                    return -1;
+                }
+
+                s->cbp_table[xy]|= cbpy<<2;
+                s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED;
+            }else{ /* P || S_TYPE */
+                if(IS_INTRA(s->current_picture.mb_type[xy])){
+                    int dir=0,i;
+                    int ac_pred = get_bits1(&s->gb);
+                    int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+
+                    if(cbpy<0){
+                        av_log(s->avctx, AV_LOG_ERROR, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return -1;
+                    }
+
+                    if(s->cbp_table[xy] & 8) {
+                        ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
+                    }
+                    s->current_picture.qscale_table[xy]= s->qscale;
+
+                    for(i=0; i<6; i++){
+                        int dc_pred_dir;
+                        int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
+                        if(dc < 0){
+                            av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
+                            return -1;
+                        }
+                        dir<<=1;
+                        if(dc_pred_dir) dir|=1;
+                    }
+                    s->cbp_table[xy]&= 3; //remove dquant
+                    s->cbp_table[xy]|= cbpy<<2;
+                    s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED;
+                    s->pred_dir_table[xy]= dir;
+                }else if(IS_SKIP(s->current_picture.mb_type[xy])){
+                    s->current_picture.qscale_table[xy]= s->qscale;
+                    s->cbp_table[xy]= 0;
+                }else{
+                    int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+
+                    if(cbpy<0){
+                        av_log(s->avctx, AV_LOG_ERROR, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return -1;
+                    }
+
+                    if(s->cbp_table[xy] & 8) {
+                        ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
+                    }
+                    s->current_picture.qscale_table[xy]= s->qscale;
+
+                    s->cbp_table[xy]&= 3; //remove dquant
+                    s->cbp_table[xy]|= (cbpy^0xf)<<2;
+                }
+            }
+        }
+        if(mb_num >= mb_count) return 0;
+        s->mb_x= 0;
+    }
+    return 0;
+}
+
+/**
+ * decodes the first & second partition
+ * @return <0 if error (and sets error type in the error_status_table)
+ */
+int ff_mpeg4_decode_partitions(MpegEncContext *s)
+{
+    int mb_num;
+    const int part_a_error= s->pict_type==I_TYPE ? (DC_ERROR|MV_ERROR) : MV_ERROR;
+    const int part_a_end  = s->pict_type==I_TYPE ? (DC_END  |MV_END)   : MV_END;
+
+    mb_num= mpeg4_decode_partition_a(s);
+    if(mb_num<0){
+        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error);
+        return -1;
+    }
+
+    if(s->resync_mb_x + s->resync_mb_y*s->mb_width + mb_num > s->mb_num){
+        av_log(s->avctx, AV_LOG_ERROR, "slice below monitor ...\n");
+        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error);
+        return -1;
+    }
+
+    s->mb_num_left= mb_num;
+
+    if(s->pict_type==I_TYPE){
+        while(show_bits(&s->gb, 9) == 1)
+            skip_bits(&s->gb, 9);
+        if(get_bits_long(&s->gb, 19)!=DC_MARKER){
+            av_log(s->avctx, AV_LOG_ERROR, "marker missing after first I partition at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+    }else{
+        while(show_bits(&s->gb, 10) == 1)
+            skip_bits(&s->gb, 10);
+        if(get_bits(&s->gb, 17)!=MOTION_MARKER){
+            av_log(s->avctx, AV_LOG_ERROR, "marker missing after first P partition at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+    }
+    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, part_a_end);
+
+    if( mpeg4_decode_partition_b(s, mb_num) < 0){
+        if(s->pict_type==P_TYPE)
+            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, DC_ERROR);
+        return -1;
+    }else{
+        if(s->pict_type==P_TYPE)
+            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, DC_END);
+    }
+
+    return 0;
+}
+
+/**
+ * decode partition C of one MB.
+ * @return <0 if an error occured
+ */
+static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
+{
+    int cbp, mb_type;
+    const int xy= s->mb_x + s->mb_y*s->mb_stride;
+
+    mb_type= s->current_picture.mb_type[xy];
+    cbp = s->cbp_table[xy];
+
+    s->use_intra_dc_vlc= s->qscale < s->intra_dc_threshold;
+
+    if(s->current_picture.qscale_table[xy] != s->qscale){
+        ff_set_qscale(s, s->current_picture.qscale_table[xy] );
+    }
+
+    if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
+        int i;
+        for(i=0; i<4; i++){
+            s->mv[0][i][0] = s->current_picture.motion_val[0][ s->block_index[i] ][0];
+            s->mv[0][i][1] = s->current_picture.motion_val[0][ s->block_index[i] ][1];
+        }
+        s->mb_intra = IS_INTRA(mb_type);
+
+        if (IS_SKIP(mb_type)) {
+            /* skip mb */
+            for(i=0;i<6;i++)
+                s->block_last_index[i] = -1;
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mv_type = MV_TYPE_16X16;
+            if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                s->mcsel=1;
+                s->mb_skipped = 0;
+            }else{
+                s->mcsel=0;
+                s->mb_skipped = 1;
+            }
+        }else if(s->mb_intra){
+            s->ac_pred = IS_ACPRED(s->current_picture.mb_type[xy]);
+        }else if(!s->mb_intra){
+//            s->mcsel= 0; //FIXME do we need to init that
+
+            s->mv_dir = MV_DIR_FORWARD;
+            if (IS_8X8(mb_type)) {
+                s->mv_type = MV_TYPE_8X8;
+            } else {
+                s->mv_type = MV_TYPE_16X16;
+            }
+        }
+    } else { /* I-Frame */
+        s->mb_intra = 1;
+        s->ac_pred = IS_ACPRED(s->current_picture.mb_type[xy]);
+    }
+
+    if (!IS_SKIP(mb_type)) {
+        int i;
+        s->dsp.clear_blocks(s->block[0]);
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if(mpeg4_decode_block(s, block[i], i, cbp&32, s->mb_intra, s->rvlc) < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "texture corrupted at %d %d %d\n", s->mb_x, s->mb_y, s->mb_intra);
+                return -1;
+            }
+            cbp+=cbp;
+        }
+    }
+
+    /* per-MB end of slice check */
+
+    if(--s->mb_num_left <= 0){
+//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size_in_bits - get_bits_count(&s->gb));
+        if(mpeg4_is_resync(s))
+            return SLICE_END;
+        else
+            return SLICE_NOEND;
+    }else{
+        if(mpeg4_is_resync(s)){
+            const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
+            if(s->cbp_table[xy+delta])
+                return SLICE_END;
+        }
+        return SLICE_OK;
+    }
+}
+
+/**
+ * read the next MVs for OBMC. yes this is a ugly hack, feel free to send a patch :)
+ */
+static void preview_obmc(MpegEncContext *s){
+    GetBitContext gb= s->gb;
+
+    int cbpc, i, pred_x, pred_y, mx, my;
+    int16_t *mot_val;
+    const int xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
+    const int stride= s->b8_stride*2;
+
+    for(i=0; i<4; i++)
+        s->block_index[i]+= 2;
+    for(i=4; i<6; i++)
+        s->block_index[i]+= 1;
+    s->mb_x++;
+
+    assert(s->pict_type == P_TYPE);
+
+    do{
+        if (get_bits1(&s->gb)) {
+            /* skip mb */
+            mot_val = s->current_picture.motion_val[0][ s->block_index[0] ];
+            mot_val[0       ]= mot_val[2       ]=
+            mot_val[0+stride]= mot_val[2+stride]= 0;
+            mot_val[1       ]= mot_val[3       ]=
+            mot_val[1+stride]= mot_val[3+stride]= 0;
+
+            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+            goto end;
+        }
+        cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+    }while(cbpc == 20);
+
+    if(cbpc & 4){
+        s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+    }else{
+        get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if (cbpc & 8) {
+            if(s->modified_quant){
+                if(get_bits1(&s->gb)) skip_bits(&s->gb, 1);
+                else                  skip_bits(&s->gb, 5);
+            }else
+                skip_bits(&s->gb, 2);
+        }
+
+        if ((cbpc & 16) == 0) {
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+                /* 16x16 motion prediction */
+                mot_val= h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+                if (s->umvplus)
+                   mx = h263p_decode_umotion(s, pred_x);
+                else
+                   mx = h263_decode_motion(s, pred_x, 1);
+
+                if (s->umvplus)
+                   my = h263p_decode_umotion(s, pred_y);
+                else
+                   my = h263_decode_motion(s, pred_y, 1);
+
+                mot_val[0       ]= mot_val[2       ]=
+                mot_val[0+stride]= mot_val[2+stride]= mx;
+                mot_val[1       ]= mot_val[3       ]=
+                mot_val[1+stride]= mot_val[3+stride]= my;
+        } else {
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
+            for(i=0;i<4;i++) {
+                mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
+                if (s->umvplus)
+                  mx = h263p_decode_umotion(s, pred_x);
+                else
+                  mx = h263_decode_motion(s, pred_x, 1);
+
+                if (s->umvplus)
+                  my = h263p_decode_umotion(s, pred_y);
+                else
+                  my = h263_decode_motion(s, pred_y, 1);
+                if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
+                  skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
+                mot_val[0] = mx;
+                mot_val[1] = my;
+            }
+        }
+    }
+end:
+
+    for(i=0; i<4; i++)
+        s->block_index[i]-= 2;
+    for(i=4; i<6; i++)
+        s->block_index[i]-= 1;
+    s->mb_x--;
+
+    s->gb= gb;
+}
+
+static void h263_decode_dquant(MpegEncContext *s){
+    static const int8_t quant_tab[4] = { -1, -2, 1, 2 };
+
+    if(s->modified_quant){
+        if(get_bits1(&s->gb))
+            s->qscale= modified_quant_tab[get_bits1(&s->gb)][ s->qscale ];
+        else
+            s->qscale= get_bits(&s->gb, 5);
+    }else
+        s->qscale += quant_tab[get_bits(&s->gb, 2)];
+    ff_set_qscale(s, s->qscale);
+}
+
+int ff_h263_decode_mb(MpegEncContext *s,
+                      DCTELEM block[6][64])
+{
+    int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
+    int16_t *mot_val;
+    const int xy= s->mb_x + s->mb_y * s->mb_stride;
+
+    assert(!s->h263_pred);
+
+    if (s->pict_type == P_TYPE) {
+        do{
+            if (get_bits1(&s->gb)) {
+                /* skip mb */
+                s->mb_intra = 0;
+                for(i=0;i<6;i++)
+                    s->block_last_index[i] = -1;
+                s->mv_dir = MV_DIR_FORWARD;
+                s->mv_type = MV_TYPE_16X16;
+                s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+                s->mv[0][0][0] = 0;
+                s->mv[0][0][1] = 0;
+                s->mb_skipped = !(s->obmc | s->loop_filter);
+                goto end;
+            }
+            cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+            //fprintf(stderr, "\tCBPC: %d", cbpc);
+            if (cbpc < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "cbpc damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }while(cbpc == 20);
+
+        s->dsp.clear_blocks(s->block[0]);
+
+        dquant = cbpc & 8;
+        s->mb_intra = ((cbpc & 4) != 0);
+        if (s->mb_intra) goto intra;
+
+        cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+
+        if(s->alt_inter_vlc==0 || (cbpc & 3)!=3)
+            cbpy ^= 0xF;
+
+        cbp = (cbpc & 3) | (cbpy << 2);
+        if (dquant) {
+            h263_decode_dquant(s);
+        }
+
+        s->mv_dir = MV_DIR_FORWARD;
+        if ((cbpc & 16) == 0) {
+            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+            /* 16x16 motion prediction */
+            s->mv_type = MV_TYPE_16X16;
+            h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+            if (s->umvplus)
+               mx = h263p_decode_umotion(s, pred_x);
+            else
+               mx = h263_decode_motion(s, pred_x, 1);
+
+            if (mx >= 0xffff)
+                return -1;
+
+            if (s->umvplus)
+               my = h263p_decode_umotion(s, pred_y);
+            else
+               my = h263_decode_motion(s, pred_y, 1);
+
+            if (my >= 0xffff)
+                return -1;
+            s->mv[0][0][0] = mx;
+            s->mv[0][0][1] = my;
+
+            if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
+               skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
+        } else {
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
+            s->mv_type = MV_TYPE_8X8;
+            for(i=0;i<4;i++) {
+                mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
+                if (s->umvplus)
+                  mx = h263p_decode_umotion(s, pred_x);
+                else
+                  mx = h263_decode_motion(s, pred_x, 1);
+                if (mx >= 0xffff)
+                    return -1;
+
+                if (s->umvplus)
+                  my = h263p_decode_umotion(s, pred_y);
+                else
+                  my = h263_decode_motion(s, pred_y, 1);
+                if (my >= 0xffff)
+                    return -1;
+                s->mv[0][i][0] = mx;
+                s->mv[0][i][1] = my;
+                if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
+                  skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
+                mot_val[0] = mx;
+                mot_val[1] = my;
+            }
+        }
+
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
+
+        if(s->obmc){
+            if(s->pict_type == P_TYPE && s->mb_x+1<s->mb_width && s->mb_num_left != 1)
+                preview_obmc(s);
+        }
+    } else if(s->pict_type==B_TYPE) {
+        int mb_type;
+        const int stride= s->b8_stride;
+        int16_t *mot_val0 = s->current_picture.motion_val[0][ 2*(s->mb_x + s->mb_y*stride) ];
+        int16_t *mot_val1 = s->current_picture.motion_val[1][ 2*(s->mb_x + s->mb_y*stride) ];
+//        const int mv_xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
+
+        //FIXME ugly
+        mot_val0[0       ]= mot_val0[2       ]= mot_val0[0+2*stride]= mot_val0[2+2*stride]=
+        mot_val0[1       ]= mot_val0[3       ]= mot_val0[1+2*stride]= mot_val0[3+2*stride]=
+        mot_val1[0       ]= mot_val1[2       ]= mot_val1[0+2*stride]= mot_val1[2+2*stride]=
+        mot_val1[1       ]= mot_val1[3       ]= mot_val1[1+2*stride]= mot_val1[3+2*stride]= 0;
+
+        do{
+            mb_type= get_vlc2(&s->gb, h263_mbtype_b_vlc.table, H263_MBTYPE_B_VLC_BITS, 2);
+            if (mb_type < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "b mb_type damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            mb_type= h263_mb_type_b_map[ mb_type ];
+        }while(!mb_type);
+
+        s->mb_intra = IS_INTRA(mb_type);
+        if(HAS_CBP(mb_type)){
+            s->dsp.clear_blocks(s->block[0]);
+            cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1);
+            if(s->mb_intra){
+                dquant = IS_QUANT(mb_type);
+                goto intra;
+            }
+
+            cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+
+            if (cbpy < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "b cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            if(s->alt_inter_vlc==0 || (cbpc & 3)!=3)
+                cbpy ^= 0xF;
+
+            cbp = (cbpc & 3) | (cbpy << 2);
+        }else
+            cbp=0;
+
+        assert(!s->mb_intra);
+
+        if(IS_QUANT(mb_type)){
+            h263_decode_dquant(s);
+        }
+
+        if(IS_DIRECT(mb_type)){
+            s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+            mb_type |= ff_mpeg4_set_direct_mv(s, 0, 0);
+        }else{
+            s->mv_dir = 0;
+            s->mv_type= MV_TYPE_16X16;
+//FIXME UMV
+
+            if(USES_LIST(mb_type, 0)){
+                int16_t *mot_val= h263_pred_motion(s, 0, 0, &mx, &my);
+                s->mv_dir = MV_DIR_FORWARD;
+
+                mx = h263_decode_motion(s, mx, 1);
+                my = h263_decode_motion(s, my, 1);
+
+                s->mv[0][0][0] = mx;
+                s->mv[0][0][1] = my;
+                mot_val[0       ]= mot_val[2       ]= mot_val[0+2*stride]= mot_val[2+2*stride]= mx;
+                mot_val[1       ]= mot_val[3       ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my;
+            }
+
+            if(USES_LIST(mb_type, 1)){
+                int16_t *mot_val= h263_pred_motion(s, 0, 1, &mx, &my);
+                s->mv_dir |= MV_DIR_BACKWARD;
+
+                mx = h263_decode_motion(s, mx, 1);
+                my = h263_decode_motion(s, my, 1);
+
+                s->mv[1][0][0] = mx;
+                s->mv[1][0][1] = my;
+                mot_val[0       ]= mot_val[2       ]= mot_val[0+2*stride]= mot_val[2+2*stride]= mx;
+                mot_val[1       ]= mot_val[3       ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my;
+            }
+        }
+
+        s->current_picture.mb_type[xy]= mb_type;
+
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
+    } else { /* I-Frame */
+        do{
+            cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
+            if (cbpc < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "I cbpc damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }while(cbpc == 8);
+
+        s->dsp.clear_blocks(s->block[0]);
+
+        dquant = cbpc & 4;
+        s->mb_intra = 1;
+intra:
+        s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+        if (s->h263_aic) {
+            s->ac_pred = get_bits1(&s->gb);
+            if(s->ac_pred){
+                s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED;
+
+                s->h263_aic_dir = get_bits1(&s->gb);
+            }
+        }else
+            s->ac_pred = 0;
+
+        cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if(cbpy<0){
+            av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        cbp = (cbpc & 3) | (cbpy << 2);
+        if (dquant) {
+            h263_decode_dquant(s);
+        }
+
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
+    }
+end:
+
+        /* per-MB end of slice check */
+    {
+        int v= show_bits(&s->gb, 16);
+
+        if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){
+            v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits;
+        }
+
+        if(v==0)
+            return SLICE_END;
+    }
+
+    return SLICE_OK;
+}
+
+int ff_mpeg4_decode_mb(MpegEncContext *s,
+                      DCTELEM block[6][64])
+{
+    int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
+    int16_t *mot_val;
+    static int8_t quant_tab[4] = { -1, -2, 1, 2 };
+    const int xy= s->mb_x + s->mb_y * s->mb_stride;
+
+    assert(s->h263_pred);
+
+    if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
+        do{
+            if (get_bits1(&s->gb)) {
+                /* skip mb */
+                s->mb_intra = 0;
+                for(i=0;i<6;i++)
+                    s->block_last_index[i] = -1;
+                s->mv_dir = MV_DIR_FORWARD;
+                s->mv_type = MV_TYPE_16X16;
+                if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                    s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_GMC | MB_TYPE_16x16 | MB_TYPE_L0;
+                    s->mcsel=1;
+                    s->mv[0][0][0]= get_amv(s, 0);
+                    s->mv[0][0][1]= get_amv(s, 1);
+
+                    s->mb_skipped = 0;
+                }else{
+                    s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+                    s->mcsel=0;
+                    s->mv[0][0][0] = 0;
+                    s->mv[0][0][1] = 0;
+                    s->mb_skipped = 1;
+                }
+                goto end;
+            }
+            cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+            //fprintf(stderr, "\tCBPC: %d", cbpc);
+            if (cbpc < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "cbpc damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }while(cbpc == 20);
+
+        s->dsp.clear_blocks(s->block[0]);
+        dquant = cbpc & 8;
+        s->mb_intra = ((cbpc & 4) != 0);
+        if (s->mb_intra) goto intra;
+
+        if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
+            s->mcsel= get_bits1(&s->gb);
+        else s->mcsel= 0;
+        cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1) ^ 0x0F;
+
+        cbp = (cbpc & 3) | (cbpy << 2);
+        if (dquant) {
+            ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
+        }
+        if((!s->progressive_sequence) && (cbp || (s->workaround_bugs&FF_BUG_XVID_ILACE)))
+            s->interlaced_dct= get_bits1(&s->gb);
+
+        s->mv_dir = MV_DIR_FORWARD;
+        if ((cbpc & 16) == 0) {
+            if(s->mcsel){
+                s->current_picture.mb_type[xy]= MB_TYPE_GMC | MB_TYPE_16x16 | MB_TYPE_L0;
+                /* 16x16 global motion prediction */
+                s->mv_type = MV_TYPE_16X16;
+                mx= get_amv(s, 0);
+                my= get_amv(s, 1);
+                s->mv[0][0][0] = mx;
+                s->mv[0][0][1] = my;
+            }else if((!s->progressive_sequence) && get_bits1(&s->gb)){
+                s->current_picture.mb_type[xy]= MB_TYPE_16x8 | MB_TYPE_L0 | MB_TYPE_INTERLACED;
+                /* 16x8 field motion prediction */
+                s->mv_type= MV_TYPE_FIELD;
+
+                s->field_select[0][0]= get_bits1(&s->gb);
+                s->field_select[0][1]= get_bits1(&s->gb);
+
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+
+                for(i=0; i<2; i++){
+                    mx = h263_decode_motion(s, pred_x, s->f_code);
+                    if (mx >= 0xffff)
+                        return -1;
+
+                    my = h263_decode_motion(s, pred_y/2, s->f_code);
+                    if (my >= 0xffff)
+                        return -1;
+
+                    s->mv[0][i][0] = mx;
+                    s->mv[0][i][1] = my;
+                }
+            }else{
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+                /* 16x16 motion prediction */
+                s->mv_type = MV_TYPE_16X16;
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+                mx = h263_decode_motion(s, pred_x, s->f_code);
+
+                if (mx >= 0xffff)
+                    return -1;
+
+                my = h263_decode_motion(s, pred_y, s->f_code);
+
+                if (my >= 0xffff)
+                    return -1;
+                s->mv[0][0][0] = mx;
+                s->mv[0][0][1] = my;
+            }
+        } else {
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
+            s->mv_type = MV_TYPE_8X8;
+            for(i=0;i<4;i++) {
+                mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
+                mx = h263_decode_motion(s, pred_x, s->f_code);
+                if (mx >= 0xffff)
+                    return -1;
+
+                my = h263_decode_motion(s, pred_y, s->f_code);
+                if (my >= 0xffff)
+                    return -1;
+                s->mv[0][i][0] = mx;
+                s->mv[0][i][1] = my;
+                mot_val[0] = mx;
+                mot_val[1] = my;
+            }
+        }
+    } else if(s->pict_type==B_TYPE) {
+        int modb1; // first bit of modb
+        int modb2; // second bit of modb
+        int mb_type;
+
+        s->mb_intra = 0; //B-frames never contain intra blocks
+        s->mcsel=0;      //     ...               true gmc blocks
+
+        if(s->mb_x==0){
+            for(i=0; i<2; i++){
+                s->last_mv[i][0][0]=
+                s->last_mv[i][0][1]=
+                s->last_mv[i][1][0]=
+                s->last_mv[i][1][1]= 0;
+            }
+        }
+
+        /* if we skipped it in the future P Frame than skip it now too */
+        s->mb_skipped= s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]; // Note, skiptab=0 if last was GMC
+
+        if(s->mb_skipped){
+                /* skip mb */
+            for(i=0;i<6;i++)
+                s->block_last_index[i] = -1;
+
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mv_type = MV_TYPE_16X16;
+            s->mv[0][0][0] = 0;
+            s->mv[0][0][1] = 0;
+            s->mv[1][0][0] = 0;
+            s->mv[1][0][1] = 0;
+            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+            goto end;
+        }
+
+        modb1= get_bits1(&s->gb);
+        if(modb1){
+            mb_type= MB_TYPE_DIRECT2 | MB_TYPE_SKIP | MB_TYPE_L0L1; //like MB_TYPE_B_DIRECT but no vectors coded
+            cbp=0;
+        }else{
+            modb2= get_bits1(&s->gb);
+            mb_type= get_vlc2(&s->gb, mb_type_b_vlc.table, MB_TYPE_B_VLC_BITS, 1);
+            if(mb_type<0){
+                av_log(s->avctx, AV_LOG_ERROR, "illegal MB_type\n");
+                return -1;
+            }
+            mb_type= mb_type_b_map[ mb_type ];
+            if(modb2) cbp= 0;
+            else{
+                s->dsp.clear_blocks(s->block[0]);
+                cbp= get_bits(&s->gb, 6);
+            }
+
+            if ((!IS_DIRECT(mb_type)) && cbp) {
+                if(get_bits1(&s->gb)){
+                    ff_set_qscale(s, s->qscale + get_bits1(&s->gb)*4 - 2);
+                }
+            }
+
+            if(!s->progressive_sequence){
+                if(cbp)
+                    s->interlaced_dct= get_bits1(&s->gb);
+
+                if(!IS_DIRECT(mb_type) && get_bits1(&s->gb)){
+                    mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
+                    mb_type &= ~MB_TYPE_16x16;
+
+                    if(USES_LIST(mb_type, 0)){
+                        s->field_select[0][0]= get_bits1(&s->gb);
+                        s->field_select[0][1]= get_bits1(&s->gb);
+                    }
+                    if(USES_LIST(mb_type, 1)){
+                        s->field_select[1][0]= get_bits1(&s->gb);
+                        s->field_select[1][1]= get_bits1(&s->gb);
+                    }
+                }
+            }
+
+            s->mv_dir = 0;
+            if((mb_type & (MB_TYPE_DIRECT2|MB_TYPE_INTERLACED)) == 0){
+                s->mv_type= MV_TYPE_16X16;
+
+                if(USES_LIST(mb_type, 0)){
+                    s->mv_dir = MV_DIR_FORWARD;
+
+                    mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
+                    my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
+                    s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
+                    s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+                }
+
+                if(USES_LIST(mb_type, 1)){
+                    s->mv_dir |= MV_DIR_BACKWARD;
+
+                    mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
+                    my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
+                    s->last_mv[1][1][0]= s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
+                    s->last_mv[1][1][1]= s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+                }
+            }else if(!IS_DIRECT(mb_type)){
+                s->mv_type= MV_TYPE_FIELD;
+
+                if(USES_LIST(mb_type, 0)){
+                    s->mv_dir = MV_DIR_FORWARD;
+
+                    for(i=0; i<2; i++){
+                        mx = h263_decode_motion(s, s->last_mv[0][i][0]  , s->f_code);
+                        my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code);
+                        s->last_mv[0][i][0]=  s->mv[0][i][0] = mx;
+                        s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2;
+                    }
+                }
+
+                if(USES_LIST(mb_type, 1)){
+                    s->mv_dir |= MV_DIR_BACKWARD;
+
+                    for(i=0; i<2; i++){
+                        mx = h263_decode_motion(s, s->last_mv[1][i][0]  , s->b_code);
+                        my = h263_decode_motion(s, s->last_mv[1][i][1]/2, s->b_code);
+                        s->last_mv[1][i][0]=  s->mv[1][i][0] = mx;
+                        s->last_mv[1][i][1]= (s->mv[1][i][1] = my)*2;
+                    }
+                }
+            }
+        }
+
+        if(IS_DIRECT(mb_type)){
+            if(IS_SKIP(mb_type))
+                mx=my=0;
+            else{
+                mx = h263_decode_motion(s, 0, 1);
+                my = h263_decode_motion(s, 0, 1);
+            }
+
+            s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+            mb_type |= ff_mpeg4_set_direct_mv(s, mx, my);
+        }
+        s->current_picture.mb_type[xy]= mb_type;
+    } else { /* I-Frame */
+        do{
+            cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
+            if (cbpc < 0){
+                av_log(s->avctx, AV_LOG_ERROR, "I cbpc damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }while(cbpc == 8);
+
+        dquant = cbpc & 4;
+        s->mb_intra = 1;
+intra:
+        s->ac_pred = get_bits1(&s->gb);
+        if(s->ac_pred)
+            s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED;
+        else
+            s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+
+        cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if(cbpy<0){
+            av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        cbp = (cbpc & 3) | (cbpy << 2);
+
+        s->use_intra_dc_vlc= s->qscale < s->intra_dc_threshold;
+
+        if (dquant) {
+            ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
+        }
+
+        if(!s->progressive_sequence)
+            s->interlaced_dct= get_bits1(&s->gb);
+
+        s->dsp.clear_blocks(s->block[0]);
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (mpeg4_decode_block(s, block[i], i, cbp&32, 1, 0) < 0)
+                return -1;
+            cbp+=cbp;
+        }
+        goto end;
+    }
+
+    /* decode each block */
+    for (i = 0; i < 6; i++) {
+        if (mpeg4_decode_block(s, block[i], i, cbp&32, 0, 0) < 0)
+            return -1;
+        cbp+=cbp;
+    }
+end:
+
+        /* per-MB end of slice check */
+    if(s->codec_id==CODEC_ID_MPEG4){
+        if(mpeg4_is_resync(s)){
+            const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
+            if(s->pict_type==B_TYPE && s->next_picture.mbskip_table[xy + delta])
+                return SLICE_OK;
+            return SLICE_END;
+        }
+    }
+
+    return SLICE_OK;
+}
+
+static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
+{
+    int code, val, sign, shift, l;
+    code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
+
+    if (code == 0)
+        return pred;
+    if (code < 0)
+        return 0xffff;
+
+    sign = get_bits1(&s->gb);
+    shift = f_code - 1;
+    val = code;
+    if (shift) {
+        val = (val - 1) << shift;
+        val |= get_bits(&s->gb, shift);
+        val++;
+    }
+    if (sign)
+        val = -val;
+    val += pred;
+
+    /* modulo decoding */
+    if (!s->h263_long_vectors) {
+        l = INT_BIT - 5 - f_code;
+        val = (val<<l)>>l;
+    } else {
+        /* horrible h263 long vector mode */
+        if (pred < -31 && val < -63)
+            val += 64;
+        if (pred > 32 && val > 63)
+            val -= 64;
+
+    }
+    return val;
+}
+
+/* Decodes RVLC of H.263+ UMV */
+static int h263p_decode_umotion(MpegEncContext * s, int pred)
+{
+   int code = 0, sign;
+
+   if (get_bits1(&s->gb)) /* Motion difference = 0 */
+      return pred;
+
+   code = 2 + get_bits1(&s->gb);
+
+   while (get_bits1(&s->gb))
+   {
+      code <<= 1;
+      code += get_bits1(&s->gb);
+   }
+   sign = code & 1;
+   code >>= 1;
+
+   code = (sign) ? (pred - code) : (pred + code);
+#ifdef DEBUG
+   av_log( s->avctx, AV_LOG_DEBUG,"H.263+ UMV Motion = %d\n", code);
+#endif
+   return code;
+
+}
+
+static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
+                             int n, int coded)
+{
+    int code, level, i, j, last, run;
+    RLTable *rl = &rl_inter;
+    const uint8_t *scan_table;
+    GetBitContext gb= s->gb;
+
+    scan_table = s->intra_scantable.permutated;
+    if (s->h263_aic && s->mb_intra) {
+        rl = &rl_intra_aic;
+        i = 0;
+        if (s->ac_pred) {
+            if (s->h263_aic_dir)
+                scan_table = s->intra_v_scantable.permutated; /* left */
+            else
+                scan_table = s->intra_h_scantable.permutated; /* top */
+        }
+    } else if (s->mb_intra) {
+        /* DC coef */
+        if(s->codec_id == CODEC_ID_RV10){
+#ifdef CONFIG_RV10_DECODER
+          if (s->rv10_version == 3 && s->pict_type == I_TYPE) {
+            int component, diff;
+            component = (n <= 3 ? 0 : n - 4 + 1);
+            level = s->last_dc[component];
+            if (s->rv10_first_dc_coded[component]) {
+                diff = rv_decode_dc(s, n);
+                if (diff == 0xffff)
+                    return -1;
+                level += diff;
+                level = level & 0xff; /* handle wrap round */
+                s->last_dc[component] = level;
+            } else {
+                s->rv10_first_dc_coded[component] = 1;
+            }
+          } else {
+                level = get_bits(&s->gb, 8);
+                if (level == 255)
+                    level = 128;
+          }
+#endif
+        }else{
+            level = get_bits(&s->gb, 8);
+            if((level&0x7F) == 0){
+                av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y);
+                if(s->error_resilience >= FF_ER_COMPLIANT)
+                    return -1;
+            }
+            if (level == 255)
+                level = 128;
+        }
+        block[0] = level;
+        i = 1;
+    } else {
+        i = 0;
+    }
+    if (!coded) {
+        if (s->mb_intra && s->h263_aic)
+            goto not_coded;
+        s->block_last_index[n] = i - 1;
+        return 0;
+    }
+retry:
+    for(;;) {
+        code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
+        if (code < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        if (code == rl->n) {
+            /* escape */
+            if (s->h263_flv > 1) {
+                int is11 = get_bits1(&s->gb);
+                last = get_bits1(&s->gb);
+                run = get_bits(&s->gb, 6);
+                if(is11){
+                    level = get_sbits(&s->gb, 11);
+                } else {
+                    level = get_sbits(&s->gb, 7);
+                }
+            } else {
+                last = get_bits1(&s->gb);
+                run = get_bits(&s->gb, 6);
+                level = (int8_t)get_bits(&s->gb, 8);
+                if(level == -128){
+                    if (s->codec_id == CODEC_ID_RV10) {
+                        /* XXX: should patch encoder too */
+                        level = get_sbits(&s->gb, 12);
+                    }else{
+                        level = get_bits(&s->gb, 5);
+                        level |= get_sbits(&s->gb, 6)<<5;
+                    }
+                }
+            }
+        } else {
+            run = rl->table_run[code];
+            level = rl->table_level[code];
+            last = code >= rl->last;
+            if (get_bits1(&s->gb))
+                level = -level;
+        }
+        i += run;
+        if (i >= 64){
+            if(s->alt_inter_vlc && rl == &rl_inter && !s->mb_intra){
+                //looks like a hack but no, it's the way its supposed to work ...
+                rl = &rl_intra_aic;
+                i = 0;
+                s->gb= gb;
+                memset(block, 0, sizeof(DCTELEM)*64);
+                goto retry;
+            }
+            av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
+            return -1;
+        }
+        j = scan_table[i];
+        block[j] = level;
+        if (last)
+            break;
+        i++;
+    }
+not_coded:
+    if (s->mb_intra && s->h263_aic) {
+        h263_pred_acdc(s, block, n);
+        i = 63;
+    }
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+/**
+ * decodes the dc value.
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ * @param dir_ptr the prediction direction will be stored here
+ * @return the quantized dc
+ */
+static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
+{
+    int level, code;
+
+    if (n < 4)
+        code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1);
+    else
+        code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1);
+    if (code < 0 || code > 9 /* && s->nbit<9 */){
+        av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
+        return -1;
+    }
+    if (code == 0) {
+        level = 0;
+    } else {
+        if(IS_3IV1){
+            if(code==1)
+                level= 2*get_bits1(&s->gb)-1;
+            else{
+                if(get_bits1(&s->gb))
+                    level = get_bits(&s->gb, code-1) + (1<<(code-1));
+                else
+                    level = -get_bits(&s->gb, code-1) - (1<<(code-1));
+            }
+        }else{
+            level = get_xbits(&s->gb, code);
+        }
+
+        if (code > 8){
+            if(get_bits1(&s->gb)==0){ /* marker */
+                if(s->error_resilience>=2){
+                    av_log(s->avctx, AV_LOG_ERROR, "dc marker bit missing\n");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    return ff_mpeg4_pred_dc(s, n, level, dir_ptr, 0);
+}
+
+/**
+ * decodes a block.
+ * @return <0 if an error occured
+ */
+static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+                              int n, int coded, int intra, int rvlc)
+{
+    int level, i, last, run;
+    int dc_pred_dir;
+    RLTable * rl;
+    RL_VLC_ELEM * rl_vlc;
+    const uint8_t * scan_table;
+    int qmul, qadd;
+
+    //Note intra & rvlc should be optimized away if this is inlined
+
+    if(intra) {
+      if(s->use_intra_dc_vlc){
+        /* DC coef */
+        if(s->partitioned_frame){
+            level = s->dc_val[0][ s->block_index[n] ];
+            if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
+            else    level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale);
+            dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32;
+        }else{
+            level = mpeg4_decode_dc(s, n, &dc_pred_dir);
+            if (level < 0)
+                return -1;
+        }
+        block[0] = level;
+        i = 0;
+      }else{
+            i = -1;
+            ff_mpeg4_pred_dc(s, n, 0, &dc_pred_dir, 0);
+      }
+        if (!coded)
+            goto not_coded;
+
+        if(rvlc){
+            rl = &rvlc_rl_intra;
+            rl_vlc = rvlc_rl_intra.rl_vlc[0];
+        }else{
+            rl = &rl_intra;
+            rl_vlc = rl_intra.rl_vlc[0];
+        }
+        if (s->ac_pred) {
+            if (dc_pred_dir == 0)
+                scan_table = s->intra_v_scantable.permutated; /* left */
+            else
+                scan_table = s->intra_h_scantable.permutated; /* top */
+        } else {
+            scan_table = s->intra_scantable.permutated;
+        }
+        qmul=1;
+        qadd=0;
+    } else {
+        i = -1;
+        if (!coded) {
+            s->block_last_index[n] = i;
+            return 0;
+        }
+        if(rvlc) rl = &rvlc_rl_inter;
+        else     rl = &rl_inter;
+
+        scan_table = s->intra_scantable.permutated;
+
+        if(s->mpeg_quant){
+            qmul=1;
+            qadd=0;
+            if(rvlc){
+                rl_vlc = rvlc_rl_inter.rl_vlc[0];
+            }else{
+                rl_vlc = rl_inter.rl_vlc[0];
+            }
+        }else{
+            qmul = s->qscale << 1;
+            qadd = (s->qscale - 1) | 1;
+            if(rvlc){
+                rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale];
+            }else{
+                rl_vlc = rl_inter.rl_vlc[s->qscale];
+            }
+        }
+    }
+  {
+    OPEN_READER(re, &s->gb);
+    for(;;) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0);
+        if (level==0) {
+          /* escape */
+          if(rvlc){
+                if(SHOW_UBITS(re, &s->gb, 1)==0){
+                    av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in rvlc esc\n");
+                    return -1;
+                }; SKIP_CACHE(re, &s->gb, 1);
+
+                last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
+                run=   SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6);
+                SKIP_COUNTER(re, &s->gb, 1+1+6);
+                UPDATE_CACHE(re, &s->gb);
+
+                if(SHOW_UBITS(re, &s->gb, 1)==0){
+                    av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in rvlc esc\n");
+                    return -1;
+                }; SKIP_CACHE(re, &s->gb, 1);
+
+                level= SHOW_UBITS(re, &s->gb, 11); SKIP_CACHE(re, &s->gb, 11);
+
+                if(SHOW_UBITS(re, &s->gb, 5)!=0x10){
+                    av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n");
+                    return -1;
+                }; SKIP_CACHE(re, &s->gb, 5);
+
+                level=  level * qmul + qadd;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_CACHE(re, &s->gb, 1);
+                SKIP_COUNTER(re, &s->gb, 1+11+5+1);
+
+                i+= run + 1;
+                if(last) i+=192;
+          }else{
+            int cache;
+            cache= GET_CACHE(re, &s->gb);
+
+            if(IS_3IV1)
+                cache ^= 0xC0000000;
+
+            if (cache&0x80000000) {
+                if (cache&0x40000000) {
+                    /* third escape */
+                    SKIP_CACHE(re, &s->gb, 2);
+                    last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
+                    run=   SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6);
+                    SKIP_COUNTER(re, &s->gb, 2+1+6);
+                    UPDATE_CACHE(re, &s->gb);
+
+                    if(IS_3IV1){
+                        level= SHOW_SBITS(re, &s->gb, 12); LAST_SKIP_BITS(re, &s->gb, 12);
+                    }else{
+                        if(SHOW_UBITS(re, &s->gb, 1)==0){
+                            av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in 3. esc\n");
+                            return -1;
+                        }; SKIP_CACHE(re, &s->gb, 1);
+
+                        level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12);
+
+                        if(SHOW_UBITS(re, &s->gb, 1)==0){
+                            av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in 3. esc\n");
+                            return -1;
+                        }; LAST_SKIP_CACHE(re, &s->gb, 1);
+
+                        SKIP_COUNTER(re, &s->gb, 1+12+1);
+                    }
+
+#if 0
+                    if(s->error_resilience >= FF_ER_COMPLIANT){
+                        const int abs_level= FFABS(level);
+                        if(abs_level<=MAX_LEVEL && run<=MAX_RUN){
+                            const int run1= run - rl->max_run[last][abs_level] - 1;
+                            if(abs_level <= rl->max_level[last][run]){
+                                av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n");
+                                return -1;
+                            }
+                            if(s->error_resilience > FF_ER_COMPLIANT){
+                                if(abs_level <= rl->max_level[last][run]*2){
+                                    av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n");
+                                    return -1;
+                                }
+                                if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){
+                                    av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n");
+                                    return -1;
+                                }
+                            }
+                        }
+                    }
+#endif
+                    if (level>0) level= level * qmul + qadd;
+                    else         level= level * qmul - qadd;
+
+                    if((unsigned)(level + 2048) > 4095){
+                        if(s->error_resilience > FF_ER_COMPLIANT){
+                            if(level > 2560 || level<-2560){
+                                av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc, qp=%d\n", s->qscale);
+                                return -1;
+                            }
+                        }
+                        level= level<0 ? -2048 : 2047;
+                    }
+
+                    i+= run + 1;
+                    if(last) i+=192;
+                } else {
+                    /* second escape */
+#if MIN_CACHE_BITS < 20
+                    LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
+#else
+                    SKIP_BITS(re, &s->gb, 2);
+#endif
+                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1);
+                    i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing
+                    level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                    LAST_SKIP_BITS(re, &s->gb, 1);
+                }
+            } else {
+                /* first escape */
+#if MIN_CACHE_BITS < 19
+                LAST_SKIP_BITS(re, &s->gb, 1);
+                UPDATE_CACHE(re, &s->gb);
+#else
+                SKIP_BITS(re, &s->gb, 1);
+#endif
+                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1);
+                i+= run;
+                level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+            }
+          }
+        } else {
+            i+= run;
+            level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+            LAST_SKIP_BITS(re, &s->gb, 1);
+        }
+        if (i > 62){
+            i-= 192;
+            if(i&(~63)){
+                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            block[scan_table[i]] = level;
+            break;
+        }
+
+        block[scan_table[i]] = level;
+    }
+    CLOSE_READER(re, &s->gb);
+  }
+ not_coded:
+    if (intra) {
+        if(!s->use_intra_dc_vlc){
+            block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0);
+
+            i -= i>>31; //if(i == -1) i=0;
+        }
+
+        mpeg4_pred_ac(s, block, n, dc_pred_dir);
+        if (s->ac_pred) {
+            i = 63; /* XXX: not optimal */
+        }
+    }
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+/* most is hardcoded. should extend to handle all h263 streams */
+int h263_decode_picture_header(MpegEncContext *s)
+{
+    int format, width, height, i;
+    uint32_t startcode;
+
+    align_get_bits(&s->gb);
+
+    startcode= get_bits(&s->gb, 22-8);
+
+    for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=8) {
+        startcode = ((startcode << 8) | get_bits(&s->gb, 8)) & 0x003FFFFF;
+
+        if(startcode == 0x20)
+            break;
+    }
+
+    if (startcode != 0x20) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
+        return -1;
+    }
+    /* temporal reference */
+    i = get_bits(&s->gb, 8); /* picture timestamp */
+    if( (s->picture_number&~0xFF)+i < s->picture_number)
+        i+= 256;
+    s->current_picture_ptr->pts=
+    s->picture_number= (s->picture_number&~0xFF) + i;
+
+    /* PTYPE starts here */
+    if (get_bits1(&s->gb) != 1) {
+        /* marker */
+        av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
+        return -1;
+    }
+    if (get_bits1(&s->gb) != 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
+        return -1;      /* h263 id */
+    }
+    skip_bits1(&s->gb);         /* split screen off */
+    skip_bits1(&s->gb);         /* camera  off */
+    skip_bits1(&s->gb);         /* freeze picture release off */
+
+    format = get_bits(&s->gb, 3);
+    /*
+        0    forbidden
+        1    sub-QCIF
+        10   QCIF
+        7       extended PTYPE (PLUSPTYPE)
+    */
+
+    if (format != 7 && format != 6) {
+        s->h263_plus = 0;
+        /* H.263v1 */
+        width = h263_format[format][0];
+        height = h263_format[format][1];
+        if (!width)
+            return -1;
+
+        s->pict_type = I_TYPE + get_bits1(&s->gb);
+
+        s->h263_long_vectors = get_bits1(&s->gb);
+
+        if (get_bits1(&s->gb) != 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
+            return -1; /* SAC: off */
+        }
+        s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
+        s->unrestricted_mv = s->h263_long_vectors || s->obmc;
+
+        if (get_bits1(&s->gb) != 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
+            return -1; /* not PB frame */
+        }
+        s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
+        skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
+
+        s->width = width;
+        s->height = height;
+        s->avctx->sample_aspect_ratio= (AVRational){12,11};
+        s->avctx->time_base= (AVRational){1001, 30000};
+    } else {
+        int ufep;
+
+        /* H.263v2 */
+        s->h263_plus = 1;
+        ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */
+
+        /* ufep other than 0 and 1 are reserved */
+        if (ufep == 1) {
+            /* OPPTYPE */
+            format = get_bits(&s->gb, 3);
+            dprintf("ufep=1, format: %d\n", format);
+            s->custom_pcf= get_bits1(&s->gb);
+            s->umvplus = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
+            if (get_bits1(&s->gb) != 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Syntax-based Arithmetic Coding (SAC) not supported\n");
+            }
+            s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
+            s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */
+            s->loop_filter= get_bits1(&s->gb);
+            s->unrestricted_mv = s->umvplus || s->obmc || s->loop_filter;
+
+            s->h263_slice_structured= get_bits1(&s->gb);
+            if (get_bits1(&s->gb) != 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Reference Picture Selection not supported\n");
+            }
+            if (get_bits1(&s->gb) != 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Independent Segment Decoding not supported\n");
+            }
+            s->alt_inter_vlc= get_bits1(&s->gb);
+            s->modified_quant= get_bits1(&s->gb);
+            if(s->modified_quant)
+                s->chroma_qscale_table= ff_h263_chroma_qscale_table;
+
+            skip_bits(&s->gb, 1); /* Prevent start code emulation */
+
+            skip_bits(&s->gb, 3); /* Reserved */
+        } else if (ufep != 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "Bad UFEP type (%d)\n", ufep);
+            return -1;
+        }
+
+        /* MPPTYPE */
+        s->pict_type = get_bits(&s->gb, 3);
+        switch(s->pict_type){
+        case 0: s->pict_type= I_TYPE;break;
+        case 1: s->pict_type= P_TYPE;break;
+        case 3: s->pict_type= B_TYPE;break;
+        case 7: s->pict_type= I_TYPE;break; //ZYGO
+        default:
+            return -1;
+        }
+        skip_bits(&s->gb, 2);
+        s->no_rounding = get_bits1(&s->gb);
+        skip_bits(&s->gb, 4);
+
+        /* Get the picture dimensions */
+        if (ufep) {
+            if (format == 6) {
+                /* Custom Picture Format (CPFMT) */
+                s->aspect_ratio_info = get_bits(&s->gb, 4);
+                dprintf("aspect: %d\n", s->aspect_ratio_info);
+                /* aspect ratios:
+                0 - forbidden
+                1 - 1:1
+                2 - 12:11 (CIF 4:3)
+                3 - 10:11 (525-type 4:3)
+                4 - 16:11 (CIF 16:9)
+                5 - 40:33 (525-type 16:9)
+                6-14 - reserved
+                */
+                width = (get_bits(&s->gb, 9) + 1) * 4;
+                skip_bits1(&s->gb);
+                height = get_bits(&s->gb, 9) * 4;
+                dprintf("\nH.263+ Custom picture: %dx%d\n",width,height);
+                if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
+                    /* aspected dimensions */
+                    s->avctx->sample_aspect_ratio.num= get_bits(&s->gb, 8);
+                    s->avctx->sample_aspect_ratio.den= get_bits(&s->gb, 8);
+                }else{
+                    s->avctx->sample_aspect_ratio= pixel_aspect[s->aspect_ratio_info];
+                }
+            } else {
+                width = h263_format[format][0];
+                height = h263_format[format][1];
+                s->avctx->sample_aspect_ratio= (AVRational){12,11};
+            }
+            if ((width == 0) || (height == 0))
+                return -1;
+            s->width = width;
+            s->height = height;
+
+            if(s->custom_pcf){
+                int gcd;
+                s->avctx->time_base.den= 1800000;
+                s->avctx->time_base.num= 1000 + get_bits1(&s->gb);
+                s->avctx->time_base.num*= get_bits(&s->gb, 7);
+                if(s->avctx->time_base.num == 0){
+                    av_log(s, AV_LOG_ERROR, "zero framerate\n");
+                    return -1;
+                }
+                gcd= ff_gcd(s->avctx->time_base.den, s->avctx->time_base.num);
+                s->avctx->time_base.den /= gcd;
+                s->avctx->time_base.num /= gcd;
+//                av_log(s->avctx, AV_LOG_DEBUG, "%d/%d\n", s->avctx->time_base.den, s->avctx->time_base.num);
+            }else{
+                s->avctx->time_base= (AVRational){1001, 30000};
+            }
+        }
+
+        if(s->custom_pcf){
+            skip_bits(&s->gb, 2); //extended Temporal reference
+        }
+
+        if (ufep) {
+            if (s->umvplus) {
+                if(get_bits1(&s->gb)==0) /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
+                    skip_bits1(&s->gb);
+            }
+            if(s->h263_slice_structured){
+                if (get_bits1(&s->gb) != 0) {
+                    av_log(s->avctx, AV_LOG_ERROR, "rectangular slices not supported\n");
+                }
+                if (get_bits1(&s->gb) != 0) {
+                    av_log(s->avctx, AV_LOG_ERROR, "unordered slices not supported\n");
+                }
+            }
+        }
+
+        s->qscale = get_bits(&s->gb, 5);
+    }
+
+    s->mb_width = (s->width  + 15) / 16;
+    s->mb_height = (s->height  + 15) / 16;
+    s->mb_num = s->mb_width * s->mb_height;
+
+    /* PEI */
+    while (get_bits1(&s->gb) != 0) {
+        skip_bits(&s->gb, 8);
+    }
+
+    if(s->h263_slice_structured){
+        if (get_bits1(&s->gb) != 1) {
+            av_log(s->avctx, AV_LOG_ERROR, "SEPB1 marker missing\n");
+            return -1;
+        }
+
+        ff_h263_decode_mba(s);
+
+        if (get_bits1(&s->gb) != 1) {
+            av_log(s->avctx, AV_LOG_ERROR, "SEPB2 marker missing\n");
+            return -1;
+        }
+    }
+    s->f_code = 1;
+
+    if(s->h263_aic){
+         s->y_dc_scale_table=
+         s->c_dc_scale_table= ff_aic_dc_scale_table;
+    }else{
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+
+     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n",
+         s->qscale, av_get_pict_type_char(s->pict_type),
+         s->gb.size_in_bits, 1-s->no_rounding,
+         s->obmc ? " AP" : "",
+         s->umvplus ? " UMV" : "",
+         s->h263_long_vectors ? " LONG" : "",
+         s->h263_plus ? " +" : "",
+         s->h263_aic ? " AIC" : "",
+         s->alt_inter_vlc ? " AIV" : "",
+         s->modified_quant ? " MQ" : "",
+         s->loop_filter ? " LOOP" : "",
+         s->h263_slice_structured ? " SS" : "",
+         s->avctx->time_base.den, s->avctx->time_base.num
+         );
+     }
+#if 1
+    if (s->pict_type == I_TYPE && s->codec_tag == ff_get_fourcc("ZYGO")){
+        int i,j;
+        for(i=0; i<85; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
+        av_log(s->avctx, AV_LOG_DEBUG, "\n");
+        for(i=0; i<13; i++){
+            for(j=0; j<3; j++){
+                int v= get_bits(&s->gb, 8);
+                v |= get_sbits(&s->gb, 8)<<8;
+                av_log(s->avctx, AV_LOG_DEBUG, " %5d", v);
+            }
+            av_log(s->avctx, AV_LOG_DEBUG, "\n");
+        }
+        for(i=0; i<50; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
+    }
+#endif
+
+    return 0;
+}
+
+static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb)
+{
+    int i;
+    int a= 2<<s->sprite_warping_accuracy;
+    int rho= 3-s->sprite_warping_accuracy;
+    int r=16/a;
+    const int vop_ref[4][2]= {{0,0}, {s->width,0}, {0, s->height}, {s->width, s->height}}; // only true for rectangle shapes
+    int d[4][2]={{0,0}, {0,0}, {0,0}, {0,0}};
+    int sprite_ref[4][2];
+    int virtual_ref[2][2];
+    int w2, h2, w3, h3;
+    int alpha=0, beta=0;
+    int w= s->width;
+    int h= s->height;
+    int min_ab;
+
+    for(i=0; i<s->num_sprite_warping_points; i++){
+        int length;
+        int x=0, y=0;
+
+        length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3);
+        if(length){
+            x= get_xbits(gb, length);
+        }
+        if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(gb); /* marker bit */
+
+        length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3);
+        if(length){
+            y=get_xbits(gb, length);
+        }
+        skip_bits1(gb); /* marker bit */
+//printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy);
+        d[i][0]= x;
+        d[i][1]= y;
+    }
+
+    while((1<<alpha)<w) alpha++;
+    while((1<<beta )<h) beta++; // there seems to be a typo in the mpeg4 std for the definition of w' and h'
+    w2= 1<<alpha;
+    h2= 1<<beta;
+
+// Note, the 4th point isn't used for GMC
+    if(s->divx_version==500 && s->divx_build==413){
+        sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0];
+        sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1];
+        sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0];
+        sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1];
+        sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0];
+        sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1];
+    } else {
+        sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]);
+        sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]);
+        sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]);
+        sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]);
+        sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]);
+        sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]);
+    }
+/*    sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]);
+    sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */
+
+// this is mostly identical to the mpeg4 std (and is totally unreadable because of that ...)
+// perhaps it should be reordered to be more readable ...
+// the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides
+// so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form
+    virtual_ref[0][0]= 16*(vop_ref[0][0] + w2)
+        + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
+    virtual_ref[0][1]= 16*vop_ref[0][1]
+        + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
+    virtual_ref[1][0]= 16*vop_ref[0][0]
+        + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
+    virtual_ref[1][1]= 16*(vop_ref[0][1] + h2)
+        + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
+
+    switch(s->num_sprite_warping_points)
+    {
+        case 0:
+            s->sprite_offset[0][0]= 0;
+            s->sprite_offset[0][1]= 0;
+            s->sprite_offset[1][0]= 0;
+            s->sprite_offset[1][1]= 0;
+            s->sprite_delta[0][0]= a;
+            s->sprite_delta[0][1]= 0;
+            s->sprite_delta[1][0]= 0;
+            s->sprite_delta[1][1]= a;
+            s->sprite_shift[0]= 0;
+            s->sprite_shift[1]= 0;
+            break;
+        case 1: //GMC only
+            s->sprite_offset[0][0]= sprite_ref[0][0] - a*vop_ref[0][0];
+            s->sprite_offset[0][1]= sprite_ref[0][1] - a*vop_ref[0][1];
+            s->sprite_offset[1][0]= ((sprite_ref[0][0]>>1)|(sprite_ref[0][0]&1)) - a*(vop_ref[0][0]/2);
+            s->sprite_offset[1][1]= ((sprite_ref[0][1]>>1)|(sprite_ref[0][1]&1)) - a*(vop_ref[0][1]/2);
+            s->sprite_delta[0][0]= a;
+            s->sprite_delta[0][1]= 0;
+            s->sprite_delta[1][0]= 0;
+            s->sprite_delta[1][1]= a;
+            s->sprite_shift[0]= 0;
+            s->sprite_shift[1]= 0;
+            break;
+        case 2:
+            s->sprite_offset[0][0]= (sprite_ref[0][0]<<(alpha+rho))
+                                                  + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][0])
+                                                  + ( r*sprite_ref[0][1] - virtual_ref[0][1])*(-vop_ref[0][1])
+                                                  + (1<<(alpha+rho-1));
+            s->sprite_offset[0][1]= (sprite_ref[0][1]<<(alpha+rho))
+                                                  + (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-vop_ref[0][0])
+                                                  + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][1])
+                                                  + (1<<(alpha+rho-1));
+            s->sprite_offset[1][0]= ( (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1)
+                                     +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1)
+                                     +2*w2*r*sprite_ref[0][0]
+                                     - 16*w2
+                                     + (1<<(alpha+rho+1)));
+            s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1)
+                                     +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1)
+                                     +2*w2*r*sprite_ref[0][1]
+                                     - 16*w2
+                                     + (1<<(alpha+rho+1)));
+            s->sprite_delta[0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
+            s->sprite_delta[0][1]=   (+r*sprite_ref[0][1] - virtual_ref[0][1]);
+            s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1]);
+            s->sprite_delta[1][1]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
+
+            s->sprite_shift[0]= alpha+rho;
+            s->sprite_shift[1]= alpha+rho+2;
+            break;
+        case 3:
+            min_ab= FFMIN(alpha, beta);
+            w3= w2>>min_ab;
+            h3= h2>>min_ab;
+            s->sprite_offset[0][0]=  (sprite_ref[0][0]<<(alpha+beta+rho-min_ab))
+                                   + (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-vop_ref[0][0])
+                                   + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-vop_ref[0][1])
+                                   + (1<<(alpha+beta+rho-min_ab-1));
+            s->sprite_offset[0][1]=  (sprite_ref[0][1]<<(alpha+beta+rho-min_ab))
+                                   + (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-vop_ref[0][0])
+                                   + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-vop_ref[0][1])
+                                   + (1<<(alpha+beta+rho-min_ab-1));
+            s->sprite_offset[1][0]=  (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-2*vop_ref[0][0] + 1)
+                                   + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-2*vop_ref[0][1] + 1)
+                                   + 2*w2*h3*r*sprite_ref[0][0]
+                                   - 16*w2*h3
+                                   + (1<<(alpha+beta+rho-min_ab+1));
+            s->sprite_offset[1][1]=  (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-2*vop_ref[0][0] + 1)
+                                   + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-2*vop_ref[0][1] + 1)
+                                   + 2*w2*h3*r*sprite_ref[0][1]
+                                   - 16*w2*h3
+                                   + (1<<(alpha+beta+rho-min_ab+1));
+            s->sprite_delta[0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3;
+            s->sprite_delta[0][1]=   (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3;
+            s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3;
+            s->sprite_delta[1][1]=   (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3;
+
+            s->sprite_shift[0]= alpha + beta + rho - min_ab;
+            s->sprite_shift[1]= alpha + beta + rho - min_ab + 2;
+            break;
+    }
+    /* try to simplify the situation */
+    if(   s->sprite_delta[0][0] == a<<s->sprite_shift[0]
+       && s->sprite_delta[0][1] == 0
+       && s->sprite_delta[1][0] == 0
+       && s->sprite_delta[1][1] == a<<s->sprite_shift[0])
+    {
+        s->sprite_offset[0][0]>>=s->sprite_shift[0];
+        s->sprite_offset[0][1]>>=s->sprite_shift[0];
+        s->sprite_offset[1][0]>>=s->sprite_shift[1];
+        s->sprite_offset[1][1]>>=s->sprite_shift[1];
+        s->sprite_delta[0][0]= a;
+        s->sprite_delta[0][1]= 0;
+        s->sprite_delta[1][0]= 0;
+        s->sprite_delta[1][1]= a;
+        s->sprite_shift[0]= 0;
+        s->sprite_shift[1]= 0;
+        s->real_sprite_warping_points=1;
+    }
+    else{
+        int shift_y= 16 - s->sprite_shift[0];
+        int shift_c= 16 - s->sprite_shift[1];
+//printf("shifts %d %d\n", shift_y, shift_c);
+        for(i=0; i<2; i++){
+            s->sprite_offset[0][i]<<= shift_y;
+            s->sprite_offset[1][i]<<= shift_c;
+            s->sprite_delta[0][i]<<= shift_y;
+            s->sprite_delta[1][i]<<= shift_y;
+            s->sprite_shift[i]= 16;
+        }
+        s->real_sprite_warping_points= s->num_sprite_warping_points;
+    }
+#if 0
+printf("vop:%d:%d %d:%d %d:%d, sprite:%d:%d %d:%d %d:%d, virtual: %d:%d %d:%d\n",
+    vop_ref[0][0], vop_ref[0][1],
+    vop_ref[1][0], vop_ref[1][1],
+    vop_ref[2][0], vop_ref[2][1],
+    sprite_ref[0][0], sprite_ref[0][1],
+    sprite_ref[1][0], sprite_ref[1][1],
+    sprite_ref[2][0], sprite_ref[2][1],
+    virtual_ref[0][0], virtual_ref[0][1],
+    virtual_ref[1][0], virtual_ref[1][1]
+    );
+
+printf("offset: %d:%d , delta: %d %d %d %d, shift %d\n",
+    s->sprite_offset[0][0], s->sprite_offset[0][1],
+    s->sprite_delta[0][0], s->sprite_delta[0][1],
+    s->sprite_delta[1][0], s->sprite_delta[1][1],
+    s->sprite_shift[0]
+    );
+#endif
+}
+
+static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){
+    int hours, minutes, seconds;
+
+    hours= get_bits(gb, 5);
+    minutes= get_bits(gb, 6);
+    skip_bits1(gb);
+    seconds= get_bits(gb, 6);
+
+    s->time_base= seconds + 60*(minutes + 60*hours);
+
+    skip_bits1(gb);
+    skip_bits1(gb);
+
+    return 0;
+}
+
+static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
+    int width, height, vo_ver_id;
+
+    /* vol header */
+    skip_bits(gb, 1); /* random access */
+    s->vo_type= get_bits(gb, 8);
+    if (get_bits1(gb) != 0) { /* is_ol_id */
+        vo_ver_id = get_bits(gb, 4); /* vo_ver_id */
+        skip_bits(gb, 3); /* vo_priority */
+    } else {
+        vo_ver_id = 1;
+    }
+//printf("vo type:%d\n",s->vo_type);
+    s->aspect_ratio_info= get_bits(gb, 4);
+    if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){
+        s->avctx->sample_aspect_ratio.num= get_bits(gb, 8); // par_width
+        s->avctx->sample_aspect_ratio.den= get_bits(gb, 8); // par_height
+    }else{
+        s->avctx->sample_aspect_ratio= pixel_aspect[s->aspect_ratio_info];
+    }
+
+    if ((s->vol_control_parameters=get_bits1(gb))) { /* vol control parameter */
+        int chroma_format= get_bits(gb, 2);
+        if(chroma_format!=1){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal chroma format\n");
+        }
+        s->low_delay= get_bits1(gb);
+        if(get_bits1(gb)){ /* vbv parameters */
+            get_bits(gb, 15);   /* first_half_bitrate */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* latter_half_bitrate */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* first_half_vbv_buffer_size */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 3);    /* latter_half_vbv_buffer_size */
+            get_bits(gb, 11);   /* first_half_vbv_occupancy */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* latter_half_vbv_occupancy */
+            skip_bits1(gb);     /* marker */
+        }
+    }else{
+        // set low delay flag only once the smartest? low delay detection won't be overriden
+        if(s->picture_number==0)
+            s->low_delay=0;
+    }
+
+    s->shape = get_bits(gb, 2); /* vol shape */
+    if(s->shape != RECT_SHAPE) av_log(s->avctx, AV_LOG_ERROR, "only rectangular vol supported\n");
+    if(s->shape == GRAY_SHAPE && vo_ver_id != 1){
+        av_log(s->avctx, AV_LOG_ERROR, "Gray shape not supported\n");
+        skip_bits(gb, 4);  //video_object_layer_shape_extension
+    }
+
+    check_marker(gb, "before time_increment_resolution");
+
+    s->avctx->time_base.den = get_bits(gb, 16);
+    if(!s->avctx->time_base.den){
+        av_log(s->avctx, AV_LOG_ERROR, "time_base.den==0\n");
+        return -1;
+    }
+
+    s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
+    if (s->time_increment_bits < 1)
+        s->time_increment_bits = 1;
+
+    check_marker(gb, "before fixed_vop_rate");
+
+    if (get_bits1(gb) != 0) {   /* fixed_vop_rate  */
+        s->avctx->time_base.num = get_bits(gb, s->time_increment_bits);
+    }else
+        s->avctx->time_base.num = 1;
+
+    s->t_frame=0;
+
+    if (s->shape != BIN_ONLY_SHAPE) {
+        if (s->shape == RECT_SHAPE) {
+            skip_bits1(gb);   /* marker */
+            width = get_bits(gb, 13);
+            skip_bits1(gb);   /* marker */
+            height = get_bits(gb, 13);
+            skip_bits1(gb);   /* marker */
+            if(width && height && !(s->width && s->codec_tag == ff_get_fourcc("MP4S"))){ /* they should be non zero but who knows ... */
+                s->width = width;
+                s->height = height;
+//                printf("width/height: %d %d\n", width, height);
+            }
+        }
+
+        s->progressive_sequence=
+        s->progressive_frame= get_bits1(gb)^1;
+        s->interlaced_dct=0;
+        if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO))
+            av_log(s->avctx, AV_LOG_INFO, "MPEG4 OBMC not supported (very likely buggy encoder)\n");   /* OBMC Disable */
+        if (vo_ver_id == 1) {
+            s->vol_sprite_usage = get_bits1(gb); /* vol_sprite_usage */
+        } else {
+            s->vol_sprite_usage = get_bits(gb, 2); /* vol_sprite_usage */
+        }
+        if(s->vol_sprite_usage==STATIC_SPRITE) av_log(s->avctx, AV_LOG_ERROR, "Static Sprites not supported\n");
+        if(s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE){
+            if(s->vol_sprite_usage==STATIC_SPRITE){
+                s->sprite_width = get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+                s->sprite_height= get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+                s->sprite_left  = get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+                s->sprite_top   = get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+            }
+            s->num_sprite_warping_points= get_bits(gb, 6);
+            s->sprite_warping_accuracy = get_bits(gb, 2);
+            s->sprite_brightness_change= get_bits1(gb);
+            if(s->vol_sprite_usage==STATIC_SPRITE)
+                s->low_latency_sprite= get_bits1(gb);
+        }
+        // FIXME sadct disable bit if verid!=1 && shape not rect
+
+        if (get_bits1(gb) == 1) {   /* not_8_bit */
+            s->quant_precision = get_bits(gb, 4); /* quant_precision */
+            if(get_bits(gb, 4)!=8) av_log(s->avctx, AV_LOG_ERROR, "N-bit not supported\n"); /* bits_per_pixel */
+            if(s->quant_precision!=5) av_log(s->avctx, AV_LOG_ERROR, "quant precision %d\n", s->quant_precision);
+        } else {
+            s->quant_precision = 5;
+        }
+
+        // FIXME a bunch of grayscale shape things
+
+        if((s->mpeg_quant=get_bits1(gb))){ /* vol_quant_type */
+            int i, v;
+
+            /* load default matrixes */
+            for(i=0; i<64; i++){
+                int j= s->dsp.idct_permutation[i];
+                v= ff_mpeg4_default_intra_matrix[i];
+                s->intra_matrix[j]= v;
+                s->chroma_intra_matrix[j]= v;
+
+                v= ff_mpeg4_default_non_intra_matrix[i];
+                s->inter_matrix[j]= v;
+                s->chroma_inter_matrix[j]= v;
+            }
+
+            /* load custom intra matrix */
+            if(get_bits1(gb)){
+                int last=0;
+                for(i=0; i<64; i++){
+                    int j;
+                    v= get_bits(gb, 8);
+                    if(v==0) break;
+
+                    last= v;
+                    j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    s->intra_matrix[j]= v;
+                    s->chroma_intra_matrix[j]= v;
+                }
+
+                /* replicate last value */
+                for(; i<64; i++){
+                    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    s->intra_matrix[j]= last;
+                    s->chroma_intra_matrix[j]= last;
+                }
+            }
+
+            /* load custom non intra matrix */
+            if(get_bits1(gb)){
+                int last=0;
+                for(i=0; i<64; i++){
+                    int j;
+                    v= get_bits(gb, 8);
+                    if(v==0) break;
+
+                    last= v;
+                    j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    s->inter_matrix[j]= v;
+                    s->chroma_inter_matrix[j]= v;
+                }
+
+                /* replicate last value */
+                for(; i<64; i++){
+                    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    s->inter_matrix[j]= last;
+                    s->chroma_inter_matrix[j]= last;
+                }
+            }
+
+            // FIXME a bunch of grayscale shape things
+        }
+
+        if(vo_ver_id != 1)
+             s->quarter_sample= get_bits1(gb);
+        else s->quarter_sample=0;
+
+        if(!get_bits1(gb)) av_log(s->avctx, AV_LOG_ERROR, "Complexity estimation not supported\n");
+
+        s->resync_marker= !get_bits1(gb); /* resync_marker_disabled */
+
+        s->data_partitioning= get_bits1(gb);
+        if(s->data_partitioning){
+            s->rvlc= get_bits1(gb);
+        }
+
+        if(vo_ver_id != 1) {
+            s->new_pred= get_bits1(gb);
+            if(s->new_pred){
+                av_log(s->avctx, AV_LOG_ERROR, "new pred not supported\n");
+                skip_bits(gb, 2); /* requested upstream message type */
+                skip_bits1(gb); /* newpred segment type */
+            }
+            s->reduced_res_vop= get_bits1(gb);
+            if(s->reduced_res_vop) av_log(s->avctx, AV_LOG_ERROR, "reduced resolution VOP not supported\n");
+        }
+        else{
+            s->new_pred=0;
+            s->reduced_res_vop= 0;
+        }
+
+        s->scalability= get_bits1(gb);
+
+        if (s->scalability) {
+            GetBitContext bak= *gb;
+            int ref_layer_id;
+            int ref_layer_sampling_dir;
+            int h_sampling_factor_n;
+            int h_sampling_factor_m;
+            int v_sampling_factor_n;
+            int v_sampling_factor_m;
+
+            s->hierachy_type= get_bits1(gb);
+            ref_layer_id= get_bits(gb, 4);
+            ref_layer_sampling_dir= get_bits1(gb);
+            h_sampling_factor_n= get_bits(gb, 5);
+            h_sampling_factor_m= get_bits(gb, 5);
+            v_sampling_factor_n= get_bits(gb, 5);
+            v_sampling_factor_m= get_bits(gb, 5);
+            s->enhancement_type= get_bits1(gb);
+
+            if(   h_sampling_factor_n==0 || h_sampling_factor_m==0
+               || v_sampling_factor_n==0 || v_sampling_factor_m==0){
+
+//                fprintf(stderr, "illegal scalability header (VERY broken encoder), trying to workaround\n");
+                s->scalability=0;
+
+                *gb= bak;
+            }else
+                av_log(s->avctx, AV_LOG_ERROR, "scalability not supported\n");
+
+            // bin shape stuff FIXME
+        }
+    }
+    return 0;
+}
+
+/**
+ * decodes the user data stuff in the header.
+ * allso inits divx/xvid/lavc_version/build
+ */
+static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
+    char buf[256];
+    int i;
+    int e;
+    int ver = 0, build = 0, ver2 = 0, ver3 = 0;
+    char last;
+
+    for(i=0; i<255 && get_bits_count(gb) < gb->size_in_bits; i++){
+        if(show_bits(gb, 23) == 0) break;
+        buf[i]= get_bits(gb, 8);
+    }
+    buf[i]=0;
+
+    /* divx detection */
+    e=sscanf(buf, "DivX%dBuild%d%c", &ver, &build, &last);
+    if(e<2)
+        e=sscanf(buf, "DivX%db%d%c", &ver, &build, &last);
+    if(e>=2){
+        s->divx_version= ver;
+        s->divx_build= build;
+        s->divx_packed= e==3 && last=='p';
+    }
+
+    /* ffmpeg detection */
+    e=sscanf(buf, "FFmpe%*[^b]b%d", &build)+3;
+    if(e!=4)
+        e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build);
+    if(e!=4){
+        e=sscanf(buf, "Lavc%d.%d.%d", &ver, &ver2, &ver3)+1;
+        if (e>1)
+            build= (ver<<16) + (ver2<<8) + ver3;
+    }
+    if(e!=4){
+        if(strcmp(buf, "ffmpeg")==0){
+            s->lavc_build= 4600;
+        }
+    }
+    if(e==4){
+        s->lavc_build= build;
+    }
+
+    /* xvid detection */
+    e=sscanf(buf, "XviD%d", &build);
+    if(e==1){
+        s->xvid_build= build;
+    }
+
+//printf("User Data: %s\n", buf);
+    return 0;
+}
+
+static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
+    int time_incr, time_increment;
+
+    s->pict_type = get_bits(gb, 2) + I_TYPE;        /* pict type: I = 0 , P = 1 */
+    if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
+        av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
+        s->low_delay=0;
+    }
+
+    s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
+    if(s->partitioned_frame)
+        s->decode_mb= mpeg4_decode_partitioned_mb;
+    else
+        s->decode_mb= ff_mpeg4_decode_mb;
+
+    time_incr=0;
+    while (get_bits1(gb) != 0)
+        time_incr++;
+
+    check_marker(gb, "before time_increment");
+
+    if(s->time_increment_bits==0 || !(show_bits(gb, s->time_increment_bits+1)&1)){
+        av_log(s->avctx, AV_LOG_ERROR, "hmm, seems the headers are not complete, trying to guess time_increment_bits\n");
+
+        for(s->time_increment_bits=1 ;s->time_increment_bits<16; s->time_increment_bits++){
+            if(show_bits(gb, s->time_increment_bits+1)&1) break;
+        }
+
+        av_log(s->avctx, AV_LOG_ERROR, "my guess is %d bits ;)\n",s->time_increment_bits);
+    }
+
+    if(IS_3IV1) time_increment= get_bits1(gb); //FIXME investigate further
+    else time_increment= get_bits(gb, s->time_increment_bits);
+
+//    printf("%d %X\n", s->time_increment_bits, time_increment);
+//av_log(s->avctx, AV_LOG_DEBUG, " type:%d modulo_time_base:%d increment:%d t_frame %d\n", s->pict_type, time_incr, time_increment, s->t_frame);
+    if(s->pict_type!=B_TYPE){
+        s->last_time_base= s->time_base;
+        s->time_base+= time_incr;
+        s->time= s->time_base*s->avctx->time_base.den + time_increment;
+        if(s->workaround_bugs&FF_BUG_UMP4){
+            if(s->time < s->last_non_b_time){
+//                fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n");
+                s->time_base++;
+                s->time+= s->avctx->time_base.den;
+            }
+        }
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
+    }else{
+        s->time= (s->last_time_base + time_incr)*s->avctx->time_base.den + time_increment;
+        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+        if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){
+//            printf("messed up order, maybe after seeking? skipping current b frame\n");
+            return FRAME_SKIPPED;
+        }
+        ff_mpeg4_init_direct_mv(s);
+
+        if(s->t_frame==0) s->t_frame= s->pb_time;
+        if(s->t_frame==0) s->t_frame=1; // 1/0 protection
+        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame)
+                           - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
+        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame)
+                           - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
+    }
+//av_log(s->avctx, AV_LOG_DEBUG, "last nonb %"PRId64" last_base %d time %"PRId64" pp %d pb %d t %d ppf %d pbf %d\n", s->last_non_b_time, s->last_time_base, s->time, s->pp_time, s->pb_time, s->t_frame, s->pp_field_time, s->pb_field_time);
+
+    if(s->avctx->time_base.num)
+        s->current_picture_ptr->pts= (s->time + s->avctx->time_base.num/2) / s->avctx->time_base.num;
+    else
+        s->current_picture_ptr->pts= AV_NOPTS_VALUE;
+    if(s->avctx->debug&FF_DEBUG_PTS)
+        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %"PRId64"\n", s->current_picture_ptr->pts);
+
+    check_marker(gb, "before vop_coded");
+
+    /* vop coded */
+    if (get_bits1(gb) != 1){
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n");
+        return FRAME_SKIPPED;
+    }
+//printf("time %d %d %d || %"PRId64" %"PRId64" %"PRId64"\n", s->time_increment_bits, s->avctx->time_base.den, s->time_base,
+//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);
+    if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
+                          || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
+        /* rounding type for motion estimation */
+        s->no_rounding = get_bits1(gb);
+    } else {
+        s->no_rounding = 0;
+    }
+//FIXME reduced res stuff
+
+     if (s->shape != RECT_SHAPE) {
+         if (s->vol_sprite_usage != 1 || s->pict_type != I_TYPE) {
+             int width, height, hor_spat_ref, ver_spat_ref;
+
+             width = get_bits(gb, 13);
+             skip_bits1(gb);   /* marker */
+             height = get_bits(gb, 13);
+             skip_bits1(gb);   /* marker */
+             hor_spat_ref = get_bits(gb, 13); /* hor_spat_ref */
+             skip_bits1(gb);   /* marker */
+             ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */
+         }
+         skip_bits1(gb); /* change_CR_disable */
+
+         if (get_bits1(gb) != 0) {
+             skip_bits(gb, 8); /* constant_alpha_value */
+         }
+     }
+//FIXME complexity estimation stuff
+
+     if (s->shape != BIN_ONLY_SHAPE) {
+         s->intra_dc_threshold= mpeg4_dc_threshold[ get_bits(gb, 3) ];
+         if(!s->progressive_sequence){
+             s->top_field_first= get_bits1(gb);
+             s->alternate_scan= get_bits1(gb);
+         }else
+             s->alternate_scan= 0;
+     }
+
+     if(s->alternate_scan){
+         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
+         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
+         ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
+         ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+     } else{
+         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
+         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
+         ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+         ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+     }
+
+     if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){
+         mpeg4_decode_sprite_trajectory(s, gb);
+         if(s->sprite_brightness_change) av_log(s->avctx, AV_LOG_ERROR, "sprite_brightness_change not supported\n");
+         if(s->vol_sprite_usage==STATIC_SPRITE) av_log(s->avctx, AV_LOG_ERROR, "static sprite not supported\n");
+     }
+
+     if (s->shape != BIN_ONLY_SHAPE) {
+         s->chroma_qscale= s->qscale = get_bits(gb, s->quant_precision);
+         if(s->qscale==0){
+             av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (qscale=0)\n");
+             return -1; // makes no sense to continue, as there is nothing left from the image then
+         }
+
+         if (s->pict_type != I_TYPE) {
+             s->f_code = get_bits(gb, 3);       /* fcode_for */
+             if(s->f_code==0){
+                 av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
+                 return -1; // makes no sense to continue, as the MV decoding will break very quickly
+             }
+         }else
+             s->f_code=1;
+
+         if (s->pict_type == B_TYPE) {
+             s->b_code = get_bits(gb, 3);
+         }else
+             s->b_code=1;
+
+         if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n",
+                 s->qscale, s->f_code, s->b_code,
+                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
+                 gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first,
+                 s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points,
+                 s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold);
+         }
+
+         if(!s->scalability){
+             if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) {
+                 skip_bits1(gb); // vop shape coding type
+             }
+         }else{
+             if(s->enhancement_type){
+                 int load_backward_shape= get_bits1(gb);
+                 if(load_backward_shape){
+                     av_log(s->avctx, AV_LOG_ERROR, "load backward shape isn't supported\n");
+                 }
+             }
+             skip_bits(gb, 2); //ref_select_code
+         }
+     }
+     /* detect buggy encoders which don't set the low_delay flag (divx4/xvid/opendivx)*/
+     // note we cannot detect divx5 without b-frames easily (although it's buggy too)
+     if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){
+         av_log(s->avctx, AV_LOG_ERROR, "looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n");
+         s->low_delay=1;
+     }
+
+     s->picture_number++; // better than pic number==0 always ;)
+
+     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support
+     s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+
+     if(s->workaround_bugs&FF_BUG_EDGE){
+         s->h_edge_pos= s->width;
+         s->v_edge_pos= s->height;
+     }
+     return 0;
+}
+
+/**
+ * decode mpeg4 headers
+ * @return <0 if no VOP found (or a damaged one)
+ *         FRAME_SKIPPED if a not coded VOP is found
+ *         0 if a VOP is found
+ */
+int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
+{
+    int startcode, v;
+
+    /* search next start code */
+    align_get_bits(gb);
+
+    if(s->codec_tag == ff_get_fourcc("WV1F") && show_bits(gb, 24) == 0x575630){
+        skip_bits(gb, 24);
+        if(get_bits(gb, 8) == 0xF0)
+            return decode_vop_header(s, gb);
+    }
+
+    startcode = 0xff;
+    for(;;) {
+        if(get_bits_count(gb) >= gb->size_in_bits){
+            if(gb->size_in_bits==8 && (s->divx_version || s->xvid_build)){
+                av_log(s->avctx, AV_LOG_ERROR, "frame skip %d\n", gb->size_in_bits);
+                return FRAME_SKIPPED; //divx bug
+            }else
+                return -1; //end of stream
+        }
+
+        /* use the bits after the test */
+        v = get_bits(gb, 8);
+        startcode = ((startcode << 8) | v) & 0xffffffff;
+
+        if((startcode&0xFFFFFF00) != 0x100)
+            continue; //no startcode
+
+        if(s->avctx->debug&FF_DEBUG_STARTCODE){
+            av_log(s->avctx, AV_LOG_DEBUG, "startcode: %3X ", startcode);
+            if     (startcode<=0x11F) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Start");
+            else if(startcode<=0x12F) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Layer Start");
+            else if(startcode<=0x13F) av_log(s->avctx, AV_LOG_DEBUG, "Reserved");
+            else if(startcode<=0x15F) av_log(s->avctx, AV_LOG_DEBUG, "FGS bp start");
+            else if(startcode<=0x1AF) av_log(s->avctx, AV_LOG_DEBUG, "Reserved");
+            else if(startcode==0x1B0) av_log(s->avctx, AV_LOG_DEBUG, "Visual Object Seq Start");
+            else if(startcode==0x1B1) av_log(s->avctx, AV_LOG_DEBUG, "Visual Object Seq End");
+            else if(startcode==0x1B2) av_log(s->avctx, AV_LOG_DEBUG, "User Data");
+            else if(startcode==0x1B3) av_log(s->avctx, AV_LOG_DEBUG, "Group of VOP start");
+            else if(startcode==0x1B4) av_log(s->avctx, AV_LOG_DEBUG, "Video Session Error");
+            else if(startcode==0x1B5) av_log(s->avctx, AV_LOG_DEBUG, "Visual Object Start");
+            else if(startcode==0x1B6) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Plane start");
+            else if(startcode==0x1B7) av_log(s->avctx, AV_LOG_DEBUG, "slice start");
+            else if(startcode==0x1B8) av_log(s->avctx, AV_LOG_DEBUG, "extension start");
+            else if(startcode==0x1B9) av_log(s->avctx, AV_LOG_DEBUG, "fgs start");
+            else if(startcode==0x1BA) av_log(s->avctx, AV_LOG_DEBUG, "FBA Object start");
+            else if(startcode==0x1BB) av_log(s->avctx, AV_LOG_DEBUG, "FBA Object Plane start");
+            else if(startcode==0x1BC) av_log(s->avctx, AV_LOG_DEBUG, "Mesh Object start");
+            else if(startcode==0x1BD) av_log(s->avctx, AV_LOG_DEBUG, "Mesh Object Plane start");
+            else if(startcode==0x1BE) av_log(s->avctx, AV_LOG_DEBUG, "Still Texture Object start");
+            else if(startcode==0x1BF) av_log(s->avctx, AV_LOG_DEBUG, "Texture Spatial Layer start");
+            else if(startcode==0x1C0) av_log(s->avctx, AV_LOG_DEBUG, "Texture SNR Layer start");
+            else if(startcode==0x1C1) av_log(s->avctx, AV_LOG_DEBUG, "Texture Tile start");
+            else if(startcode==0x1C2) av_log(s->avctx, AV_LOG_DEBUG, "Texture Shape Layer start");
+            else if(startcode==0x1C3) av_log(s->avctx, AV_LOG_DEBUG, "stuffing start");
+            else if(startcode<=0x1C5) av_log(s->avctx, AV_LOG_DEBUG, "reserved");
+            else if(startcode<=0x1FF) av_log(s->avctx, AV_LOG_DEBUG, "System start");
+            av_log(s->avctx, AV_LOG_DEBUG, " at %d\n", get_bits_count(gb));
+        }
+
+        if(startcode >= 0x120 && startcode <= 0x12F){
+            if(decode_vol_header(s, gb) < 0)
+                return -1;
+        }
+        else if(startcode == USER_DATA_STARTCODE){
+            decode_user_data(s, gb);
+        }
+        else if(startcode == GOP_STARTCODE){
+            mpeg4_decode_gop_header(s, gb);
+        }
+        else if(startcode == VOP_STARTCODE){
+            return decode_vop_header(s, gb);
+        }
+
+        align_get_bits(gb);
+        startcode = 0xff;
+    }
+}
+
+/* don't understand why they choose a different header ! */
+int intel_h263_decode_picture_header(MpegEncContext *s)
+{
+    int format;
+
+    /* picture header */
+    if (get_bits_long(&s->gb, 22) != 0x20) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
+        return -1;
+    }
+    s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
+
+    if (get_bits1(&s->gb) != 1) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
+        return -1;      /* marker */
+    }
+    if (get_bits1(&s->gb) != 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
+        return -1;      /* h263 id */
+    }
+    skip_bits1(&s->gb);         /* split screen off */
+    skip_bits1(&s->gb);         /* camera  off */
+    skip_bits1(&s->gb);         /* freeze picture release off */
+
+    format = get_bits(&s->gb, 3);
+    if (format != 7) {
+        av_log(s->avctx, AV_LOG_ERROR, "Intel H263 free format not supported\n");
+        return -1;
+    }
+    s->h263_plus = 0;
+
+    s->pict_type = I_TYPE + get_bits1(&s->gb);
+
+    s->unrestricted_mv = get_bits1(&s->gb);
+    s->h263_long_vectors = s->unrestricted_mv;
+
+    if (get_bits1(&s->gb) != 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
+        return -1;      /* SAC: off */
+    }
+    if (get_bits1(&s->gb) != 0) {
+        s->obmc= 1;
+        av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
+//        return -1;      /* advanced prediction mode: off */
+    }
+    if (get_bits1(&s->gb) != 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
+        return -1;      /* PB frame mode */
+    }
+
+    /* skip unknown header garbage */
+    skip_bits(&s->gb, 41);
+
+    s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
+    skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
+
+    /* PEI */
+    while (get_bits1(&s->gb) != 0) {
+        skip_bits(&s->gb, 8);
+    }
+    s->f_code = 1;
+
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+
+    return 0;
+}
+
+int flv_h263_decode_picture_header(MpegEncContext *s)
+{
+    int format, width, height;
+
+    /* picture header */
+    if (get_bits_long(&s->gb, 17) != 1) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
+        return -1;
+    }
+    format = get_bits(&s->gb, 5);
+    if (format != 0 && format != 1) {
+        av_log(s->avctx, AV_LOG_ERROR, "Bad picture format\n");
+        return -1;
+    }
+    s->h263_flv = format+1;
+    s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
+    format = get_bits(&s->gb, 3);
+    switch (format) {
+    case 0:
+        width = get_bits(&s->gb, 8);
+        height = get_bits(&s->gb, 8);
+        break;
+    case 1:
+        width = get_bits(&s->gb, 16);
+        height = get_bits(&s->gb, 16);
+        break;
+    case 2:
+        width = 352;
+        height = 288;
+        break;
+    case 3:
+        width = 176;
+        height = 144;
+        break;
+    case 4:
+        width = 128;
+        height = 96;
+        break;
+    case 5:
+        width = 320;
+        height = 240;
+        break;
+    case 6:
+        width = 160;
+        height = 120;
+        break;
+    default:
+        width = height = 0;
+        break;
+    }
+    if(avcodec_check_dimensions(s->avctx, width, height))
+        return -1;
+    s->width = width;
+    s->height = height;
+
+    s->pict_type = I_TYPE + get_bits(&s->gb, 2);
+    s->dropable= s->pict_type > P_TYPE;
+    if (s->dropable)
+        s->pict_type = P_TYPE;
+
+    skip_bits1(&s->gb); /* deblocking flag */
+    s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
+
+    s->h263_plus = 0;
+
+    s->unrestricted_mv = 1;
+    s->h263_long_vectors = 0;
+
+    /* PEI */
+    while (get_bits1(&s->gb) != 0) {
+        skip_bits(&s->gb, 8);
+    }
+    s->f_code = 1;
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO){
+        av_log(s->avctx, AV_LOG_DEBUG, "%c esc_type:%d, qp:%d num:%d\n",
+               s->dropable ? 'D' : av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number);
+    }
+
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/h263data.h b/contrib/ffmpeg/libavcodec/h263data.h
new file mode 100644
index 000000000..5eddc3b54
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h263data.h
@@ -0,0 +1,308 @@
+/*
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * H263+ support
+ * copyright (c) 2001 Juan J. Sierralta P.
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h263data.h
+ * H.263 tables.
+ */
+
+
+/* intra MCBPC, mb_type = (intra), then (intraq) */
+const uint8_t intra_MCBPC_code[9] = { 1, 1, 2, 3, 1, 1, 2, 3, 1 };
+const uint8_t intra_MCBPC_bits[9] = { 1, 3, 3, 3, 4, 6, 6, 6, 9 };
+
+/* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
+/* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */
+const uint8_t inter_MCBPC_code[28] = {
+    1, 3, 2, 5,
+    3, 4, 3, 3,
+    3, 7, 6, 5,
+    4, 4, 3, 2,
+    2, 5, 4, 5,
+    1, 0, 0, 0, /* Stuffing */
+    2, 12, 14, 15,
+};
+const uint8_t inter_MCBPC_bits[28] = {
+    1, 4, 4, 6, /* inter  */
+    5, 8, 8, 7, /* intra  */
+    3, 7, 7, 9, /* interQ */
+    6, 9, 9, 9, /* intraQ */
+    3, 7, 7, 8, /* inter4 */
+    9, 0, 0, 0, /* Stuffing */
+    11, 13, 13, 13,/* inter4Q*/
+};
+
+static const uint8_t h263_mbtype_b_tab[15][2] = {
+ {1, 1},
+ {3, 3},
+ {1, 5},
+ {4, 4},
+ {5, 4},
+ {6, 6},
+ {2, 4},
+ {3, 4},
+ {7, 6},
+ {4, 6},
+ {5, 6},
+ {1, 6},
+ {1,10},
+ {1, 7},
+ {1, 8},
+};
+
+static const int h263_mb_type_b_map[15]= {
+    MB_TYPE_DIRECT2 | MB_TYPE_L0L1,
+    MB_TYPE_DIRECT2 | MB_TYPE_L0L1 | MB_TYPE_CBP,
+    MB_TYPE_DIRECT2 | MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT,
+                      MB_TYPE_L0                                 | MB_TYPE_16x16,
+                      MB_TYPE_L0   | MB_TYPE_CBP                 | MB_TYPE_16x16,
+                      MB_TYPE_L0   | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16,
+                      MB_TYPE_L1                                 | MB_TYPE_16x16,
+                      MB_TYPE_L1   | MB_TYPE_CBP                 | MB_TYPE_16x16,
+                      MB_TYPE_L1   | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16,
+                      MB_TYPE_L0L1                               | MB_TYPE_16x16,
+                      MB_TYPE_L0L1 | MB_TYPE_CBP                 | MB_TYPE_16x16,
+                      MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16,
+    0, //stuffing
+    MB_TYPE_INTRA4x4                | MB_TYPE_CBP,
+    MB_TYPE_INTRA4x4                | MB_TYPE_CBP | MB_TYPE_QUANT,
+};
+
+static const uint8_t cbpc_b_tab[4][2] = {
+{0, 1},
+{2, 2},
+{7, 3},
+{6, 3},
+};
+
+const uint8_t cbpy_tab[16][2] =
+{
+  {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4},
+  {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2}
+};
+
+const uint8_t mvtab[33][2] =
+{
+  {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7},
+  {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10},
+  {12,10}, {11,10}, {10,10}, {9,10}, {8,10}, {7,10}, {6,10}, {5,10},
+  {4,10}, {7,11}, {6,11}, {5,11}, {4,11}, {3,11}, {2,11}, {3,12},
+  {2,12}
+};
+
+/* third non intra table */
+const uint16_t inter_vlc[103][2] = {
+{ 0x2, 2 },{ 0xf, 4 },{ 0x15, 6 },{ 0x17, 7 },
+{ 0x1f, 8 },{ 0x25, 9 },{ 0x24, 9 },{ 0x21, 10 },
+{ 0x20, 10 },{ 0x7, 11 },{ 0x6, 11 },{ 0x20, 11 },
+{ 0x6, 3 },{ 0x14, 6 },{ 0x1e, 8 },{ 0xf, 10 },
+{ 0x21, 11 },{ 0x50, 12 },{ 0xe, 4 },{ 0x1d, 8 },
+{ 0xe, 10 },{ 0x51, 12 },{ 0xd, 5 },{ 0x23, 9 },
+{ 0xd, 10 },{ 0xc, 5 },{ 0x22, 9 },{ 0x52, 12 },
+{ 0xb, 5 },{ 0xc, 10 },{ 0x53, 12 },{ 0x13, 6 },
+{ 0xb, 10 },{ 0x54, 12 },{ 0x12, 6 },{ 0xa, 10 },
+{ 0x11, 6 },{ 0x9, 10 },{ 0x10, 6 },{ 0x8, 10 },
+{ 0x16, 7 },{ 0x55, 12 },{ 0x15, 7 },{ 0x14, 7 },
+{ 0x1c, 8 },{ 0x1b, 8 },{ 0x21, 9 },{ 0x20, 9 },
+{ 0x1f, 9 },{ 0x1e, 9 },{ 0x1d, 9 },{ 0x1c, 9 },
+{ 0x1b, 9 },{ 0x1a, 9 },{ 0x22, 11 },{ 0x23, 11 },
+{ 0x56, 12 },{ 0x57, 12 },{ 0x7, 4 },{ 0x19, 9 },
+{ 0x5, 11 },{ 0xf, 6 },{ 0x4, 11 },{ 0xe, 6 },
+{ 0xd, 6 },{ 0xc, 6 },{ 0x13, 7 },{ 0x12, 7 },
+{ 0x11, 7 },{ 0x10, 7 },{ 0x1a, 8 },{ 0x19, 8 },
+{ 0x18, 8 },{ 0x17, 8 },{ 0x16, 8 },{ 0x15, 8 },
+{ 0x14, 8 },{ 0x13, 8 },{ 0x18, 9 },{ 0x17, 9 },
+{ 0x16, 9 },{ 0x15, 9 },{ 0x14, 9 },{ 0x13, 9 },
+{ 0x12, 9 },{ 0x11, 9 },{ 0x7, 10 },{ 0x6, 10 },
+{ 0x5, 10 },{ 0x4, 10 },{ 0x24, 11 },{ 0x25, 11 },
+{ 0x26, 11 },{ 0x27, 11 },{ 0x58, 12 },{ 0x59, 12 },
+{ 0x5a, 12 },{ 0x5b, 12 },{ 0x5c, 12 },{ 0x5d, 12 },
+{ 0x5e, 12 },{ 0x5f, 12 },{ 0x3, 7 },
+};
+
+const int8_t inter_level[102] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12,  1,  2,  3,  4,
+  5,  6,  1,  2,  3,  4,  1,  2,
+  3,  1,  2,  3,  1,  2,  3,  1,
+  2,  3,  1,  2,  1,  2,  1,  2,
+  1,  2,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  2,  3,  1,  2,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,
+};
+
+const int8_t inter_run[102] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  1,  1,  1,  1,
+  1,  1,  2,  2,  2,  2,  3,  3,
+  3,  4,  4,  4,  5,  5,  5,  6,
+  6,  6,  7,  7,  8,  8,  9,  9,
+ 10, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26,  0,  0,  0,  1,  1,  2,
+  3,  4,  5,  6,  7,  8,  9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40,
+};
+
+static RLTable rl_inter = {
+    102,
+    58,
+    inter_vlc,
+    inter_run,
+    inter_level,
+};
+
+static const uint16_t intra_vlc_aic[103][2] = {
+{  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 },
+{  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 },
+{ 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 },
+{ 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 },
+{ 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 },
+{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 },
+{ 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 },
+{ 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 },
+{  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 },
+{ 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 },
+{ 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 },
+{ 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 },
+{ 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 },
+{ 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 },
+{  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 },
+{ 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 },
+{  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 },
+{  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 },
+{  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 },
+{  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 },
+{ 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 },
+{ 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 },
+{ 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 },
+{ 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 },
+{ 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 },
+{ 0x59, 12 }, { 0x5a, 12 }, {  0x3,  7 },
+};
+
+static const int8_t intra_run_aic[102] = {
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  1,  1,  1,  1,  1,  1,  1,
+ 2,  2,  2,  2,  3,  3,  3,  3,
+ 4,  4,  4,  5,  5,  5,  6,  6,
+ 7,  7,  8,  8,  9,  9, 10, 11,
+12, 13,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  1,  1,  1,  1,
+ 2,  2,  2,  3,  3,  3,  4,  4,
+ 5,  5,  6,  6,  7,  7,  8,  9,
+10, 11, 12, 13, 14, 15, 16, 17,
+18, 19, 20, 21, 22, 23,
+};
+
+static const int8_t intra_level_aic[102] = {
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19, 20, 21, 22, 23, 24,
+25,  1,  2,  3,  4,  5,  6,  7,
+ 1,  2,  3,  4,  1,  2,  3,  4,
+ 1,  2,  3,  1,  2,  3,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  1,
+ 1,  1,  1,  2,  3,  4,  5,  6,
+ 7,  8,  9, 10,  1,  2,  3,  4,
+ 1,  2,  3,  1,  2,  3,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,
+};
+
+static RLTable rl_intra_aic = {
+    102,
+    58,
+    intra_vlc_aic,
+    intra_run_aic,
+    intra_level_aic,
+};
+
+static const uint8_t wrong_run[102] = {
+ 1,  2,  3,  5,  4, 10,  9,  8,
+11, 15, 17, 16, 23, 22, 21, 20,
+19, 18, 25, 24, 27, 26, 11,  7,
+ 6,  1,  2, 13,  2,  2,  2,  2,
+ 6, 12,  3,  9,  1,  3,  4,  3,
+ 7,  4,  1,  1,  5,  5, 14,  6,
+ 1,  7,  1,  8,  1,  1,  1,  1,
+10,  1,  1,  5,  9, 17, 25, 24,
+29, 33, 32, 41,  2, 23, 28, 31,
+ 3, 22, 30,  4, 27, 40,  8, 26,
+ 6, 39,  7, 38, 16, 37, 15, 10,
+11, 12, 13, 14,  1, 21, 20, 18,
+19,  2,  1, 34, 35, 36
+};
+
+static const uint16_t h263_format[8][2] = {
+    { 0, 0 },
+    { 128, 96 },
+    { 176, 144 },
+    { 352, 288 },
+    { 704, 576 },
+    { 1408, 1152 },
+};
+
+const uint8_t ff_aic_dc_scale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 2, 4, 6, 8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62
+};
+
+static const uint8_t modified_quant_tab[2][32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+{
+    0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,10,11,12,13,14,15,16,17,18,18,19,20,21,22,23,24,25,26,27,28
+},{
+    0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29,30,31,31,31,26
+}
+};
+
+const uint8_t ff_h263_chroma_qscale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,10,11,11,12,12,12,13,13,13,14,14,14,14,14,15,15,15,15,15
+};
+
+const uint16_t ff_mba_max[6]={
+     47,  98, 395,1583,6335,9215
+};
+
+const uint8_t ff_mba_length[7]={
+      6,   7,   9,  11,  13,  14,  14
+};
+
+const uint8_t ff_h263_loop_filter_strength[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9,10,10,10,11,11,11,12,12,12
+};
+
diff --git a/contrib/ffmpeg/libavcodec/h263dec.c b/contrib/ffmpeg/libavcodec/h263dec.c
new file mode 100644
index 000000000..66370c179
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h263dec.c
@@ -0,0 +1,913 @@
+/*
+ * H.263 decoder
+ * Copyright (c) 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h263dec.c
+ * H.263 decoder.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+//#define DEBUG
+//#define PRINT_FRAME_TIME
+
+int ff_h263_decode_init(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->out_format = FMT_H263;
+
+    s->width  = avctx->coded_width;
+    s->height = avctx->coded_height;
+    s->workaround_bugs= avctx->workaround_bugs;
+
+    // set defaults
+    MPV_decode_defaults(s);
+    s->quant_precision=5;
+    s->decode_mb= ff_h263_decode_mb;
+    s->low_delay= 1;
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+    s->unrestricted_mv= 1;
+
+    /* select sub codec */
+    switch(avctx->codec->id) {
+    case CODEC_ID_H263:
+        s->unrestricted_mv= 0;
+        break;
+    case CODEC_ID_MPEG4:
+        s->decode_mb= ff_mpeg4_decode_mb;
+        s->time_increment_bits = 4; /* default value for broken headers */
+        s->h263_pred = 1;
+        s->low_delay = 0; //default, might be overriden in the vol header during header parsing
+        break;
+    case CODEC_ID_MSMPEG4V1:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=1;
+        break;
+    case CODEC_ID_MSMPEG4V2:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=2;
+        break;
+    case CODEC_ID_MSMPEG4V3:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=3;
+        break;
+    case CODEC_ID_WMV1:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=4;
+        break;
+    case CODEC_ID_WMV2:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=5;
+        break;
+    case CODEC_ID_VC1:
+    case CODEC_ID_WMV3:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=6;
+        break;
+    case CODEC_ID_H263I:
+        break;
+    case CODEC_ID_FLV1:
+        s->h263_flv = 1;
+        break;
+    default:
+        return -1;
+    }
+    s->codec_id= avctx->codec->id;
+
+    /* for h263, we allocate the images after having read the header */
+    if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4)
+        if (MPV_common_init(s) < 0)
+            return -1;
+
+    if (s->h263_msmpeg4)
+        ff_msmpeg4_decode_init(s);
+    else
+        h263_decode_init_vlc(s);
+
+    return 0;
+}
+
+int ff_h263_decode_end(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    MPV_common_end(s);
+    return 0;
+}
+
+/**
+ * returns the number of bytes consumed for building the current frame
+ */
+static int get_consumed_bytes(MpegEncContext *s, int buf_size){
+    int pos= (get_bits_count(&s->gb)+7)>>3;
+
+    if(s->divx_packed){
+        //we would have to scan through the whole buf to handle the weird reordering ...
+        return buf_size;
+    }else if(s->flags&CODEC_FLAG_TRUNCATED){
+        pos -= s->parse_context.last_index;
+        if(pos<0) pos=0; // padding is not really read so this might be -1
+        return pos;
+    }else{
+        if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
+        if(pos+10>buf_size) pos=buf_size; // oops ;)
+
+        return pos;
+    }
+}
+
+static int decode_slice(MpegEncContext *s){
+    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
+    const int mb_size= 16>>s->avctx->lowres;
+    s->last_resync_gb= s->gb;
+    s->first_slice_line= 1;
+
+    s->resync_mb_x= s->mb_x;
+    s->resync_mb_y= s->mb_y;
+
+    ff_set_qscale(s, s->qscale);
+
+    if(s->partitioned_frame){
+        const int qscale= s->qscale;
+
+        if(s->codec_id==CODEC_ID_MPEG4){
+            if(ff_mpeg4_decode_partitions(s) < 0)
+                return -1;
+        }
+
+        /* restore variables which were modified */
+        s->first_slice_line=1;
+        s->mb_x= s->resync_mb_x;
+        s->mb_y= s->resync_mb_y;
+        ff_set_qscale(s, qscale);
+    }
+
+    for(; s->mb_y < s->mb_height; s->mb_y++) {
+        /* per-row end of slice checks */
+        if(s->msmpeg4_version){
+            if(s->resync_mb_y + s->slice_height == s->mb_y){
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
+
+                return 0;
+            }
+        }
+
+        if(s->msmpeg4_version==1){
+            s->last_dc[0]=
+            s->last_dc[1]=
+            s->last_dc[2]= 128;
+        }
+
+        ff_init_block_index(s);
+        for(; s->mb_x < s->mb_width; s->mb_x++) {
+            int ret;
+
+            ff_update_block_index(s);
+
+            if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){
+                s->first_slice_line=0;
+            }
+
+            /* DCT & quantize */
+
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mv_type = MV_TYPE_16X16;
+//            s->mb_skipped = 0;
+//printf("%d %d %06X\n", ret, get_bits_count(&s->gb), show_bits(&s->gb, 24));
+            ret= s->decode_mb(s, s->block);
+
+            if (s->pict_type!=B_TYPE)
+                ff_h263_update_motion_val(s);
+
+            if(ret<0){
+                const int xy= s->mb_x + s->mb_y*s->mb_stride;
+                if(ret==SLICE_END){
+                    MPV_decode_mb(s, s->block);
+                    if(s->loop_filter)
+                        ff_h263_loop_filter(s);
+
+//printf("%d %d %d %06X\n", s->mb_x, s->mb_y, s->gb.size*8 - get_bits_count(&s->gb), show_bits(&s->gb, 24));
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                    s->padding_bug_score--;
+
+                    if(++s->mb_x >= s->mb_width){
+                        s->mb_x=0;
+                        ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
+                        s->mb_y++;
+                    }
+                    return 0;
+                }else if(ret==SLICE_NOEND){
+                    av_log(s->avctx, AV_LOG_ERROR, "Slice mismatch at MB: %d\n", xy);
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x+1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                    return -1;
+                }
+                av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", xy);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+
+                return -1;
+            }
+
+            MPV_decode_mb(s, s->block);
+            if(s->loop_filter)
+                ff_h263_loop_filter(s);
+        }
+
+        ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
+
+        s->mb_x= 0;
+    }
+
+    assert(s->mb_x==0 && s->mb_y==s->mb_height);
+
+    /* try to detect the padding bug */
+    if(      s->codec_id==CODEC_ID_MPEG4
+       &&   (s->workaround_bugs&FF_BUG_AUTODETECT)
+       &&    s->gb.size_in_bits - get_bits_count(&s->gb) >=0
+       &&    s->gb.size_in_bits - get_bits_count(&s->gb) < 48
+//       &&   !s->resync_marker
+       &&   !s->data_partitioning){
+
+        const int bits_count= get_bits_count(&s->gb);
+        const int bits_left = s->gb.size_in_bits - bits_count;
+
+        if(bits_left==0){
+            s->padding_bug_score+=16;
+        } else if(bits_left != 1){
+            int v= show_bits(&s->gb, 8);
+            v|= 0x7F >> (7-(bits_count&7));
+
+            if(v==0x7F && bits_left<=8)
+                s->padding_bug_score--;
+            else if(v==0x7F && ((get_bits_count(&s->gb)+8)&8) && bits_left<=16)
+                s->padding_bug_score+= 4;
+            else
+                s->padding_bug_score++;
+        }
+    }
+
+    if(s->workaround_bugs&FF_BUG_AUTODETECT){
+        if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version || !s->resync_marker)*/)
+            s->workaround_bugs |=  FF_BUG_NO_PADDING;
+        else
+            s->workaround_bugs &= ~FF_BUG_NO_PADDING;
+    }
+
+    // handle formats which don't have unique end markers
+    if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly
+        int left= s->gb.size_in_bits - get_bits_count(&s->gb);
+        int max_extra=7;
+
+        /* no markers in M$ crap */
+        if(s->msmpeg4_version && s->pict_type==I_TYPE)
+            max_extra+= 17;
+
+        /* buggy padding but the frame should still end approximately at the bitstream end */
+        if((s->workaround_bugs&FF_BUG_NO_PADDING) && s->error_resilience>=3)
+            max_extra+= 48;
+        else if((s->workaround_bugs&FF_BUG_NO_PADDING))
+            max_extra+= 256*256*256*64;
+
+        if(left>max_extra){
+            av_log(s->avctx, AV_LOG_ERROR, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24));
+        }
+        else if(left<0){
+            av_log(s->avctx, AV_LOG_ERROR, "overreading %d bits\n", -left);
+        }else
+            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
+
+        return 0;
+    }
+
+    av_log(s->avctx, AV_LOG_ERROR, "slice end not reached but screenspace end (%d left %06X, score= %d)\n",
+            s->gb.size_in_bits - get_bits_count(&s->gb),
+            show_bits(&s->gb, 24), s->padding_bug_score);
+
+    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+    return -1;
+}
+
+/**
+ * finds the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or -1
+ */
+int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
+    int vop_found, i;
+    uint32_t state;
+
+    vop_found= pc->frame_start_found;
+    state= pc->state;
+
+    i=0;
+    if(!vop_found){
+        for(i=0; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if(state == 0x1B6){
+                i++;
+                vop_found=1;
+                break;
+            }
+        }
+    }
+
+    if(vop_found){
+        /* EOF considered as end of frame */
+        if (buf_size == 0)
+            return 0;
+        for(; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if((state&0xFFFFFF00) == 0x100){
+                pc->frame_start_found=0;
+                pc->state=-1;
+                return i-3;
+            }
+        }
+    }
+    pc->frame_start_found= vop_found;
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+static int h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
+    int vop_found, i;
+    uint32_t state;
+
+    vop_found= pc->frame_start_found;
+    state= pc->state;
+
+    i=0;
+    if(!vop_found){
+        for(i=0; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if(state>>(32-22) == 0x20){
+                i++;
+                vop_found=1;
+                break;
+            }
+        }
+    }
+
+    if(vop_found){
+      for(; i<buf_size; i++){
+        state= (state<<8) | buf[i];
+        if(state>>(32-22) == 0x20){
+            pc->frame_start_found=0;
+            pc->state=-1;
+            return i-3;
+        }
+      }
+    }
+    pc->frame_start_found= vop_found;
+    pc->state= state;
+
+    return END_NOT_FOUND;
+}
+
+#ifdef CONFIG_H263_PARSER
+static int h263_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    next= h263_find_frame_end(pc, buf, buf_size);
+
+    if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+#endif
+
+int ff_h263_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int ret;
+    AVFrame *pict = data;
+
+#ifdef PRINT_FRAME_TIME
+uint64_t time= rdtsc();
+#endif
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size);
+    av_log(avctx, AV_LOG_DEBUG, "bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
+#endif
+    s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
+
+    /* no supplementary picture */
+    if (buf_size == 0) {
+        /* special case for last picture */
+        if (s->low_delay==0 && s->next_picture_ptr) {
+            *pict= *(AVFrame*)s->next_picture_ptr;
+            s->next_picture_ptr= NULL;
+
+            *data_size = sizeof(AVFrame);
+        }
+
+        return 0;
+    }
+
+    if(s->flags&CODEC_FLAG_TRUNCATED){
+        int next;
+
+        if(s->codec_id==CODEC_ID_MPEG4){
+            next= ff_mpeg4_find_frame_end(&s->parse_context, buf, buf_size);
+        }else if(s->codec_id==CODEC_ID_H263){
+            next= h263_find_frame_end(&s->parse_context, buf, buf_size);
+        }else{
+            av_log(s->avctx, AV_LOG_ERROR, "this codec does not support truncated bitstreams\n");
+            return -1;
+        }
+
+        if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
+            return buf_size;
+    }
+
+
+retry:
+
+    if(s->bitstream_buffer_size && (s->divx_packed || buf_size<20)){ //divx 5.01+/xvid frame reorder
+        init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size*8);
+    }else
+        init_get_bits(&s->gb, buf, buf_size*8);
+    s->bitstream_buffer_size=0;
+
+    if (!s->context_initialized) {
+        if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
+            return -1;
+    }
+
+    //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there
+    if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){
+        int i= ff_find_unused_picture(s, 0);
+        s->current_picture_ptr= &s->picture[i];
+    }
+
+    /* let's go :-) */
+    if (s->msmpeg4_version==5) {
+        ret= ff_wmv2_decode_picture_header(s);
+    } else if (s->msmpeg4_version) {
+        ret = msmpeg4_decode_picture_header(s);
+    } else if (s->h263_pred) {
+        if(s->avctx->extradata_size && s->picture_number==0){
+            GetBitContext gb;
+
+            init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
+            ret = ff_mpeg4_decode_picture_header(s, &gb);
+        }
+        ret = ff_mpeg4_decode_picture_header(s, &s->gb);
+
+        if(s->flags& CODEC_FLAG_LOW_DELAY)
+            s->low_delay=1;
+    } else if (s->codec_id == CODEC_ID_H263I) {
+        ret = intel_h263_decode_picture_header(s);
+    } else if (s->h263_flv) {
+        ret = flv_h263_decode_picture_header(s);
+    } else {
+        ret = h263_decode_picture_header(s);
+    }
+
+    if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_size);
+
+    /* skip if the header was thrashed */
+    if (ret < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "header damaged\n");
+        return -1;
+    }
+
+    avctx->has_b_frames= !s->low_delay;
+
+    if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
+        if(s->stream_codec_tag == ff_get_fourcc("XVID") ||
+           s->codec_tag == ff_get_fourcc("XVID") || s->codec_tag == ff_get_fourcc("XVIX") ||
+           s->codec_tag == ff_get_fourcc("RMP4"))
+            s->xvid_build= -1;
+#if 0
+        if(s->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==1
+           && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc
+            s->xvid_build= -1;
+#endif
+    }
+
+    if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
+        if(s->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==0)
+            s->divx_version= 400; //divx 4
+    }
+
+    if(s->xvid_build && s->divx_version){
+        s->divx_version=
+        s->divx_build= 0;
+    }
+
+    if(s->workaround_bugs&FF_BUG_AUTODETECT){
+        if(s->codec_tag == ff_get_fourcc("XVIX"))
+            s->workaround_bugs|= FF_BUG_XVID_ILACE;
+
+        if(s->codec_tag == ff_get_fourcc("UMP4")){
+            s->workaround_bugs|= FF_BUG_UMP4;
+        }
+
+        if(s->divx_version>=500){
+            s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
+        }
+
+        if(s->divx_version>502){
+            s->workaround_bugs|= FF_BUG_QPEL_CHROMA2;
+        }
+
+        if(s->xvid_build && s->xvid_build<=3)
+            s->padding_bug_score= 256*256*256*64;
+
+        if(s->xvid_build && s->xvid_build<=1)
+            s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
+
+        if(s->xvid_build && s->xvid_build<=12)
+            s->workaround_bugs|= FF_BUG_EDGE;
+
+        if(s->xvid_build && s->xvid_build<=32)
+            s->workaround_bugs|= FF_BUG_DC_CLIP;
+
+#define SET_QPEL_FUNC(postfix1, postfix2) \
+    s->dsp.put_ ## postfix1 = ff_put_ ## postfix2;\
+    s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\
+    s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2;
+
+        if(s->lavc_build && s->lavc_build<4653)
+            s->workaround_bugs|= FF_BUG_STD_QPEL;
+
+        if(s->lavc_build && s->lavc_build<4655)
+            s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
+
+        if(s->lavc_build && s->lavc_build<4670){
+            s->workaround_bugs|= FF_BUG_EDGE;
+        }
+
+        if(s->lavc_build && s->lavc_build<=4712)
+            s->workaround_bugs|= FF_BUG_DC_CLIP;
+
+        if(s->divx_version)
+            s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
+//printf("padding_bug_score: %d\n", s->padding_bug_score);
+        if(s->divx_version==501 && s->divx_build==20020416)
+            s->padding_bug_score= 256*256*256*64;
+
+        if(s->divx_version && s->divx_version<500){
+            s->workaround_bugs|= FF_BUG_EDGE;
+        }
+
+        if(s->divx_version)
+            s->workaround_bugs|= FF_BUG_HPEL_CHROMA;
+#if 0
+        if(s->divx_version==500)
+            s->padding_bug_score= 256*256*256*64;
+
+        /* very ugly XVID padding bug detection FIXME/XXX solve this differently
+         * lets hope this at least works
+         */
+        if(   s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==0
+           && s->codec_id==CODEC_ID_MPEG4 && s->vo_type==0)
+            s->workaround_bugs|= FF_BUG_NO_PADDING;
+
+        if(s->lavc_build && s->lavc_build<4609) //FIXME not sure about the version num but a 4609 file seems ok
+            s->workaround_bugs|= FF_BUG_NO_PADDING;
+#endif
+    }
+
+    if(s->workaround_bugs& FF_BUG_STD_QPEL){
+        SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_old_c)
+
+        SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_old_c)
+    }
+
+    if(avctx->debug & FF_DEBUG_BUGS)
+        av_log(s->avctx, AV_LOG_DEBUG, "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n",
+               s->workaround_bugs, s->lavc_build, s->xvid_build, s->divx_version, s->divx_build,
+               s->divx_packed ? "p" : "");
+
+#if 0 // dump bits per frame / qp / complexity
+{
+    static FILE *f=NULL;
+    if(!f) f=fopen("rate_qp_cplx.txt", "w");
+    fprintf(f, "%d %d %f\n", buf_size, s->qscale, buf_size*(double)s->qscale);
+}
+#endif
+
+#if defined(HAVE_MMX) && defined(CONFIG_GPL)
+    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & MM_MMX)){
+        avctx->idct_algo= FF_IDCT_XVIDMMX;
+        avctx->coded_width= 0; // force reinit
+//        dsputil_init(&s->dsp, avctx);
+        s->picture_number=0;
+    }
+#endif
+
+        /* After H263 & mpeg4 header decode we have the height, width,*/
+        /* and other parameters. So then we could init the picture   */
+        /* FIXME: By the way H263 decoder is evolving it should have */
+        /* an H263EncContext                                         */
+
+    if (   s->width  != avctx->coded_width
+        || s->height != avctx->coded_height) {
+        /* H.263 could change picture size any time */
+        ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat
+        s->parse_context.buffer=0;
+        MPV_common_end(s);
+        s->parse_context= pc;
+    }
+    if (!s->context_initialized) {
+        avcodec_set_dimensions(avctx, s->width, s->height);
+
+        goto retry;
+    }
+
+    if((s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P))
+        s->gob_index = ff_h263_get_gob_height(s);
+
+    // for hurry_up==5
+    s->current_picture.pict_type= s->pict_type;
+    s->current_picture.key_frame= s->pict_type == I_TYPE;
+
+    /* skip B-frames if we don't have reference frames */
+    if(s->last_picture_ptr==NULL && (s->pict_type==B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size);
+    /* skip b frames if we are in a hurry */
+    if(avctx->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size);
+    if(   (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
+       || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
+       ||  avctx->skip_frame >= AVDISCARD_ALL)
+        return get_consumed_bytes(s, buf_size);
+    /* skip everything if we are in a hurry>=5 */
+    if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
+
+    if(s->next_p_frame_damaged){
+        if(s->pict_type==B_TYPE)
+            return get_consumed_bytes(s, buf_size);
+        else
+            s->next_p_frame_damaged=0;
+    }
+
+    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && s->pict_type==B_TYPE){
+        s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+    }else if((!s->no_rounding) || s->pict_type==B_TYPE){
+        s->me.qpel_put= s->dsp.put_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
+    }else{
+        s->me.qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
+    }
+
+    if(MPV_frame_start(s, avctx) < 0)
+        return -1;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale);
+#endif
+
+    ff_er_frame_start(s);
+
+    //the second part of the wmv2 header contains the MB skip bits which are stored in current_picture->mb_type
+    //which isnt available before MPV_frame_start()
+    if (s->msmpeg4_version==5){
+        if(ff_wmv2_decode_secondary_picture_header(s) < 0)
+            return -1;
+    }
+
+    /* decode each macroblock */
+    s->mb_x=0;
+    s->mb_y=0;
+
+    decode_slice(s);
+    while(s->mb_y<s->mb_height){
+        if(s->msmpeg4_version){
+            if(s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits)
+                break;
+        }else{
+            if(ff_h263_resync(s)<0)
+                break;
+        }
+
+        if(s->msmpeg4_version<4 && s->h263_pred)
+            ff_mpeg4_clean_buffers(s);
+
+        decode_slice(s);
+    }
+
+    if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==I_TYPE)
+        if(msmpeg4_decode_ext_header(s, buf_size) < 0){
+            s->error_status_table[s->mb_num-1]= AC_ERROR|DC_ERROR|MV_ERROR;
+        }
+
+    /* divx 5.01+ bistream reorder stuff */
+    if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0 && s->divx_packed){
+        int current_pos= get_bits_count(&s->gb)>>3;
+        int startcode_found=0;
+
+        if(buf_size - current_pos > 5){
+            int i;
+            for(i=current_pos; i<buf_size-3; i++){
+                if(buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==0xB6){
+                    startcode_found=1;
+                    break;
+                }
+            }
+        }
+        if(s->gb.buffer == s->bitstream_buffer && buf_size>20){ //xvid style
+            startcode_found=1;
+            current_pos=0;
+        }
+
+        if(startcode_found){
+            s->bitstream_buffer= av_fast_realloc(
+                s->bitstream_buffer,
+                &s->allocated_bitstream_buffer_size,
+                buf_size - current_pos + FF_INPUT_BUFFER_PADDING_SIZE);
+            memcpy(s->bitstream_buffer, buf + current_pos, buf_size - current_pos);
+            s->bitstream_buffer_size= buf_size - current_pos;
+        }
+    }
+
+    ff_er_frame_end(s);
+
+    MPV_frame_end(s);
+
+assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type);
+assert(s->current_picture.pict_type == s->pict_type);
+    if (s->pict_type == B_TYPE || s->low_delay) {
+        *pict= *(AVFrame*)s->current_picture_ptr;
+    } else if (s->last_picture_ptr != NULL) {
+        *pict= *(AVFrame*)s->last_picture_ptr;
+    }
+
+    if(s->last_picture_ptr || s->low_delay){
+        *data_size = sizeof(AVFrame);
+        ff_print_debug_info(s, pict);
+    }
+
+    /* Return the Picture timestamp as the frame number */
+    /* we substract 1 because it is added on utils.c    */
+    avctx->frame_number = s->picture_number - 1;
+
+#ifdef PRINT_FRAME_TIME
+av_log(avctx, AV_LOG_DEBUG, "%"PRId64"\n", rdtsc()-time);
+#endif
+
+    return get_consumed_bytes(s, buf_size);
+}
+
+AVCodec mpeg4_decoder = {
+    "mpeg4",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG4,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
+AVCodec h263_decoder = {
+    "h263",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H263,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
+AVCodec msmpeg4v1_decoder = {
+    "msmpeg4v1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSMPEG4V1,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+AVCodec msmpeg4v2_decoder = {
+    "msmpeg4v2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSMPEG4V2,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+AVCodec msmpeg4v3_decoder = {
+    "msmpeg4",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSMPEG4V3,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+AVCodec wmv1_decoder = {
+    "wmv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV1,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+AVCodec h263i_decoder = {
+    "h263i",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H263I,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+AVCodec flv_decoder = {
+    "flv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FLV1,
+    sizeof(MpegEncContext),
+    ff_h263_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1
+};
+
+#ifdef CONFIG_H263_PARSER
+AVCodecParser h263_parser = {
+    { CODEC_ID_H263 },
+    sizeof(ParseContext),
+    NULL,
+    h263_parse,
+    ff_parse_close,
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/h264.c b/contrib/ffmpeg/libavcodec/h264.c
new file mode 100644
index 000000000..ad23ae120
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h264.c
@@ -0,0 +1,8655 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file h264.c
+ * H.264 / AVC / MPEG4 part10 codec.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "common.h"
+#include "dsputil.h"
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "h264data.h"
+#include "golomb.h"
+
+#include "cabac.h"
+
+//#undef NDEBUG
+#include <assert.h>
+
+#define interlaced_dct interlaced_dct_is_a_bad_name
+#define mb_intra mb_intra_isnt_initalized_see_mb_type
+
+#define LUMA_DC_BLOCK_INDEX   25
+#define CHROMA_DC_BLOCK_INDEX 26
+
+#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
+#define COEFF_TOKEN_VLC_BITS           8
+#define TOTAL_ZEROS_VLC_BITS           9
+#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
+#define RUN_VLC_BITS                   3
+#define RUN7_VLC_BITS                  6
+
+#define MAX_SPS_COUNT 32
+#define MAX_PPS_COUNT 256
+
+#define MAX_MMCO_COUNT 66
+
+/* Compiling in interlaced support reduces the speed
+ * of progressive decoding by about 2%. */
+#define ALLOW_INTERLACE
+
+#ifdef ALLOW_INTERLACE
+#define MB_MBAFF h->mb_mbaff
+#define MB_FIELD h->mb_field_decoding_flag
+#define FRAME_MBAFF h->mb_aff_frame
+#else
+#define MB_MBAFF 0
+#define MB_FIELD 0
+#define FRAME_MBAFF 0
+#undef  IS_INTERLACED
+#define IS_INTERLACED(mb_type) 0
+#endif
+
+/**
+ * Sequence parameter set
+ */
+typedef struct SPS{
+
+    int profile_idc;
+    int level_idc;
+    int transform_bypass;              ///< qpprime_y_zero_transform_bypass_flag
+    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
+    int poc_type;                      ///< pic_order_cnt_type
+    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
+    int delta_pic_order_always_zero_flag;
+    int offset_for_non_ref_pic;
+    int offset_for_top_to_bottom_field;
+    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
+    int ref_frame_count;               ///< num_ref_frames
+    int gaps_in_frame_num_allowed_flag;
+    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
+    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
+    int frame_mbs_only_flag;
+    int mb_aff;                        ///<mb_adaptive_frame_field_flag
+    int direct_8x8_inference_flag;
+    int crop;                   ///< frame_cropping_flag
+    int crop_left;              ///< frame_cropping_rect_left_offset
+    int crop_right;             ///< frame_cropping_rect_right_offset
+    int crop_top;               ///< frame_cropping_rect_top_offset
+    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
+    int vui_parameters_present_flag;
+    AVRational sar;
+    int timing_info_present_flag;
+    uint32_t num_units_in_tick;
+    uint32_t time_scale;
+    int fixed_frame_rate_flag;
+    short offset_for_ref_frame[256]; //FIXME dyn aloc?
+    int bitstream_restriction_flag;
+    int num_reorder_frames;
+    int scaling_matrix_present;
+    uint8_t scaling_matrix4[6][16];
+    uint8_t scaling_matrix8[2][64];
+}SPS;
+
+/**
+ * Picture parameter set
+ */
+typedef struct PPS{
+    int sps_id;
+    int cabac;                  ///< entropy_coding_mode_flag
+    int pic_order_present;      ///< pic_order_present_flag
+    int slice_group_count;      ///< num_slice_groups_minus1 + 1
+    int mb_slice_group_map_type;
+    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
+    int weighted_pred;          ///< weighted_pred_flag
+    int weighted_bipred_idc;
+    int init_qp;                ///< pic_init_qp_minus26 + 26
+    int init_qs;                ///< pic_init_qs_minus26 + 26
+    int chroma_qp_index_offset;
+    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
+    int constrained_intra_pred; ///< constrained_intra_pred_flag
+    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
+    int transform_8x8_mode;     ///< transform_8x8_mode_flag
+    uint8_t scaling_matrix4[6][16];
+    uint8_t scaling_matrix8[2][64];
+}PPS;
+
+/**
+ * Memory management control operation opcode.
+ */
+typedef enum MMCOOpcode{
+    MMCO_END=0,
+    MMCO_SHORT2UNUSED,
+    MMCO_LONG2UNUSED,
+    MMCO_SHORT2LONG,
+    MMCO_SET_MAX_LONG,
+    MMCO_RESET,
+    MMCO_LONG,
+} MMCOOpcode;
+
+/**
+ * Memory management control operation.
+ */
+typedef struct MMCO{
+    MMCOOpcode opcode;
+    int short_frame_num;
+    int long_index;
+} MMCO;
+
+/**
+ * H264Context
+ */
+typedef struct H264Context{
+    MpegEncContext s;
+    int nal_ref_idc;
+    int nal_unit_type;
+#define NAL_SLICE                1
+#define NAL_DPA                  2
+#define NAL_DPB                  3
+#define NAL_DPC                  4
+#define NAL_IDR_SLICE            5
+#define NAL_SEI                  6
+#define NAL_SPS                  7
+#define NAL_PPS                  8
+#define NAL_AUD                  9
+#define NAL_END_SEQUENCE        10
+#define NAL_END_STREAM          11
+#define NAL_FILLER_DATA         12
+#define NAL_SPS_EXT             13
+#define NAL_AUXILIARY_SLICE     19
+    uint8_t *rbsp_buffer;
+    unsigned int rbsp_buffer_size;
+
+    /**
+      * Used to parse AVC variant of h264
+      */
+    int is_avc; ///< this flag is != 0 if codec is avc1
+    int got_avcC; ///< flag used to parse avcC data only once
+    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
+
+    int chroma_qp; //QPc
+
+    int prev_mb_skipped;
+    int next_mb_skipped;
+
+    //prediction stuff
+    int chroma_pred_mode;
+    int intra16x16_pred_mode;
+
+    int top_mb_xy;
+    int left_mb_xy[2];
+
+    int8_t intra4x4_pred_mode_cache[5*8];
+    int8_t (*intra4x4_pred_mode)[8];
+    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
+    void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
+    void (*pred8x8  [4+3])(uint8_t *src, int stride);
+    void (*pred16x16[4+3])(uint8_t *src, int stride);
+    unsigned int topleft_samples_available;
+    unsigned int top_samples_available;
+    unsigned int topright_samples_available;
+    unsigned int left_samples_available;
+    uint8_t (*top_borders[2])[16+2*8];
+    uint8_t left_border[2*(17+2*9)];
+
+    /**
+     * non zero coeff count cache.
+     * is 64 if not available.
+     */
+    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
+    uint8_t (*non_zero_count)[16];
+
+    /**
+     * Motion vector cache.
+     */
+    DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
+    DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
+#define LIST_NOT_USED -1 //FIXME rename?
+#define PART_NOT_AVAILABLE -2
+
+    /**
+     * is 1 if the specific list MV&references are set to 0,0,-2.
+     */
+    int mv_cache_clean[2];
+
+    /**
+     * number of neighbors (top and/or left) that used 8x8 dct
+     */
+    int neighbor_transform_size;
+
+    /**
+     * block_offset[ 0..23] for frame macroblocks
+     * block_offset[24..47] for field macroblocks
+     */
+    int block_offset[2*(16+8)];
+
+    uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
+    uint32_t *mb2b8_xy;
+    int b_stride; //FIXME use s->b4_stride
+    int b8_stride;
+
+    int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
+    int mb_uvlinesize;
+
+    int emu_edge_width;
+    int emu_edge_height;
+
+    int halfpel_flag;
+    int thirdpel_flag;
+
+    int unknown_svq3_flag;
+    int next_slice_index;
+
+    SPS sps_buffer[MAX_SPS_COUNT];
+    SPS sps; ///< current sps
+
+    PPS pps_buffer[MAX_PPS_COUNT];
+    /**
+     * current pps
+     */
+    PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
+
+    uint32_t dequant4_buffer[6][52][16];
+    uint32_t dequant8_buffer[2][52][64];
+    uint32_t (*dequant4_coeff[6])[16];
+    uint32_t (*dequant8_coeff[2])[64];
+    int dequant_coeff_pps;     ///< reinit tables when pps changes
+
+    int slice_num;
+    uint8_t *slice_table_base;
+    uint8_t *slice_table;      ///< slice_table_base + 2*mb_stride + 1
+    int slice_type;
+    int slice_type_fixed;
+
+    //interlacing specific flags
+    int mb_aff_frame;
+    int mb_field_decoding_flag;
+    int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
+
+    int sub_mb_type[4];
+
+    //POC stuff
+    int poc_lsb;
+    int poc_msb;
+    int delta_poc_bottom;
+    int delta_poc[2];
+    int frame_num;
+    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
+    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
+    int frame_num_offset;         ///< for POC type 2
+    int prev_frame_num_offset;    ///< for POC type 2
+    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
+
+    /**
+     * frame_num for frames or 2*frame_num for field pics.
+     */
+    int curr_pic_num;
+
+    /**
+     * max_frame_num or 2*max_frame_num for field pics.
+     */
+    int max_pic_num;
+
+    //Weighted pred stuff
+    int use_weight;
+    int use_weight_chroma;
+    int luma_log2_weight_denom;
+    int chroma_log2_weight_denom;
+    int luma_weight[2][48];
+    int luma_offset[2][48];
+    int chroma_weight[2][48][2];
+    int chroma_offset[2][48][2];
+    int implicit_weight[48][48];
+
+    //deblock
+    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
+    int slice_alpha_c0_offset;
+    int slice_beta_offset;
+
+    int redundant_pic_count;
+
+    int direct_spatial_mv_pred;
+    int dist_scale_factor[16];
+    int dist_scale_factor_field[32];
+    int map_col_to_list0[2][16];
+    int map_col_to_list0_field[2][32];
+
+    /**
+     * num_ref_idx_l0/1_active_minus1 + 1
+     */
+    int ref_count[2];            ///< counts frames or fields, depending on current mb mode
+    Picture *short_ref[32];
+    Picture *long_ref[32];
+    Picture default_ref_list[2][32];
+    Picture ref_list[2][48];     ///< 0..15: frame refs, 16..47: mbaff field refs
+    Picture *delayed_pic[16]; //FIXME size?
+    Picture *delayed_output_pic;
+
+    /**
+     * memory management control operations buffer.
+     */
+    MMCO mmco[MAX_MMCO_COUNT];
+    int mmco_index;
+
+    int long_ref_count;  ///< number of actual long term references
+    int short_ref_count; ///< number of actual short term references
+
+    //data partitioning
+    GetBitContext intra_gb;
+    GetBitContext inter_gb;
+    GetBitContext *intra_gb_ptr;
+    GetBitContext *inter_gb_ptr;
+
+    DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
+
+    /**
+     * Cabac
+     */
+    CABACContext cabac;
+    uint8_t      cabac_state[460];
+    int          cabac_init_idc;
+
+    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
+    uint16_t     *cbp_table;
+    int cbp;
+    int top_cbp;
+    int left_cbp;
+    /* chroma_pred_mode for i4x4 or i16x16, else 0 */
+    uint8_t     *chroma_pred_mode_table;
+    int         last_qscale_diff;
+    int16_t     (*mvd_table[2])[2];
+    DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
+    uint8_t     *direct_table;
+    uint8_t     direct_cache[5*8];
+
+    uint8_t zigzag_scan[16];
+    uint8_t zigzag_scan8x8[64];
+    uint8_t zigzag_scan8x8_cavlc[64];
+    uint8_t field_scan[16];
+    uint8_t field_scan8x8[64];
+    uint8_t field_scan8x8_cavlc[64];
+    const uint8_t *zigzag_scan_q0;
+    const uint8_t *zigzag_scan8x8_q0;
+    const uint8_t *zigzag_scan8x8_cavlc_q0;
+    const uint8_t *field_scan_q0;
+    const uint8_t *field_scan8x8_q0;
+    const uint8_t *field_scan8x8_cavlc_q0;
+
+    int x264_build;
+}H264Context;
+
+static VLC coeff_token_vlc[4];
+static VLC chroma_dc_coeff_token_vlc;
+
+static VLC total_zeros_vlc[15];
+static VLC chroma_dc_total_zeros_vlc[3];
+
+static VLC run_vlc[6];
+static VLC run7_vlc;
+
+static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
+static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
+static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+
+static always_inline uint32_t pack16to32(int a, int b){
+#ifdef WORDS_BIGENDIAN
+   return (b&0xFFFF) + (a<<16);
+#else
+   return (a&0xFFFF) + (b<<16);
+#endif
+}
+
+/**
+ * fill a rectangle.
+ * @param h height of the rectangle, should be a constant
+ * @param w width of the rectangle, should be a constant
+ * @param size the size of val (1 or 4), should be a constant
+ */
+static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+    uint8_t *p= (uint8_t*)vp;
+    assert(size==1 || size==4);
+    assert(w<=4);
+
+    w      *= size;
+    stride *= size;
+
+    assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
+    assert((stride&(w-1))==0);
+    if(w==2){
+        const uint16_t v= size==4 ? val : val*0x0101;
+        *(uint16_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint16_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint16_t*)(p + 2*stride)=
+        *(uint16_t*)(p + 3*stride)= v;
+    }else if(w==4){
+        const uint32_t v= size==4 ? val : val*0x01010101;
+        *(uint32_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint32_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint32_t*)(p + 2*stride)=
+        *(uint32_t*)(p + 3*stride)= v;
+    }else if(w==8){
+    //gcc can't optimize 64bit math on x86_32
+#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
+        const uint64_t v= val*0x0100000001ULL;
+        *(uint64_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint64_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint64_t*)(p + 2*stride)=
+        *(uint64_t*)(p + 3*stride)= v;
+    }else if(w==16){
+        const uint64_t v= val*0x0100000001ULL;
+        *(uint64_t*)(p + 0+0*stride)=
+        *(uint64_t*)(p + 8+0*stride)=
+        *(uint64_t*)(p + 0+1*stride)=
+        *(uint64_t*)(p + 8+1*stride)= v;
+        if(h==2) return;
+        *(uint64_t*)(p + 0+2*stride)=
+        *(uint64_t*)(p + 8+2*stride)=
+        *(uint64_t*)(p + 0+3*stride)=
+        *(uint64_t*)(p + 8+3*stride)= v;
+#else
+        *(uint32_t*)(p + 0+0*stride)=
+        *(uint32_t*)(p + 4+0*stride)= val;
+        if(h==1) return;
+        *(uint32_t*)(p + 0+1*stride)=
+        *(uint32_t*)(p + 4+1*stride)= val;
+        if(h==2) return;
+        *(uint32_t*)(p + 0+2*stride)=
+        *(uint32_t*)(p + 4+2*stride)=
+        *(uint32_t*)(p + 0+3*stride)=
+        *(uint32_t*)(p + 4+3*stride)= val;
+    }else if(w==16){
+        *(uint32_t*)(p + 0+0*stride)=
+        *(uint32_t*)(p + 4+0*stride)=
+        *(uint32_t*)(p + 8+0*stride)=
+        *(uint32_t*)(p +12+0*stride)=
+        *(uint32_t*)(p + 0+1*stride)=
+        *(uint32_t*)(p + 4+1*stride)=
+        *(uint32_t*)(p + 8+1*stride)=
+        *(uint32_t*)(p +12+1*stride)= val;
+        if(h==2) return;
+        *(uint32_t*)(p + 0+2*stride)=
+        *(uint32_t*)(p + 4+2*stride)=
+        *(uint32_t*)(p + 8+2*stride)=
+        *(uint32_t*)(p +12+2*stride)=
+        *(uint32_t*)(p + 0+3*stride)=
+        *(uint32_t*)(p + 4+3*stride)=
+        *(uint32_t*)(p + 8+3*stride)=
+        *(uint32_t*)(p +12+3*stride)= val;
+#endif
+    }else
+        assert(0);
+    assert(h==4);
+}
+
+static void fill_caches(H264Context *h, int mb_type, int for_deblock){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    int topleft_xy, top_xy, topright_xy, left_xy[2];
+    int topleft_type, top_type, topright_type, left_type[2];
+    int left_block[8];
+    int i;
+
+    //FIXME deblocking could skip the intra and nnz parts.
+    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
+        return;
+
+    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
+
+    top_xy     = mb_xy  - s->mb_stride;
+    topleft_xy = top_xy - 1;
+    topright_xy= top_xy + 1;
+    left_xy[1] = left_xy[0] = mb_xy-1;
+    left_block[0]= 0;
+    left_block[1]= 1;
+    left_block[2]= 2;
+    left_block[3]= 3;
+    left_block[4]= 7;
+    left_block[5]= 10;
+    left_block[6]= 8;
+    left_block[7]= 11;
+    if(FRAME_MBAFF){
+        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
+        const int top_pair_xy      = pair_xy     - s->mb_stride;
+        const int topleft_pair_xy  = top_pair_xy - 1;
+        const int topright_pair_xy = top_pair_xy + 1;
+        const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
+        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+        const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
+        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+        const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
+        const int bottom = (s->mb_y & 1);
+        tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
+                ) {
+            top_xy -= s->mb_stride;
+        }
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
+                ) {
+            topleft_xy -= s->mb_stride;
+        }
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
+                ) {
+            topright_xy -= s->mb_stride;
+        }
+        if (left_mb_frame_flag != curr_mb_frame_flag) {
+            left_xy[1] = left_xy[0] = pair_xy - 1;
+            if (curr_mb_frame_flag) {
+                if (bottom) {
+                    left_block[0]= 2;
+                    left_block[1]= 2;
+                    left_block[2]= 3;
+                    left_block[3]= 3;
+                    left_block[4]= 8;
+                    left_block[5]= 11;
+                    left_block[6]= 8;
+                    left_block[7]= 11;
+                } else {
+                    left_block[0]= 0;
+                    left_block[1]= 0;
+                    left_block[2]= 1;
+                    left_block[3]= 1;
+                    left_block[4]= 7;
+                    left_block[5]= 10;
+                    left_block[6]= 7;
+                    left_block[7]= 10;
+                }
+            } else {
+                left_xy[1] += s->mb_stride;
+                //left_block[0]= 0;
+                left_block[1]= 2;
+                left_block[2]= 0;
+                left_block[3]= 2;
+                //left_block[4]= 7;
+                left_block[5]= 10;
+                left_block[6]= 7;
+                left_block[7]= 10;
+            }
+        }
+    }
+
+    h->top_mb_xy = top_xy;
+    h->left_mb_xy[0] = left_xy[0];
+    h->left_mb_xy[1] = left_xy[1];
+    if(for_deblock){
+        topleft_type = 0;
+        topright_type = 0;
+        top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
+        left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
+        left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
+
+        if(FRAME_MBAFF && !IS_INTRA(mb_type)){
+            int list;
+            int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
+            for(i=0; i<16; i++)
+                h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
+            for(list=0; list<1+(h->slice_type==B_TYPE); list++){
+                if(USES_LIST(mb_type,list)){
+                    uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
+                    uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
+                    int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
+                    for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
+                        dst[0] = src[0];
+                        dst[1] = src[1];
+                        dst[2] = src[2];
+                        dst[3] = src[3];
+                    }
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
+                    ref += h->b8_stride;
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
+                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
+                }else{
+                    fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
+                    fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
+                }
+            }
+        }
+    }else{
+        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
+        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
+        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
+        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
+        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
+    }
+
+    if(IS_INTRA(mb_type)){
+        h->topleft_samples_available=
+        h->top_samples_available=
+        h->left_samples_available= 0xFFFF;
+        h->topright_samples_available= 0xEEEA;
+
+        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
+            h->topleft_samples_available= 0xB3FF;
+            h->top_samples_available= 0x33FF;
+            h->topright_samples_available= 0x26EA;
+        }
+        for(i=0; i<2; i++){
+            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
+                h->topleft_samples_available&= 0xDF5F;
+                h->left_samples_available&= 0x5F5F;
+            }
+        }
+
+        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
+            h->topleft_samples_available&= 0x7FFF;
+
+        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
+            h->topright_samples_available&= 0xFBFF;
+
+        if(IS_INTRA4x4(mb_type)){
+            if(IS_INTRA4x4(top_type)){
+                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
+                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
+                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
+                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
+            }else{
+                int pred;
+                if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
+                    pred= -1;
+                else{
+                    pred= 2;
+                }
+                h->intra4x4_pred_mode_cache[4+8*0]=
+                h->intra4x4_pred_mode_cache[5+8*0]=
+                h->intra4x4_pred_mode_cache[6+8*0]=
+                h->intra4x4_pred_mode_cache[7+8*0]= pred;
+            }
+            for(i=0; i<2; i++){
+                if(IS_INTRA4x4(left_type[i])){
+                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
+                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
+                }else{
+                    int pred;
+                    if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
+                        pred= -1;
+                    else{
+                        pred= 2;
+                    }
+                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
+                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
+                }
+            }
+        }
+    }
+
+
+/*
+0 . T T. T T T T
+1 L . .L . . . .
+2 L . .L . . . .
+3 . T TL . . . .
+4 L . .L . . . .
+5 L . .. . . . .
+*/
+//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
+    if(top_type){
+        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
+        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
+        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
+        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
+
+        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
+        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
+
+        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
+        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
+
+    }else{
+        h->non_zero_count_cache[4+8*0]=
+        h->non_zero_count_cache[5+8*0]=
+        h->non_zero_count_cache[6+8*0]=
+        h->non_zero_count_cache[7+8*0]=
+
+        h->non_zero_count_cache[1+8*0]=
+        h->non_zero_count_cache[2+8*0]=
+
+        h->non_zero_count_cache[1+8*3]=
+        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+
+    }
+
+    for (i=0; i<2; i++) {
+        if(left_type[i]){
+            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
+            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
+            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
+            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
+        }else{
+            h->non_zero_count_cache[3+8*1 + 2*8*i]=
+            h->non_zero_count_cache[3+8*2 + 2*8*i]=
+            h->non_zero_count_cache[0+8*1 +   8*i]=
+            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+        }
+    }
+
+    if( h->pps.cabac ) {
+        // top_cbp
+        if(top_type) {
+            h->top_cbp = h->cbp_table[top_xy];
+        } else if(IS_INTRA(mb_type)) {
+            h->top_cbp = 0x1C0;
+        } else {
+            h->top_cbp = 0;
+        }
+        // left_cbp
+        if (left_type[0]) {
+            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
+        } else if(IS_INTRA(mb_type)) {
+            h->left_cbp = 0x1C0;
+        } else {
+            h->left_cbp = 0;
+        }
+        if (left_type[0]) {
+            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
+        }
+        if (left_type[1]) {
+            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
+        }
+    }
+
+#if 1
+    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+        int list;
+        for(list=0; list<1+(h->slice_type==B_TYPE); list++){
+            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
+                /*if(!h->mv_cache_clean[list]){
+                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
+                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
+                    h->mv_cache_clean[list]= 1;
+                }*/
+                continue;
+            }
+            h->mv_cache_clean[list]= 0;
+
+            if(USES_LIST(top_type, list)){
+                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
+                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
+                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
+                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
+                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
+                h->ref_cache[list][scan8[0] + 0 - 1*8]=
+                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
+                h->ref_cache[list][scan8[0] + 2 - 1*8]=
+                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
+            }else{
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
+                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
+            }
+
+            //FIXME unify cleanup or sth
+            if(USES_LIST(left_type[0], list)){
+                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
+                h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
+                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
+            }else{
+                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
+                h->ref_cache[list][scan8[0] - 1 + 0*8]=
+                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            }
+
+            if(USES_LIST(left_type[1], list)){
+                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
+                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
+                h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
+                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
+            }else{
+                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
+                h->ref_cache[list][scan8[0] - 1 + 2*8]=
+                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                assert((!left_type[0]) == (!left_type[1]));
+            }
+
+            if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
+                continue;
+
+            if(USES_LIST(topleft_type, list)){
+                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
+                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+            }else{
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
+                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            }
+
+            if(USES_LIST(topright_type, list)){
+                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
+                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
+                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+            }else{
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
+                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            }
+
+            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
+                continue;
+
+            h->ref_cache[list][scan8[5 ]+1] =
+            h->ref_cache[list][scan8[7 ]+1] =
+            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
+            h->ref_cache[list][scan8[4 ]] =
+            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
+            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
+            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
+            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
+            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
+            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
+
+            if( h->pps.cabac ) {
+                /* XXX beurk, Load mvd */
+                if(USES_LIST(top_type, list)){
+                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
+                }else{
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
+                }
+                if(USES_LIST(left_type[0], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
+                }else{
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
+                }
+                if(USES_LIST(left_type[1], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
+                }else{
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
+                }
+                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
+                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
+                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
+                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
+                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
+
+                if(h->slice_type == B_TYPE){
+                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
+
+                    if(IS_DIRECT(top_type)){
+                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
+                    }else if(IS_8X8(top_type)){
+                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
+                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
+                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
+                    }else{
+                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
+                    }
+
+                    if(IS_DIRECT(left_type[0]))
+                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
+                    else if(IS_8X8(left_type[0]))
+                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
+                    else
+                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
+
+                    if(IS_DIRECT(left_type[1]))
+                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
+                    else if(IS_8X8(left_type[1]))
+                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
+                    else
+                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
+                }
+            }
+
+            if(FRAME_MBAFF){
+#define MAP_MVS\
+                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
+                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
+                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
+                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
+                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
+                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
+                if(MB_FIELD){
+#define MAP_F2F(idx, mb_type)\
+                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
+                        h->ref_cache[list][idx] <<= 1;\
+                        h->mv_cache[list][idx][1] /= 2;\
+                        h->mvd_cache[list][idx][1] /= 2;\
+                    }
+                    MAP_MVS
+#undef MAP_F2F
+                }else{
+#define MAP_F2F(idx, mb_type)\
+                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
+                        h->ref_cache[list][idx] >>= 1;\
+                        h->mv_cache[list][idx][1] <<= 1;\
+                        h->mvd_cache[list][idx][1] <<= 1;\
+                    }
+                    MAP_MVS
+#undef MAP_F2F
+                }
+            }
+        }
+    }
+#endif
+
+    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
+}
+
+static inline void write_back_intra_pred_mode(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+
+    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
+    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
+    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
+    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
+    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
+    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
+    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
+}
+
+/**
+ * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
+ */
+static inline int check_intra4x4_pred_mode(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
+    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
+    int i;
+
+    if(!(h->top_samples_available&0x8000)){
+        for(i=0; i<4; i++){
+            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
+            if(status<0){
+                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
+                return -1;
+            } else if(status){
+                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
+            }
+        }
+    }
+
+    if(!(h->left_samples_available&0x8000)){
+        for(i=0; i<4; i++){
+            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
+            if(status<0){
+                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
+                return -1;
+            } else if(status){
+                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
+            }
+        }
+    }
+
+    return 0;
+} //FIXME cleanup like next
+
+/**
+ * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
+ */
+static inline int check_intra_pred_mode(H264Context *h, int mode){
+    MpegEncContext * const s = &h->s;
+    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
+    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
+
+    if(mode < 0 || mode > 6) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+    }
+
+    if(!(h->top_samples_available&0x8000)){
+        mode= top[ mode ];
+        if(mode<0){
+            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+    }
+
+    if(!(h->left_samples_available&0x8000)){
+        mode= left[ mode ];
+        if(mode<0){
+            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+    }
+
+    return mode;
+}
+
+/**
+ * gets the predicted intra4x4 prediction mode.
+ */
+static inline int pred_intra_mode(H264Context *h, int n){
+    const int index8= scan8[n];
+    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
+    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
+    const int min= FFMIN(left, top);
+
+    tprintf("mode:%d %d min:%d\n", left ,top, min);
+
+    if(min<0) return DC_PRED;
+    else      return min;
+}
+
+static inline void write_back_non_zero_count(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+
+    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
+    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
+    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
+    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
+    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
+    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
+    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
+
+    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
+    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
+    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
+
+    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
+    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
+    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
+
+    if(FRAME_MBAFF){
+        // store all luma nnzs, for deblocking
+        int v = 0, i;
+        for(i=0; i<16; i++)
+            v += (!!h->non_zero_count_cache[scan8[i]]) << i;
+        *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
+    }
+}
+
+/**
+ * gets the predicted number of non zero coefficients.
+ * @param n block index
+ */
+static inline int pred_non_zero_count(H264Context *h, int n){
+    const int index8= scan8[n];
+    const int left= h->non_zero_count_cache[index8 - 1];
+    const int top = h->non_zero_count_cache[index8 - 8];
+    int i= left + top;
+
+    if(i<64) i= (i+1)>>1;
+
+    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
+
+    return i&31;
+}
+
+static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
+    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
+
+    /* there is no consistent mapping of mvs to neighboring locations that will
+     * make mbaff happy, so we can't move all this logic to fill_caches */
+    if(FRAME_MBAFF){
+        MpegEncContext *s = &h->s;
+        const uint32_t *mb_types = s->current_picture_ptr->mb_type;
+        const int16_t *mv;
+        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
+        *C = h->mv_cache[list][scan8[0]-2];
+
+        if(!MB_FIELD
+           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
+            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
+            if(IS_INTERLACED(mb_types[topright_xy])){
+#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
+                const int x4 = X4, y4 = Y4;\
+                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
+                if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
+                    return LIST_NOT_USED;\
+                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
+                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
+                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
+                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
+
+                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
+            }
+        }
+        if(topright_ref == PART_NOT_AVAILABLE
+           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
+           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
+            if(!MB_FIELD
+               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
+                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
+            }
+            if(MB_FIELD
+               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
+               && i >= scan8[0]+8){
+                // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
+                SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+            }
+        }
+#undef SET_DIAG_MV
+    }
+
+    if(topright_ref != PART_NOT_AVAILABLE){
+        *C= h->mv_cache[list][ i - 8 + part_width ];
+        return topright_ref;
+    }else{
+        tprintf("topright MV not available\n");
+
+        *C= h->mv_cache[list][ i - 8 - 1 ];
+        return h->ref_cache[list][ i - 8 - 1 ];
+    }
+}
+
+/**
+ * gets the predicted MV.
+ * @param n the block index
+ * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
+ * @param mx the x component of the predicted motion vector
+ * @param my the y component of the predicted motion vector
+ */
+static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
+    const int index8= scan8[n];
+    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
+    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
+    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
+    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
+    const int16_t * C;
+    int diagonal_ref, match_count;
+
+    assert(part_width==1 || part_width==2 || part_width==4);
+
+/* mv_cache
+  B . . A T T T T
+  U . . L . . , .
+  U . . L . . . .
+  U . . L . . , .
+  . . . L . . . .
+*/
+
+    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
+    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
+    tprintf("pred_motion match_count=%d\n", match_count);
+    if(match_count > 1){ //most common
+        *mx= mid_pred(A[0], B[0], C[0]);
+        *my= mid_pred(A[1], B[1], C[1]);
+    }else if(match_count==1){
+        if(left_ref==ref){
+            *mx= A[0];
+            *my= A[1];
+        }else if(top_ref==ref){
+            *mx= B[0];
+            *my= B[1];
+        }else{
+            *mx= C[0];
+            *my= C[1];
+        }
+    }else{
+        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
+            *mx= A[0];
+            *my= A[1];
+        }else{
+            *mx= mid_pred(A[0], B[0], C[0]);
+            *my= mid_pred(A[1], B[1], C[1]);
+        }
+    }
+
+    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
+}
+
+/**
+ * gets the directionally predicted 16x8 MV.
+ * @param n the block index
+ * @param mx the x component of the predicted motion vector
+ * @param my the y component of the predicted motion vector
+ */
+static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+    if(n==0){
+        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
+        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
+
+        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        if(top_ref == ref){
+            *mx= B[0];
+            *my= B[1];
+            return;
+        }
+    }else{
+        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
+        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
+
+        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        if(left_ref == ref){
+            *mx= A[0];
+            *my= A[1];
+            return;
+        }
+    }
+
+    //RARE
+    pred_motion(h, n, 4, list, ref, mx, my);
+}
+
+/**
+ * gets the directionally predicted 8x16 MV.
+ * @param n the block index
+ * @param mx the x component of the predicted motion vector
+ * @param my the y component of the predicted motion vector
+ */
+static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+    if(n==0){
+        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
+        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
+
+        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        if(left_ref == ref){
+            *mx= A[0];
+            *my= A[1];
+            return;
+        }
+    }else{
+        const int16_t * C;
+        int diagonal_ref;
+
+        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
+
+        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        if(diagonal_ref == ref){
+            *mx= C[0];
+            *my= C[1];
+            return;
+        }
+    }
+
+    //RARE
+    pred_motion(h, n, 2, list, ref, mx, my);
+}
+
+static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
+    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
+    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
+
+    tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+
+    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
+       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
+       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
+
+        *mx = *my = 0;
+        return;
+    }
+
+    pred_motion(h, 0, 4, 0, 0, mx, my);
+
+    return;
+}
+
+static inline void direct_dist_scale_factor(H264Context * const h){
+    const int poc = h->s.current_picture_ptr->poc;
+    const int poc1 = h->ref_list[1][0].poc;
+    int i;
+    for(i=0; i<h->ref_count[0]; i++){
+        int poc0 = h->ref_list[0][i].poc;
+        int td = clip(poc1 - poc0, -128, 127);
+        if(td == 0 /* FIXME || pic0 is a long-term ref */){
+            h->dist_scale_factor[i] = 256;
+        }else{
+            int tb = clip(poc - poc0, -128, 127);
+            int tx = (16384 + (FFABS(td) >> 1)) / td;
+            h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
+        }
+    }
+    if(FRAME_MBAFF){
+        for(i=0; i<h->ref_count[0]; i++){
+            h->dist_scale_factor_field[2*i] =
+            h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
+        }
+    }
+}
+static inline void direct_ref_list_init(H264Context * const h){
+    MpegEncContext * const s = &h->s;
+    Picture * const ref1 = &h->ref_list[1][0];
+    Picture * const cur = s->current_picture_ptr;
+    int list, i, j;
+    if(cur->pict_type == I_TYPE)
+        cur->ref_count[0] = 0;
+    if(cur->pict_type != B_TYPE)
+        cur->ref_count[1] = 0;
+    for(list=0; list<2; list++){
+        cur->ref_count[list] = h->ref_count[list];
+        for(j=0; j<h->ref_count[list]; j++)
+            cur->ref_poc[list][j] = h->ref_list[list][j].poc;
+    }
+    if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
+        return;
+    for(list=0; list<2; list++){
+        for(i=0; i<ref1->ref_count[list]; i++){
+            const int poc = ref1->ref_poc[list][i];
+            h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
+            for(j=0; j<h->ref_count[list]; j++)
+                if(h->ref_list[list][j].poc == poc){
+                    h->map_col_to_list0[list][i] = j;
+                    break;
+                }
+        }
+    }
+    if(FRAME_MBAFF){
+        for(list=0; list<2; list++){
+            for(i=0; i<ref1->ref_count[list]; i++){
+                j = h->map_col_to_list0[list][i];
+                h->map_col_to_list0_field[list][2*i] = 2*j;
+                h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
+            }
+        }
+    }
+}
+
+static inline void pred_direct_motion(H264Context * const h, int *mb_type){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy =   s->mb_x +   s->mb_y*s->mb_stride;
+    const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
+    const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+    const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
+    const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
+    const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
+    const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
+    const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
+    const int is_b8x8 = IS_8X8(*mb_type);
+    int sub_mb_type;
+    int i8, i4;
+
+#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
+    if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
+        /* FIXME save sub mb types from previous frames (or derive from MVs)
+         * so we know exactly what block size to use */
+        sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
+        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
+    }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
+        sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+        *mb_type =    MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
+    }else{
+        sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
+    }
+    if(!is_b8x8)
+        *mb_type |= MB_TYPE_DIRECT2;
+    if(MB_FIELD)
+        *mb_type |= MB_TYPE_INTERLACED;
+
+    tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
+
+    if(h->direct_spatial_mv_pred){
+        int ref[2];
+        int mv[2][2];
+        int list;
+
+        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
+
+        /* ref = min(neighbors) */
+        for(list=0; list<2; list++){
+            int refa = h->ref_cache[list][scan8[0] - 1];
+            int refb = h->ref_cache[list][scan8[0] - 8];
+            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
+            if(refc == -2)
+                refc = h->ref_cache[list][scan8[0] - 8 - 1];
+            ref[list] = refa;
+            if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
+                ref[list] = refb;
+            if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
+                ref[list] = refc;
+            if(ref[list] < 0)
+                ref[list] = -1;
+        }
+
+        if(ref[0] < 0 && ref[1] < 0){
+            ref[0] = ref[1] = 0;
+            mv[0][0] = mv[0][1] =
+            mv[1][0] = mv[1][1] = 0;
+        }else{
+            for(list=0; list<2; list++){
+                if(ref[list] >= 0)
+                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
+                else
+                    mv[list][0] = mv[list][1] = 0;
+            }
+        }
+
+        if(ref[1] < 0){
+            *mb_type &= ~MB_TYPE_P0L1;
+            sub_mb_type &= ~MB_TYPE_P0L1;
+        }else if(ref[0] < 0){
+            *mb_type &= ~MB_TYPE_P0L0;
+            sub_mb_type &= ~MB_TYPE_P0L0;
+        }
+
+        if(IS_16X16(*mb_type)){
+            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
+            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
+            if(!IS_INTRA(mb_type_col)
+               && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
+                   || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
+                       && (h->x264_build>33 || !h->x264_build)))){
+                if(ref[0] > 0)
+                    fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
+                else
+                    fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+                if(ref[1] > 0)
+                    fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
+                else
+                    fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+            }else{
+                fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
+                fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
+            }
+        }else{
+            for(i8=0; i8<4; i8++){
+                const int x8 = i8&1;
+                const int y8 = i8>>1;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+
+                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
+                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+
+                /* col_zero_flag */
+                if(!IS_INTRA(mb_type_col) && (   l1ref0[x8 + y8*h->b8_stride] == 0
+                                              || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
+                                                  && (h->x264_build>33 || !h->x264_build)))){
+                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
+                    if(IS_SUB_8X8(sub_mb_type)){
+                        const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
+                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
+                            if(ref[0] == 0)
+                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                            if(ref[1] == 0)
+                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                        }
+                    }else
+                    for(i4=0; i4<4; i4++){
+                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
+                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
+                            if(ref[0] == 0)
+                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
+                            if(ref[1] == 0)
+                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
+                        }
+                    }
+                }
+            }
+        }
+    }else{ /* direct temporal mv pred */
+        const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
+        const int *dist_scale_factor = h->dist_scale_factor;
+
+        if(FRAME_MBAFF){
+            if(IS_INTERLACED(*mb_type)){
+                map_col_to_list0[0] = h->map_col_to_list0_field[0];
+                map_col_to_list0[1] = h->map_col_to_list0_field[1];
+                dist_scale_factor = h->dist_scale_factor_field;
+            }
+            if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
+                /* FIXME assumes direct_8x8_inference == 1 */
+                const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
+                int mb_types_col[2];
+                int y_shift;
+
+                *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
+                         | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
+                         | (*mb_type & MB_TYPE_INTERLACED);
+                sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
+
+                if(IS_INTERLACED(*mb_type)){
+                    /* frame to field scaling */
+                    mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
+                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+                    if(s->mb_y&1){
+                        l1ref0 -= 2*h->b8_stride;
+                        l1ref1 -= 2*h->b8_stride;
+                        l1mv0 -= 4*h->b_stride;
+                        l1mv1 -= 4*h->b_stride;
+                    }
+                    y_shift = 0;
+
+                    if(   (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
+                       && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
+                       && !is_b8x8)
+                        *mb_type |= MB_TYPE_16x8;
+                    else
+                        *mb_type |= MB_TYPE_8x8;
+                }else{
+                    /* field to frame scaling */
+                    /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
+                     * but in MBAFF, top and bottom POC are equal */
+                    int dy = (s->mb_y&1) ? 1 : 2;
+                    mb_types_col[0] =
+                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+                    l1ref0 += dy*h->b8_stride;
+                    l1ref1 += dy*h->b8_stride;
+                    l1mv0 += 2*dy*h->b_stride;
+                    l1mv1 += 2*dy*h->b_stride;
+                    y_shift = 2;
+
+                    if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
+                       && !is_b8x8)
+                        *mb_type |= MB_TYPE_16x16;
+                    else
+                        *mb_type |= MB_TYPE_8x8;
+                }
+
+                for(i8=0; i8<4; i8++){
+                    const int x8 = i8&1;
+                    const int y8 = i8>>1;
+                    int ref0, scale;
+                    const int16_t (*l1mv)[2]= l1mv0;
+
+                    if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                        continue;
+                    h->sub_mb_type[i8] = sub_mb_type;
+
+                    fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
+                    if(IS_INTRA(mb_types_col[y8])){
+                        fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
+                        fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                        fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                        continue;
+                    }
+
+                    ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
+                    if(ref0 >= 0)
+                        ref0 = map_col_to_list0[0][ref0*2>>y_shift];
+                    else{
+                        ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
+                        l1mv= l1mv1;
+                    }
+                    scale = dist_scale_factor[ref0];
+                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
+
+                    {
+                        const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
+                        int my_col = (mv_col[1]<<y_shift)/2;
+                        int mx = (scale * mv_col[0] + 128) >> 8;
+                        int my = (scale * my_col + 128) >> 8;
+                        fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
+                        fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
+                    }
+                }
+                return;
+            }
+        }
+
+        /* one-to-one mv scaling */
+
+        if(IS_16X16(*mb_type)){
+            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
+            if(IS_INTRA(mb_type_col)){
+                fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+                fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+                fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+            }else{
+                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
+                                                : map_col_to_list0[1][l1ref1[0]];
+                const int scale = dist_scale_factor[ref0];
+                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
+                int mv_l0[2];
+                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
+                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
+                fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
+                fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
+                fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
+            }
+        }else{
+            for(i8=0; i8<4; i8++){
+                const int x8 = i8&1;
+                const int y8 = i8>>1;
+                int ref0, scale;
+                const int16_t (*l1mv)[2]= l1mv0;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
+                if(IS_INTRA(mb_type_col)){
+                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
+                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                    continue;
+                }
+
+                ref0 = l1ref0[x8 + y8*h->b8_stride];
+                if(ref0 >= 0)
+                    ref0 = map_col_to_list0[0][ref0];
+                else{
+                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
+                    l1mv= l1mv1;
+                }
+                scale = dist_scale_factor[ref0];
+
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
+                if(IS_SUB_8X8(sub_mb_type)){
+                    const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
+                    int mx = (scale * mv_col[0] + 128) >> 8;
+                    int my = (scale * mv_col[1] + 128) >> 8;
+                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
+                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
+                }else
+                for(i4=0; i4<4; i4++){
+                    const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
+                    int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
+                    mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
+                    mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
+                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
+                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
+                }
+            }
+        }
+    }
+}
+
+static inline void write_back_motion(H264Context *h, int mb_type){
+    MpegEncContext * const s = &h->s;
+    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
+    int list;
+
+    if(!USES_LIST(mb_type, 0))
+        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
+
+    for(list=0; list<2; list++){
+        int y;
+        if(!USES_LIST(mb_type, list))
+            continue;
+
+        for(y=0; y<4; y++){
+            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
+            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
+        }
+        if( h->pps.cabac ) {
+            if(IS_SKIP(mb_type))
+                fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
+            else
+            for(y=0; y<4; y++){
+                *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
+                *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
+            }
+        }
+
+        {
+            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
+            ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
+            ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
+            ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
+            ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
+        }
+    }
+
+    if(h->slice_type == B_TYPE && h->pps.cabac){
+        if(IS_8X8(mb_type)){
+            uint8_t *direct_table = &h->direct_table[b8_xy];
+            direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
+            direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
+            direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
+        }
+    }
+}
+
+/**
+ * Decodes a network abstraction layer unit.
+ * @param consumed is the number of bytes used as input
+ * @param length is the length of the array
+ * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
+ * @returns decoded bytes, might be src+1 if no escapes
+ */
+static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
+    int i, si, di;
+    uint8_t *dst;
+
+//    src[0]&0x80;                //forbidden bit
+    h->nal_ref_idc= src[0]>>5;
+    h->nal_unit_type= src[0]&0x1F;
+
+    src++; length--;
+#if 0
+    for(i=0; i<length; i++)
+        printf("%2X ", src[i]);
+#endif
+    for(i=0; i+1<length; i+=2){
+        if(src[i]) continue;
+        if(i>0 && src[i-1]==0) i--;
+        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
+            if(src[i+2]!=3){
+                /* startcode, so we must be past the end */
+                length=i;
+            }
+            break;
+        }
+    }
+
+    if(i>=length-1){ //no escaped 0
+        *dst_length= length;
+        *consumed= length+1; //+1 for the header
+        return src;
+    }
+
+    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
+    dst= h->rbsp_buffer;
+
+//printf("decoding esc\n");
+    si=di=0;
+    while(si<length){
+        //remove escapes (very rare 1:2^22)
+        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
+            if(src[si+2]==3){ //escape
+                dst[di++]= 0;
+                dst[di++]= 0;
+                si+=3;
+                continue;
+            }else //next start code
+                break;
+        }
+
+        dst[di++]= src[si++];
+    }
+
+    *dst_length= di;
+    *consumed= si + 1;//+1 for the header
+//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
+    return dst;
+}
+
+#if 0
+/**
+ * @param src the data which should be escaped
+ * @param dst the target buffer, dst+1 == src is allowed as a special case
+ * @param length the length of the src data
+ * @param dst_length the length of the dst array
+ * @returns length of escaped data in bytes or -1 if an error occured
+ */
+static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
+    int i, escape_count, si, di;
+    uint8_t *temp;
+
+    assert(length>=0);
+    assert(dst_length>0);
+
+    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
+
+    if(length==0) return 1;
+
+    escape_count= 0;
+    for(i=0; i<length; i+=2){
+        if(src[i]) continue;
+        if(i>0 && src[i-1]==0)
+            i--;
+        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
+            escape_count++;
+            i+=2;
+        }
+    }
+
+    if(escape_count==0){
+        if(dst+1 != src)
+            memcpy(dst+1, src, length);
+        return length + 1;
+    }
+
+    if(length + escape_count + 1> dst_length)
+        return -1;
+
+    //this should be damn rare (hopefully)
+
+    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
+    temp= h->rbsp_buffer;
+//printf("encoding esc\n");
+
+    si= 0;
+    di= 0;
+    while(si < length){
+        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
+            temp[di++]= 0; si++;
+            temp[di++]= 0; si++;
+            temp[di++]= 3;
+            temp[di++]= src[si++];
+        }
+        else
+            temp[di++]= src[si++];
+    }
+    memcpy(dst+1, temp, length+escape_count);
+
+    assert(di == length+escape_count);
+
+    return di + 1;
+}
+
+/**
+ * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
+ */
+static void encode_rbsp_trailing(PutBitContext *pb){
+    int length;
+    put_bits(pb, 1, 1);
+    length= (-put_bits_count(pb))&7;
+    if(length) put_bits(pb, length, 0);
+}
+#endif
+
+/**
+ * identifies the exact end of the bitstream
+ * @return the length of the trailing, or 0 if damaged
+ */
+static int decode_rbsp_trailing(uint8_t *src){
+    int v= *src;
+    int r;
+
+    tprintf("rbsp trailing %X\n", v);
+
+    for(r=1; r<9; r++){
+        if(v&1) return r;
+        v>>=1;
+    }
+    return 0;
+}
+
+/**
+ * idct tranforms the 16 dc values and dequantize them.
+ * @param qp quantization parameter
+ */
+static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
+#define stride 16
+    int i;
+    int temp[16]; //FIXME check if this is a good idea
+    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
+    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
+
+//memset(block, 64, 2*256);
+//return;
+    for(i=0; i<4; i++){
+        const int offset= y_offset[i];
+        const int z0= block[offset+stride*0] + block[offset+stride*4];
+        const int z1= block[offset+stride*0] - block[offset+stride*4];
+        const int z2= block[offset+stride*1] - block[offset+stride*5];
+        const int z3= block[offset+stride*1] + block[offset+stride*5];
+
+        temp[4*i+0]= z0+z3;
+        temp[4*i+1]= z1+z2;
+        temp[4*i+2]= z1-z2;
+        temp[4*i+3]= z0-z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int offset= x_offset[i];
+        const int z0= temp[4*0+i] + temp[4*2+i];
+        const int z1= temp[4*0+i] - temp[4*2+i];
+        const int z2= temp[4*1+i] - temp[4*3+i];
+        const int z3= temp[4*1+i] + temp[4*3+i];
+
+        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
+        block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
+        block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
+        block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
+    }
+}
+
+#if 0
+/**
+ * dct tranforms the 16 dc values.
+ * @param qp quantization parameter ??? FIXME
+ */
+static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
+//    const int qmul= dequant_coeff[qp][0];
+    int i;
+    int temp[16]; //FIXME check if this is a good idea
+    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
+    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
+
+    for(i=0; i<4; i++){
+        const int offset= y_offset[i];
+        const int z0= block[offset+stride*0] + block[offset+stride*4];
+        const int z1= block[offset+stride*0] - block[offset+stride*4];
+        const int z2= block[offset+stride*1] - block[offset+stride*5];
+        const int z3= block[offset+stride*1] + block[offset+stride*5];
+
+        temp[4*i+0]= z0+z3;
+        temp[4*i+1]= z1+z2;
+        temp[4*i+2]= z1-z2;
+        temp[4*i+3]= z0-z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int offset= x_offset[i];
+        const int z0= temp[4*0+i] + temp[4*2+i];
+        const int z1= temp[4*0+i] - temp[4*2+i];
+        const int z2= temp[4*1+i] - temp[4*3+i];
+        const int z3= temp[4*1+i] + temp[4*3+i];
+
+        block[stride*0 +offset]= (z0 + z3)>>1;
+        block[stride*2 +offset]= (z1 + z2)>>1;
+        block[stride*8 +offset]= (z1 - z2)>>1;
+        block[stride*10+offset]= (z0 - z3)>>1;
+    }
+}
+#endif
+
+#undef xStride
+#undef stride
+
+static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
+    const int stride= 16*2;
+    const int xStride= 16;
+    int a,b,c,d,e;
+
+    a= block[stride*0 + xStride*0];
+    b= block[stride*0 + xStride*1];
+    c= block[stride*1 + xStride*0];
+    d= block[stride*1 + xStride*1];
+
+    e= a-b;
+    a= a+b;
+    b= c-d;
+    c= c+d;
+
+    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
+    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
+    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
+    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
+}
+
+#if 0
+static void chroma_dc_dct_c(DCTELEM *block){
+    const int stride= 16*2;
+    const int xStride= 16;
+    int a,b,c,d,e;
+
+    a= block[stride*0 + xStride*0];
+    b= block[stride*0 + xStride*1];
+    c= block[stride*1 + xStride*0];
+    d= block[stride*1 + xStride*1];
+
+    e= a-b;
+    a= a+b;
+    b= c-d;
+    c= c+d;
+
+    block[stride*0 + xStride*0]= (a+c);
+    block[stride*0 + xStride*1]= (e+b);
+    block[stride*1 + xStride*0]= (a-c);
+    block[stride*1 + xStride*1]= (e-b);
+}
+#endif
+
+/**
+ * gets the chroma qp.
+ */
+static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
+
+    return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
+}
+
+
+#if 0
+static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
+    int i;
+    //FIXME try int temp instead of block
+
+    for(i=0; i<4; i++){
+        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
+        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
+        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
+        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
+        const int z0= d0 + d3;
+        const int z3= d0 - d3;
+        const int z1= d1 + d2;
+        const int z2= d1 - d2;
+
+        block[0 + 4*i]=   z0 +   z1;
+        block[1 + 4*i]= 2*z3 +   z2;
+        block[2 + 4*i]=   z0 -   z1;
+        block[3 + 4*i]=   z3 - 2*z2;
+    }
+
+    for(i=0; i<4; i++){
+        const int z0= block[0*4 + i] + block[3*4 + i];
+        const int z3= block[0*4 + i] - block[3*4 + i];
+        const int z1= block[1*4 + i] + block[2*4 + i];
+        const int z2= block[1*4 + i] - block[2*4 + i];
+
+        block[0*4 + i]=   z0 +   z1;
+        block[1*4 + i]= 2*z3 +   z2;
+        block[2*4 + i]=   z0 -   z1;
+        block[3*4 + i]=   z3 - 2*z2;
+    }
+}
+#endif
+
+//FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
+//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
+static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
+    int i;
+    const int * const quant_table= quant_coeff[qscale];
+    const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
+    const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
+    const unsigned int threshold2= (threshold1<<1);
+    int last_non_zero;
+
+    if(seperate_dc){
+        if(qscale<=18){
+            //avoid overflows
+            const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
+            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
+            const unsigned int dc_threshold2= (dc_threshold1<<1);
+
+            int level= block[0]*quant_coeff[qscale+18][0];
+            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
+                if(level>0){
+                    level= (dc_bias + level)>>(QUANT_SHIFT-2);
+                    block[0]= level;
+                }else{
+                    level= (dc_bias - level)>>(QUANT_SHIFT-2);
+                    block[0]= -level;
+                }
+//                last_non_zero = i;
+            }else{
+                block[0]=0;
+            }
+        }else{
+            const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
+            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
+            const unsigned int dc_threshold2= (dc_threshold1<<1);
+
+            int level= block[0]*quant_table[0];
+            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
+                if(level>0){
+                    level= (dc_bias + level)>>(QUANT_SHIFT+1);
+                    block[0]= level;
+                }else{
+                    level= (dc_bias - level)>>(QUANT_SHIFT+1);
+                    block[0]= -level;
+                }
+//                last_non_zero = i;
+            }else{
+                block[0]=0;
+            }
+        }
+        last_non_zero= 0;
+        i=1;
+    }else{
+        last_non_zero= -1;
+        i=0;
+    }
+
+    for(; i<16; i++){
+        const int j= scantable[i];
+        int level= block[j]*quant_table[j];
+
+//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
+//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
+        if(((unsigned)(level+threshold1))>threshold2){
+            if(level>0){
+                level= (bias + level)>>QUANT_SHIFT;
+                block[j]= level;
+            }else{
+                level= (bias - level)>>QUANT_SHIFT;
+                block[j]= -level;
+            }
+            last_non_zero = i;
+        }else{
+            block[j]=0;
+        }
+    }
+
+    return last_non_zero;
+}
+
+static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    ((uint32_t*)(src+0*stride))[0]= a;
+    ((uint32_t*)(src+1*stride))[0]= a;
+    ((uint32_t*)(src+2*stride))[0]= a;
+    ((uint32_t*)(src+3*stride))[0]= a;
+}
+
+static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
+    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
+    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
+    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
+}
+
+static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
+                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
+}
+
+
+#define LOAD_TOP_RIGHT_EDGE\
+    const int t4= topright[0];\
+    const int t5= topright[1];\
+    const int t6= topright[2];\
+    const int t7= topright[3];\
+
+#define LOAD_LEFT_EDGE\
+    const int l0= src[-1+0*stride];\
+    const int l1= src[-1+1*stride];\
+    const int l2= src[-1+2*stride];\
+    const int l3= src[-1+3*stride];\
+
+#define LOAD_TOP_EDGE\
+    const int t0= src[ 0-1*stride];\
+    const int t1= src[ 1-1*stride];\
+    const int t2= src[ 2-1*stride];\
+    const int t3= src[ 3-1*stride];\
+
+static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
+    src[0+1*stride]=
+    src[1+2*stride]=
+    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
+    src[0+0*stride]=
+    src[1+1*stride]=
+    src[2+2*stride]=
+    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+0*stride]=
+    src[2+1*stride]=
+    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+0*stride]=
+    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+}
+
+static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+//    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
+    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
+}
+
+static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+    const __attribute__((unused)) int unu= l3;
+
+    src[0+0*stride]=
+    src[1+2*stride]=(lt + t0 + 1)>>1;
+    src[1+0*stride]=
+    src[2+2*stride]=(t0 + t1 + 1)>>1;
+    src[2+0*stride]=
+    src[3+2*stride]=(t1 + t2 + 1)>>1;
+    src[3+0*stride]=(t2 + t3 + 1)>>1;
+    src[0+1*stride]=
+    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+1*stride]=
+    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+1*stride]=
+    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    const __attribute__((unused)) int unu= t7;
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l0 + l1 + 1)>>1;
+    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[2+0*stride]=
+    src[0+1*stride]=(l1 + l2 + 1)>>1;
+    src[3+0*stride]=
+    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+    src[2+1*stride]=
+    src[0+2*stride]=(l2 + l3 + 1)>>1;
+    src[3+1*stride]=
+    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
+    src[3+2*stride]=
+    src[1+3*stride]=
+    src[0+3*stride]=
+    src[2+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+    const __attribute__((unused)) int unu= t3;
+
+    src[0+0*stride]=
+    src[2+1*stride]=(lt + l0 + 1)>>1;
+    src[1+0*stride]=
+    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[0+1*stride]=
+    src[2+2*stride]=(l0 + l1 + 1)>>1;
+    src[1+1*stride]=
+    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[2+3*stride]=(l1 + l2+ 1)>>1;
+    src[1+2*stride]=
+    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[0+3*stride]=(l2 + l3 + 1)>>1;
+    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+}
+
+static void pred16x16_vertical_c(uint8_t *src, int stride){
+    int i;
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    const uint32_t b= ((uint32_t*)(src-stride))[1];
+    const uint32_t c= ((uint32_t*)(src-stride))[2];
+    const uint32_t d= ((uint32_t*)(src-stride))[3];
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]= a;
+        ((uint32_t*)(src+i*stride))[1]= b;
+        ((uint32_t*)(src+i*stride))[2]= c;
+        ((uint32_t*)(src+i*stride))[3]= d;
+    }
+}
+
+static void pred16x16_horizontal_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
+    }
+}
+
+static void pred16x16_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dc= 0x01010101*((dc + 16)>>5);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_left_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    dc= 0x01010101*((dc + 8)>>4);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_top_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+    dc= 0x01010101*((dc + 8)>>4);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_128_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
+    }
+}
+
+static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
+  int i, j, k;
+  int a;
+  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+7-stride;
+  const uint8_t *src1 = src+8*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=8; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  if(svq3){
+    H = ( 5*(H/4) ) / 16;
+    V = ( 5*(V/4) ) / 16;
+
+    /* required for 100% accuracy */
+    i = H; H = V; V = i;
+  }else{
+    H = ( 5*H+32 ) >> 6;
+    V = ( 5*V+32 ) >> 6;
+  }
+
+  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    for(i=-16; i<0; i+=4) {
+      src[16+i] = cm[ (b    ) >> 5 ];
+      src[17+i] = cm[ (b+  H) >> 5 ];
+      src[18+i] = cm[ (b+2*H) >> 5 ];
+      src[19+i] = cm[ (b+3*H) >> 5 ];
+      b += 4*H;
+    }
+    src += stride;
+  }
+}
+
+static void pred16x16_plane_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 0);
+}
+
+static void pred8x8_vertical_c(uint8_t *src, int stride){
+    int i;
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    const uint32_t b= ((uint32_t*)(src-stride))[1];
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= a;
+        ((uint32_t*)(src+i*stride))[1]= b;
+    }
+}
+
+static void pred8x8_horizontal_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
+    }
+}
+
+static void pred8x8_128_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
+    }
+}
+
+static void pred8x8_left_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc2;
+
+    dc0=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0= 0x01010101*((dc0 + 2)>>2);
+    dc2= 0x01010101*((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc2;
+    }
+}
+
+static void pred8x8_top_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0= 0x01010101*((dc0 + 2)>>2);
+    dc1= 0x01010101*((dc1 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+}
+
+
+static void pred8x8_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc1, dc2, dc3;
+
+    dc0=dc1=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
+    dc0= 0x01010101*((dc0 + 4)>>3);
+    dc1= 0x01010101*((dc1 + 2)>>2);
+    dc2= 0x01010101*((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc2;
+        ((uint32_t*)(src+i*stride))[1]= dc3;
+    }
+}
+
+static void pred8x8_plane_c(uint8_t *src, int stride){
+  int j, k;
+  int a;
+  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+3-stride;
+  const uint8_t *src1 = src+4*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=4; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  H = ( 17*H+16 ) >> 5;
+  V = ( 17*V+16 ) >> 5;
+
+  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+  for(j=8; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = cm[ (b    ) >> 5 ];
+    src[1] = cm[ (b+  H) >> 5 ];
+    src[2] = cm[ (b+2*H) >> 5 ];
+    src[3] = cm[ (b+3*H) >> 5 ];
+    src[4] = cm[ (b+4*H) >> 5 ];
+    src[5] = cm[ (b+5*H) >> 5 ];
+    src[6] = cm[ (b+6*H) >> 5 ];
+    src[7] = cm[ (b+7*H) >> 5 ];
+    src += stride;
+  }
+}
+
+#define SRC(x,y) src[(x)+(y)*stride]
+#define PL(y) \
+    const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+    const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
+                     + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+    PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+    const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
+
+#define PT(x) \
+    const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+    const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
+                     + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+    PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+    const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
+                     + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
+
+#define PTR(x) \
+    t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+    int t8, t9, t10, t11, t12, t13, t14, t15; \
+    if(has_topright) { \
+        PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+        t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+    } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+    const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
+
+#define PREDICT_8x8_DC(v) \
+    int y; \
+    for( y = 0; y < 8; y++ ) { \
+        ((uint32_t*)src)[0] = \
+        ((uint32_t*)src)[1] = v; \
+        src += stride; \
+    }
+
+static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_DC(0x80808080);
+}
+static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOP;
+    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
+                         +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
+               ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
+    ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    int y;
+    PREDICT_8x8_LOAD_TOP;
+    src[0] = t0;
+    src[1] = t1;
+    src[2] = t2;
+    src[3] = t3;
+    src[4] = t4;
+    src[5] = t5;
+    src[6] = t6;
+    src[7] = t7;
+    for( y = 1; y < 8; y++ )
+        *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
+}
+static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+    SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+    SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+    SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+    SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+    SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+
+}
+static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+    SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+    SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+    SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+    SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+    SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+    SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+    SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l6 + l7 + 1) >> 1;
+    SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+    SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+    SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+    SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+    SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+    SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+    SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+    SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+    SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+    SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+    SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+    SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+    SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+    SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + t1 + 1) >> 1;
+    SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+    SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+    SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+    SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+    SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+    SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+    SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+    SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+    SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+    SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(7,6)= (t10 + t11 + 1) >> 1;
+    SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    SRC(0,0)= (l0 + l1 + 1) >> 1;
+    SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+    SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+    SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+    SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+    SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+    SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+    SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+    SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+    SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+    SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+    SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+#undef PREDICT_8x8_LOAD_LEFT
+#undef PREDICT_8x8_LOAD_TOP
+#undef PREDICT_8x8_LOAD_TOPLEFT
+#undef PREDICT_8x8_LOAD_TOPRIGHT
+#undef PREDICT_8x8_DC
+#undef PTR
+#undef PT
+#undef PL
+#undef SRC
+
+static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int src_x_offset, int src_y_offset,
+                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
+    MpegEncContext * const s = &h->s;
+    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
+    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
+    const int luma_xy= (mx&3) + ((my&3)<<2);
+    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
+    uint8_t * src_cb, * src_cr;
+    int extra_width= h->emu_edge_width;
+    int extra_height= h->emu_edge_height;
+    int emu=0;
+    const int full_mx= mx>>2;
+    const int full_my= my>>2;
+    const int pic_width  = 16*s->mb_width;
+    const int pic_height = 16*s->mb_height >> MB_MBAFF;
+
+    if(!pic->data[0])
+        return;
+
+    if(mx&7) extra_width -= 3;
+    if(my&7) extra_height -= 3;
+
+    if(   full_mx < 0-extra_width
+       || full_my < 0-extra_height
+       || full_mx + 16/*FIXME*/ > pic_width + extra_width
+       || full_my + 16/*FIXME*/ > pic_height + extra_height){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
+        emu=1;
+    }
+
+    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
+    if(!square){
+        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
+    }
+
+    if(s->flags&CODEC_FLAG_GRAY) return;
+
+    if(MB_MBAFF){
+        // chroma offset when predicting from a field of opposite parity
+        my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
+        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
+    }
+    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
+    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
+
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+            src_cb= s->edge_emu_buffer;
+    }
+    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
+
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+            src_cr= s->edge_emu_buffer;
+    }
+    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
+}
+
+static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
+                           int list0, int list1){
+    MpegEncContext * const s = &h->s;
+    qpel_mc_func *qpix_op=  qpix_put;
+    h264_chroma_mc_func chroma_op= chroma_put;
+
+    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
+    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
+    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
+    x_offset += 8*s->mb_x;
+    y_offset += 8*(s->mb_y >> MB_MBAFF);
+
+    if(list0){
+        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
+                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                           qpix_op, chroma_op);
+
+        qpix_op=  qpix_avg;
+        chroma_op= chroma_avg;
+    }
+
+    if(list1){
+        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
+                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                           qpix_op, chroma_op);
+    }
+}
+
+static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
+                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
+                           int list0, int list1){
+    MpegEncContext * const s = &h->s;
+
+    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
+    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
+    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
+    x_offset += 8*s->mb_x;
+    y_offset += 8*(s->mb_y >> MB_MBAFF);
+
+    if(list0 && list1){
+        /* don't optimize for luma-only case, since B-frames usually
+         * use implicit weights => chroma too. */
+        uint8_t *tmp_cb = s->obmc_scratchpad;
+        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
+        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
+        int refn0 = h->ref_cache[0][ scan8[n] ];
+        int refn1 = h->ref_cache[1][ scan8[n] ];
+
+        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
+                    dest_y, dest_cb, dest_cr,
+                    x_offset, y_offset, qpix_put, chroma_put);
+        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
+                    tmp_y, tmp_cb, tmp_cr,
+                    x_offset, y_offset, qpix_put, chroma_put);
+
+        if(h->use_weight == 2){
+            int weight0 = h->implicit_weight[refn0][refn1];
+            int weight1 = 64 - weight0;
+            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
+        }else{
+            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
+                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
+                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
+            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
+                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
+            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
+                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
+        }
+    }else{
+        int list = list1 ? 1 : 0;
+        int refn = h->ref_cache[list][ scan8[n] ];
+        Picture *ref= &h->ref_list[list][refn];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put, chroma_put);
+
+        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
+                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
+        if(h->use_weight_chroma){
+            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
+            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
+        }
+    }
+}
+
+static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
+                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
+                           int list0, int list1){
+    if((h->use_weight==2 && list0 && list1
+        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
+       || h->use_weight==1)
+        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
+                         x_offset, y_offset, qpix_put, chroma_put,
+                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
+    else
+        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
+                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
+}
+
+static inline void prefetch_motion(H264Context *h, int list){
+    /* fetch pixels for estimated mv 4 macroblocks ahead
+     * optimized for 64byte cache lines */
+    MpegEncContext * const s = &h->s;
+    const int refn = h->ref_cache[list][scan8[0]];
+    if(refn >= 0){
+        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
+        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
+        uint8_t **src= h->ref_list[list][refn].data;
+        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
+        s->dsp.prefetch(src[0]+off, s->linesize, 4);
+        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
+        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+    }
+}
+
+static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
+                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
+                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+
+    assert(IS_INTER(mb_type));
+
+    prefetch_motion(h, 0);
+
+    if(IS_16X16(mb_type)){
+        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
+                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
+                &weight_op[0], &weight_avg[0],
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+    }else if(IS_16X8(mb_type)){
+        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
+                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
+                &weight_op[1], &weight_avg[1],
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
+                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
+                &weight_op[1], &weight_avg[1],
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+    }else if(IS_8X16(mb_type)){
+        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
+                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                &weight_op[2], &weight_avg[2],
+                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
+        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
+                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                &weight_op[2], &weight_avg[2],
+                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
+    }else{
+        int i;
+
+        assert(IS_8X8(mb_type));
+
+        for(i=0; i<4; i++){
+            const int sub_mb_type= h->sub_mb_type[i];
+            const int n= 4*i;
+            int x_offset= (i&1)<<2;
+            int y_offset= (i&2)<<1;
+
+            if(IS_SUB_8X8(sub_mb_type)){
+                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                    &weight_op[3], &weight_avg[3],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+            }else if(IS_SUB_8X4(sub_mb_type)){
+                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
+                    &weight_op[4], &weight_avg[4],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
+                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
+                    &weight_op[4], &weight_avg[4],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+            }else if(IS_SUB_4X8(sub_mb_type)){
+                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                    &weight_op[5], &weight_avg[5],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
+                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                    &weight_op[5], &weight_avg[5],
+                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+            }else{
+                int j;
+                assert(IS_SUB_4X4(sub_mb_type));
+                for(j=0; j<4; j++){
+                    int sub_x_offset= x_offset + 2*(j&1);
+                    int sub_y_offset= y_offset +   (j&2);
+                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
+                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                        &weight_op[6], &weight_avg[6],
+                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
+                }
+            }
+        }
+    }
+
+    prefetch_motion(h, 1);
+}
+
+static void decode_init_vlc(){
+    static int done = 0;
+
+    if (!done) {
+        int i;
+        done = 1;
+
+        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
+                 &chroma_dc_coeff_token_len [0], 1, 1,
+                 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
+
+        for(i=0; i<4; i++){
+            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
+                     &coeff_token_len [i][0], 1, 1,
+                     &coeff_token_bits[i][0], 1, 1, 1);
+        }
+
+        for(i=0; i<3; i++){
+            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
+                     &chroma_dc_total_zeros_len [i][0], 1, 1,
+                     &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
+        }
+        for(i=0; i<15; i++){
+            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
+                     &total_zeros_len [i][0], 1, 1,
+                     &total_zeros_bits[i][0], 1, 1, 1);
+        }
+
+        for(i=0; i<6; i++){
+            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
+                     &run_len [i][0], 1, 1,
+                     &run_bits[i][0], 1, 1, 1);
+        }
+        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
+                 &run_len [6][0], 1, 1,
+                 &run_bits[6][0], 1, 1, 1);
+    }
+}
+
+/**
+ * Sets the intra prediction function pointers.
+ */
+static void init_pred_ptrs(H264Context *h){
+//    MpegEncContext * const s = &h->s;
+
+    h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
+    h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
+    h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
+    h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
+    h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
+    h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
+    h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
+    h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
+    h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
+    h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
+    h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
+    h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
+
+    h->pred8x8l[VERT_PRED           ]= pred8x8l_vertical_c;
+    h->pred8x8l[HOR_PRED            ]= pred8x8l_horizontal_c;
+    h->pred8x8l[DC_PRED             ]= pred8x8l_dc_c;
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
+    h->pred8x8l[VERT_RIGHT_PRED     ]= pred8x8l_vertical_right_c;
+    h->pred8x8l[HOR_DOWN_PRED       ]= pred8x8l_horizontal_down_c;
+    h->pred8x8l[VERT_LEFT_PRED      ]= pred8x8l_vertical_left_c;
+    h->pred8x8l[HOR_UP_PRED         ]= pred8x8l_horizontal_up_c;
+    h->pred8x8l[LEFT_DC_PRED        ]= pred8x8l_left_dc_c;
+    h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
+    h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
+
+    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
+    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
+    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
+    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
+    h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
+    h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
+    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
+
+    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
+    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
+    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
+    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
+    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
+    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
+    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
+}
+
+static void free_tables(H264Context *h){
+    av_freep(&h->intra4x4_pred_mode);
+    av_freep(&h->chroma_pred_mode_table);
+    av_freep(&h->cbp_table);
+    av_freep(&h->mvd_table[0]);
+    av_freep(&h->mvd_table[1]);
+    av_freep(&h->direct_table);
+    av_freep(&h->non_zero_count);
+    av_freep(&h->slice_table_base);
+    av_freep(&h->top_borders[1]);
+    av_freep(&h->top_borders[0]);
+    h->slice_table= NULL;
+
+    av_freep(&h->mb2b_xy);
+    av_freep(&h->mb2b8_xy);
+
+    av_freep(&h->s.obmc_scratchpad);
+}
+
+static void init_dequant8_coeff_table(H264Context *h){
+    int i,q,x;
+    const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
+    h->dequant8_coeff[0] = h->dequant8_buffer[0];
+    h->dequant8_coeff[1] = h->dequant8_buffer[1];
+
+    for(i=0; i<2; i++ ){
+        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
+            h->dequant8_coeff[1] = h->dequant8_buffer[0];
+            break;
+        }
+
+        for(q=0; q<52; q++){
+            int shift = div6[q];
+            int idx = rem6[q];
+            for(x=0; x<64; x++)
+                h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
+                    ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
+                    h->pps.scaling_matrix8[i][x]) << shift;
+        }
+    }
+}
+
+static void init_dequant4_coeff_table(H264Context *h){
+    int i,j,q,x;
+    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
+    for(i=0; i<6; i++ ){
+        h->dequant4_coeff[i] = h->dequant4_buffer[i];
+        for(j=0; j<i; j++){
+            if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
+                h->dequant4_coeff[i] = h->dequant4_buffer[j];
+                break;
+            }
+        }
+        if(j<i)
+            continue;
+
+        for(q=0; q<52; q++){
+            int shift = div6[q] + 2;
+            int idx = rem6[q];
+            for(x=0; x<16; x++)
+                h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
+                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
+                    h->pps.scaling_matrix4[i][x]) << shift;
+        }
+    }
+}
+
+static void init_dequant_tables(H264Context *h){
+    int i,x;
+    init_dequant4_coeff_table(h);
+    if(h->pps.transform_8x8_mode)
+        init_dequant8_coeff_table(h);
+    if(h->sps.transform_bypass){
+        for(i=0; i<6; i++)
+            for(x=0; x<16; x++)
+                h->dequant4_coeff[i][0][x] = 1<<6;
+        if(h->pps.transform_8x8_mode)
+            for(i=0; i<2; i++)
+                for(x=0; x<64; x++)
+                    h->dequant8_coeff[i][0][x] = 1<<6;
+    }
+}
+
+
+/**
+ * allocates tables.
+ * needs width/height
+ */
+static int alloc_tables(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int big_mb_num= s->mb_stride * (s->mb_height+1);
+    int x,y;
+
+    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
+
+    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->top_borders[0]    , s->mb_width * (16+8+8) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->top_borders[1]    , s->mb_width * (16+8+8) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
+
+    if( h->pps.cabac ) {
+        CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
+        CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
+        CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
+        CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
+    }
+
+    memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(uint8_t));
+    h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
+
+    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
+    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
+    for(y=0; y<s->mb_height; y++){
+        for(x=0; x<s->mb_width; x++){
+            const int mb_xy= x + y*s->mb_stride;
+            const int b_xy = 4*x + 4*y*h->b_stride;
+            const int b8_xy= 2*x + 2*y*h->b8_stride;
+
+            h->mb2b_xy [mb_xy]= b_xy;
+            h->mb2b8_xy[mb_xy]= b8_xy;
+        }
+    }
+
+    s->obmc_scratchpad = NULL;
+
+    if(!h->dequant4_coeff[0])
+        init_dequant_tables(h);
+
+    return 0;
+fail:
+    free_tables(h);
+    return -1;
+}
+
+static void common_init(H264Context *h){
+    MpegEncContext * const s = &h->s;
+
+    s->width = s->avctx->width;
+    s->height = s->avctx->height;
+    s->codec_id= s->avctx->codec->id;
+
+    init_pred_ptrs(h);
+
+    h->dequant_coeff_pps= -1;
+    s->unrestricted_mv=1;
+    s->decode=1; //FIXME
+
+    memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
+    memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
+}
+
+static int decode_init(AVCodecContext *avctx){
+    H264Context *h= avctx->priv_data;
+    MpegEncContext * const s = &h->s;
+
+    MPV_decode_defaults(s);
+
+    s->avctx = avctx;
+    common_init(h);
+
+    s->out_format = FMT_H264;
+    s->workaround_bugs= avctx->workaround_bugs;
+
+    // set defaults
+//    s->decode_mb= ff_h263_decode_mb;
+    s->low_delay= 1;
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    decode_init_vlc();
+
+    if(avctx->extradata_size > 0 && avctx->extradata &&
+       *(char *)avctx->extradata == 1){
+        h->is_avc = 1;
+        h->got_avcC = 0;
+    } else {
+        h->is_avc = 0;
+    }
+
+    return 0;
+}
+
+static int frame_start(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    if(MPV_frame_start(s, s->avctx) < 0)
+        return -1;
+    ff_er_frame_start(s);
+
+    assert(s->linesize && s->uvlinesize);
+
+    for(i=0; i<16; i++){
+        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+    }
+    for(i=0; i<4; i++){
+        h->block_offset[16+i]=
+        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+16+i]=
+        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+    }
+
+    /* can't be in alloc_tables because linesize isn't known there.
+     * FIXME: redo bipred weight to not require extra buffer? */
+    if(!s->obmc_scratchpad)
+        s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+
+    /* some macroblocks will be accessed before they're available */
+    if(FRAME_MBAFF)
+        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
+
+//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
+    return 0;
+}
+
+static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    src_y  -=   linesize;
+    src_cb -= uvlinesize;
+    src_cr -= uvlinesize;
+
+    // There are two lines saved, the line above the the top macroblock of a pair,
+    // and the line above the bottom macroblock
+    h->left_border[0]= h->top_borders[0][s->mb_x][15];
+    for(i=1; i<17; i++){
+        h->left_border[i]= src_y[15+i*  linesize];
+    }
+
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        h->left_border[17  ]= h->top_borders[0][s->mb_x][16+7];
+        h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
+        for(i=1; i<9; i++){
+            h->left_border[i+17  ]= src_cb[7+i*uvlinesize];
+            h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
+        }
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
+    }
+}
+
+static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
+    MpegEncContext * const s = &h->s;
+    int temp8, i;
+    uint64_t temp64;
+    int deblock_left = (s->mb_x > 0);
+    int deblock_top  = (s->mb_y > 0);
+
+    src_y  -=   linesize + 1;
+    src_cb -= uvlinesize + 1;
+    src_cr -= uvlinesize + 1;
+
+#define XCHG(a,b,t,xchg)\
+t= a;\
+if(xchg)\
+    a= b;\
+b= t;
+
+    if(deblock_left){
+        for(i = !deblock_top; i<17; i++){
+            XCHG(h->left_border[i     ], src_y [i*  linesize], temp8, xchg);
+        }
+    }
+
+    if(deblock_top){
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
+        if(s->mb_x+1 < s->mb_width){
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
+        }
+    }
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        if(deblock_left){
+            for(i = !deblock_top; i<9; i++){
+                XCHG(h->left_border[i+17  ], src_cb[i*uvlinesize], temp8, xchg);
+                XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
+            }
+        }
+        if(deblock_top){
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
+        }
+    }
+}
+
+static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    src_y  -= 2 *   linesize;
+    src_cb -= 2 * uvlinesize;
+    src_cr -= 2 * uvlinesize;
+
+    // There are two lines saved, the line above the the top macroblock of a pair,
+    // and the line above the bottom macroblock
+    h->left_border[0]= h->top_borders[0][s->mb_x][15];
+    h->left_border[1]= h->top_borders[1][s->mb_x][15];
+    for(i=2; i<34; i++){
+        h->left_border[i]= src_y[15+i*  linesize];
+    }
+
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y +  32*linesize);
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
+    *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y +  33*linesize);
+    *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        h->left_border[34     ]= h->top_borders[0][s->mb_x][16+7];
+        h->left_border[34+   1]= h->top_borders[1][s->mb_x][16+7];
+        h->left_border[34+18  ]= h->top_borders[0][s->mb_x][24+7];
+        h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
+        for(i=2; i<18; i++){
+            h->left_border[i+34   ]= src_cb[7+i*uvlinesize];
+            h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
+        }
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
+        *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
+        *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
+    }
+}
+
+static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
+    MpegEncContext * const s = &h->s;
+    int temp8, i;
+    uint64_t temp64;
+    int deblock_left = (s->mb_x > 0);
+    int deblock_top  = (s->mb_y > 1);
+
+    tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
+
+    src_y  -= 2 *   linesize + 1;
+    src_cb -= 2 * uvlinesize + 1;
+    src_cr -= 2 * uvlinesize + 1;
+
+#define XCHG(a,b,t,xchg)\
+t= a;\
+if(xchg)\
+    a= b;\
+b= t;
+
+    if(deblock_left){
+        for(i = (!deblock_top)<<1; i<34; i++){
+            XCHG(h->left_border[i     ], src_y [i*  linesize], temp8, xchg);
+        }
+    }
+
+    if(deblock_top){
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
+        XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
+        if(s->mb_x+1 < s->mb_width){
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
+        }
+    }
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        if(deblock_left){
+            for(i = (!deblock_top) << 1; i<18; i++){
+                XCHG(h->left_border[i+34   ], src_cb[i*uvlinesize], temp8, xchg);
+                XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
+            }
+        }
+        if(deblock_top){
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
+        }
+    }
+}
+
+static void hl_decode_mb(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_x= s->mb_x;
+    const int mb_y= s->mb_y;
+    const int mb_xy= mb_x + mb_y*s->mb_stride;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+    uint8_t  *dest_y, *dest_cb, *dest_cr;
+    int linesize, uvlinesize /*dct_offset*/;
+    int i;
+    int *block_offset = &h->block_offset[0];
+    const unsigned int bottom = mb_y & 1;
+    const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
+    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+
+    if(!s->decode)
+        return;
+
+    dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
+    dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+    dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+
+    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
+    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
+
+    if (MB_FIELD) {
+        linesize   = h->mb_linesize   = s->linesize * 2;
+        uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
+        block_offset = &h->block_offset[24];
+        if(mb_y&1){ //FIXME move out of this func?
+            dest_y -= s->linesize*15;
+            dest_cb-= s->uvlinesize*7;
+            dest_cr-= s->uvlinesize*7;
+        }
+        if(FRAME_MBAFF) {
+            int list;
+            for(list=0; list<2; list++){
+                if(!USES_LIST(mb_type, list))
+                    continue;
+                if(IS_16X16(mb_type)){
+                    int8_t *ref = &h->ref_cache[list][scan8[0]];
+                    fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
+                }else{
+                    for(i=0; i<16; i+=4){
+                        //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
+                        int ref = h->ref_cache[list][scan8[i]];
+                        if(ref >= 0)
+                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
+                    }
+                }
+            }
+        }
+    } else {
+        linesize   = h->mb_linesize   = s->linesize;
+        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
+//        dct_offset = s->linesize * 16;
+    }
+
+    if(transform_bypass){
+        idct_dc_add =
+        idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
+    }else if(IS_8x8DCT(mb_type)){
+        idct_dc_add = s->dsp.h264_idct8_dc_add;
+        idct_add = s->dsp.h264_idct8_add;
+    }else{
+        idct_dc_add = s->dsp.h264_idct_dc_add;
+        idct_add = s->dsp.h264_idct_add;
+    }
+
+    if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
+       && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
+        int mbt_y = mb_y&~1;
+        uint8_t *top_y  = s->current_picture.data[0] + (mbt_y * 16* s->linesize  ) + mb_x * 16;
+        uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
+        uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
+        xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
+    }
+
+    if (IS_INTRA_PCM(mb_type)) {
+        unsigned int x, y;
+
+        // The pixels are stored in h->mb array in the same order as levels,
+        // copy them in output in the correct order.
+        for(i=0; i<16; i++) {
+            for (y=0; y<4; y++) {
+                for (x=0; x<4; x++) {
+                    *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
+                }
+            }
+        }
+        for(i=16; i<16+4; i++) {
+            for (y=0; y<4; y++) {
+                for (x=0; x<4; x++) {
+                    *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
+                }
+            }
+        }
+        for(i=20; i<20+4; i++) {
+            for (y=0; y<4; y++) {
+                for (x=0; x<4; x++) {
+                    *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
+                }
+            }
+        }
+    } else {
+        if(IS_INTRA(mb_type)){
+            if(h->deblocking_filter && !FRAME_MBAFF)
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
+
+            if(!(s->flags&CODEC_FLAG_GRAY)){
+                h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
+                h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+            }
+
+            if(IS_INTRA4x4(mb_type)){
+                if(!s->encoding){
+                    if(IS_8x8DCT(mb_type)){
+                        for(i=0; i<16; i+=4){
+                            uint8_t * const ptr= dest_y + block_offset[i];
+                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                            const int nnz = h->non_zero_count_cache[ scan8[i] ];
+                            h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+                                                   (h->topright_samples_available<<(i+1))&0x8000, linesize);
+                            if(nnz){
+                                if(nnz == 1 && h->mb[i*16])
+                                    idct_dc_add(ptr, h->mb + i*16, linesize);
+                                else
+                                    idct_add(ptr, h->mb + i*16, linesize);
+                            }
+                        }
+                    }else
+                    for(i=0; i<16; i++){
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        uint8_t *topright;
+                        const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                        int nnz, tr;
+
+                        if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+                            const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+                            assert(mb_y || linesize <= block_offset[i]);
+                            if(!topright_avail){
+                                tr= ptr[3 - linesize]*0x01010101;
+                                topright= (uint8_t*) &tr;
+                            }else
+                                topright= ptr + 4 - linesize;
+                        }else
+                            topright= NULL;
+
+                        h->pred4x4[ dir ](ptr, topright, linesize);
+                        nnz = h->non_zero_count_cache[ scan8[i] ];
+                        if(nnz){
+                            if(s->codec_id == CODEC_ID_H264){
+                                if(nnz == 1 && h->mb[i*16])
+                                    idct_dc_add(ptr, h->mb + i*16, linesize);
+                                else
+                                    idct_add(ptr, h->mb + i*16, linesize);
+                            }else
+                                svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+                        }
+                    }
+                }
+            }else{
+                h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+                if(s->codec_id == CODEC_ID_H264){
+                    if(!transform_bypass)
+                        h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
+                }else
+                    svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
+            }
+            if(h->deblocking_filter && !FRAME_MBAFF)
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
+        }else if(s->codec_id == CODEC_ID_H264){
+            hl_motion(h, dest_y, dest_cb, dest_cr,
+                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                      s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
+        }
+
+
+        if(!IS_INTRA4x4(mb_type)){
+            if(s->codec_id == CODEC_ID_H264){
+                if(IS_INTRA16x16(mb_type)){
+                    for(i=0; i<16; i++){
+                        if(h->non_zero_count_cache[ scan8[i] ])
+                            idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                        else if(h->mb[i*16])
+                            idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                    }
+                }else{
+                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
+                    for(i=0; i<16; i+=di){
+                        int nnz = h->non_zero_count_cache[ scan8[i] ];
+                        if(nnz){
+                            if(nnz==1 && h->mb[i*16])
+                                idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                            else
+                                idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                        }
+                    }
+                }
+            }else{
+                for(i=0; i<16; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                    }
+                }
+            }
+        }
+
+        if(!(s->flags&CODEC_FLAG_GRAY)){
+            uint8_t *dest[2] = {dest_cb, dest_cr};
+            if(transform_bypass){
+                idct_add = idct_dc_add = s->dsp.add_pixels4;
+            }else{
+                idct_add = s->dsp.h264_idct_add;
+                idct_dc_add = s->dsp.h264_idct_dc_add;
+                chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
+                chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
+            }
+            if(s->codec_id == CODEC_ID_H264){
+                for(i=16; i<16+8; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ])
+                        idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+                    else if(h->mb[i*16])
+                        idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+                }
+            }else{
+                for(i=16; i<16+8; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                        uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                    }
+                }
+            }
+        }
+    }
+    if(h->deblocking_filter) {
+        if (FRAME_MBAFF) {
+            //FIXME try deblocking one mb at a time?
+            // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
+            const int mb_y = s->mb_y - 1;
+            uint8_t  *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
+            const int mb_xy= mb_x + mb_y*s->mb_stride;
+            const int mb_type_top   = s->current_picture.mb_type[mb_xy];
+            const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
+            if (!bottom) return;
+            pair_dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
+            pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+            pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+
+            if(IS_INTRA(mb_type_top | mb_type_bottom))
+                xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
+
+            backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
+            // deblock a pair
+            // top
+            s->mb_y--;
+            tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
+            fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
+            h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
+            filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
+            // bottom
+            s->mb_y++;
+            tprintf("call mbaff filter_mb\n");
+            fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
+            h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
+            filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+        } else {
+            tprintf("call filter_mb\n");
+            backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+            fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
+            filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+        }
+    }
+}
+
+/**
+ * fills the default_ref_list.
+ */
+static int fill_default_ref_list(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int i;
+    int smallest_poc_greater_than_current = -1;
+    Picture sorted_short_ref[32];
+
+    if(h->slice_type==B_TYPE){
+        int out_i;
+        int limit= INT_MIN;
+
+        /* sort frame according to poc in B slice */
+        for(out_i=0; out_i<h->short_ref_count; out_i++){
+            int best_i=INT_MIN;
+            int best_poc=INT_MAX;
+
+            for(i=0; i<h->short_ref_count; i++){
+                const int poc= h->short_ref[i]->poc;
+                if(poc > limit && poc < best_poc){
+                    best_poc= poc;
+                    best_i= i;
+                }
+            }
+
+            assert(best_i != INT_MIN);
+
+            limit= best_poc;
+            sorted_short_ref[out_i]= *h->short_ref[best_i];
+            tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
+            if (-1 == smallest_poc_greater_than_current) {
+                if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
+                    smallest_poc_greater_than_current = out_i;
+                }
+            }
+        }
+    }
+
+    if(s->picture_structure == PICT_FRAME){
+        if(h->slice_type==B_TYPE){
+            int list;
+            tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
+
+            // find the largest poc
+            for(list=0; list<2; list++){
+                int index = 0;
+                int j= -99;
+                int step= list ? -1 : 1;
+
+                for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
+                    while(j<0 || j>= h->short_ref_count){
+                        if(j != -99 && step == (list ? -1 : 1))
+                            return -1;
+                        step = -step;
+                        j= smallest_poc_greater_than_current + (step>>1);
+                    }
+                    if(sorted_short_ref[j].reference != 3) continue;
+                    h->default_ref_list[list][index  ]= sorted_short_ref[j];
+                    h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
+                }
+
+                for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
+                    if(h->long_ref[i] == NULL) continue;
+                    if(h->long_ref[i]->reference != 3) continue;
+
+                    h->default_ref_list[ list ][index  ]= *h->long_ref[i];
+                    h->default_ref_list[ list ][index++].pic_id= i;;
+                }
+
+                if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
+                    // swap the two first elements of L1 when
+                    // L0 and L1 are identical
+                    Picture temp= h->default_ref_list[1][0];
+                    h->default_ref_list[1][0] = h->default_ref_list[1][1];
+                    h->default_ref_list[1][1] = temp;
+                }
+
+                if(index < h->ref_count[ list ])
+                    memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
+            }
+        }else{
+            int index=0;
+            for(i=0; i<h->short_ref_count; i++){
+                if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
+                h->default_ref_list[0][index  ]= *h->short_ref[i];
+                h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
+            }
+            for(i = 0; i < 16; i++){
+                if(h->long_ref[i] == NULL) continue;
+                if(h->long_ref[i]->reference != 3) continue;
+                h->default_ref_list[0][index  ]= *h->long_ref[i];
+                h->default_ref_list[0][index++].pic_id= i;;
+            }
+            if(index < h->ref_count[0])
+                memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
+        }
+    }else{ //FIELD
+        if(h->slice_type==B_TYPE){
+        }else{
+            //FIXME second field balh
+        }
+    }
+#ifdef TRACE
+    for (i=0; i<h->ref_count[0]; i++) {
+        tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
+    }
+    if(h->slice_type==B_TYPE){
+        for (i=0; i<h->ref_count[1]; i++) {
+            tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
+        }
+    }
+#endif
+    return 0;
+}
+
+static void print_short_term(H264Context *h);
+static void print_long_term(H264Context *h);
+
+static int decode_ref_pic_list_reordering(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int list, index;
+
+    print_short_term(h);
+    print_long_term(h);
+    if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
+
+    for(list=0; list<2; list++){
+        memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
+
+        if(get_bits1(&s->gb)){
+            int pred= h->curr_pic_num;
+
+            for(index=0; ; index++){
+                int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
+                int pic_id;
+                int i;
+                Picture *ref = NULL;
+
+                if(reordering_of_pic_nums_idc==3)
+                    break;
+
+                if(index >= h->ref_count[list]){
+                    av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
+                    return -1;
+                }
+
+                if(reordering_of_pic_nums_idc<3){
+                    if(reordering_of_pic_nums_idc<2){
+                        const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
+
+                        if(abs_diff_pic_num >= h->max_pic_num){
+                            av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
+                            return -1;
+                        }
+
+                        if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
+                        else                                pred+= abs_diff_pic_num;
+                        pred &= h->max_pic_num - 1;
+
+                        for(i= h->short_ref_count-1; i>=0; i--){
+                            ref = h->short_ref[i];
+                            assert(ref->reference == 3);
+                            assert(!ref->long_ref);
+                            if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
+                                break;
+                        }
+                        if(i>=0)
+                            ref->pic_id= ref->frame_num;
+                    }else{
+                        pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
+                        ref = h->long_ref[pic_id];
+                        ref->pic_id= pic_id;
+                        assert(ref->reference == 3);
+                        assert(ref->long_ref);
+                        i=0;
+                    }
+
+                    if (i < 0) {
+                        av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
+                        memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
+                    } else {
+                        for(i=index; i+1<h->ref_count[list]; i++){
+                            if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
+                                break;
+                        }
+                        for(; i > index; i--){
+                            h->ref_list[list][i]= h->ref_list[list][i-1];
+                        }
+                        h->ref_list[list][index]= *ref;
+                    }
+                }else{
+                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
+                    return -1;
+                }
+            }
+        }
+
+        if(h->slice_type!=B_TYPE) break;
+    }
+    for(list=0; list<2; list++){
+        for(index= 0; index < h->ref_count[list]; index++){
+            if(!h->ref_list[list][index].data[0])
+                h->ref_list[list][index]= s->current_picture;
+        }
+        if(h->slice_type!=B_TYPE) break;
+    }
+
+    if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
+        direct_dist_scale_factor(h);
+    direct_ref_list_init(h);
+    return 0;
+}
+
+static void fill_mbaff_ref_list(H264Context *h){
+    int list, i, j;
+    for(list=0; list<2; list++){
+        for(i=0; i<h->ref_count[list]; i++){
+            Picture *frame = &h->ref_list[list][i];
+            Picture *field = &h->ref_list[list][16+2*i];
+            field[0] = *frame;
+            for(j=0; j<3; j++)
+                field[0].linesize[j] <<= 1;
+            field[1] = field[0];
+            for(j=0; j<3; j++)
+                field[1].data[j] += frame->linesize[j];
+
+            h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
+            h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
+            for(j=0; j<2; j++){
+                h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
+                h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
+            }
+        }
+    }
+    for(j=0; j<h->ref_count[1]; j++){
+        for(i=0; i<h->ref_count[0]; i++)
+            h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
+        memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
+        memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
+    }
+}
+
+static int pred_weight_table(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int list, i;
+    int luma_def, chroma_def;
+
+    h->use_weight= 0;
+    h->use_weight_chroma= 0;
+    h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
+    h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
+    luma_def = 1<<h->luma_log2_weight_denom;
+    chroma_def = 1<<h->chroma_log2_weight_denom;
+
+    for(list=0; list<2; list++){
+        for(i=0; i<h->ref_count[list]; i++){
+            int luma_weight_flag, chroma_weight_flag;
+
+            luma_weight_flag= get_bits1(&s->gb);
+            if(luma_weight_flag){
+                h->luma_weight[list][i]= get_se_golomb(&s->gb);
+                h->luma_offset[list][i]= get_se_golomb(&s->gb);
+                if(   h->luma_weight[list][i] != luma_def
+                   || h->luma_offset[list][i] != 0)
+                    h->use_weight= 1;
+            }else{
+                h->luma_weight[list][i]= luma_def;
+                h->luma_offset[list][i]= 0;
+            }
+
+            chroma_weight_flag= get_bits1(&s->gb);
+            if(chroma_weight_flag){
+                int j;
+                for(j=0; j<2; j++){
+                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
+                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
+                    if(   h->chroma_weight[list][i][j] != chroma_def
+                       || h->chroma_offset[list][i][j] != 0)
+                        h->use_weight_chroma= 1;
+                }
+            }else{
+                int j;
+                for(j=0; j<2; j++){
+                    h->chroma_weight[list][i][j]= chroma_def;
+                    h->chroma_offset[list][i][j]= 0;
+                }
+            }
+        }
+        if(h->slice_type != B_TYPE) break;
+    }
+    h->use_weight= h->use_weight || h->use_weight_chroma;
+    return 0;
+}
+
+static void implicit_weight_table(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int ref0, ref1;
+    int cur_poc = s->current_picture_ptr->poc;
+
+    if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
+       && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
+        h->use_weight= 0;
+        h->use_weight_chroma= 0;
+        return;
+    }
+
+    h->use_weight= 2;
+    h->use_weight_chroma= 2;
+    h->luma_log2_weight_denom= 5;
+    h->chroma_log2_weight_denom= 5;
+
+    for(ref0=0; ref0 < h->ref_count[0]; ref0++){
+        int poc0 = h->ref_list[0][ref0].poc;
+        for(ref1=0; ref1 < h->ref_count[1]; ref1++){
+            int poc1 = h->ref_list[1][ref1].poc;
+            int td = clip(poc1 - poc0, -128, 127);
+            if(td){
+                int tb = clip(cur_poc - poc0, -128, 127);
+                int tx = (16384 + (FFABS(td) >> 1)) / td;
+                int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
+                if(dist_scale_factor < -64 || dist_scale_factor > 128)
+                    h->implicit_weight[ref0][ref1] = 32;
+                else
+                    h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
+            }else
+                h->implicit_weight[ref0][ref1] = 32;
+        }
+    }
+}
+
+static inline void unreference_pic(H264Context *h, Picture *pic){
+    int i;
+    pic->reference=0;
+    if(pic == h->delayed_output_pic)
+        pic->reference=1;
+    else{
+        for(i = 0; h->delayed_pic[i]; i++)
+            if(pic == h->delayed_pic[i]){
+                pic->reference=1;
+                break;
+            }
+    }
+}
+
+/**
+ * instantaneous decoder refresh.
+ */
+static void idr(H264Context *h){
+    int i;
+
+    for(i=0; i<16; i++){
+        if (h->long_ref[i] != NULL) {
+            unreference_pic(h, h->long_ref[i]);
+            h->long_ref[i]= NULL;
+        }
+    }
+    h->long_ref_count=0;
+
+    for(i=0; i<h->short_ref_count; i++){
+        unreference_pic(h, h->short_ref[i]);
+        h->short_ref[i]= NULL;
+    }
+    h->short_ref_count=0;
+}
+
+/* forget old pics after a seek */
+static void flush_dpb(AVCodecContext *avctx){
+    H264Context *h= avctx->priv_data;
+    int i;
+    for(i=0; i<16; i++) {
+        if(h->delayed_pic[i])
+            h->delayed_pic[i]->reference= 0;
+        h->delayed_pic[i]= NULL;
+    }
+    if(h->delayed_output_pic)
+        h->delayed_output_pic->reference= 0;
+    h->delayed_output_pic= NULL;
+    idr(h);
+    if(h->s.current_picture_ptr)
+        h->s.current_picture_ptr->reference= 0;
+}
+
+/**
+ *
+ * @return the removed picture or NULL if an error occurs
+ */
+static Picture * remove_short(H264Context *h, int frame_num){
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    if(s->avctx->debug&FF_DEBUG_MMCO)
+        av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
+
+    for(i=0; i<h->short_ref_count; i++){
+        Picture *pic= h->short_ref[i];
+        if(s->avctx->debug&FF_DEBUG_MMCO)
+            av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
+        if(pic->frame_num == frame_num){
+            h->short_ref[i]= NULL;
+            memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
+            h->short_ref_count--;
+            return pic;
+        }
+    }
+    return NULL;
+}
+
+/**
+ *
+ * @return the removed picture or NULL if an error occurs
+ */
+static Picture * remove_long(H264Context *h, int i){
+    Picture *pic;
+
+    pic= h->long_ref[i];
+    h->long_ref[i]= NULL;
+    if(pic) h->long_ref_count--;
+
+    return pic;
+}
+
+/**
+ * print short term list
+ */
+static void print_short_term(H264Context *h) {
+    uint32_t i;
+    if(h->s.avctx->debug&FF_DEBUG_MMCO) {
+        av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
+        for(i=0; i<h->short_ref_count; i++){
+            Picture *pic= h->short_ref[i];
+            av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
+        }
+    }
+}
+
+/**
+ * print long term list
+ */
+static void print_long_term(H264Context *h) {
+    uint32_t i;
+    if(h->s.avctx->debug&FF_DEBUG_MMCO) {
+        av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
+        for(i = 0; i < 16; i++){
+            Picture *pic= h->long_ref[i];
+            if (pic) {
+                av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
+            }
+        }
+    }
+}
+
+/**
+ * Executes the reference picture marking (memory management control operations).
+ */
+static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
+    MpegEncContext * const s = &h->s;
+    int i, j;
+    int current_is_long=0;
+    Picture *pic;
+
+    if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
+        av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
+
+    for(i=0; i<mmco_count; i++){
+        if(s->avctx->debug&FF_DEBUG_MMCO)
+            av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
+
+        switch(mmco[i].opcode){
+        case MMCO_SHORT2UNUSED:
+            pic= remove_short(h, mmco[i].short_frame_num);
+            if(pic)
+                unreference_pic(h, pic);
+            else if(s->avctx->debug&FF_DEBUG_MMCO)
+                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
+            break;
+        case MMCO_SHORT2LONG:
+            pic= remove_long(h, mmco[i].long_index);
+            if(pic) unreference_pic(h, pic);
+
+            h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
+            h->long_ref[ mmco[i].long_index ]->long_ref=1;
+            h->long_ref_count++;
+            break;
+        case MMCO_LONG2UNUSED:
+            pic= remove_long(h, mmco[i].long_index);
+            if(pic)
+                unreference_pic(h, pic);
+            else if(s->avctx->debug&FF_DEBUG_MMCO)
+                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
+            break;
+        case MMCO_LONG:
+            pic= remove_long(h, mmco[i].long_index);
+            if(pic) unreference_pic(h, pic);
+
+            h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
+            h->long_ref[ mmco[i].long_index ]->long_ref=1;
+            h->long_ref_count++;
+
+            current_is_long=1;
+            break;
+        case MMCO_SET_MAX_LONG:
+            assert(mmco[i].long_index <= 16);
+            // just remove the long term which index is greater than new max
+            for(j = mmco[i].long_index; j<16; j++){
+                pic = remove_long(h, j);
+                if (pic) unreference_pic(h, pic);
+            }
+            break;
+        case MMCO_RESET:
+            while(h->short_ref_count){
+                pic= remove_short(h, h->short_ref[0]->frame_num);
+                unreference_pic(h, pic);
+            }
+            for(j = 0; j < 16; j++) {
+                pic= remove_long(h, j);
+                if(pic) unreference_pic(h, pic);
+            }
+            break;
+        default: assert(0);
+        }
+    }
+
+    if(!current_is_long){
+        pic= remove_short(h, s->current_picture_ptr->frame_num);
+        if(pic){
+            unreference_pic(h, pic);
+            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
+        }
+
+        if(h->short_ref_count)
+            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
+
+        h->short_ref[0]= s->current_picture_ptr;
+        h->short_ref[0]->long_ref=0;
+        h->short_ref_count++;
+    }
+
+    print_short_term(h);
+    print_long_term(h);
+    return 0;
+}
+
+static int decode_ref_pic_marking(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
+        s->broken_link= get_bits1(&s->gb) -1;
+        h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
+        if(h->mmco[0].long_index == -1)
+            h->mmco_index= 0;
+        else{
+            h->mmco[0].opcode= MMCO_LONG;
+            h->mmco_index= 1;
+        }
+    }else{
+        if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
+            for(i= 0; i<MAX_MMCO_COUNT; i++) {
+                MMCOOpcode opcode= get_ue_golomb(&s->gb);;
+
+                h->mmco[i].opcode= opcode;
+                if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
+                    h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
+/*                    if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
+                        av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
+                        return -1;
+                    }*/
+                }
+                if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
+                    h->mmco[i].long_index= get_ue_golomb(&s->gb);
+                    if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
+                        av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
+                        return -1;
+                    }
+                }
+
+                if(opcode > MMCO_LONG){
+                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
+                    return -1;
+                }
+                if(opcode == MMCO_END)
+                    break;
+            }
+            h->mmco_index= i;
+        }else{
+            assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
+
+            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
+                h->mmco[0].opcode= MMCO_SHORT2UNUSED;
+                h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
+                h->mmco_index= 1;
+            }else
+                h->mmco_index= 0;
+        }
+    }
+
+    return 0;
+}
+
+static int init_poc(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int max_frame_num= 1<<h->sps.log2_max_frame_num;
+    int field_poc[2];
+
+    if(h->nal_unit_type == NAL_IDR_SLICE){
+        h->frame_num_offset= 0;
+    }else{
+        if(h->frame_num < h->prev_frame_num)
+            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
+        else
+            h->frame_num_offset= h->prev_frame_num_offset;
+    }
+
+    if(h->sps.poc_type==0){
+        const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
+
+        if(h->nal_unit_type == NAL_IDR_SLICE){
+             h->prev_poc_msb=
+             h->prev_poc_lsb= 0;
+        }
+
+        if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
+            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
+        else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
+            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
+        else
+            h->poc_msb = h->prev_poc_msb;
+//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
+        field_poc[0] =
+        field_poc[1] = h->poc_msb + h->poc_lsb;
+        if(s->picture_structure == PICT_FRAME)
+            field_poc[1] += h->delta_poc_bottom;
+    }else if(h->sps.poc_type==1){
+        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
+        int i;
+
+        if(h->sps.poc_cycle_length != 0)
+            abs_frame_num = h->frame_num_offset + h->frame_num;
+        else
+            abs_frame_num = 0;
+
+        if(h->nal_ref_idc==0 && abs_frame_num > 0)
+            abs_frame_num--;
+
+        expected_delta_per_poc_cycle = 0;
+        for(i=0; i < h->sps.poc_cycle_length; i++)
+            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
+
+        if(abs_frame_num > 0){
+            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
+            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
+
+            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
+            for(i = 0; i <= frame_num_in_poc_cycle; i++)
+                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
+        } else
+            expectedpoc = 0;
+
+        if(h->nal_ref_idc == 0)
+            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
+
+        field_poc[0] = expectedpoc + h->delta_poc[0];
+        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
+
+        if(s->picture_structure == PICT_FRAME)
+            field_poc[1] += h->delta_poc[1];
+    }else{
+        int poc;
+        if(h->nal_unit_type == NAL_IDR_SLICE){
+            poc= 0;
+        }else{
+            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
+            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
+        }
+        field_poc[0]= poc;
+        field_poc[1]= poc;
+    }
+
+    if(s->picture_structure != PICT_BOTTOM_FIELD)
+        s->current_picture_ptr->field_poc[0]= field_poc[0];
+    if(s->picture_structure != PICT_TOP_FIELD)
+        s->current_picture_ptr->field_poc[1]= field_poc[1];
+    if(s->picture_structure == PICT_FRAME) // FIXME field pix?
+        s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
+
+    return 0;
+}
+
+/**
+ * decodes a slice header.
+ * this will allso call MPV_common_init() and frame_start() as needed
+ */
+static int decode_slice_header(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int first_mb_in_slice, pps_id;
+    int num_ref_idx_active_override_flag;
+    static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
+    int slice_type;
+    int default_ref_list_done = 0;
+
+    s->current_picture.reference= h->nal_ref_idc != 0;
+    s->dropable= h->nal_ref_idc == 0;
+
+    first_mb_in_slice= get_ue_golomb(&s->gb);
+
+    slice_type= get_ue_golomb(&s->gb);
+    if(slice_type > 9){
+        av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
+        return -1;
+    }
+    if(slice_type > 4){
+        slice_type -= 5;
+        h->slice_type_fixed=1;
+    }else
+        h->slice_type_fixed=0;
+
+    slice_type= slice_type_map[ slice_type ];
+    if (slice_type == I_TYPE
+        || (h->slice_num != 0 && slice_type == h->slice_type) ) {
+        default_ref_list_done = 1;
+    }
+    h->slice_type= slice_type;
+
+    s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+
+    pps_id= get_ue_golomb(&s->gb);
+    if(pps_id>255){
+        av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
+        return -1;
+    }
+    h->pps= h->pps_buffer[pps_id];
+    if(h->pps.slice_group_count == 0){
+        av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
+        return -1;
+    }
+
+    h->sps= h->sps_buffer[ h->pps.sps_id ];
+    if(h->sps.log2_max_frame_num == 0){
+        av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
+        return -1;
+    }
+
+    if(h->dequant_coeff_pps != pps_id){
+        h->dequant_coeff_pps = pps_id;
+        init_dequant_tables(h);
+    }
+
+    s->mb_width= h->sps.mb_width;
+    s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
+
+    h->b_stride=  s->mb_width*4;
+    h->b8_stride= s->mb_width*2;
+
+    s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
+    if(h->sps.frame_mbs_only_flag)
+        s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
+    else
+        s->height= 16*s->mb_height - 4*(h->sps.crop_top  + h->sps.crop_bottom); //FIXME recheck
+
+    if (s->context_initialized
+        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
+        free_tables(h);
+        MPV_common_end(s);
+    }
+    if (!s->context_initialized) {
+        if (MPV_common_init(s) < 0)
+            return -1;
+
+        if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
+            memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
+            memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
+        }else{
+            int i;
+            for(i=0; i<16; i++){
+#define T(x) (x>>2) | ((x<<2) & 0xF)
+                h->zigzag_scan[i] = T(zigzag_scan[i]);
+                h-> field_scan[i] = T( field_scan[i]);
+#undef T
+            }
+        }
+        if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
+            memcpy(h->zigzag_scan8x8,       zigzag_scan8x8,       64*sizeof(uint8_t));
+            memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
+            memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
+            memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
+        }else{
+            int i;
+            for(i=0; i<64; i++){
+#define T(x) (x>>3) | ((x&7)<<3)
+                h->zigzag_scan8x8[i]       = T(zigzag_scan8x8[i]);
+                h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
+                h->field_scan8x8[i]        = T(field_scan8x8[i]);
+                h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
+#undef T
+            }
+        }
+        if(h->sps.transform_bypass){ //FIXME same ugly
+            h->zigzag_scan_q0          = zigzag_scan;
+            h->zigzag_scan8x8_q0       = zigzag_scan8x8;
+            h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
+            h->field_scan_q0           = field_scan;
+            h->field_scan8x8_q0        = field_scan8x8;
+            h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
+        }else{
+            h->zigzag_scan_q0          = h->zigzag_scan;
+            h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
+            h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
+            h->field_scan_q0           = h->field_scan;
+            h->field_scan8x8_q0        = h->field_scan8x8;
+            h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
+        }
+
+        alloc_tables(h);
+
+        s->avctx->width = s->width;
+        s->avctx->height = s->height;
+        s->avctx->sample_aspect_ratio= h->sps.sar;
+        if(!s->avctx->sample_aspect_ratio.den)
+            s->avctx->sample_aspect_ratio.den = 1;
+
+        if(h->sps.timing_info_present_flag){
+            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
+            if(h->x264_build > 0 && h->x264_build < 44)
+                s->avctx->time_base.den *= 2;
+            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
+                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
+        }
+    }
+
+    if(h->slice_num == 0){
+        if(frame_start(h) < 0)
+            return -1;
+    }
+
+    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
+    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
+
+    h->mb_mbaff = 0;
+    h->mb_aff_frame = 0;
+    if(h->sps.frame_mbs_only_flag){
+        s->picture_structure= PICT_FRAME;
+    }else{
+        if(get_bits1(&s->gb)) { //field_pic_flag
+            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
+            av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
+        } else {
+            s->picture_structure= PICT_FRAME;
+            h->mb_aff_frame = h->sps.mb_aff;
+        }
+    }
+
+    s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
+    s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
+    if(s->mb_y >= s->mb_height){
+        return -1;
+    }
+
+    if(s->picture_structure==PICT_FRAME){
+        h->curr_pic_num=   h->frame_num;
+        h->max_pic_num= 1<< h->sps.log2_max_frame_num;
+    }else{
+        h->curr_pic_num= 2*h->frame_num;
+        h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
+    }
+
+    if(h->nal_unit_type == NAL_IDR_SLICE){
+        get_ue_golomb(&s->gb); /* idr_pic_id */
+    }
+
+    if(h->sps.poc_type==0){
+        h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
+
+        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
+            h->delta_poc_bottom= get_se_golomb(&s->gb);
+        }
+    }
+
+    if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
+        h->delta_poc[0]= get_se_golomb(&s->gb);
+
+        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
+            h->delta_poc[1]= get_se_golomb(&s->gb);
+    }
+
+    init_poc(h);
+
+    if(h->pps.redundant_pic_cnt_present){
+        h->redundant_pic_count= get_ue_golomb(&s->gb);
+    }
+
+    //set defaults, might be overriden a few line later
+    h->ref_count[0]= h->pps.ref_count[0];
+    h->ref_count[1]= h->pps.ref_count[1];
+
+    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
+        if(h->slice_type == B_TYPE){
+            h->direct_spatial_mv_pred= get_bits1(&s->gb);
+            if(h->sps.mb_aff && h->direct_spatial_mv_pred)
+                av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
+        }
+        num_ref_idx_active_override_flag= get_bits1(&s->gb);
+
+        if(num_ref_idx_active_override_flag){
+            h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
+            if(h->slice_type==B_TYPE)
+                h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
+
+            if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
+                av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
+                return -1;
+            }
+        }
+    }
+
+    if(!default_ref_list_done){
+        fill_default_ref_list(h);
+    }
+
+    if(decode_ref_pic_list_reordering(h) < 0)
+        return -1;
+
+    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
+       || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
+        pred_weight_table(h);
+    else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
+        implicit_weight_table(h);
+    else
+        h->use_weight = 0;
+
+    if(s->current_picture.reference)
+        decode_ref_pic_marking(h);
+
+    if(FRAME_MBAFF)
+        fill_mbaff_ref_list(h);
+
+    if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
+        h->cabac_init_idc = get_ue_golomb(&s->gb);
+
+    h->last_qscale_diff = 0;
+    s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
+    if(s->qscale<0 || s->qscale>51){
+        av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
+        return -1;
+    }
+    h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
+    //FIXME qscale / qp ... stuff
+    if(h->slice_type == SP_TYPE){
+        get_bits1(&s->gb); /* sp_for_switch_flag */
+    }
+    if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
+        get_se_golomb(&s->gb); /* slice_qs_delta */
+    }
+
+    h->deblocking_filter = 1;
+    h->slice_alpha_c0_offset = 0;
+    h->slice_beta_offset = 0;
+    if( h->pps.deblocking_filter_parameters_present ) {
+        h->deblocking_filter= get_ue_golomb(&s->gb);
+        if(h->deblocking_filter < 2)
+            h->deblocking_filter^= 1; // 1<->0
+
+        if( h->deblocking_filter ) {
+            h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
+            h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
+        }
+    }
+    if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type == B_TYPE)
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
+        h->deblocking_filter= 0;
+
+#if 0 //FMO
+    if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
+        slice_group_change_cycle= get_bits(&s->gb, ?);
+#endif
+
+    h->slice_num++;
+
+    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
+    h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
+
+    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
+               h->slice_num,
+               (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
+               first_mb_in_slice,
+               av_get_pict_type_char(h->slice_type),
+               pps_id, h->frame_num,
+               s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
+               h->ref_count[0], h->ref_count[1],
+               s->qscale,
+               h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
+               h->use_weight,
+               h->use_weight==1 && h->use_weight_chroma ? "c" : ""
+               );
+    }
+
+    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
+        s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+    }else{
+        s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
+    }
+
+    return 0;
+}
+
+/**
+ *
+ */
+static inline int get_level_prefix(GetBitContext *gb){
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf=GET_CACHE(re, gb);
+
+    log= 32 - av_log2(buf);
+#ifdef TRACE
+    print_bin(buf>>(32-log), log);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
+#endif
+
+    LAST_SKIP_BITS(re, gb, log);
+    CLOSE_READER(re, gb);
+
+    return log-1;
+}
+
+static inline int get_dct8x8_allowed(H264Context *h){
+    int i;
+    for(i=0; i<4; i++){
+        if(!IS_SUB_8X8(h->sub_mb_type[i])
+           || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
+            return 0;
+    }
+    return 1;
+}
+
+/**
+ * decodes a residual block.
+ * @param n block index
+ * @param scantable scantable
+ * @param max_coeff number of coefficients in the block
+ * @return <0 if an error occured
+ */
+static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
+    MpegEncContext * const s = &h->s;
+    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+    int level[16];
+    int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
+
+    //FIXME put trailing_onex into the context
+
+    if(n == CHROMA_DC_BLOCK_INDEX){
+        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
+        total_coeff= coeff_token>>2;
+    }else{
+        if(n == LUMA_DC_BLOCK_INDEX){
+            total_coeff= pred_non_zero_count(h, 0);
+            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
+            total_coeff= coeff_token>>2;
+        }else{
+            total_coeff= pred_non_zero_count(h, n);
+            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
+            total_coeff= coeff_token>>2;
+            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
+        }
+    }
+
+    //FIXME set last_non_zero?
+
+    if(total_coeff==0)
+        return 0;
+
+    trailing_ones= coeff_token&3;
+    tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
+    assert(total_coeff<=16);
+
+    for(i=0; i<trailing_ones; i++){
+        level[i]= 1 - 2*get_bits1(gb);
+    }
+
+    if(i<total_coeff) {
+        int level_code, mask;
+        int suffix_length = total_coeff > 10 && trailing_ones < 3;
+        int prefix= get_level_prefix(gb);
+
+        //first coefficient has suffix_length equal to 0 or 1
+        if(prefix<14){ //FIXME try to build a large unified VLC table for all this
+            if(suffix_length)
+                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
+            else
+                level_code= (prefix<<suffix_length); //part
+        }else if(prefix==14){
+            if(suffix_length)
+                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
+            else
+                level_code= prefix + get_bits(gb, 4); //part
+        }else if(prefix==15){
+            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
+            if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
+        }else{
+            av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+
+        if(trailing_ones < 3) level_code += 2;
+
+        suffix_length = 1;
+        if(level_code > 5)
+            suffix_length++;
+        mask= -(level_code&1);
+        level[i]= (((2+level_code)>>1) ^ mask) - mask;
+        i++;
+
+        //remaining coefficients have suffix_length > 0
+        for(;i<total_coeff;i++) {
+            static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
+            prefix = get_level_prefix(gb);
+            if(prefix<15){
+                level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
+            }else if(prefix==15){
+                level_code =  (prefix<<suffix_length) + get_bits(gb, 12);
+            }else{
+                av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+            mask= -(level_code&1);
+            level[i]= (((2+level_code)>>1) ^ mask) - mask;
+            if(level_code > suffix_limit[suffix_length])
+                suffix_length++;
+        }
+    }
+
+    if(total_coeff == max_coeff)
+        zeros_left=0;
+    else{
+        if(n == CHROMA_DC_BLOCK_INDEX)
+            zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
+        else
+            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
+    }
+
+    coeff_num = zeros_left + total_coeff - 1;
+    j = scantable[coeff_num];
+    if(n > 24){
+        block[j] = level[0];
+        for(i=1;i<total_coeff;i++) {
+            if(zeros_left <= 0)
+                run_before = 0;
+            else if(zeros_left < 7){
+                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
+            }else{
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+            }
+            zeros_left -= run_before;
+            coeff_num -= 1 + run_before;
+            j= scantable[ coeff_num ];
+
+            block[j]= level[i];
+        }
+    }else{
+        block[j] = (level[0] * qmul[j] + 32)>>6;
+        for(i=1;i<total_coeff;i++) {
+            if(zeros_left <= 0)
+                run_before = 0;
+            else if(zeros_left < 7){
+                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
+            }else{
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+            }
+            zeros_left -= run_before;
+            coeff_num -= 1 + run_before;
+            j= scantable[ coeff_num ];
+
+            block[j]= (level[i] * qmul[j] + 32)>>6;
+        }
+    }
+
+    if(zeros_left<0){
+        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void predict_field_decoding_flag(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
+                ? s->current_picture.mb_type[mb_xy-1]
+                : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
+                ? s->current_picture.mb_type[mb_xy-s->mb_stride]
+                : 0;
+    h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
+}
+
+/**
+ * decodes a P_SKIP or B_SKIP macroblock
+ */
+static void decode_mb_skip(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    int mb_type=0;
+
+    memset(h->non_zero_count[mb_xy], 0, 16);
+    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
+
+    if(MB_FIELD)
+        mb_type|= MB_TYPE_INTERLACED;
+
+    if( h->slice_type == B_TYPE )
+    {
+        // just for fill_caches. pred_direct_motion will set the real mb_type
+        mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
+
+        fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
+        pred_direct_motion(h, &mb_type);
+        mb_type|= MB_TYPE_SKIP;
+    }
+    else
+    {
+        int mx, my;
+        mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
+
+        fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
+        pred_pskip_motion(h, &mx, &my);
+        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+        fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+    }
+
+    write_back_motion(h, mb_type);
+    s->current_picture.mb_type[mb_xy]= mb_type;
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
+    h->slice_table[ mb_xy ]= h->slice_num;
+    h->prev_mb_skipped= 1;
+}
+
+/**
+ * decodes a macroblock
+ * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ */
+static int decode_mb_cavlc(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    int mb_type, partition_count, cbp;
+    int dct8x8_allowed= h->pps.transform_8x8_mode;
+
+    s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
+
+    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
+    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
+                down the code */
+    if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
+        if(s->mb_skip_run==-1)
+            s->mb_skip_run= get_ue_golomb(&s->gb);
+
+        if (s->mb_skip_run--) {
+            if(FRAME_MBAFF && (s->mb_y&1) == 0){
+                if(s->mb_skip_run==0)
+                    h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
+                else
+                    predict_field_decoding_flag(h);
+            }
+            decode_mb_skip(h);
+            return 0;
+        }
+    }
+    if(FRAME_MBAFF){
+        if( (s->mb_y&1) == 0 )
+            h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
+    }else
+        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
+
+    h->prev_mb_skipped= 0;
+
+    mb_type= get_ue_golomb(&s->gb);
+    if(h->slice_type == B_TYPE){
+        if(mb_type < 23){
+            partition_count= b_mb_type_info[mb_type].partition_count;
+            mb_type=         b_mb_type_info[mb_type].type;
+        }else{
+            mb_type -= 23;
+            goto decode_intra_mb;
+        }
+    }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
+        if(mb_type < 5){
+            partition_count= p_mb_type_info[mb_type].partition_count;
+            mb_type=         p_mb_type_info[mb_type].type;
+        }else{
+            mb_type -= 5;
+            goto decode_intra_mb;
+        }
+    }else{
+       assert(h->slice_type == I_TYPE);
+decode_intra_mb:
+        if(mb_type > 25){
+            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
+            return -1;
+        }
+        partition_count=0;
+        cbp= i_mb_type_info[mb_type].cbp;
+        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
+        mb_type= i_mb_type_info[mb_type].type;
+    }
+
+    if(MB_FIELD)
+        mb_type |= MB_TYPE_INTERLACED;
+
+    h->slice_table[ mb_xy ]= h->slice_num;
+
+    if(IS_INTRA_PCM(mb_type)){
+        unsigned int x, y;
+
+        // we assume these blocks are very rare so we dont optimize it
+        align_get_bits(&s->gb);
+
+        // The pixels are stored in the same order as levels in h->mb array.
+        for(y=0; y<16; y++){
+            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
+            for(x=0; x<16; x++){
+                tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
+                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
+            }
+        }
+        for(y=0; y<8; y++){
+            const int index= 256 + 4*(y&3) + 32*(y>>2);
+            for(x=0; x<8; x++){
+                tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
+                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
+            }
+        }
+        for(y=0; y<8; y++){
+            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
+            for(x=0; x<8; x++){
+                tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
+                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
+            }
+        }
+
+        // In deblocking, the quantizer is 0
+        s->current_picture.qscale_table[mb_xy]= 0;
+        h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
+        // All coeffs are present
+        memset(h->non_zero_count[mb_xy], 16, 16);
+
+        s->current_picture.mb_type[mb_xy]= mb_type;
+        return 0;
+    }
+
+    if(MB_MBAFF){
+        h->ref_count[0] <<= 1;
+        h->ref_count[1] <<= 1;
+    }
+
+    fill_caches(h, mb_type, 0);
+
+    //mb_pred
+    if(IS_INTRA(mb_type)){
+//            init_top_left_availability(h);
+            if(IS_INTRA4x4(mb_type)){
+                int i;
+                int di = 1;
+                if(dct8x8_allowed && get_bits1(&s->gb)){
+                    mb_type |= MB_TYPE_8x8DCT;
+                    di = 4;
+                }
+
+//                fill_intra4x4_pred_table(h);
+                for(i=0; i<16; i+=di){
+                    int mode= pred_intra_mode(h, i);
+
+                    if(!get_bits1(&s->gb)){
+                        const int rem_mode= get_bits(&s->gb, 3);
+                        mode = rem_mode + (rem_mode >= mode);
+                    }
+
+                    if(di==4)
+                        fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
+                    else
+                        h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+                }
+                write_back_intra_pred_mode(h);
+                if( check_intra4x4_pred_mode(h) < 0)
+                    return -1;
+            }else{
+                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
+                if(h->intra16x16_pred_mode < 0)
+                    return -1;
+            }
+            h->chroma_pred_mode= get_ue_golomb(&s->gb);
+
+            h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
+            if(h->chroma_pred_mode < 0)
+                return -1;
+    }else if(partition_count==4){
+        int i, j, sub_partition_count[4], list, ref[2][4];
+
+        if(h->slice_type == B_TYPE){
+            for(i=0; i<4; i++){
+                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
+                if(h->sub_mb_type[i] >=13){
+                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
+                    return -1;
+                }
+                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
+                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+            }
+            if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
+               || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
+                pred_direct_motion(h, &mb_type);
+                h->ref_cache[0][scan8[4]] =
+                h->ref_cache[1][scan8[4]] =
+                h->ref_cache[0][scan8[12]] =
+                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
+            }
+        }else{
+            assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
+            for(i=0; i<4; i++){
+                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
+                if(h->sub_mb_type[i] >=4){
+                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
+                    return -1;
+                }
+                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
+                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+            }
+        }
+
+        for(list=0; list<2; list++){
+            int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
+            if(ref_count == 0) continue;
+            for(i=0; i<4; i++){
+                if(IS_DIRECT(h->sub_mb_type[i])) continue;
+                if(IS_DIR(h->sub_mb_type[i], 0, list)){
+                    ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
+                }else{
+                 //FIXME
+                    ref[list][i] = -1;
+                }
+            }
+        }
+
+        if(dct8x8_allowed)
+            dct8x8_allowed = get_dct8x8_allowed(h);
+
+        for(list=0; list<2; list++){
+            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
+            if(ref_count == 0) continue;
+
+            for(i=0; i<4; i++){
+                if(IS_DIRECT(h->sub_mb_type[i])) {
+                    h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
+                    continue;
+                }
+                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
+                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
+
+                if(IS_DIR(h->sub_mb_type[i], 0, list)){
+                    const int sub_mb_type= h->sub_mb_type[i];
+                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
+                    for(j=0; j<sub_partition_count[i]; j++){
+                        int mx, my;
+                        const int index= 4*i + block_width*j;
+                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
+                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
+                        mx += get_se_golomb(&s->gb);
+                        my += get_se_golomb(&s->gb);
+                        tprintf("final mv:%d %d\n", mx, my);
+
+                        if(IS_SUB_8X8(sub_mb_type)){
+                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
+                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
+                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
+                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
+                        }else if(IS_SUB_8X4(sub_mb_type)){
+                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
+                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
+                        }else if(IS_SUB_4X8(sub_mb_type)){
+                            mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
+                            mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
+                        }else{
+                            assert(IS_SUB_4X4(sub_mb_type));
+                            mv_cache[ 0 ][0]= mx;
+                            mv_cache[ 0 ][1]= my;
+                        }
+                    }
+                }else{
+                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
+                    p[0] = p[1]=
+                    p[8] = p[9]= 0;
+                }
+            }
+        }
+    }else if(IS_DIRECT(mb_type)){
+        pred_direct_motion(h, &mb_type);
+        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
+    }else{
+        int list, mx, my, i;
+         //FIXME we should set ref_idx_l? to 0 if we use that later ...
+        if(IS_16X16(mb_type)){
+            for(list=0; list<2; list++){
+                if(h->ref_count[list]>0){
+                    if(IS_DIR(mb_type, 0, list)){
+                        const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
+                    }else
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
+                }
+            }
+            for(list=0; list<2; list++){
+                if(IS_DIR(mb_type, 0, list)){
+                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
+                    mx += get_se_golomb(&s->gb);
+                    my += get_se_golomb(&s->gb);
+                    tprintf("final mv:%d %d\n", mx, my);
+
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
+                }else
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
+            }
+        }
+        else if(IS_16X8(mb_type)){
+            for(list=0; list<2; list++){
+                if(h->ref_count[list]>0){
+                    for(i=0; i<2; i++){
+                        if(IS_DIR(mb_type, i, list)){
+                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
+                        }else
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
+                    }
+                }
+            }
+            for(list=0; list<2; list++){
+                for(i=0; i<2; i++){
+                    if(IS_DIR(mb_type, i, list)){
+                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
+                        mx += get_se_golomb(&s->gb);
+                        my += get_se_golomb(&s->gb);
+                        tprintf("final mv:%d %d\n", mx, my);
+
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
+                    }else
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                }
+            }
+        }else{
+            assert(IS_8X16(mb_type));
+            for(list=0; list<2; list++){
+                if(h->ref_count[list]>0){
+                    for(i=0; i<2; i++){
+                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
+                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
+                        }else
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
+                    }
+                }
+            }
+            for(list=0; list<2; list++){
+                for(i=0; i<2; i++){
+                    if(IS_DIR(mb_type, i, list)){
+                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
+                        mx += get_se_golomb(&s->gb);
+                        my += get_se_golomb(&s->gb);
+                        tprintf("final mv:%d %d\n", mx, my);
+
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
+                    }else
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                }
+            }
+        }
+    }
+
+    if(IS_INTER(mb_type))
+        write_back_motion(h, mb_type);
+
+    if(!IS_INTRA16x16(mb_type)){
+        cbp= get_ue_golomb(&s->gb);
+        if(cbp > 47){
+            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
+            return -1;
+        }
+
+        if(IS_INTRA4x4(mb_type))
+            cbp= golomb_to_intra4x4_cbp[cbp];
+        else
+            cbp= golomb_to_inter_cbp[cbp];
+    }
+    h->cbp = cbp;
+
+    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
+        if(get_bits1(&s->gb))
+            mb_type |= MB_TYPE_8x8DCT;
+    }
+    s->current_picture.mb_type[mb_xy]= mb_type;
+
+    if(cbp || IS_INTRA16x16(mb_type)){
+        int i8x8, i4x4, chroma_idx;
+        int chroma_qp, dquant;
+        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
+        const uint8_t *scan, *scan8x8, *dc_scan;
+
+//        fill_non_zero_count_cache(h);
+
+        if(IS_INTERLACED(mb_type)){
+            scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
+            scan= s->qscale ? h->field_scan : h->field_scan_q0;
+            dc_scan= luma_dc_field_scan;
+        }else{
+            scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
+            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+            dc_scan= luma_dc_zigzag_scan;
+        }
+
+        dquant= get_se_golomb(&s->gb);
+
+        if( dquant > 25 || dquant < -26 ){
+            av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
+            return -1;
+        }
+
+        s->qscale += dquant;
+        if(((unsigned)s->qscale) > 51){
+            if(s->qscale<0) s->qscale+= 52;
+            else            s->qscale-= 52;
+        }
+
+        h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
+        if(IS_INTRA16x16(mb_type)){
+            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
+                return -1; //FIXME continue if partitioned and other return -1 too
+            }
+
+            assert((cbp&15) == 0 || (cbp&15) == 15);
+
+            if(cbp&15){
+                for(i8x8=0; i8x8<4; i8x8++){
+                    for(i4x4=0; i4x4<4; i4x4++){
+                        const int index= i4x4 + 4*i8x8;
+                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
+                            return -1;
+                        }
+                    }
+                }
+            }else{
+                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+            }
+        }else{
+            for(i8x8=0; i8x8<4; i8x8++){
+                if(cbp & (1<<i8x8)){
+                    if(IS_8x8DCT(mb_type)){
+                        DCTELEM *buf = &h->mb[64*i8x8];
+                        uint8_t *nnz;
+                        for(i4x4=0; i4x4<4; i4x4++){
+                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
+                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
+                                return -1;
+                        }
+                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
+                    }else{
+                        for(i4x4=0; i4x4<4; i4x4++){
+                            const int index= i4x4 + 4*i8x8;
+
+                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
+                                return -1;
+                            }
+                        }
+                    }
+                }else{
+                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+                }
+            }
+        }
+
+        if(cbp&0x30){
+            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
+                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
+                    return -1;
+                }
+        }
+
+        if(cbp&0x20){
+            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
+                for(i4x4=0; i4x4<4; i4x4++){
+                    const int index= 16 + 4*chroma_idx + i4x4;
+                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
+                        return -1;
+                    }
+                }
+            }
+        }else{
+            uint8_t * const nnz= &h->non_zero_count_cache[0];
+            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        }
+    }else{
+        uint8_t * const nnz= &h->non_zero_count_cache[0];
+        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
+        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+    }
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
+    write_back_non_zero_count(h);
+
+    if(MB_MBAFF){
+        h->ref_count[0] >>= 1;
+        h->ref_count[1] >>= 1;
+    }
+
+    return 0;
+}
+
+static int decode_cabac_field_decoding_flag(H264Context *h) {
+    MpegEncContext * const s = &h->s;
+    const int mb_x = s->mb_x;
+    const int mb_y = s->mb_y & ~1;
+    const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
+    const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
+
+    unsigned int ctx = 0;
+
+    if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
+        ctx += 1;
+    }
+    if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
+        ctx += 1;
+    }
+
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
+}
+
+static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
+    uint8_t *state= &h->cabac_state[ctx_base];
+    int mb_type;
+
+    if(intra_slice){
+        MpegEncContext * const s = &h->s;
+        const int mba_xy = h->left_mb_xy[0];
+        const int mbb_xy = h->top_mb_xy;
+        int ctx=0;
+        if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
+            ctx++;
+        if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
+            ctx++;
+        if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
+            return 0;   /* I4x4 */
+        state += 2;
+    }else{
+        if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
+            return 0;   /* I4x4 */
+    }
+
+    if( get_cabac_terminate( &h->cabac ) )
+        return 25;  /* PCM */
+
+    mb_type = 1; /* I16x16 */
+    mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
+    if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
+        mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
+    mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
+    mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
+    return mb_type;
+}
+
+static int decode_cabac_mb_type( H264Context *h ) {
+    MpegEncContext * const s = &h->s;
+
+    if( h->slice_type == I_TYPE ) {
+        return decode_cabac_intra_mb_type(h, 3, 1);
+    } else if( h->slice_type == P_TYPE ) {
+        if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
+            /* P-type */
+            if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
+                /* P_L0_D16x16, P_8x8 */
+                return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
+            } else {
+                /* P_L0_D8x16, P_L0_D16x8 */
+                return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
+            }
+        } else {
+            return decode_cabac_intra_mb_type(h, 17, 0) + 5;
+        }
+    } else if( h->slice_type == B_TYPE ) {
+        const int mba_xy = h->left_mb_xy[0];
+        const int mbb_xy = h->top_mb_xy;
+        int ctx = 0;
+        int bits;
+
+        if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
+            ctx++;
+        if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
+            ctx++;
+
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
+            return 0; /* B_Direct_16x16 */
+
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
+            return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
+        }
+
+        bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+        if( bits < 8 )
+            return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
+        else if( bits == 13 ) {
+            return decode_cabac_intra_mb_type(h, 32, 0) + 23;
+        } else if( bits == 14 )
+            return 11; /* B_L1_L0_8x16 */
+        else if( bits == 15 )
+            return 22; /* B_8x8 */
+
+        bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+        return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
+    } else {
+        /* TODO SI/SP frames? */
+        return -1;
+    }
+}
+
+static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
+    MpegEncContext * const s = &h->s;
+    int mba_xy, mbb_xy;
+    int ctx = 0;
+
+    if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
+        int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
+        mba_xy = mb_xy - 1;
+        if( (mb_y&1)
+            && h->slice_table[mba_xy] == h->slice_num
+            && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
+            mba_xy += s->mb_stride;
+        if( MB_FIELD ){
+            mbb_xy = mb_xy - s->mb_stride;
+            if( !(mb_y&1)
+                && h->slice_table[mbb_xy] == h->slice_num
+                && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
+                mbb_xy -= s->mb_stride;
+        }else
+            mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
+    }else{
+        int mb_xy = mb_x + mb_y*s->mb_stride;
+        mba_xy = mb_xy - 1;
+        mbb_xy = mb_xy - s->mb_stride;
+    }
+
+    if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
+        ctx++;
+    if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
+        ctx++;
+
+    if( h->slice_type == B_TYPE )
+        ctx += 13;
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
+}
+
+static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
+    int mode = 0;
+
+    if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
+        return pred_mode;
+
+    mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
+    mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
+    mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
+
+    if( mode >= pred_mode )
+        return mode + 1;
+    else
+        return mode;
+}
+
+static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
+    const int mba_xy = h->left_mb_xy[0];
+    const int mbb_xy = h->top_mb_xy;
+
+    int ctx = 0;
+
+    /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
+    if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
+        ctx++;
+
+    if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
+        ctx++;
+
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
+        return 0;
+
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
+        return 1;
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
+        return 2;
+    else
+        return 3;
+}
+
+static const uint8_t block_idx_x[16] = {
+    0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
+};
+static const uint8_t block_idx_y[16] = {
+    0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
+};
+static const uint8_t block_idx_xy[4][4] = {
+    { 0, 2, 8,  10},
+    { 1, 3, 9,  11},
+    { 4, 6, 12, 14},
+    { 5, 7, 13, 15}
+};
+
+static int decode_cabac_mb_cbp_luma( H264Context *h) {
+    int cbp = 0;
+    int cbp_b = -1;
+    int i8x8;
+
+    if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
+        cbp_b = h->top_cbp;
+        tprintf("cbp_b = top_cbp = %x\n", cbp_b);
+    }
+
+    for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
+        int cbp_a = -1;
+        int x, y;
+        int ctx = 0;
+
+        x = block_idx_x[4*i8x8];
+        y = block_idx_y[4*i8x8];
+
+        if( x > 0 )
+            cbp_a = cbp;
+        else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
+            cbp_a = h->left_cbp;
+            tprintf("cbp_a = left_cbp = %x\n", cbp_a);
+        }
+
+        if( y > 0 )
+            cbp_b = cbp;
+
+        /* No need to test for skip as we put 0 for skip block */
+        /* No need to test for IPCM as we put 1 for IPCM block */
+        if( cbp_a >= 0 ) {
+            int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
+            if( ((cbp_a >> i8x8a)&0x01) == 0 )
+                ctx++;
+        }
+
+        if( cbp_b >= 0 ) {
+            int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
+            if( ((cbp_b >> i8x8b)&0x01) == 0 )
+                ctx += 2;
+        }
+
+        if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
+            cbp |= 1 << i8x8;
+        }
+    }
+    return cbp;
+}
+static int decode_cabac_mb_cbp_chroma( H264Context *h) {
+    int ctx;
+    int cbp_a, cbp_b;
+
+    cbp_a = (h->left_cbp>>4)&0x03;
+    cbp_b = (h-> top_cbp>>4)&0x03;
+
+    ctx = 0;
+    if( cbp_a > 0 ) ctx++;
+    if( cbp_b > 0 ) ctx += 2;
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
+        return 0;
+
+    ctx = 4;
+    if( cbp_a == 2 ) ctx++;
+    if( cbp_b == 2 ) ctx += 2;
+    return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
+}
+static int decode_cabac_mb_dqp( H264Context *h) {
+    MpegEncContext * const s = &h->s;
+    int mbn_xy;
+    int   ctx = 0;
+    int   val = 0;
+
+    if( s->mb_x > 0 )
+        mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
+    else
+        mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
+
+    if( h->last_qscale_diff != 0 )
+        ctx++;
+
+    while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
+        if( ctx < 2 )
+            ctx = 2;
+        else
+            ctx = 3;
+        val++;
+        if(val > 102) //prevent infinite loop
+            return INT_MIN;
+    }
+
+    if( val&0x01 )
+        return (val + 1)/2;
+    else
+        return -(val + 1)/2;
+}
+static int decode_cabac_p_mb_sub_type( H264Context *h ) {
+    if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
+        return 0;   /* 8x8 */
+    if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
+        return 1;   /* 8x4 */
+    if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
+        return 2;   /* 4x8 */
+    return 3;       /* 4x4 */
+}
+static int decode_cabac_b_mb_sub_type( H264Context *h ) {
+    int type;
+    if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
+        return 0;   /* B_Direct_8x8 */
+    if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
+        return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
+    type = 3;
+    if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
+        if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
+            return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
+        type += 4;
+    }
+    type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
+    type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
+    return type;
+}
+
+static inline int decode_cabac_mb_transform_size( H264Context *h ) {
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
+}
+
+static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
+    int refa = h->ref_cache[list][scan8[n] - 1];
+    int refb = h->ref_cache[list][scan8[n] - 8];
+    int ref  = 0;
+    int ctx  = 0;
+
+    if( h->slice_type == B_TYPE) {
+        if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
+            ctx++;
+        if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
+            ctx += 2;
+    } else {
+        if( refa > 0 )
+            ctx++;
+        if( refb > 0 )
+            ctx += 2;
+    }
+
+    while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
+        ref++;
+        if( ctx < 4 )
+            ctx = 4;
+        else
+            ctx = 5;
+    }
+    return ref;
+}
+
+static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
+    int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
+               abs( h->mvd_cache[list][scan8[n] - 8][l] );
+    int ctxbase = (l == 0) ? 40 : 47;
+    int ctx, mvd;
+
+    if( amvd < 3 )
+        ctx = 0;
+    else if( amvd > 32 )
+        ctx = 2;
+    else
+        ctx = 1;
+
+    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
+        return 0;
+
+    mvd= 1;
+    ctx= 3;
+    while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
+        mvd++;
+        if( ctx < 6 )
+            ctx++;
+    }
+
+    if( mvd >= 9 ) {
+        int k = 3;
+        while( get_cabac_bypass( &h->cabac ) ) {
+            mvd += 1 << k;
+            k++;
+        }
+        while( k-- ) {
+            if( get_cabac_bypass( &h->cabac ) )
+                mvd += 1 << k;
+        }
+    }
+    return get_cabac_bypass_sign( &h->cabac, -mvd );
+}
+
+static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
+    int nza, nzb;
+    int ctx = 0;
+
+    if( cat == 0 ) {
+        nza = h->left_cbp&0x100;
+        nzb = h-> top_cbp&0x100;
+    } else if( cat == 1 || cat == 2 ) {
+        nza = h->non_zero_count_cache[scan8[idx] - 1];
+        nzb = h->non_zero_count_cache[scan8[idx] - 8];
+    } else if( cat == 3 ) {
+        nza = (h->left_cbp>>(6+idx))&0x01;
+        nzb = (h-> top_cbp>>(6+idx))&0x01;
+    } else {
+        assert(cat == 4);
+        nza = h->non_zero_count_cache[scan8[16+idx] - 1];
+        nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
+    }
+
+    if( nza > 0 )
+        ctx++;
+
+    if( nzb > 0 )
+        ctx += 2;
+
+    return ctx + 4 * cat;
+}
+
+static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+};
+
+static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
+    const int mb_xy  = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
+    static const int significant_coeff_flag_offset[2][6] = {
+      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
+      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
+    };
+    static const int last_coeff_flag_offset[2][6] = {
+      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
+      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
+    };
+    static const int coeff_abs_level_m1_offset[6] = {
+        227+0, 227+10, 227+20, 227+30, 227+39, 426
+    };
+    static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
+      { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
+        4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
+        7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
+       12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
+      { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
+        6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
+        9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
+        9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
+    };
+
+    int index[64];
+
+    int last;
+    int coeff_count = 0;
+
+    int abslevel1 = 1;
+    int abslevelgt1 = 0;
+
+    uint8_t *significant_coeff_ctx_base;
+    uint8_t *last_coeff_ctx_base;
+    uint8_t *abs_level_m1_ctx_base;
+
+#ifndef ARCH_X86
+#define CABAC_ON_STACK
+#endif
+#ifdef CABAC_ON_STACK
+#define CC &cc
+    CABACContext cc;
+    cc.range     = h->cabac.range;
+    cc.low       = h->cabac.low;
+    cc.bytestream= h->cabac.bytestream;
+#else
+#define CC &h->cabac
+#endif
+
+
+    /* cat: 0-> DC 16x16  n = 0
+     *      1-> AC 16x16  n = luma4x4idx
+     *      2-> Luma4x4   n = luma4x4idx
+     *      3-> DC Chroma n = iCbCr
+     *      4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
+     *      5-> Luma8x8   n = 4 * luma8x8idx
+     */
+
+    /* read coded block flag */
+    if( cat != 5 ) {
+        if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
+            if( cat == 1 || cat == 2 )
+                h->non_zero_count_cache[scan8[n]] = 0;
+            else if( cat == 4 )
+                h->non_zero_count_cache[scan8[16+n]] = 0;
+#ifdef CABAC_ON_STACK
+            h->cabac.range     = cc.range     ;
+            h->cabac.low       = cc.low       ;
+            h->cabac.bytestream= cc.bytestream;
+#endif
+            return 0;
+        }
+    }
+
+    significant_coeff_ctx_base = h->cabac_state
+        + significant_coeff_flag_offset[MB_FIELD][cat];
+    last_coeff_ctx_base = h->cabac_state
+        + last_coeff_flag_offset[MB_FIELD][cat];
+    abs_level_m1_ctx_base = h->cabac_state
+        + coeff_abs_level_m1_offset[cat];
+
+    if( cat == 5 ) {
+#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
+        for(last= 0; last < coefs; last++) { \
+            uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
+            if( get_cabac( CC, sig_ctx )) { \
+                uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
+                index[coeff_count++] = last; \
+                if( get_cabac( CC, last_ctx ) ) { \
+                    last= max_coeff; \
+                    break; \
+                } \
+            } \
+        }\
+        if( last == max_coeff -1 ) {\
+            index[coeff_count++] = last;\
+        }
+        const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
+#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
+        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
+    } else {
+        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
+#else
+        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
+    } else {
+        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
+#endif
+    }
+    assert(coeff_count > 0);
+
+    if( cat == 0 )
+        h->cbp_table[mb_xy] |= 0x100;
+    else if( cat == 1 || cat == 2 )
+        h->non_zero_count_cache[scan8[n]] = coeff_count;
+    else if( cat == 3 )
+        h->cbp_table[mb_xy] |= 0x40 << n;
+    else if( cat == 4 )
+        h->non_zero_count_cache[scan8[16+n]] = coeff_count;
+    else {
+        assert( cat == 5 );
+        fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
+    }
+
+    for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
+        uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
+        int j= scantable[index[coeff_count]];
+
+        if( get_cabac( CC, ctx ) == 0 ) {
+            if( !qmul ) {
+                block[j] = get_cabac_bypass_sign( CC, -1);
+            }else{
+                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
+            }
+
+            abslevel1++;
+        } else {
+            int coeff_abs = 2;
+            ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
+            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
+                coeff_abs++;
+            }
+
+            if( coeff_abs >= 15 ) {
+                int j = 0;
+                while( get_cabac_bypass( CC ) ) {
+                    j++;
+                }
+
+                coeff_abs=1;
+                while( j-- ) {
+                    coeff_abs += coeff_abs + get_cabac_bypass( CC );
+                }
+                coeff_abs+= 14;
+            }
+
+            if( !qmul ) {
+                if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
+                else                                block[j] =  coeff_abs;
+            }else{
+                if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
+                else                                block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
+            }
+
+            abslevelgt1++;
+        }
+    }
+#ifdef CABAC_ON_STACK
+            h->cabac.range     = cc.range     ;
+            h->cabac.low       = cc.low       ;
+            h->cabac.bytestream= cc.bytestream;
+#endif
+    return 0;
+}
+
+static void inline compute_mb_neighbors(H264Context *h)
+{
+    MpegEncContext * const s = &h->s;
+    const int mb_xy  = s->mb_x + s->mb_y*s->mb_stride;
+    h->top_mb_xy     = mb_xy - s->mb_stride;
+    h->left_mb_xy[0] = mb_xy - 1;
+    if(FRAME_MBAFF){
+        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
+        const int top_pair_xy      = pair_xy     - s->mb_stride;
+        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+        const int curr_mb_frame_flag = !MB_FIELD;
+        const int bottom = (s->mb_y & 1);
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
+                ) {
+            h->top_mb_xy -= s->mb_stride;
+        }
+        if (left_mb_frame_flag != curr_mb_frame_flag) {
+            h->left_mb_xy[0] = pair_xy - 1;
+        }
+    }
+    return;
+}
+
+/**
+ * decodes a macroblock
+ * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ */
+static int decode_mb_cabac(H264Context *h) {
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    int mb_type, partition_count, cbp = 0;
+    int dct8x8_allowed= h->pps.transform_8x8_mode;
+
+    s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
+
+    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
+    if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
+        int skip;
+        /* a skipped mb needs the aff flag from the following mb */
+        if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
+            predict_field_decoding_flag(h);
+        if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
+            skip = h->next_mb_skipped;
+        else
+            skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
+        /* read skip flags */
+        if( skip ) {
+            if( FRAME_MBAFF && (s->mb_y&1)==0 ){
+                s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
+                h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
+                if(h->next_mb_skipped)
+                    predict_field_decoding_flag(h);
+                else
+                    h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
+            }
+
+            decode_mb_skip(h);
+
+            h->cbp_table[mb_xy] = 0;
+            h->chroma_pred_mode_table[mb_xy] = 0;
+            h->last_qscale_diff = 0;
+
+            return 0;
+
+        }
+    }
+    if(FRAME_MBAFF){
+        if( (s->mb_y&1) == 0 )
+            h->mb_mbaff =
+            h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
+    }else
+        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
+
+    h->prev_mb_skipped = 0;
+
+    compute_mb_neighbors(h);
+    if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
+        av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
+        return -1;
+    }
+
+    if( h->slice_type == B_TYPE ) {
+        if( mb_type < 23 ){
+            partition_count= b_mb_type_info[mb_type].partition_count;
+            mb_type=         b_mb_type_info[mb_type].type;
+        }else{
+            mb_type -= 23;
+            goto decode_intra_mb;
+        }
+    } else if( h->slice_type == P_TYPE ) {
+        if( mb_type < 5) {
+            partition_count= p_mb_type_info[mb_type].partition_count;
+            mb_type=         p_mb_type_info[mb_type].type;
+        } else {
+            mb_type -= 5;
+            goto decode_intra_mb;
+        }
+    } else {
+       assert(h->slice_type == I_TYPE);
+decode_intra_mb:
+        partition_count = 0;
+        cbp= i_mb_type_info[mb_type].cbp;
+        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
+        mb_type= i_mb_type_info[mb_type].type;
+    }
+    if(MB_FIELD)
+        mb_type |= MB_TYPE_INTERLACED;
+
+    h->slice_table[ mb_xy ]= h->slice_num;
+
+    if(IS_INTRA_PCM(mb_type)) {
+        const uint8_t *ptr;
+        unsigned int x, y;
+
+        // We assume these blocks are very rare so we dont optimize it.
+        // FIXME The two following lines get the bitstream position in the cabac
+        // decode, I think it should be done by a function in cabac.h (or cabac.c).
+        ptr= h->cabac.bytestream;
+        if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
+
+        // The pixels are stored in the same order as levels in h->mb array.
+        for(y=0; y<16; y++){
+            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
+            for(x=0; x<16; x++){
+                tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
+                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
+            }
+        }
+        for(y=0; y<8; y++){
+            const int index= 256 + 4*(y&3) + 32*(y>>2);
+            for(x=0; x<8; x++){
+                tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
+                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
+            }
+        }
+        for(y=0; y<8; y++){
+            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
+            for(x=0; x<8; x++){
+                tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
+                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
+            }
+        }
+
+        ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
+
+        // All blocks are present
+        h->cbp_table[mb_xy] = 0x1ef;
+        h->chroma_pred_mode_table[mb_xy] = 0;
+        // In deblocking, the quantizer is 0
+        s->current_picture.qscale_table[mb_xy]= 0;
+        h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
+        // All coeffs are present
+        memset(h->non_zero_count[mb_xy], 16, 16);
+        s->current_picture.mb_type[mb_xy]= mb_type;
+        return 0;
+    }
+
+    if(MB_MBAFF){
+        h->ref_count[0] <<= 1;
+        h->ref_count[1] <<= 1;
+    }
+
+    fill_caches(h, mb_type, 0);
+
+    if( IS_INTRA( mb_type ) ) {
+        int i;
+        if( IS_INTRA4x4( mb_type ) ) {
+            if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
+                mb_type |= MB_TYPE_8x8DCT;
+                for( i = 0; i < 16; i+=4 ) {
+                    int pred = pred_intra_mode( h, i );
+                    int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
+                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
+                }
+            } else {
+                for( i = 0; i < 16; i++ ) {
+                    int pred = pred_intra_mode( h, i );
+                    h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
+
+                //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
+                }
+            }
+            write_back_intra_pred_mode(h);
+            if( check_intra4x4_pred_mode(h) < 0 ) return -1;
+        } else {
+            h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
+            if( h->intra16x16_pred_mode < 0 ) return -1;
+        }
+        h->chroma_pred_mode_table[mb_xy] =
+            h->chroma_pred_mode          = decode_cabac_mb_chroma_pre_mode( h );
+
+        h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
+        if( h->chroma_pred_mode < 0 ) return -1;
+    } else if( partition_count == 4 ) {
+        int i, j, sub_partition_count[4], list, ref[2][4];
+
+        if( h->slice_type == B_TYPE ) {
+            for( i = 0; i < 4; i++ ) {
+                h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
+                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
+                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+            }
+            if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
+                          h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
+                pred_direct_motion(h, &mb_type);
+                if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
+                    for( i = 0; i < 4; i++ )
+                        if( IS_DIRECT(h->sub_mb_type[i]) )
+                            fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
+                }
+            }
+        } else {
+            for( i = 0; i < 4; i++ ) {
+                h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
+                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
+                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+            }
+        }
+
+        for( list = 0; list < 2; list++ ) {
+            if( h->ref_count[list] > 0 ) {
+                for( i = 0; i < 4; i++ ) {
+                    if(IS_DIRECT(h->sub_mb_type[i])) continue;
+                    if(IS_DIR(h->sub_mb_type[i], 0, list)){
+                        if( h->ref_count[list] > 1 )
+                            ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
+                        else
+                            ref[list][i] = 0;
+                    } else {
+                        ref[list][i] = -1;
+                    }
+                                                       h->ref_cache[list][ scan8[4*i]+1 ]=
+                    h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
+                }
+            }
+        }
+
+        if(dct8x8_allowed)
+            dct8x8_allowed = get_dct8x8_allowed(h);
+
+        for(list=0; list<2; list++){
+            for(i=0; i<4; i++){
+                if(IS_DIRECT(h->sub_mb_type[i])){
+                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
+                    continue;
+                }
+                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
+
+                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
+                    const int sub_mb_type= h->sub_mb_type[i];
+                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
+                    for(j=0; j<sub_partition_count[i]; j++){
+                        int mpx, mpy;
+                        int mx, my;
+                        const int index= 4*i + block_width*j;
+                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
+                        int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
+                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
+
+                        mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
+                        my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
+                        tprintf("final mv:%d %d\n", mx, my);
+
+                        if(IS_SUB_8X8(sub_mb_type)){
+                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
+                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
+                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
+                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
+
+                            mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
+                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
+                            mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
+                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
+                        }else if(IS_SUB_8X4(sub_mb_type)){
+                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
+                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
+
+                            mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
+                            mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
+                        }else if(IS_SUB_4X8(sub_mb_type)){
+                            mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
+                            mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
+
+                            mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
+                            mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
+                        }else{
+                            assert(IS_SUB_4X4(sub_mb_type));
+                            mv_cache[ 0 ][0]= mx;
+                            mv_cache[ 0 ][1]= my;
+
+                            mvd_cache[ 0 ][0]= mx - mpx;
+                            mvd_cache[ 0 ][1]= my - mpy;
+                        }
+                    }
+                }else{
+                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
+                    uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
+                    p[0] = p[1] = p[8] = p[9] = 0;
+                    pd[0]= pd[1]= pd[8]= pd[9]= 0;
+                }
+            }
+        }
+    } else if( IS_DIRECT(mb_type) ) {
+        pred_direct_motion(h, &mb_type);
+        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
+    } else {
+        int list, mx, my, i, mpx, mpy;
+        if(IS_16X16(mb_type)){
+            for(list=0; list<2; list++){
+                if(IS_DIR(mb_type, 0, list)){
+                    if(h->ref_count[list] > 0 ){
+                        const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
+                    }
+                }else
+                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
+            }
+            for(list=0; list<2; list++){
+                if(IS_DIR(mb_type, 0, list)){
+                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
+
+                    mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
+                    my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
+                    tprintf("final mv:%d %d\n", mx, my);
+
+                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
+                }else
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
+            }
+        }
+        else if(IS_16X8(mb_type)){
+            for(list=0; list<2; list++){
+                if(h->ref_count[list]>0){
+                    for(i=0; i<2; i++){
+                        if(IS_DIR(mb_type, i, list)){
+                            const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
+                        }else
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
+                    }
+                }
+            }
+            for(list=0; list<2; list++){
+                for(i=0; i<2; i++){
+                    if(IS_DIR(mb_type, i, list)){
+                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
+                        mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
+                        my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
+                        tprintf("final mv:%d %d\n", mx, my);
+
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
+                    }else{
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                    }
+                }
+            }
+        }else{
+            assert(IS_8X16(mb_type));
+            for(list=0; list<2; list++){
+                if(h->ref_count[list]>0){
+                    for(i=0; i<2; i++){
+                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
+                            const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
+                        }else
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
+                    }
+                }
+            }
+            for(list=0; list<2; list++){
+                for(i=0; i<2; i++){
+                    if(IS_DIR(mb_type, i, list)){
+                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
+                        mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
+                        my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
+
+                        tprintf("final mv:%d %d\n", mx, my);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
+                    }else{
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                    }
+                }
+            }
+        }
+    }
+
+   if( IS_INTER( mb_type ) ) {
+        h->chroma_pred_mode_table[mb_xy] = 0;
+        write_back_motion( h, mb_type );
+   }
+
+    if( !IS_INTRA16x16( mb_type ) ) {
+        cbp  = decode_cabac_mb_cbp_luma( h );
+        cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
+    }
+
+    h->cbp_table[mb_xy] = h->cbp = cbp;
+
+    if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
+        if( decode_cabac_mb_transform_size( h ) )
+            mb_type |= MB_TYPE_8x8DCT;
+    }
+    s->current_picture.mb_type[mb_xy]= mb_type;
+
+    if( cbp || IS_INTRA16x16( mb_type ) ) {
+        const uint8_t *scan, *scan8x8, *dc_scan;
+        int dqp;
+
+        if(IS_INTERLACED(mb_type)){
+            scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
+            scan= s->qscale ? h->field_scan : h->field_scan_q0;
+            dc_scan= luma_dc_field_scan;
+        }else{
+            scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
+            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+            dc_scan= luma_dc_zigzag_scan;
+        }
+
+        h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
+        if( dqp == INT_MIN ){
+            av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        s->qscale += dqp;
+        if(((unsigned)s->qscale) > 51){
+            if(s->qscale<0) s->qscale+= 52;
+            else            s->qscale-= 52;
+        }
+        h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
+
+        if( IS_INTRA16x16( mb_type ) ) {
+            int i;
+            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
+            if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
+                return -1;
+            if( cbp&15 ) {
+                for( i = 0; i < 16; i++ ) {
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
+                    if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
+                        return -1;
+                }
+            } else {
+                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+            }
+        } else {
+            int i8x8, i4x4;
+            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
+                if( cbp & (1<<i8x8) ) {
+                    if( IS_8x8DCT(mb_type) ) {
+                        if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
+                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
+                            return -1;
+                    } else
+                    for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+                        const int index = 4*i8x8 + i4x4;
+                        //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+//START_TIMER
+                        if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
+                            return -1;
+//STOP_TIMER("decode_residual")
+                    }
+                } else {
+                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+                }
+            }
+        }
+
+        if( cbp&0x30 ){
+            int c;
+            for( c = 0; c < 2; c++ ) {
+                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
+                if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
+                    return -1;
+            }
+        }
+
+        if( cbp&0x20 ) {
+            int c, i;
+            for( c = 0; c < 2; c++ ) {
+                for( i = 0; i < 4; i++ ) {
+                    const int index = 16 + 4 * c + i;
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
+                    if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
+                        return -1;
+                }
+            }
+        } else {
+            uint8_t * const nnz= &h->non_zero_count_cache[0];
+            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        }
+    } else {
+        uint8_t * const nnz= &h->non_zero_count_cache[0];
+        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
+        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        h->last_qscale_diff = 0;
+    }
+
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
+    write_back_non_zero_count(h);
+
+    if(MB_MBAFF){
+        h->ref_count[0] >>= 1;
+        h->ref_count[1] >>= 1;
+    }
+
+    return 0;
+}
+
+
+static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i, d;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
+        h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
+    } else {
+        /* 16px edge length, because bS=4 is triggered by being at
+         * the edge of an intra MB, so all 4 bS are the same */
+            for( d = 0; d < 16; d++ ) {
+                const int p0 = pix[-1];
+                const int p1 = pix[-2];
+                const int p2 = pix[-3];
+
+                const int q0 = pix[0];
+                const int q1 = pix[1];
+                const int q2 = pix[2];
+
+                if( FFABS( p0 - q0 ) < alpha &&
+                    FFABS( p1 - p0 ) < beta &&
+                    FFABS( q1 - q0 ) < beta ) {
+
+                    if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                        if( FFABS( p2 - p0 ) < beta)
+                        {
+                            const int p3 = pix[-4];
+                            /* p0', p1', p2' */
+                            pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                            pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                            pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                        } else {
+                            /* p0' */
+                            pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        }
+                        if( FFABS( q2 - q0 ) < beta)
+                        {
+                            const int q3 = pix[3];
+                            /* q0', q1', q2' */
+                            pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                            pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                            pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                        } else {
+                            /* q0' */
+                            pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                        }
+                    }else{
+                        /* p0', q0' */
+                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                    tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
+                }
+                pix += stride;
+            }
+    }
+}
+static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
+        h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
+    } else {
+        h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
+    }
+}
+
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
+    int i;
+    for( i = 0; i < 16; i++, pix += stride) {
+        int index_a;
+        int alpha;
+        int beta;
+
+        int qp_index;
+        int bS_index = (i >> 1);
+        if (!MB_FIELD) {
+            bS_index &= ~1;
+            bS_index |= (i & 1);
+        }
+
+        if( bS[bS_index] == 0 ) {
+            continue;
+        }
+
+        qp_index = MB_FIELD ? (i >> 3) : (i & 1);
+        index_a = qp[qp_index] + h->slice_alpha_c0_offset;
+        alpha = (alpha_table+52)[index_a];
+        beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
+
+        if( bS[bS_index] < 4 ) {
+            const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int p2 = pix[-3];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+            const int q2 = pix[2];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+                int tc = tc0;
+                int i_delta;
+
+                if( FFABS( p2 - p0 ) < beta ) {
+                    pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
+                    tc++;
+                }
+                if( FFABS( q2 - q0 ) < beta ) {
+                    pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
+                    tc++;
+                }
+
+                i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
+                tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+            }
+        }else{
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int p2 = pix[-3];
+
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+            const int q2 = pix[2];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                    if( FFABS( p2 - p0 ) < beta)
+                    {
+                        const int p3 = pix[-4];
+                        /* p0', p1', p2' */
+                        pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                        pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                        pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                    } else {
+                        /* p0' */
+                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                    }
+                    if( FFABS( q2 - q0 ) < beta)
+                    {
+                        const int q3 = pix[3];
+                        /* q0', q1', q2' */
+                        pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                        pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                        pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                    } else {
+                        /* q0' */
+                        pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                }else{
+                    /* p0', q0' */
+                    pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                    pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                }
+                tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
+            }
+        }
+    }
+}
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
+    int i;
+    for( i = 0; i < 8; i++, pix += stride) {
+        int index_a;
+        int alpha;
+        int beta;
+
+        int qp_index;
+        int bS_index = i;
+
+        if( bS[bS_index] == 0 ) {
+            continue;
+        }
+
+        qp_index = MB_FIELD ? (i >> 2) : (i & 1);
+        index_a = qp[qp_index] + h->slice_alpha_c0_offset;
+        alpha = (alpha_table+52)[index_a];
+        beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
+
+        if( bS[bS_index] < 4 ) {
+            const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+                const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+                pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
+                tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+            }
+        }else{
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+                pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+                tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
+            }
+        }
+    }
+}
+
+static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i, d;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+    const int pix_next  = stride;
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
+        h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
+    } else {
+        /* 16px edge length, see filter_mb_edgev */
+            for( d = 0; d < 16; d++ ) {
+                const int p0 = pix[-1*pix_next];
+                const int p1 = pix[-2*pix_next];
+                const int p2 = pix[-3*pix_next];
+                const int q0 = pix[0];
+                const int q1 = pix[1*pix_next];
+                const int q2 = pix[2*pix_next];
+
+                if( FFABS( p0 - q0 ) < alpha &&
+                    FFABS( p1 - p0 ) < beta &&
+                    FFABS( q1 - q0 ) < beta ) {
+
+                    const int p3 = pix[-4*pix_next];
+                    const int q3 = pix[ 3*pix_next];
+
+                    if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                        if( FFABS( p2 - p0 ) < beta) {
+                            /* p0', p1', p2' */
+                            pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                            pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                            pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                        } else {
+                            /* p0' */
+                            pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        }
+                        if( FFABS( q2 - q0 ) < beta) {
+                            /* q0', q1', q2' */
+                            pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                            pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                            pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                        } else {
+                            /* q0' */
+                            pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                        }
+                    }else{
+                        /* p0', q0' */
+                        pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                    tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
+                }
+                pix++;
+            }
+    }
+}
+
+static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
+        h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
+    } else {
+        h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
+    }
+}
+
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
+    MpegEncContext * const s = &h->s;
+    int mb_xy, mb_type;
+    int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+
+    if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
+        filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
+        return;
+    }
+    assert(!FRAME_MBAFF);
+
+    mb_xy = mb_x + mb_y*s->mb_stride;
+    mb_type = s->current_picture.mb_type[mb_xy];
+    qp = s->current_picture.qscale_table[mb_xy];
+    qp0 = s->current_picture.qscale_table[mb_xy-1];
+    qp1 = s->current_picture.qscale_table[h->top_mb_xy];
+    qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
+    qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
+    qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
+    qp0 = (qp + qp0 + 1) >> 1;
+    qp1 = (qp + qp1 + 1) >> 1;
+    qpc0 = (qpc + qpc0 + 1) >> 1;
+    qpc1 = (qpc + qpc1 + 1) >> 1;
+    qp_thresh = 15 - h->slice_alpha_c0_offset;
+    if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
+       qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
+        return;
+
+    if( IS_INTRA(mb_type) ) {
+        int16_t bS4[4] = {4,4,4,4};
+        int16_t bS3[4] = {3,3,3,3};
+        if( IS_8x8DCT(mb_type) ) {
+            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+        } else {
+            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+            filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
+            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+            filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+            filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
+        }
+        filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
+        filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
+        filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
+        filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
+        filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+        filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
+        filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+        filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
+        return;
+    } else {
+        DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
+        uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
+        int edges;
+        if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
+            edges = 4;
+            bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
+        } else {
+            int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
+                             (mb_type & MB_TYPE_16x8) ? 1 : 0;
+            int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
+                             && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
+                             ? 3 : 0;
+            int step = IS_8x8DCT(mb_type) ? 2 : 1;
+            edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
+            s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
+                                              (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
+        }
+        if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
+            bSv[0][0] = 0x0004000400040004ULL;
+        if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
+            bSv[1][0] = 0x0004000400040004ULL;
+
+#define FILTER(hv,dir,edge)\
+        if(bSv[dir][edge]) {\
+            filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
+            if(!(edge&1)) {\
+                filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+                filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+            }\
+        }
+        if( edges == 1 ) {
+            FILTER(v,0,0);
+            FILTER(h,1,0);
+        } else if( IS_8x8DCT(mb_type) ) {
+            FILTER(v,0,0);
+            FILTER(v,0,2);
+            FILTER(h,1,0);
+            FILTER(h,1,2);
+        } else {
+            FILTER(v,0,0);
+            FILTER(v,0,1);
+            FILTER(v,0,2);
+            FILTER(v,0,3);
+            FILTER(h,1,0);
+            FILTER(h,1,1);
+            FILTER(h,1,2);
+            FILTER(h,1,3);
+        }
+#undef FILTER
+    }
+}
+
+static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= mb_x + mb_y*s->mb_stride;
+    const int mb_type = s->current_picture.mb_type[mb_xy];
+    const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
+    int first_vertical_edge_done = 0;
+    int dir;
+    /* FIXME: A given frame may occupy more than one position in
+     * the reference list. So ref2frm should be populated with
+     * frame numbers, not indices. */
+    static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+                                    16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+
+    //for sufficiently low qp, filtering wouldn't do anything
+    //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
+    if(!FRAME_MBAFF){
+        int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
+        int qp = s->current_picture.qscale_table[mb_xy];
+        if(qp <= qp_thresh
+           && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
+           && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
+            return;
+        }
+    }
+
+    if (FRAME_MBAFF
+            // left mb is in picture
+            && h->slice_table[mb_xy-1] != 255
+            // and current and left pair do not have the same interlaced type
+            && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
+            // and left mb is in the same slice if deblocking_filter == 2
+            && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
+        /* First vertical edge is different in MBAFF frames
+         * There are 8 different bS to compute and 2 different Qp
+         */
+        const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
+        const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
+        int16_t bS[8];
+        int qp[2];
+        int chroma_qp[2];
+        int mb_qp, mbn0_qp, mbn1_qp;
+        int i;
+        first_vertical_edge_done = 1;
+
+        if( IS_INTRA(mb_type) )
+            bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
+        else {
+            for( i = 0; i < 8; i++ ) {
+                int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
+
+                if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
+                    bS[i] = 4;
+                else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
+                         /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
+                         h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
+                    bS[i] = 2;
+                else
+                    bS[i] = 1;
+            }
+        }
+
+        mb_qp = s->current_picture.qscale_table[mb_xy];
+        mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
+        mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
+        qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
+        chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
+                         get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
+        qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
+        chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
+                         get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
+
+        /* Filter edge */
+        tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
+        { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
+        filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
+        filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
+        filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
+    }
+    /* dir : 0 -> vertical edge, 1 -> horizontal edge */
+    for( dir = 0; dir < 2; dir++ )
+    {
+        int edge;
+        const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
+        const int mbm_type = s->current_picture.mb_type[mbm_xy];
+        int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
+
+        const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
+                                  == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
+        // how often to recheck mv-based bS when iterating between edges
+        const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
+                              (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
+        // how often to recheck mv-based bS when iterating along each edge
+        const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
+
+        if (first_vertical_edge_done) {
+            start = 1;
+            first_vertical_edge_done = 0;
+        }
+
+        if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
+            start = 1;
+
+        if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
+            && !IS_INTERLACED(mb_type)
+            && IS_INTERLACED(mbm_type)
+            ) {
+            // This is a special case in the norm where the filtering must
+            // be done twice (one each of the field) even if we are in a
+            // frame macroblock.
+            //
+            static const int nnz_idx[4] = {4,5,6,3};
+            unsigned int tmp_linesize   = 2 *   linesize;
+            unsigned int tmp_uvlinesize = 2 * uvlinesize;
+            int mbn_xy = mb_xy - 2 * s->mb_stride;
+            int qp, chroma_qp;
+            int i, j;
+            int16_t bS[4];
+
+            for(j=0; j<2; j++, mbn_xy += s->mb_stride){
+                if( IS_INTRA(mb_type) ||
+                    IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 3;
+                } else {
+                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
+                    for( i = 0; i < 4; i++ ) {
+                        if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
+                            mbn_nnz[nnz_idx[i]] != 0 )
+                            bS[i] = 2;
+                        else
+                            bS[i] = 1;
+                    }
+                }
+                // Do not use s->qscale as luma quantizer because it has not the same
+                // value in IPCM macroblocks.
+                qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+                tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
+                { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
+                filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
+                chroma_qp = ( h->chroma_qp +
+                              get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
+                filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
+            }
+
+            start = 1;
+        }
+
+        /* Calculate bS */
+        for( edge = start; edge < edges; edge++ ) {
+            /* mbn_xy: neighbor macroblock */
+            const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
+            const int mbn_type = s->current_picture.mb_type[mbn_xy];
+            int16_t bS[4];
+            int qp;
+
+            if( (edge&1) && IS_8x8DCT(mb_type) )
+                continue;
+
+            if( IS_INTRA(mb_type) ||
+                IS_INTRA(mbn_type) ) {
+                int value;
+                if (edge == 0) {
+                    if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
+                        || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
+                    ) {
+                        value = 4;
+                    } else {
+                        value = 3;
+                    }
+                } else {
+                    value = 3;
+                }
+                bS[0] = bS[1] = bS[2] = bS[3] = value;
+            } else {
+                int i, l;
+                int mv_done;
+
+                if( edge & mask_edge ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 0;
+                    mv_done = 1;
+                }
+                else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 1;
+                    mv_done = 1;
+                }
+                else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
+                    int b_idx= 8 + 4 + edge * (dir ? 8:1);
+                    int bn_idx= b_idx - (dir ? 8:1);
+                    int v = 0;
+                    for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
+                        v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
+                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
+                    }
+                    bS[0] = bS[1] = bS[2] = bS[3] = v;
+                    mv_done = 1;
+                }
+                else
+                    mv_done = 0;
+
+                for( i = 0; i < 4; i++ ) {
+                    int x = dir == 0 ? edge : i;
+                    int y = dir == 0 ? i    : edge;
+                    int b_idx= 8 + 4 + x + 8*y;
+                    int bn_idx= b_idx - (dir ? 8:1);
+
+                    if( h->non_zero_count_cache[b_idx] != 0 ||
+                        h->non_zero_count_cache[bn_idx] != 0 ) {
+                        bS[i] = 2;
+                    }
+                    else if(!mv_done)
+                    {
+                        bS[i] = 0;
+                        for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
+                            if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
+                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
+                                bS[i] = 1;
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
+                    continue;
+            }
+
+            /* Filter edge */
+            // Do not use s->qscale as luma quantizer because it has not the same
+            // value in IPCM macroblocks.
+            qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+            //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
+            tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
+            { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
+            if( dir == 0 ) {
+                filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
+                if( (edge&1) == 0 ) {
+                    int chroma_qp = ( h->chroma_qp +
+                                      get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                    filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
+                    filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
+                }
+            } else {
+                filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
+                if( (edge&1) == 0 ) {
+                    int chroma_qp = ( h->chroma_qp +
+                                      get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                    filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
+                    filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
+                }
+            }
+        }
+    }
+}
+
+static int decode_slice(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
+
+    s->mb_skip_run= -1;
+
+    if( h->pps.cabac ) {
+        int i;
+
+        /* realign */
+        align_get_bits( &s->gb );
+
+        /* init cabac */
+        ff_init_cabac_states( &h->cabac);
+        ff_init_cabac_decoder( &h->cabac,
+                               s->gb.buffer + get_bits_count(&s->gb)/8,
+                               ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
+        /* calculate pre-state */
+        for( i= 0; i < 460; i++ ) {
+            int pre;
+            if( h->slice_type == I_TYPE )
+                pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
+            else
+                pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
+
+            if( pre <= 63 )
+                h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
+            else
+                h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
+        }
+
+        for(;;){
+//START_TIMER
+            int ret = decode_mb_cabac(h);
+            int eos;
+//STOP_TIMER("decode_mb_cabac")
+
+            if(ret>=0) hl_decode_mb(h);
+
+            if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
+                s->mb_y++;
+
+                if(ret>=0) ret = decode_mb_cabac(h);
+
+                if(ret>=0) hl_decode_mb(h);
+                s->mb_y--;
+            }
+            eos = get_cabac_terminate( &h->cabac );
+
+            if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
+                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+                return -1;
+            }
+
+            if( ++s->mb_x >= s->mb_width ) {
+                s->mb_x = 0;
+                ff_draw_horiz_band(s, 16*s->mb_y, 16);
+                ++s->mb_y;
+                if(FRAME_MBAFF) {
+                    ++s->mb_y;
+                }
+            }
+
+            if( eos || s->mb_y >= s->mb_height ) {
+                tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                return 0;
+            }
+        }
+
+    } else {
+        for(;;){
+            int ret = decode_mb_cavlc(h);
+
+            if(ret>=0) hl_decode_mb(h);
+
+            if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
+                s->mb_y++;
+                ret = decode_mb_cavlc(h);
+
+                if(ret>=0) hl_decode_mb(h);
+                s->mb_y--;
+            }
+
+            if(ret<0){
+                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+
+                return -1;
+            }
+
+            if(++s->mb_x >= s->mb_width){
+                s->mb_x=0;
+                ff_draw_horiz_band(s, 16*s->mb_y, 16);
+                ++s->mb_y;
+                if(FRAME_MBAFF) {
+                    ++s->mb_y;
+                }
+                if(s->mb_y >= s->mb_height){
+                    tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+
+                    if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
+                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                        return 0;
+                    }else{
+                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                        return -1;
+                    }
+                }
+            }
+
+            if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
+                tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+                if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                    return 0;
+                }else{
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+
+                    return -1;
+                }
+            }
+        }
+    }
+
+#if 0
+    for(;s->mb_y < s->mb_height; s->mb_y++){
+        for(;s->mb_x < s->mb_width; s->mb_x++){
+            int ret= decode_mb(h);
+
+            hl_decode_mb(h);
+
+            if(ret<0){
+                av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+
+                return -1;
+            }
+
+            if(++s->mb_x >= s->mb_width){
+                s->mb_x=0;
+                if(++s->mb_y >= s->mb_height){
+                    if(get_bits_count(s->gb) == s->gb.size_in_bits){
+                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                        return 0;
+                    }else{
+                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                        return -1;
+                    }
+                }
+            }
+
+            if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
+                if(get_bits_count(s->gb) == s->gb.size_in_bits){
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                    return 0;
+                }else{
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+
+                    return -1;
+                }
+            }
+        }
+        s->mb_x=0;
+        ff_draw_horiz_band(s, 16*s->mb_y, 16);
+    }
+#endif
+    return -1; //not reached
+}
+
+static int decode_unregistered_user_data(H264Context *h, int size){
+    MpegEncContext * const s = &h->s;
+    uint8_t user_data[16+256];
+    int e, build, i;
+
+    if(size<16)
+        return -1;
+
+    for(i=0; i<sizeof(user_data)-1 && i<size; i++){
+        user_data[i]= get_bits(&s->gb, 8);
+    }
+
+    user_data[i]= 0;
+    e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
+    if(e==1 && build>=0)
+        h->x264_build= build;
+
+    if(s->avctx->debug & FF_DEBUG_BUGS)
+        av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
+
+    for(; i<size; i++)
+        skip_bits(&s->gb, 8);
+
+    return 0;
+}
+
+static int decode_sei(H264Context *h){
+    MpegEncContext * const s = &h->s;
+
+    while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
+        int size, type;
+
+        type=0;
+        do{
+            type+= show_bits(&s->gb, 8);
+        }while(get_bits(&s->gb, 8) == 255);
+
+        size=0;
+        do{
+            size+= show_bits(&s->gb, 8);
+        }while(get_bits(&s->gb, 8) == 255);
+
+        switch(type){
+        case 5:
+            if(decode_unregistered_user_data(h, size) < 0)
+                return -1;
+            break;
+        default:
+            skip_bits(&s->gb, 8*size);
+        }
+
+        //FIXME check bits here
+        align_get_bits(&s->gb);
+    }
+
+    return 0;
+}
+
+static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
+    MpegEncContext * const s = &h->s;
+    int cpb_count, i;
+    cpb_count = get_ue_golomb(&s->gb) + 1;
+    get_bits(&s->gb, 4); /* bit_rate_scale */
+    get_bits(&s->gb, 4); /* cpb_size_scale */
+    for(i=0; i<cpb_count; i++){
+        get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
+        get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
+        get_bits1(&s->gb);     /* cbr_flag */
+    }
+    get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* time_offset_length */
+}
+
+static inline int decode_vui_parameters(H264Context *h, SPS *sps){
+    MpegEncContext * const s = &h->s;
+    int aspect_ratio_info_present_flag, aspect_ratio_idc;
+    int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
+
+    aspect_ratio_info_present_flag= get_bits1(&s->gb);
+
+    if( aspect_ratio_info_present_flag ) {
+        aspect_ratio_idc= get_bits(&s->gb, 8);
+        if( aspect_ratio_idc == EXTENDED_SAR ) {
+            sps->sar.num= get_bits(&s->gb, 16);
+            sps->sar.den= get_bits(&s->gb, 16);
+        }else if(aspect_ratio_idc < 14){
+            sps->sar=  pixel_aspect[aspect_ratio_idc];
+        }else{
+            av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
+            return -1;
+        }
+    }else{
+        sps->sar.num=
+        sps->sar.den= 0;
+    }
+//            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
+
+    if(get_bits1(&s->gb)){      /* overscan_info_present_flag */
+        get_bits1(&s->gb);      /* overscan_appropriate_flag */
+    }
+
+    if(get_bits1(&s->gb)){      /* video_signal_type_present_flag */
+        get_bits(&s->gb, 3);    /* video_format */
+        get_bits1(&s->gb);      /* video_full_range_flag */
+        if(get_bits1(&s->gb)){  /* colour_description_present_flag */
+            get_bits(&s->gb, 8); /* colour_primaries */
+            get_bits(&s->gb, 8); /* transfer_characteristics */
+            get_bits(&s->gb, 8); /* matrix_coefficients */
+        }
+    }
+
+    if(get_bits1(&s->gb)){      /* chroma_location_info_present_flag */
+        get_ue_golomb(&s->gb);  /* chroma_sample_location_type_top_field */
+        get_ue_golomb(&s->gb);  /* chroma_sample_location_type_bottom_field */
+    }
+
+    sps->timing_info_present_flag = get_bits1(&s->gb);
+    if(sps->timing_info_present_flag){
+        sps->num_units_in_tick = get_bits_long(&s->gb, 32);
+        sps->time_scale = get_bits_long(&s->gb, 32);
+        sps->fixed_frame_rate_flag = get_bits1(&s->gb);
+    }
+
+    nal_hrd_parameters_present_flag = get_bits1(&s->gb);
+    if(nal_hrd_parameters_present_flag)
+        decode_hrd_parameters(h, sps);
+    vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
+    if(vcl_hrd_parameters_present_flag)
+        decode_hrd_parameters(h, sps);
+    if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
+        get_bits1(&s->gb);     /* low_delay_hrd_flag */
+    get_bits1(&s->gb);         /* pic_struct_present_flag */
+
+    sps->bitstream_restriction_flag = get_bits1(&s->gb);
+    if(sps->bitstream_restriction_flag){
+        get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
+        get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
+        get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
+        get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
+        get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
+        sps->num_reorder_frames = get_ue_golomb(&s->gb);
+        get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
+    }
+
+    return 0;
+}
+
+static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
+                                const uint8_t *jvt_list, const uint8_t *fallback_list){
+    MpegEncContext * const s = &h->s;
+    int i, last = 8, next = 8;
+    const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
+    if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
+        memcpy(factors, fallback_list, size*sizeof(uint8_t));
+    else
+    for(i=0;i<size;i++){
+        if(next)
+            next = (last + get_se_golomb(&s->gb)) & 0xff;
+        if(!i && !next){ /* matrix not written, we use the preset one */
+            memcpy(factors, jvt_list, size*sizeof(uint8_t));
+            break;
+        }
+        last = factors[scan[i]] = next ? next : last;
+    }
+}
+
+static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
+                                   uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
+    MpegEncContext * const s = &h->s;
+    int fallback_sps = !is_sps && sps->scaling_matrix_present;
+    const uint8_t *fallback[4] = {
+        fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
+        fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
+        fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
+        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
+    };
+    if(get_bits1(&s->gb)){
+        sps->scaling_matrix_present |= is_sps;
+        decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
+        decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
+        decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
+        decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
+        decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
+        decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
+        if(is_sps || pps->transform_8x8_mode){
+            decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
+            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
+        }
+    } else if(fallback_sps) {
+        memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
+        memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
+    }
+}
+
+static inline int decode_seq_parameter_set(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int profile_idc, level_idc;
+    int sps_id, i;
+    SPS *sps;
+
+    profile_idc= get_bits(&s->gb, 8);
+    get_bits1(&s->gb);   //constraint_set0_flag
+    get_bits1(&s->gb);   //constraint_set1_flag
+    get_bits1(&s->gb);   //constraint_set2_flag
+    get_bits1(&s->gb);   //constraint_set3_flag
+    get_bits(&s->gb, 4); // reserved
+    level_idc= get_bits(&s->gb, 8);
+    sps_id= get_ue_golomb(&s->gb);
+
+    sps= &h->sps_buffer[ sps_id ];
+    sps->profile_idc= profile_idc;
+    sps->level_idc= level_idc;
+
+    if(sps->profile_idc >= 100){ //high profile
+        if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
+            get_bits1(&s->gb);  //residual_color_transform_flag
+        get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
+        get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
+        sps->transform_bypass = get_bits1(&s->gb);
+        decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
+    }else
+        sps->scaling_matrix_present = 0;
+
+    sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
+    sps->poc_type= get_ue_golomb(&s->gb);
+
+    if(sps->poc_type == 0){ //FIXME #define
+        sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
+    } else if(sps->poc_type == 1){//FIXME #define
+        sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
+        sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
+        sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
+        sps->poc_cycle_length= get_ue_golomb(&s->gb);
+
+        for(i=0; i<sps->poc_cycle_length; i++)
+            sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
+    }
+    if(sps->poc_type > 2){
+        av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
+        return -1;
+    }
+
+    sps->ref_frame_count= get_ue_golomb(&s->gb);
+    if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
+        av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
+    }
+    sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
+    sps->mb_width= get_ue_golomb(&s->gb) + 1;
+    sps->mb_height= get_ue_golomb(&s->gb) + 1;
+    if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
+       avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
+        return -1;
+
+    sps->frame_mbs_only_flag= get_bits1(&s->gb);
+    if(!sps->frame_mbs_only_flag)
+        sps->mb_aff= get_bits1(&s->gb);
+    else
+        sps->mb_aff= 0;
+
+    sps->direct_8x8_inference_flag= get_bits1(&s->gb);
+
+#ifndef ALLOW_INTERLACE
+    if(sps->mb_aff)
+        av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
+#endif
+    if(!sps->direct_8x8_inference_flag && sps->mb_aff)
+        av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
+
+    sps->crop= get_bits1(&s->gb);
+    if(sps->crop){
+        sps->crop_left  = get_ue_golomb(&s->gb);
+        sps->crop_right = get_ue_golomb(&s->gb);
+        sps->crop_top   = get_ue_golomb(&s->gb);
+        sps->crop_bottom= get_ue_golomb(&s->gb);
+        if(sps->crop_left || sps->crop_top){
+            av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
+        }
+    }else{
+        sps->crop_left  =
+        sps->crop_right =
+        sps->crop_top   =
+        sps->crop_bottom= 0;
+    }
+
+    sps->vui_parameters_present_flag= get_bits1(&s->gb);
+    if( sps->vui_parameters_present_flag )
+        decode_vui_parameters(h, sps);
+
+    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
+               sps_id, sps->profile_idc, sps->level_idc,
+               sps->poc_type,
+               sps->ref_frame_count,
+               sps->mb_width, sps->mb_height,
+               sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
+               sps->direct_8x8_inference_flag ? "8B8" : "",
+               sps->crop_left, sps->crop_right,
+               sps->crop_top, sps->crop_bottom,
+               sps->vui_parameters_present_flag ? "VUI" : ""
+               );
+    }
+    return 0;
+}
+
+static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
+    MpegEncContext * const s = &h->s;
+    int pps_id= get_ue_golomb(&s->gb);
+    PPS *pps= &h->pps_buffer[pps_id];
+
+    pps->sps_id= get_ue_golomb(&s->gb);
+    pps->cabac= get_bits1(&s->gb);
+    pps->pic_order_present= get_bits1(&s->gb);
+    pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
+    if(pps->slice_group_count > 1 ){
+        pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
+        av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
+        switch(pps->mb_slice_group_map_type){
+        case 0:
+#if 0
+|   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
+|    run_length[ i ]                                |1  |ue(v)   |
+#endif
+            break;
+        case 2:
+#if 0
+|   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
+|{                                                  |   |        |
+|    top_left_mb[ i ]                               |1  |ue(v)   |
+|    bottom_right_mb[ i ]                           |1  |ue(v)   |
+|   }                                               |   |        |
+#endif
+            break;
+        case 3:
+        case 4:
+        case 5:
+#if 0
+|   slice_group_change_direction_flag               |1  |u(1)    |
+|   slice_group_change_rate_minus1                  |1  |ue(v)   |
+#endif
+            break;
+        case 6:
+#if 0
+|   slice_group_id_cnt_minus1                       |1  |ue(v)   |
+|   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
+|)                                                  |   |        |
+|    slice_group_id[ i ]                            |1  |u(v)    |
+#endif
+            break;
+        }
+    }
+    pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
+    pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
+    if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
+        av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
+        return -1;
+    }
+
+    pps->weighted_pred= get_bits1(&s->gb);
+    pps->weighted_bipred_idc= get_bits(&s->gb, 2);
+    pps->init_qp= get_se_golomb(&s->gb) + 26;
+    pps->init_qs= get_se_golomb(&s->gb) + 26;
+    pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
+    pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
+    pps->constrained_intra_pred= get_bits1(&s->gb);
+    pps->redundant_pic_cnt_present = get_bits1(&s->gb);
+
+    pps->transform_8x8_mode= 0;
+    h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
+    memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
+    memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
+
+    if(get_bits_count(&s->gb) < bit_length){
+        pps->transform_8x8_mode= get_bits1(&s->gb);
+        decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
+        get_se_golomb(&s->gb);  //second_chroma_qp_index_offset
+    }
+
+    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
+               pps_id, pps->sps_id,
+               pps->cabac ? "CABAC" : "CAVLC",
+               pps->slice_group_count,
+               pps->ref_count[0], pps->ref_count[1],
+               pps->weighted_pred ? "weighted" : "",
+               pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
+               pps->deblocking_filter_parameters_present ? "LPAR" : "",
+               pps->constrained_intra_pred ? "CONSTR" : "",
+               pps->redundant_pic_cnt_present ? "REDU" : "",
+               pps->transform_8x8_mode ? "8x8DCT" : ""
+               );
+    }
+
+    return 0;
+}
+
+/**
+ * finds the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or -1
+ */
+static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
+    int i;
+    uint32_t state;
+    ParseContext *pc = &(h->s.parse_context);
+//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
+//    mb_addr= pc->mb_addr - 1;
+    state= pc->state;
+    for(i=0; i<=buf_size; i++){
+        if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
+            tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
+            if(pc->frame_start_found){
+                // If there isn't one more byte in the buffer
+                // the test on first_mb_in_slice cannot be done yet
+                // do it at next call.
+                if (i >= buf_size) break;
+                if (buf[i] & 0x80) {
+                    // first_mb_in_slice is 0, probably the first nal of a new
+                    // slice
+                    tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
+                    pc->state=-1;
+                    pc->frame_start_found= 0;
+                    return i-4;
+                }
+            }
+            pc->frame_start_found = 1;
+        }
+        if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
+           if(pc->frame_start_found){
+                pc->state=-1;
+                pc->frame_start_found= 0;
+                return i-4;
+           }
+        }
+        if (i<buf_size)
+            state= (state<<8) | buf[i];
+    }
+
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+#ifdef CONFIG_H264_PARSER
+static int h264_parse(AVCodecParserContext *s,
+                      AVCodecContext *avctx,
+                      uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    H264Context *h = s->priv_data;
+    ParseContext *pc = &h->s.parse_context;
+    int next;
+
+    next= find_frame_end(h, buf, buf_size);
+
+    if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+static int h264_split(AVCodecContext *avctx,
+                      const uint8_t *buf, int buf_size)
+{
+    int i;
+    uint32_t state = -1;
+    int has_sps= 0;
+
+    for(i=0; i<=buf_size; i++){
+        if((state&0xFFFFFF1F) == 0x107)
+            has_sps=1;
+/*        if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
+        }*/
+        if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
+            if(has_sps){
+                while(i>4 && buf[i-5]==0) i--;
+                return i-4;
+            }
+        }
+        if (i<buf_size)
+            state= (state<<8) | buf[i];
+    }
+    return 0;
+}
+#endif /* CONFIG_H264_PARSER */
+
+static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
+    MpegEncContext * const s = &h->s;
+    AVCodecContext * const avctx= s->avctx;
+    int buf_index=0;
+#if 0
+    int i;
+    for(i=0; i<50; i++){
+        av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
+    }
+#endif
+    h->slice_num = 0;
+    s->current_picture_ptr= NULL;
+    for(;;){
+        int consumed;
+        int dst_length;
+        int bit_length;
+        uint8_t *ptr;
+        int i, nalsize = 0;
+
+      if(h->is_avc) {
+        if(buf_index >= buf_size) break;
+        nalsize = 0;
+        for(i = 0; i < h->nal_length_size; i++)
+            nalsize = (nalsize << 8) | buf[buf_index++];
+        if(nalsize <= 1){
+            if(nalsize == 1){
+                buf_index++;
+                continue;
+            }else{
+                av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
+                break;
+            }
+        }
+      } else {
+        // start code prefix search
+        for(; buf_index + 3 < buf_size; buf_index++){
+            // this should allways succeed in the first iteration
+            if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
+                break;
+        }
+
+        if(buf_index+3 >= buf_size) break;
+
+        buf_index+=3;
+      }
+
+        ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
+        while(ptr[dst_length - 1] == 0 && dst_length > 1)
+            dst_length--;
+        bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
+
+        if(s->avctx->debug&FF_DEBUG_STARTCODE){
+            av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
+        }
+
+        if (h->is_avc && (nalsize != consumed))
+            av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
+
+        buf_index += consumed;
+
+        if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME dont discard SEI id
+           ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
+            continue;
+
+        switch(h->nal_unit_type){
+        case NAL_IDR_SLICE:
+            idr(h); //FIXME ensure we don't loose some frames if there is reordering
+        case NAL_SLICE:
+            init_get_bits(&s->gb, ptr, bit_length);
+            h->intra_gb_ptr=
+            h->inter_gb_ptr= &s->gb;
+            s->data_partitioning = 0;
+
+            if(decode_slice_header(h) < 0){
+                av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
+                break;
+            }
+            s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
+            if(h->redundant_pic_count==0 && s->hurry_up < 5
+               && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
+               && (avctx->skip_frame < AVDISCARD_BIDIR  || h->slice_type!=B_TYPE)
+               && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
+               && avctx->skip_frame < AVDISCARD_ALL)
+                decode_slice(h);
+            break;
+        case NAL_DPA:
+            init_get_bits(&s->gb, ptr, bit_length);
+            h->intra_gb_ptr=
+            h->inter_gb_ptr= NULL;
+            s->data_partitioning = 1;
+
+            if(decode_slice_header(h) < 0){
+                av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
+            }
+            break;
+        case NAL_DPB:
+            init_get_bits(&h->intra_gb, ptr, bit_length);
+            h->intra_gb_ptr= &h->intra_gb;
+            break;
+        case NAL_DPC:
+            init_get_bits(&h->inter_gb, ptr, bit_length);
+            h->inter_gb_ptr= &h->inter_gb;
+
+            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
+               && s->hurry_up < 5
+               && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
+               && (avctx->skip_frame < AVDISCARD_BIDIR  || h->slice_type!=B_TYPE)
+               && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
+               && avctx->skip_frame < AVDISCARD_ALL)
+                decode_slice(h);
+            break;
+        case NAL_SEI:
+            init_get_bits(&s->gb, ptr, bit_length);
+            decode_sei(h);
+            break;
+        case NAL_SPS:
+            init_get_bits(&s->gb, ptr, bit_length);
+            decode_seq_parameter_set(h);
+
+            if(s->flags& CODEC_FLAG_LOW_DELAY)
+                s->low_delay=1;
+
+            if(avctx->has_b_frames < 2)
+                avctx->has_b_frames= !s->low_delay;
+            break;
+        case NAL_PPS:
+            init_get_bits(&s->gb, ptr, bit_length);
+
+            decode_picture_parameter_set(h, bit_length);
+
+            break;
+        case NAL_AUD:
+        case NAL_END_SEQUENCE:
+        case NAL_END_STREAM:
+        case NAL_FILLER_DATA:
+        case NAL_SPS_EXT:
+        case NAL_AUXILIARY_SLICE:
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
+        }
+    }
+
+    if(!s->current_picture_ptr) return buf_index; //no frame
+
+    s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
+    s->current_picture_ptr->pict_type= s->pict_type;
+
+    h->prev_frame_num_offset= h->frame_num_offset;
+    h->prev_frame_num= h->frame_num;
+    if(s->current_picture_ptr->reference){
+        h->prev_poc_msb= h->poc_msb;
+        h->prev_poc_lsb= h->poc_lsb;
+    }
+    if(s->current_picture_ptr->reference)
+        execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+
+    ff_er_frame_end(s);
+
+    MPV_frame_end(s);
+
+    return buf_index;
+}
+
+/**
+ * returns the number of bytes consumed for building the current frame
+ */
+static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
+    if(s->flags&CODEC_FLAG_TRUNCATED){
+        pos -= s->parse_context.last_index;
+        if(pos<0) pos=0; // FIXME remove (unneeded?)
+
+        return pos;
+    }else{
+        if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
+        if(pos+10>buf_size) pos=buf_size; // oops ;)
+
+        return pos;
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    H264Context *h = avctx->priv_data;
+    MpegEncContext *s = &h->s;
+    AVFrame *pict = data;
+    int buf_index;
+
+    s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
+
+   /* no supplementary picture */
+    if (buf_size == 0) {
+        return 0;
+    }
+
+    if(s->flags&CODEC_FLAG_TRUNCATED){
+        int next= find_frame_end(h, buf, buf_size);
+
+        if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
+            return buf_size;
+//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
+    }
+
+    if(h->is_avc && !h->got_avcC) {
+        int i, cnt, nalsize;
+        unsigned char *p = avctx->extradata;
+        if(avctx->extradata_size < 7) {
+            av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
+            return -1;
+        }
+        if(*p != 1) {
+            av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
+            return -1;
+        }
+        /* sps and pps in the avcC always have length coded with 2 bytes,
+           so put a fake nal_length_size = 2 while parsing them */
+        h->nal_length_size = 2;
+        // Decode sps from avcC
+        cnt = *(p+5) & 0x1f; // Number of sps
+        p += 6;
+        for (i = 0; i < cnt; i++) {
+            nalsize = BE_16(p) + 2;
+            if(decode_nal_units(h, p, nalsize) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
+                return -1;
+            }
+            p += nalsize;
+        }
+        // Decode pps from avcC
+        cnt = *(p++); // Number of pps
+        for (i = 0; i < cnt; i++) {
+            nalsize = BE_16(p) + 2;
+            if(decode_nal_units(h, p, nalsize)  != nalsize) {
+                av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
+                return -1;
+            }
+            p += nalsize;
+        }
+        // Now store right nal length size, that will be use to parse all other nals
+        h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
+        // Do not reparse avcC
+        h->got_avcC = 1;
+    }
+
+    if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
+        if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
+            return -1;
+    }
+
+    buf_index=decode_nal_units(h, buf, buf_size);
+    if(buf_index < 0)
+        return -1;
+
+    //FIXME do something with unavailable reference frames
+
+//    if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
+    if(!s->current_picture_ptr){
+        av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
+        return -1;
+    }
+
+    {
+        Picture *out = s->current_picture_ptr;
+#if 0 //decode order
+        *data_size = sizeof(AVFrame);
+#else
+        /* Sort B-frames into display order */
+        Picture *cur = s->current_picture_ptr;
+        Picture *prev = h->delayed_output_pic;
+        int i, pics, cross_idr, out_of_order, out_idx;
+
+        if(h->sps.bitstream_restriction_flag
+           && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+            s->avctx->has_b_frames = h->sps.num_reorder_frames;
+            s->low_delay = 0;
+        }
+
+        pics = 0;
+        while(h->delayed_pic[pics]) pics++;
+        h->delayed_pic[pics++] = cur;
+        if(cur->reference == 0)
+            cur->reference = 1;
+
+        cross_idr = 0;
+        for(i=0; h->delayed_pic[i]; i++)
+            if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
+                cross_idr = 1;
+
+        out = h->delayed_pic[0];
+        out_idx = 0;
+        for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+            if(h->delayed_pic[i]->poc < out->poc){
+                out = h->delayed_pic[i];
+                out_idx = i;
+            }
+
+        out_of_order = !cross_idr && prev && out->poc < prev->poc;
+        if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
+            { }
+        else if(prev && pics <= s->avctx->has_b_frames)
+            out = prev;
+        else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
+           || (s->low_delay &&
+            ((!cross_idr && prev && out->poc > prev->poc + 2)
+             || cur->pict_type == B_TYPE)))
+        {
+            s->low_delay = 0;
+            s->avctx->has_b_frames++;
+            out = prev;
+        }
+        else if(out_of_order)
+            out = prev;
+
+        if(out_of_order || pics > s->avctx->has_b_frames){
+            for(i=out_idx; h->delayed_pic[i]; i++)
+                h->delayed_pic[i] = h->delayed_pic[i+1];
+        }
+
+        if(prev == out)
+            *data_size = 0;
+        else
+            *data_size = sizeof(AVFrame);
+        if(prev && prev != out && prev->reference == 1)
+            prev->reference = 0;
+        h->delayed_output_pic = out;
+#endif
+
+        if(out)
+            *pict= *(AVFrame*)out;
+        else
+            av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+    }
+
+    assert(pict->data[0] || !*data_size);
+    ff_print_debug_info(s, pict);
+//printf("out %d\n", (int)pict->data[0]);
+#if 0 //?
+
+    /* Return the Picture timestamp as the frame number */
+    /* we substract 1 because it is added on utils.c    */
+    avctx->frame_number = s->picture_number - 1;
+#endif
+    return get_consumed_bytes(s, buf_index, buf_size);
+}
+#if 0
+static inline void fill_mb_avail(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+
+    if(s->mb_y){
+        h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
+        h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
+        h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
+    }else{
+        h->mb_avail[0]=
+        h->mb_avail[1]=
+        h->mb_avail[2]= 0;
+    }
+    h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
+    h->mb_avail[4]= 1; //FIXME move out
+    h->mb_avail[5]= 0; //FIXME move out
+}
+#endif
+
+#if 0 //selftest
+#define COUNT 8000
+#define SIZE (COUNT*40)
+int main(){
+    int i;
+    uint8_t temp[SIZE];
+    PutBitContext pb;
+    GetBitContext gb;
+//    int int_temp[10000];
+    DSPContext dsp;
+    AVCodecContext avctx;
+
+    dsputil_init(&dsp, &avctx);
+
+    init_put_bits(&pb, temp, SIZE);
+    printf("testing unsigned exp golomb\n");
+    for(i=0; i<COUNT; i++){
+        START_TIMER
+        set_ue_golomb(&pb, i);
+        STOP_TIMER("set_ue_golomb");
+    }
+    flush_put_bits(&pb);
+
+    init_get_bits(&gb, temp, 8*SIZE);
+    for(i=0; i<COUNT; i++){
+        int j, s;
+
+        s= show_bits(&gb, 24);
+
+        START_TIMER
+        j= get_ue_golomb(&gb);
+        if(j != i){
+            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
+//            return -1;
+        }
+        STOP_TIMER("get_ue_golomb");
+    }
+
+
+    init_put_bits(&pb, temp, SIZE);
+    printf("testing signed exp golomb\n");
+    for(i=0; i<COUNT; i++){
+        START_TIMER
+        set_se_golomb(&pb, i - COUNT/2);
+        STOP_TIMER("set_se_golomb");
+    }
+    flush_put_bits(&pb);
+
+    init_get_bits(&gb, temp, 8*SIZE);
+    for(i=0; i<COUNT; i++){
+        int j, s;
+
+        s= show_bits(&gb, 24);
+
+        START_TIMER
+        j= get_se_golomb(&gb);
+        if(j != i - COUNT/2){
+            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
+//            return -1;
+        }
+        STOP_TIMER("get_se_golomb");
+    }
+
+    printf("testing 4x4 (I)DCT\n");
+
+    DCTELEM block[16];
+    uint8_t src[16], ref[16];
+    uint64_t error= 0, max_error=0;
+
+    for(i=0; i<COUNT; i++){
+        int j;
+//        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
+        for(j=0; j<16; j++){
+            ref[j]= random()%255;
+            src[j]= random()%255;
+        }
+
+        h264_diff_dct_c(block, src, ref, 4);
+
+        //normalize
+        for(j=0; j<16; j++){
+//            printf("%d ", block[j]);
+            block[j]= block[j]*4;
+            if(j&1) block[j]= (block[j]*4 + 2)/5;
+            if(j&4) block[j]= (block[j]*4 + 2)/5;
+        }
+//        printf("\n");
+
+        s->dsp.h264_idct_add(ref, block, 4);
+/*        for(j=0; j<16; j++){
+            printf("%d ", ref[j]);
+        }
+        printf("\n");*/
+
+        for(j=0; j<16; j++){
+            int diff= FFABS(src[j] - ref[j]);
+
+            error+= diff*diff;
+            max_error= FFMAX(max_error, diff);
+        }
+    }
+    printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
+#if 0
+    printf("testing quantizer\n");
+    for(qp=0; qp<52; qp++){
+        for(i=0; i<16; i++)
+            src1_block[i]= src2_block[i]= random()%255;
+
+    }
+#endif
+    printf("Testing NAL layer\n");
+
+    uint8_t bitstream[COUNT];
+    uint8_t nal[COUNT*2];
+    H264Context h;
+    memset(&h, 0, sizeof(H264Context));
+
+    for(i=0; i<COUNT; i++){
+        int zeros= i;
+        int nal_length;
+        int consumed;
+        int out_length;
+        uint8_t *out;
+        int j;
+
+        for(j=0; j<COUNT; j++){
+            bitstream[j]= (random() % 255) + 1;
+        }
+
+        for(j=0; j<zeros; j++){
+            int pos= random() % COUNT;
+            while(bitstream[pos] == 0){
+                pos++;
+                pos %= COUNT;
+            }
+            bitstream[pos]=0;
+        }
+
+        START_TIMER
+
+        nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
+        if(nal_length<0){
+            printf("encoding failed\n");
+            return -1;
+        }
+
+        out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
+
+        STOP_TIMER("NAL")
+
+        if(out_length != COUNT){
+            printf("incorrect length %d %d\n", out_length, COUNT);
+            return -1;
+        }
+
+        if(consumed != nal_length){
+            printf("incorrect consumed length %d %d\n", nal_length, consumed);
+            return -1;
+        }
+
+        if(memcmp(bitstream, out, COUNT)){
+            printf("missmatch\n");
+            return -1;
+        }
+    }
+
+    printf("Testing RBSP\n");
+
+
+    return 0;
+}
+#endif
+
+
+static int decode_end(AVCodecContext *avctx)
+{
+    H264Context *h = avctx->priv_data;
+    MpegEncContext *s = &h->s;
+
+    av_freep(&h->rbsp_buffer);
+    free_tables(h); //FIXME cleanup init stuff perhaps
+    MPV_common_end(s);
+
+//    memset(h, 0, sizeof(H264Context));
+
+    return 0;
+}
+
+
+AVCodec h264_decoder = {
+    "h264",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H264,
+    sizeof(H264Context),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush= flush_dpb,
+};
+
+#ifdef CONFIG_H264_PARSER
+AVCodecParser h264_parser = {
+    { CODEC_ID_H264 },
+    sizeof(H264Context),
+    NULL,
+    h264_parse,
+    ff_parse_close,
+    h264_split,
+};
+#endif
+
+#include "svq3.c"
diff --git a/contrib/ffmpeg/libavcodec/h264data.h b/contrib/ffmpeg/libavcodec/h264data.h
new file mode 100644
index 000000000..2dea3580f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h264data.h
@@ -0,0 +1,1314 @@
+/*
+ * H26L/H264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file h264data.h
+ * @brief
+ *     H264 / AVC / MPEG4 part10 codec data table
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#define VERT_PRED             0
+#define HOR_PRED              1
+#define DC_PRED               2
+#define DIAG_DOWN_LEFT_PRED   3
+#define DIAG_DOWN_RIGHT_PRED  4
+#define VERT_RIGHT_PRED       5
+#define HOR_DOWN_PRED         6
+#define VERT_LEFT_PRED        7
+#define HOR_UP_PRED           8
+
+#define LEFT_DC_PRED          9
+#define TOP_DC_PRED           10
+#define DC_128_PRED           11
+
+
+#define DC_PRED8x8            0
+#define HOR_PRED8x8           1
+#define VERT_PRED8x8          2
+#define PLANE_PRED8x8         3
+
+#define LEFT_DC_PRED8x8       4
+#define TOP_DC_PRED8x8        5
+#define DC_128_PRED8x8        6
+
+#define EXTENDED_SAR          255
+
+static const AVRational pixel_aspect[14]={
+ {0, 1},
+ {1, 1},
+ {12, 11},
+ {10, 11},
+ {16, 11},
+ {40, 33},
+ {24, 11},
+ {20, 11},
+ {32, 11},
+ {80, 33},
+ {18, 11},
+ {15, 11},
+ {64, 33},
+ {160,99},
+};
+
+static const uint8_t golomb_to_pict_type[5]=
+{P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
+
+static const uint8_t pict_type_to_golomb[7]=
+{-1, 2, 0, 1, -1, 4, 3};
+
+static const uint8_t chroma_qp[52]={
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,
+   12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
+   28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,
+   37,38,38,38,39,39,39,39
+
+};
+
+static const uint8_t golomb_to_intra4x4_cbp[48]={
+ 47, 31, 15,  0, 23, 27, 29, 30,  7, 11, 13, 14, 39, 43, 45, 46,
+ 16,  3,  5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44,  1,  2,  4,
+  8, 17, 18, 20, 24,  6,  9, 22, 25, 32, 33, 34, 36, 40, 38, 41
+};
+
+static const uint8_t golomb_to_inter_cbp[48]={
+  0, 16,  1,  2,  4,  8, 32,  3,  5, 10, 12, 15, 47,  7, 11, 13,
+ 14,  6,  9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
+ 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
+};
+
+static const uint8_t intra4x4_cbp_to_golomb[48]={
+  3, 29, 30, 17, 31, 18, 37,  8, 32, 38, 19,  9, 20, 10, 11,  2,
+ 16, 33, 34, 21, 35, 22, 39,  4, 36, 40, 23,  5, 24,  6,  7,  1,
+ 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15,  0
+};
+
+static const uint8_t inter_cbp_to_golomb[48]={
+  0,  2,  3,  7,  4,  8, 17, 13,  5, 18,  9, 14, 10, 15, 16, 11,
+  1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
+  6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
+};
+
+static const uint8_t chroma_dc_coeff_token_len[4*5]={
+ 2, 0, 0, 0,
+ 6, 1, 0, 0,
+ 6, 6, 3, 0,
+ 6, 7, 7, 6,
+ 6, 8, 8, 7,
+};
+
+static const uint8_t chroma_dc_coeff_token_bits[4*5]={
+ 1, 0, 0, 0,
+ 7, 1, 0, 0,
+ 4, 6, 1, 0,
+ 3, 3, 2, 5,
+ 2, 3, 2, 0,
+};
+
+static const uint8_t coeff_token_len[4][4*17]={
+{
+     1, 0, 0, 0,
+     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
+    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
+    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
+    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
+},
+{
+     2, 0, 0, 0,
+     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
+     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
+    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
+    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
+},
+{
+     4, 0, 0, 0,
+     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
+     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
+     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
+    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
+},
+{
+     6, 0, 0, 0,
+     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
+     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
+     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
+     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
+}
+};
+
+static const uint8_t coeff_token_bits[4][4*17]={
+{
+     1, 0, 0, 0,
+     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
+     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
+    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
+    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
+},
+{
+     3, 0, 0, 0,
+    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
+     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
+    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
+    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
+},
+{
+    15, 0, 0, 0,
+    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
+    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
+    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
+    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
+},
+{
+     3, 0, 0, 0,
+     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
+    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
+    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
+    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
+}
+};
+
+static const uint8_t total_zeros_len[16][16]= {
+    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+    {5,3,4,4,3,3,3,4,3,4,5,5,5},
+    {4,4,4,3,3,3,3,3,4,5,4,5},
+    {6,5,3,3,3,3,3,3,4,3,6},
+    {6,5,3,3,3,2,3,4,3,6},
+    {6,4,5,3,2,2,3,3,6},
+    {6,6,4,2,2,3,2,5},
+    {5,5,3,2,2,2,4},
+    {4,4,3,3,1,3},
+    {4,4,2,1,3},
+    {3,3,1,2},
+    {2,2,1},
+    {1,1},
+};
+
+static const uint8_t total_zeros_bits[16][16]= {
+    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
+    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
+    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
+    {3,7,5,4,6,5,4,3,3,2,2,1,0},
+    {5,4,3,7,6,5,4,3,2,1,1,0},
+    {1,1,7,6,5,4,3,2,1,1,0},
+    {1,1,5,4,3,3,2,1,1,0},
+    {1,1,1,3,3,2,2,1,0},
+    {1,0,1,3,2,1,1,1},
+    {1,0,1,3,2,1,1},
+    {0,1,1,2,1,3},
+    {0,1,1,1,1},
+    {0,1,1,1},
+    {0,1,1},
+    {0,1},
+};
+
+static const uint8_t chroma_dc_total_zeros_len[3][4]= {
+    { 1, 2, 3, 3,},
+    { 1, 2, 2, 0,},
+    { 1, 1, 0, 0,},
+};
+
+static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
+    { 1, 1, 1, 0,},
+    { 1, 1, 0, 0,},
+    { 1, 0, 0, 0,},
+};
+
+static const uint8_t run_len[7][16]={
+    {1,1},
+    {1,2,2},
+    {2,2,2,2},
+    {2,2,2,3,3},
+    {2,2,3,3,3,3},
+    {2,3,3,3,3,3,3},
+    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
+};
+
+static const uint8_t run_bits[7][16]={
+    {1,0},
+    {1,1,0},
+    {3,2,1,0},
+    {3,2,1,1,0},
+    {3,2,3,2,1,0},
+    {3,0,1,3,2,5,4},
+    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
+};
+
+/*
+o-o o-o
+ / / /
+o-o o-o
+ ,---'
+o-o o-o
+ / / /
+o-o o-o
+*/
+
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+};
+
+static const uint8_t zigzag_scan[16]={
+ 0+0*4, 1+0*4, 0+1*4, 0+2*4,
+ 1+1*4, 2+0*4, 3+0*4, 2+1*4,
+ 1+2*4, 0+3*4, 1+3*4, 2+2*4,
+ 3+1*4, 3+2*4, 2+3*4, 3+3*4,
+};
+
+static const uint8_t field_scan[16]={
+ 0+0*4, 0+1*4, 1+0*4, 0+2*4,
+ 0+3*4, 1+1*4, 1+2*4, 1+3*4,
+ 2+0*4, 2+1*4, 2+2*4, 2+3*4,
+ 3+0*4, 3+1*4, 3+2*4, 3+3*4,
+};
+
+static const uint8_t luma_dc_zigzag_scan[16]={
+ 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64,
+ 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64,
+ 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64,
+ 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64,
+};
+
+static const uint8_t luma_dc_field_scan[16]={
+ 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64,
+ 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64,
+ 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64,
+ 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64,
+};
+
+static const uint8_t chroma_dc_scan[4]={
+ (0+0*2)*16, (1+0*2)*16,
+ (0+1*2)*16, (1+1*2)*16,  //FIXME
+};
+
+static const uint8_t zigzag_scan8x8[64]={
+ 0+0*8, 1+0*8, 0+1*8, 0+2*8,
+ 1+1*8, 2+0*8, 3+0*8, 2+1*8,
+ 1+2*8, 0+3*8, 0+4*8, 1+3*8,
+ 2+2*8, 3+1*8, 4+0*8, 5+0*8,
+ 4+1*8, 3+2*8, 2+3*8, 1+4*8,
+ 0+5*8, 0+6*8, 1+5*8, 2+4*8,
+ 3+3*8, 4+2*8, 5+1*8, 6+0*8,
+ 7+0*8, 6+1*8, 5+2*8, 4+3*8,
+ 3+4*8, 2+5*8, 1+6*8, 0+7*8,
+ 1+7*8, 2+6*8, 3+5*8, 4+4*8,
+ 5+3*8, 6+2*8, 7+1*8, 7+2*8,
+ 6+3*8, 5+4*8, 4+5*8, 3+6*8,
+ 2+7*8, 3+7*8, 4+6*8, 5+5*8,
+ 6+4*8, 7+3*8, 7+4*8, 6+5*8,
+ 5+6*8, 4+7*8, 5+7*8, 6+6*8,
+ 7+5*8, 7+6*8, 6+7*8, 7+7*8,
+};
+
+// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
+static const uint8_t zigzag_scan8x8_cavlc[64]={
+ 0+0*8, 1+1*8, 1+2*8, 2+2*8,
+ 4+1*8, 0+5*8, 3+3*8, 7+0*8,
+ 3+4*8, 1+7*8, 5+3*8, 6+3*8,
+ 2+7*8, 6+4*8, 5+6*8, 7+5*8,
+ 1+0*8, 2+0*8, 0+3*8, 3+1*8,
+ 3+2*8, 0+6*8, 4+2*8, 6+1*8,
+ 2+5*8, 2+6*8, 6+2*8, 5+4*8,
+ 3+7*8, 7+3*8, 4+7*8, 7+6*8,
+ 0+1*8, 3+0*8, 0+4*8, 4+0*8,
+ 2+3*8, 1+5*8, 5+1*8, 5+2*8,
+ 1+6*8, 3+5*8, 7+1*8, 4+5*8,
+ 4+6*8, 7+4*8, 5+7*8, 6+7*8,
+ 0+2*8, 2+1*8, 1+3*8, 5+0*8,
+ 1+4*8, 2+4*8, 6+0*8, 4+3*8,
+ 0+7*8, 4+4*8, 7+2*8, 3+6*8,
+ 5+5*8, 6+5*8, 6+6*8, 7+7*8,
+};
+
+static const uint8_t field_scan8x8[64]={
+ 0+0*8, 0+1*8, 0+2*8, 1+0*8,
+ 1+1*8, 0+3*8, 0+4*8, 1+2*8,
+ 2+0*8, 1+3*8, 0+5*8, 0+6*8,
+ 0+7*8, 1+4*8, 2+1*8, 3+0*8,
+ 2+2*8, 1+5*8, 1+6*8, 1+7*8,
+ 2+3*8, 3+1*8, 4+0*8, 3+2*8,
+ 2+4*8, 2+5*8, 2+6*8, 2+7*8,
+ 3+3*8, 4+1*8, 5+0*8, 4+2*8,
+ 3+4*8, 3+5*8, 3+6*8, 3+7*8,
+ 4+3*8, 5+1*8, 6+0*8, 5+2*8,
+ 4+4*8, 4+5*8, 4+6*8, 4+7*8,
+ 5+3*8, 6+1*8, 6+2*8, 5+4*8,
+ 5+5*8, 5+6*8, 5+7*8, 6+3*8,
+ 7+0*8, 7+1*8, 6+4*8, 6+5*8,
+ 6+6*8, 6+7*8, 7+2*8, 7+3*8,
+ 7+4*8, 7+5*8, 7+6*8, 7+7*8,
+};
+
+static const uint8_t field_scan8x8_cavlc[64]={
+ 0+0*8, 1+1*8, 2+0*8, 0+7*8,
+ 2+2*8, 2+3*8, 2+4*8, 3+3*8,
+ 3+4*8, 4+3*8, 4+4*8, 5+3*8,
+ 5+5*8, 7+0*8, 6+6*8, 7+4*8,
+ 0+1*8, 0+3*8, 1+3*8, 1+4*8,
+ 1+5*8, 3+1*8, 2+5*8, 4+1*8,
+ 3+5*8, 5+1*8, 4+5*8, 6+1*8,
+ 5+6*8, 7+1*8, 6+7*8, 7+5*8,
+ 0+2*8, 0+4*8, 0+5*8, 2+1*8,
+ 1+6*8, 4+0*8, 2+6*8, 5+0*8,
+ 3+6*8, 6+0*8, 4+6*8, 6+2*8,
+ 5+7*8, 6+4*8, 7+2*8, 7+6*8,
+ 1+0*8, 1+2*8, 0+6*8, 3+0*8,
+ 1+7*8, 3+2*8, 2+7*8, 4+2*8,
+ 3+7*8, 5+2*8, 4+7*8, 5+4*8,
+ 6+3*8, 6+5*8, 7+3*8, 7+7*8,
+};
+
+#define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16bit
+#define MB_TYPE_8x8DCT     0x01000000
+#define IS_REF0(a)       ((a)&MB_TYPE_REF0)
+#define IS_8x8DCT(a)     ((a)&MB_TYPE_8x8DCT)
+
+
+typedef struct IMbInfo{
+    uint16_t type;
+    uint8_t pred_mode;
+    uint8_t cbp;
+} IMbInfo;
+
+static const IMbInfo i_mb_type_info[26]={
+{MB_TYPE_INTRA4x4  , -1, -1},
+{MB_TYPE_INTRA16x16,  2,  0},
+{MB_TYPE_INTRA16x16,  1,  0},
+{MB_TYPE_INTRA16x16,  0,  0},
+{MB_TYPE_INTRA16x16,  3,  0},
+{MB_TYPE_INTRA16x16,  2,  16},
+{MB_TYPE_INTRA16x16,  1,  16},
+{MB_TYPE_INTRA16x16,  0,  16},
+{MB_TYPE_INTRA16x16,  3,  16},
+{MB_TYPE_INTRA16x16,  2,  32},
+{MB_TYPE_INTRA16x16,  1,  32},
+{MB_TYPE_INTRA16x16,  0,  32},
+{MB_TYPE_INTRA16x16,  3,  32},
+{MB_TYPE_INTRA16x16,  2,  15+0},
+{MB_TYPE_INTRA16x16,  1,  15+0},
+{MB_TYPE_INTRA16x16,  0,  15+0},
+{MB_TYPE_INTRA16x16,  3,  15+0},
+{MB_TYPE_INTRA16x16,  2,  15+16},
+{MB_TYPE_INTRA16x16,  1,  15+16},
+{MB_TYPE_INTRA16x16,  0,  15+16},
+{MB_TYPE_INTRA16x16,  3,  15+16},
+{MB_TYPE_INTRA16x16,  2,  15+32},
+{MB_TYPE_INTRA16x16,  1,  15+32},
+{MB_TYPE_INTRA16x16,  0,  15+32},
+{MB_TYPE_INTRA16x16,  3,  15+32},
+{MB_TYPE_INTRA_PCM , -1, -1},
+};
+
+typedef struct PMbInfo{
+    uint16_t type;
+    uint8_t partition_count;
+} PMbInfo;
+
+static const PMbInfo p_mb_type_info[5]={
+{MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
+{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
+{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0, 4},
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4},
+};
+
+static const PMbInfo p_sub_mb_type_info[4]={
+{MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
+{MB_TYPE_16x8 |MB_TYPE_P0L0             , 2},
+{MB_TYPE_8x16 |MB_TYPE_P0L0             , 2},
+{MB_TYPE_8x8  |MB_TYPE_P0L0             , 4},
+};
+
+static const PMbInfo b_mb_type_info[23]={
+{MB_TYPE_DIRECT2                                                   , 1, },
+{MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
+{MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
+{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
+{MB_TYPE_16x8              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0                          |MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0                          |MB_TYPE_P1L1, 2, },
+{MB_TYPE_16x8              |MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
+{MB_TYPE_8x16              |MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_16x8              |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16              |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
+};
+
+static const PMbInfo b_sub_mb_type_info[13]={
+{MB_TYPE_DIRECT2                                                   , 1, },
+{MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
+{MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
+{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
+{MB_TYPE_16x8              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
+{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
+{MB_TYPE_8x8  |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 4, },
+{MB_TYPE_8x8               |MB_TYPE_P0L1             |MB_TYPE_P1L1, 4, },
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
+};
+
+
+static const uint8_t rem6[52]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+};
+
+static const uint8_t div6[52]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
+static const uint8_t default_scaling4[2][16]={
+{   6,13,20,28,
+   13,20,28,32,
+   20,28,32,37,
+   28,32,37,42
+},{
+   10,14,20,24,
+   14,20,24,27,
+   20,24,27,30,
+   24,27,30,34
+}};
+
+static const uint8_t default_scaling8[2][64]={
+{   6,10,13,16,18,23,25,27,
+   10,11,16,18,23,25,27,29,
+   13,16,18,23,25,27,29,31,
+   16,18,23,25,27,29,31,33,
+   18,23,25,27,29,31,33,36,
+   23,25,27,29,31,33,36,38,
+   25,27,29,31,33,36,38,40,
+   27,29,31,33,36,38,40,42
+},{
+    9,13,15,17,19,21,22,24,
+   13,13,17,19,21,22,24,25,
+   15,17,19,21,22,24,25,27,
+   17,19,21,22,24,25,27,28,
+   19,21,22,24,25,27,28,30,
+   21,22,24,25,27,28,30,32,
+   22,24,25,27,28,30,32,33,
+   24,25,27,28,30,32,33,35
+}};
+
+static const int dequant4_coeff_init[6][3]={
+  {10,13,16},
+  {11,14,18},
+  {13,16,20},
+  {14,18,23},
+  {16,20,25},
+  {18,23,29},
+};
+
+static const int dequant8_coeff_init_scan[16] = {
+  0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
+};
+static const int dequant8_coeff_init[6][6]={
+  {20,18,32,19,25,24},
+  {22,19,35,21,28,26},
+  {26,23,42,24,33,31},
+  {28,25,45,26,35,33},
+  {32,28,51,30,40,38},
+  {36,32,58,34,46,43},
+};
+
+#define QUANT_SHIFT 22
+
+static const int quant_coeff[52][16]={
+    { 419430,258111,419430,258111,258111,167772,258111,167772,419430,258111,419430,258111,258111,167772,258111,167772,},
+    { 381300,239675,381300,239675,239675,149131,239675,149131,381300,239675,381300,239675,239675,149131,239675,149131,},
+    { 322639,209715,322639,209715,209715,134218,209715,134218,322639,209715,322639,209715,209715,134218,209715,134218,},
+    { 299593,186414,299593,186414,186414,116711,186414,116711,299593,186414,299593,186414,186414,116711,186414,116711,},
+    { 262144,167772,262144,167772,167772,107374,167772,107374,262144,167772,262144,167772,167772,107374,167772,107374,},
+    { 233017,145889,233017,145889,145889, 92564,145889, 92564,233017,145889,233017,145889,145889, 92564,145889, 92564,},
+    { 209715,129056,209715,129056,129056, 83886,129056, 83886,209715,129056,209715,129056,129056, 83886,129056, 83886,},
+    { 190650,119837,190650,119837,119837, 74565,119837, 74565,190650,119837,190650,119837,119837, 74565,119837, 74565,},
+    { 161319,104858,161319,104858,104858, 67109,104858, 67109,161319,104858,161319,104858,104858, 67109,104858, 67109,},
+    { 149797, 93207,149797, 93207, 93207, 58356, 93207, 58356,149797, 93207,149797, 93207, 93207, 58356, 93207, 58356,},
+    { 131072, 83886,131072, 83886, 83886, 53687, 83886, 53687,131072, 83886,131072, 83886, 83886, 53687, 83886, 53687,},
+    { 116508, 72944,116508, 72944, 72944, 46282, 72944, 46282,116508, 72944,116508, 72944, 72944, 46282, 72944, 46282,},
+    { 104858, 64528,104858, 64528, 64528, 41943, 64528, 41943,104858, 64528,104858, 64528, 64528, 41943, 64528, 41943,},
+    {  95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283, 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283,},
+    {  80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554, 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554,},
+    {  74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178, 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178,},
+    {  65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844, 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844,},
+    {  58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141, 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141,},
+    {  52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972, 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972,},
+    {  47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641, 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641,},
+    {  40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777, 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777,},
+    {  37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589, 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589,},
+    {  32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422, 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422,},
+    {  29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570, 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570,},
+    {  26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486, 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486,},
+    {  23831, 14980, 23831, 14980, 14980,  9321, 14980,  9321, 23831, 14980, 23831, 14980, 14980,  9321, 14980,  9321,},
+    {  20165, 13107, 20165, 13107, 13107,  8389, 13107,  8389, 20165, 13107, 20165, 13107, 13107,  8389, 13107,  8389,},
+    {  18725, 11651, 18725, 11651, 11651,  7294, 11651,  7294, 18725, 11651, 18725, 11651, 11651,  7294, 11651,  7294,},
+    {  16384, 10486, 16384, 10486, 10486,  6711, 10486,  6711, 16384, 10486, 16384, 10486, 10486,  6711, 10486,  6711,},
+    {  14564,  9118, 14564,  9118,  9118,  5785,  9118,  5785, 14564,  9118, 14564,  9118,  9118,  5785,  9118,  5785,},
+    {  13107,  8066, 13107,  8066,  8066,  5243,  8066,  5243, 13107,  8066, 13107,  8066,  8066,  5243,  8066,  5243,},
+    {  11916,  7490, 11916,  7490,  7490,  4660,  7490,  4660, 11916,  7490, 11916,  7490,  7490,  4660,  7490,  4660,},
+    {  10082,  6554, 10082,  6554,  6554,  4194,  6554,  4194, 10082,  6554, 10082,  6554,  6554,  4194,  6554,  4194,},
+    {   9362,  5825,  9362,  5825,  5825,  3647,  5825,  3647,  9362,  5825,  9362,  5825,  5825,  3647,  5825,  3647,},
+    {   8192,  5243,  8192,  5243,  5243,  3355,  5243,  3355,  8192,  5243,  8192,  5243,  5243,  3355,  5243,  3355,},
+    {   7282,  4559,  7282,  4559,  4559,  2893,  4559,  2893,  7282,  4559,  7282,  4559,  4559,  2893,  4559,  2893,},
+    {   6554,  4033,  6554,  4033,  4033,  2621,  4033,  2621,  6554,  4033,  6554,  4033,  4033,  2621,  4033,  2621,},
+    {   5958,  3745,  5958,  3745,  3745,  2330,  3745,  2330,  5958,  3745,  5958,  3745,  3745,  2330,  3745,  2330,},
+    {   5041,  3277,  5041,  3277,  3277,  2097,  3277,  2097,  5041,  3277,  5041,  3277,  3277,  2097,  3277,  2097,},
+    {   4681,  2913,  4681,  2913,  2913,  1824,  2913,  1824,  4681,  2913,  4681,  2913,  2913,  1824,  2913,  1824,},
+    {   4096,  2621,  4096,  2621,  2621,  1678,  2621,  1678,  4096,  2621,  4096,  2621,  2621,  1678,  2621,  1678,},
+    {   3641,  2280,  3641,  2280,  2280,  1446,  2280,  1446,  3641,  2280,  3641,  2280,  2280,  1446,  2280,  1446,},
+    {   3277,  2016,  3277,  2016,  2016,  1311,  2016,  1311,  3277,  2016,  3277,  2016,  2016,  1311,  2016,  1311,},
+    {   2979,  1872,  2979,  1872,  1872,  1165,  1872,  1165,  2979,  1872,  2979,  1872,  1872,  1165,  1872,  1165,},
+    {   2521,  1638,  2521,  1638,  1638,  1049,  1638,  1049,  2521,  1638,  2521,  1638,  1638,  1049,  1638,  1049,},
+    {   2341,  1456,  2341,  1456,  1456,   912,  1456,   912,  2341,  1456,  2341,  1456,  1456,   912,  1456,   912,},
+    {   2048,  1311,  2048,  1311,  1311,   839,  1311,   839,  2048,  1311,  2048,  1311,  1311,   839,  1311,   839,},
+    {   1820,  1140,  1820,  1140,  1140,   723,  1140,   723,  1820,  1140,  1820,  1140,  1140,   723,  1140,   723,},
+    {   1638,  1008,  1638,  1008,  1008,   655,  1008,   655,  1638,  1008,  1638,  1008,  1008,   655,  1008,   655,},
+    {   1489,   936,  1489,   936,   936,   583,   936,   583,  1489,   936,  1489,   936,   936,   583,   936,   583,},
+    {   1260,   819,  1260,   819,   819,   524,   819,   524,  1260,   819,  1260,   819,   819,   524,   819,   524,},
+    {   1170,   728,  1170,   728,   728,   456,   728,   456,  1170,   728,  1170,   728,   728,   456,   728,   456,},
+};
+
+
+/* Deblocking filter (p153) */
+static const int alpha_table[52*3] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
+     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
+    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
+    80, 90,101,113,127,144,162,182,203,226,
+   255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+};
+static const int beta_table[52*3] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
+     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
+     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
+    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
+    18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+};
+static const int tc0_table[52*3][3] = {
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
+    { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
+    { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
+    { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
+    { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
+    { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
+    { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+};
+
+/* Cabac pre state table */
+
+static const int cabac_context_init_I[460][2] =
+{
+    /* 0 - 10 */
+    { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
+    {  2,  54 }, {  3, 74 },  { -28,127 }, { -23, 104 },
+    { -6,  53 }, { -1, 54 },  {  7,  51 },
+
+    /* 11 - 23 unsused for I */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },
+
+    /* 24- 39 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+
+    /* 40 - 53 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },
+
+    /* 54 - 59 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },
+
+    /* 60 - 69 */
+    { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+    { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+    { 13, 41 },  { 3, 62 },
+
+    /* 70 -> 87 */
+    { 0, 11 },   { 1, 55 },   { 0, 69 },     { -17, 127 },
+    { -13, 102 },{ 0, 82 },   { -7, 74 },    { -21, 107 },
+    { -27, 127 },{ -31, 127 },{ -24, 127 },  { -18, 95 },
+    { -27, 127 },{ -21, 114 },{ -30, 127 },  { -17, 123 },
+    { -12, 115 },{ -16, 122 },
+
+    /* 88 -> 104 */
+    { -11, 115 },{ -12, 63 }, { -2, 68 },    { -15, 84 },
+    { -13, 104 },{ -3, 70 },  { -8, 93 },    { -10, 90 },
+    { -30, 127 },{ -1, 74 },  { -6, 97 },    { -7, 91 },
+    { -20, 127 },{ -4, 56 },  { -5, 82 },    { -7, 76 },
+    { -22, 125 },
+
+    /* 105 -> 135 */
+    { -7, 93 },  { -11, 87 }, { -3, 77 },    { -5, 71 },
+    { -4, 63 },  { -4, 68 },  { -12, 84 },   { -7, 62 },
+    { -7, 65 },  { 8, 61 },   { 5, 56 },     { -2, 66 },
+    { 1, 64 },   { 0, 61 },   { -2, 78 },    { 1, 50 },
+    { 7, 52 },   { 10, 35 },  { 0, 44 },     { 11, 38 },
+    { 1, 45 },   { 0, 46 },   { 5, 44 },     { 31, 17 },
+    { 1, 51 },   { 7, 50 },   { 28, 19 },    { 16, 33 },
+    { 14, 62 },  { -13, 108 },{ -15, 100 },
+
+    /* 136 -> 165 */
+    { -13, 101 },{ -13, 91 }, { -12, 94 },   { -10, 88 },
+    { -16, 84 }, { -10, 86 }, { -7, 83 },    { -13, 87 },
+    { -19, 94 }, { 1, 70 },   { 0, 72 },     { -5, 74 },
+    { 18, 59 },  { -8, 102 }, { -15, 100 },  { 0, 95 },
+    { -4, 75 },  { 2, 72 },   { -11, 75 },   { -3, 71 },
+    { 15, 46 },  { -13, 69 }, { 0, 62 },     { 0, 65 },
+    { 21, 37 },  { -15, 72 }, { 9, 57 },     { 16, 54 },
+    { 0, 62 },   { 12, 72 },
+
+    /* 166 -> 196 */
+    { 24, 0 },   { 15, 9 },   { 8, 25 },     { 13, 18 },
+    { 15, 9 },   { 13, 19 },  { 10, 37 },    { 12, 18 },
+    { 6, 29 },   { 20, 33 },  { 15, 30 },    { 4, 45 },
+    { 1, 58 },   { 0, 62 },   { 7, 61 },     { 12, 38 },
+    { 11, 45 },  { 15, 39 },  { 11, 42 },    { 13, 44 },
+    { 16, 45 },  { 12, 41 },  { 10, 49 },    { 30, 34 },
+    { 18, 42 },  { 10, 55 },  { 17, 51 },    { 17, 46 },
+    { 0, 89 },   { 26, -19 }, { 22, -17 },
+
+    /* 197 -> 226 */
+    { 26, -17 }, { 30, -25 }, { 28, -20 },   { 33, -23 },
+    { 37, -27 }, { 33, -23 }, { 40, -28 },   { 38, -17 },
+    { 33, -11 }, { 40, -15 }, { 41, -6 },    { 38, 1 },
+    { 41, 17 },  { 30, -6 },  { 27, 3 },     { 26, 22 },
+    { 37, -16 }, { 35, -4 },  { 38, -8 },    { 38, -3 },
+    { 37, 3 },   { 38, 5 },   { 42, 0 },     { 35, 16 },
+    { 39, 22 },  { 14, 48 },  { 27, 37 },    { 21, 60 },
+    { 12, 68 },  { 2, 97 },
+
+    /* 227 -> 251 */
+    { -3, 71 },  { -6, 42 },  { -5, 50 },    { -3, 54 },
+    { -2, 62 },  { 0, 58 },   { 1, 63 },     { -2, 72 },
+    { -1, 74 },  { -9, 91 },  { -5, 67 },    { -5, 27 },
+    { -3, 39 },  { -2, 44 },  { 0, 46 },     { -16, 64 },
+    { -8, 68 },  { -10, 78 }, { -6, 77 },    { -10, 86 },
+    { -12, 92 }, { -15, 55 }, { -10, 60 },   { -6, 62 },
+    { -4, 65 },
+
+    /* 252 -> 275 */
+    { -12, 73 }, { -8, 76 },  { -7, 80 },    { -9, 88 },
+    { -17, 110 },{ -11, 97 }, { -20, 84 },   { -11, 79 },
+    { -6, 73 },  { -4, 74 },  { -13, 86 },   { -13, 96 },
+    { -11, 97 }, { -19, 117 },{ -8, 78 },    { -5, 33 },
+    { -4, 48 },  { -2, 53 },  { -3, 62 },    { -13, 71 },
+    { -10, 79 }, { -12, 86 }, { -13, 90 },   { -14, 97 },
+
+    /* 276 a bit special (not used, bypass is used instead) */
+    { 0, 0 },
+
+    /* 277 -> 307 */
+    { -6, 93 },  { -6, 84 },  { -8, 79 },    { 0, 66 },
+    { -1, 71 },  { 0, 62 },   { -2, 60 },    { -2, 59 },
+    { -5, 75 },  { -3, 62 },  { -4, 58 },    { -9, 66 },
+    { -1, 79 },  { 0, 71 },   { 3, 68 },     { 10, 44 },
+    { -7, 62 },  { 15, 36 },  { 14, 40 },    { 16, 27 },
+    { 12, 29 },  { 1, 44 },   { 20, 36 },    { 18, 32 },
+    { 5, 42 },   { 1, 48 },   { 10, 62 },    { 17, 46 },
+    { 9, 64 },   { -12, 104 },{ -11, 97 },
+
+    /* 308 -> 337 */
+    { -16, 96 }, { -7, 88 },  { -8, 85 },    { -7, 85 },
+    { -9, 85 },  { -13, 88 }, { 4, 66 },     { -3, 77 },
+    { -3, 76 },  { -6, 76 },  { 10, 58 },    { -1, 76 },
+    { -1, 83 },  { -7, 99 },  { -14, 95 },   { 2, 95 },
+    { 0, 76 },   { -5, 74 },  { 0, 70 },     { -11, 75 },
+    { 1, 68 },   { 0, 65 },   { -14, 73 },   { 3, 62 },
+    { 4, 62 },   { -1, 68 },  { -13, 75 },   { 11, 55 },
+    { 5, 64 },   { 12, 70 },
+
+    /* 338 -> 368 */
+    { 15, 6 },   { 6, 19 },   { 7, 16 },     { 12, 14 },
+    { 18, 13 },  { 13, 11 },  { 13, 15 },    { 15, 16 },
+    { 12, 23 },  { 13, 23 },  { 15, 20 },    { 14, 26 },
+    { 14, 44 },  { 17, 40 },  { 17, 47 },    { 24, 17 },
+    { 21, 21 },  { 25, 22 },  { 31, 27 },    { 22, 29 },
+    { 19, 35 },  { 14, 50 },  { 10, 57 },    { 7, 63 },
+    { -2, 77 },  { -4, 82 },  { -3, 94 },    { 9, 69 },
+    { -12, 109 },{ 36, -35 }, { 36, -34 },
+
+    /* 369 -> 398 */
+    { 32, -26 }, { 37, -30 }, { 44, -32 },   { 34, -18 },
+    { 34, -15 }, { 40, -15 }, { 33, -7 },    { 35, -5 },
+    { 33, 0 },   { 38, 2 },   { 33, 13 },    { 23, 35 },
+    { 13, 58 },  { 29, -3 },  { 26, 0 },     { 22, 30 },
+    { 31, -7 },  { 35, -15 }, { 34, -3 },    { 34, 3 },
+    { 36, -1 },  { 34, 5 },   { 32, 11 },    { 35, 5 },
+    { 34, 12 },  { 39, 11 },  { 30, 29 },    { 34, 26 },
+    { 29, 39 },  { 19, 66 },
+
+    /* 399 -> 435 */
+    {  31,  21 }, {  31,  31 }, {  25,  50 },
+    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
+    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
+    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
+    { -23,  68 }, { -24,  50 }, { -11,  74 }, {  23, -13 },
+    {  26, -13 }, {  40, -15 }, {  49, -14 }, {  44,   3 },
+    {  45,   6 }, {  44,  34 }, {  33,  54 }, {  19,  82 },
+    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
+    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
+    {   0,  68 }, {  -9,  92 },
+
+    /* 436 -> 459 */
+    { -14, 106 }, { -13,  97 }, { -15,  90 }, { -12,  90 },
+    { -18,  88 }, { -10,  73 }, {  -9,  79 }, { -14,  86 },
+    { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
+    {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
+    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
+};
+
+static const int cabac_context_init_PB[3][460][2] =
+{
+    /* i_cabac_init_idc == 0 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  23,  33 }, {  23,   2 }, {  21,   0 }, {   1,   9 },
+        {   0,  49 }, { -37, 118 }, {   5,  57 }, { -13,  78 },
+        { -11,  65 }, {   1,  62 }, {  12,  49 }, {  -4,  73 },
+        {  17,  50 },
+
+        /* 24 - 39 */
+        {  18,  64 }, {   9,  43 }, {  29,   0 }, {  26,  67 },
+        {  16,  90 }, {   9, 104 }, { -46, 127 }, { -20, 104 },
+        {   1,  67 }, { -13,  78 }, { -11,  65 }, {   1,  62 },
+        {  -6,  86 }, { -17,  95 }, {  -6,  61 }, {   9,  45 },
+
+        /* 40 - 53 */
+        {  -3,  69 }, {  -6,  81 }, { -11,  96 }, {   6,  55 },
+        {   7,  67 }, {  -5,  86 }, {   2,  88 }, {   0,  58 },
+        {  -3,  76 }, { -10,  94 }, {   5,  54 }, {   4,  69 },
+        {  -3,  81 }, {   0,  88 },
+
+        /* 54 - 59 */
+        {  -7,  67 }, {  -5,  74 }, {  -4,  74 }, {  -5,  80 },
+        {  -7,  72 }, {   1,  58 },
+
+        /* 60 - 69 */
+        {   0,  41 }, {   0,  63 }, {   0,  63 }, { 0, 63 },
+        {  -9,  83 }, {   4,  86 }, {   0,  97 }, { -7, 72 },
+        {  13,  41 }, {   3,  62 },
+
+        /* 70 - 87 */
+        {   0,  45 }, {  -4,  78 }, {  -3,  96 }, { -27,  126 },
+        { -28,  98 }, { -25, 101 }, { -23,  67 }, { -28,  82 },
+        { -20,  94 }, { -16,  83 }, { -22, 110 }, { -21,  91 },
+        { -18, 102 }, { -13,  93 }, { -29, 127 }, {  -7,  92 },
+        {  -5,  89 }, {  -7,  96 }, { -13, 108 }, {  -3,  46 },
+        {  -1,  65 }, {  -1,  57 }, {  -9,  93 }, {  -3,  74 },
+        {  -9,  92 }, {  -8,  87 }, { -23, 126 }, {   5,  54 },
+        {   6,  60 }, {   6,  59 }, {   6,  69 }, {  -1,  48 },
+        {   0,  68 }, {  -4,  69 }, {  -8,  88 },
+
+        /* 105 -> 165 */
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {   3,  64 }, {   1,  61 }, {   9,  63 }, {   7,  50 },
+        {  16,  39 }, {   5,  44 }, {   4,  52 }, {  11,  48 },
+        {  -5,  60 }, {  -1,  59 }, {   0,  59 }, {  22,  33 },
+        {   5,  44 }, {  14,  43 }, {  -1,  78 }, {   0,  60 },
+        {   9,  69 },
+
+        /* 166 - 226 */
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {   1,  67 }, {   5,  59 }, {   9,  67 }, {  16,  30 },
+        {  18,  32 }, {  18,  35 }, {  22,  29 }, {  24,  31 },
+        {  23,  38 }, {  18,  43 }, {  20,  41 }, {  11,  63 },
+        {   9,  59 }, {   9,  64 }, {  -1,  94 }, {  -2,  89 },
+        {  -9, 108 },
+
+        /* 227 - 275 */
+        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
+        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
+        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
+        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
+        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
+        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
+        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
+        {  -3,  74 }, { -10,  90 }, {   0,  70 }, {  -4,  29 },
+        {   5,  31 }, {   7,  42 }, {   1,  59 }, {  -2,  58 },
+        {  -3,  72 }, {  -3,  81 }, { -11,  97 }, {   0,  58 },
+        {   8,   5 }, {  10,  14 }, {  14,  18 }, {  13,  27 },
+        {   2,  40 }, {   0,  58 }, {  -3,  70 }, {  -6,  79 },
+        {  -8,  85 },
+
+        /* 276 a bit special (not used, bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        {  -2,  69 }, {  -2,  59 }, {   6,  70 }, {  10,  44 },
+        {   9,  31 }, {  12,  43 }, {   3,  53 }, {  14,  34 },
+        {  10,  38 }, {  -3,  52 }, {  13,  40 }, {  17,  32 },
+        {   7,  44 }, {   7,  38 }, {  13,  50 }, {  10,  57 },
+        {  26,  43 },
+
+        /* 338 - 398 */
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {   8,  60 }, {   6,  63 }, {  17,  65 }, {  21,  24 },
+        {  23,  20 }, {  26,  23 }, {  27,  32 }, {  28,  23 },
+        {  28,  24 }, {  23,  40 }, {  24,  32 }, {  28,  29 },
+        {  23,  42 }, {  19,  57 }, {  22,  53 }, {  22,  61 },
+        {  11,  86 },
+
+        /* 399 - 435 */
+        {  12,  40 }, {  11,  51 }, {  14,  59 },
+        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
+        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
+        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
+        { -16,  66 }, { -22,  65 }, { -20,  63 }, {   9,  -2 },
+        {  26,  -9 }, {  33,  -9 }, {  39,  -7 }, {  41,  -2 },
+        {  45,   3 }, {  49,   9 }, {  45,  27 }, {  36,  59 },
+        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
+        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
+        {  -8,  66 }, {  -8,  76 },
+
+        /* 436 - 459 */
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
+        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
+        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+    },
+
+    /* i_cabac_init_idc == 1 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  22,  25 }, {  34,   0 }, {  16,   0 }, {  -2,   9 },
+        {   4,  41 }, { -29, 118 }, {   2,  65 }, {  -6,  71 },
+        { -13,  79 }, {   5,  52 }, {   9,  50 }, {  -3,  70 },
+        {  10,  54 },
+
+        /* 24 - 39 */
+        {  26,  34 }, {  19,  22 }, {  40,   0 }, {  57,   2 },
+        {  41,  36 }, {  26,  69 }, { -45, 127 }, { -15, 101 },
+        {  -4,  76 }, {  -6,  71 }, { -13,  79 }, {   5,  52 },
+        {   6,  69 }, { -13,  90 }, {   0,  52 }, {   8,  43 },
+
+        /* 40 - 53 */
+        {  -2,  69 },{  -5,  82 },{ -10,  96 },{   2,  59 },
+        {   2,  75 },{  -3,  87 },{  -3,  100 },{   1,  56 },
+        {  -3,  74 },{  -6,  85 },{   0,  59 },{  -3,  81 },
+        {  -7,  86 },{  -5,  95 },
+
+        /* 54 - 59 */
+        {  -1,  66 },{  -1,  77 },{   1,  70 },{  -2,  86 },
+        {  -5,  72 },{   0,  61 },
+
+        /* 60 - 69 */
+        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+        { 13, 41 },  { 3, 62 },
+
+        /* 70 - 104 */
+        {  13,  15 }, {   7,  51 }, {   2,  80 }, { -39, 127 },
+        { -18,  91 }, { -17,  96 }, { -26,  81 }, { -35,  98 },
+        { -24, 102 }, { -23,  97 }, { -27, 119 }, { -24,  99 },
+        { -21, 110 }, { -18, 102 }, { -36, 127 }, {   0,  80 },
+        {  -5,  89 }, {  -7,  94 }, {  -4,  92 }, {   0,  39 },
+        {   0,  65 }, { -15,  84 }, { -35, 127 }, {  -2,  73 },
+        { -12, 104 }, {  -9,  91 }, { -31, 127 }, {   3,  55 },
+        {   7,  56 }, {   7,  55 }, {   8,  61 }, {  -3,  53 },
+        {   0,  68 }, {  -7,  74 }, {  -9,  88 },
+
+        /* 105 -> 165 */
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        {  -4,  71 }, {   0,  58 }, {   7,  61 }, {   9,  41 },
+        {  18,  25 }, {   9,  32 }, {   5,  43 }, {   9,  47 },
+        {   0,  44 }, {   0,  51 }, {   2,  46 }, {  19,  38 },
+        {  -4,  66 }, {  15,  38 }, {  12,  42 }, {   9,  34 },
+        {   0,  89 },
+
+        /* 166 - 226 */
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {   0,  75 }, {   2,  72 }, {   8,  77 }, {  14,  35 },
+        {  18,  31 }, {  17,  35 }, {  21,  30 }, {  17,  45 },
+        {  20,  42 }, {  18,  45 }, {  27,  26 }, {  16,  54 },
+        {   7,  66 }, {  16,  56 }, {  11,  73 }, {  10,  67 },
+        { -10, 116 },
+
+        /* 227 - 275 */
+        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
+        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
+        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
+        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
+        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
+        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
+        {  -5,  74 }, {  -9,  86 }, {   2,  66 }, {  -9,  34 },
+        {   1,  32 }, {  11,  31 }, {   5,  52 }, {  -2,  55 },
+        {  -2,  67 }, {   0,  73 }, {  -8,  89 }, {   3,  52 },
+        {   7,   4 }, {  10,   8 }, {  17,   8 }, {  16,  19 },
+        {   3,  37 }, {  -1,  61 }, {  -5,  73 }, {  -1,  70 },
+        {  -4,  78 },
+
+        /* 276 a bit special (not used, bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        {  -1,  70 }, {  -9,  72 }, {  14,  60 }, {  16,  37 },
+        {   0,  47 }, {  18,  35 }, {  11,  37 }, {  12,  41 },
+        {  10,  41 }, {   2,  48 }, {  12,  41 }, {  13,  41 },
+        {   0,  59 }, {   3,  50 }, {  19,  40 }, {   3,  66 },
+        {  18,  50 },
+
+        /* 338 - 398 */
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        {  12,  48 }, {  11,  49 }, {  26,  45 }, {  22,  22 },
+        {  23,  22 }, {  27,  21 }, {  33,  20 }, {  26,  28 },
+        {  30,  24 }, {  27,  34 }, {  18,  42 }, {  25,  39 },
+        {  18,  50 }, {  12,  70 }, {  21,  54 }, {  14,  71 },
+        {  11,  83 },
+
+        /* 399 - 435 */
+        {  25,  32 }, {  21,  49 }, {  21,  54 },
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
+        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
+        {  -4,  67 }, {  -7,  82 },
+
+        /* 436 - 459 */
+        {  -3,  81 }, {  -3,  76 }, {  -7,  72 }, {  -6,  78 },
+        { -12,  72 }, { -14,  68 }, {  -3,  70 }, {  -6,  76 },
+        {  -5,  66 }, {  -5,  62 }, {   0,  57 }, {  -4,  61 },
+        {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+    },
+
+    /* i_cabac_init_idc == 2 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  29,  16 }, {  25,   0 }, {  14,   0 }, { -10,  51 },
+        {  -3,  62 }, { -27,  99 }, {  26,  16 }, {  -4,  85 },
+        { -24, 102 }, {   5,  57 }, {   6,  57 }, { -17,  73 },
+        {  14,  57 },
+
+        /* 24 - 39 */
+        {  20,  40 }, {  20,  10 }, {  29,   0 }, {  54,   0 },
+        {  37,  42 }, {  12,  97 }, { -32, 127 }, { -22, 117 },
+        {  -2,  74 }, {  -4,  85 }, { -24, 102 }, {   5,  57 },
+        {  -6,  93 }, { -14,  88 }, {  -6,  44 }, {   4,  55 },
+
+        /* 40 - 53 */
+        { -11,  89 },{ -15,  103 },{ -21,  116 },{  19,  57 },
+        {  20,  58 },{   4,  84 },{   6,  96 },{   1,  63 },
+        {  -5,  85 },{ -13,  106 },{   5,  63 },{   6,  75 },
+        {  -3,  90 },{  -1,  101 },
+
+        /* 54 - 59 */
+        {   3,  55 },{  -4,  79 },{  -2,  75 },{ -12,  97 },
+        {  -7,  50 },{   1,  60 },
+
+        /* 60 - 69 */
+        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+        { 13, 41 },  { 3, 62 },
+
+        /* 70 - 104 */
+        {   7,  34 }, {  -9,  88 }, { -20, 127 }, { -36, 127 },
+        { -17,  91 }, { -14,  95 }, { -25,  84 }, { -25,  86 },
+        { -12,  89 }, { -17,  91 }, { -31, 127 }, { -14,  76 },
+        { -18, 103 }, { -13,  90 }, { -37, 127 }, {  11,  80 },
+        {   5,  76 }, {   2,  84 }, {   5,  78 }, {  -6,  55 },
+        {   4,  61 }, { -14,  83 }, { -37, 127 }, {  -5,  79 },
+        { -11, 104 }, { -11,  91 }, { -30, 127 }, {   0,  65 },
+        {  -2,  79 }, {   0,  72 }, {  -4,  92 }, {  -6,  56 },
+        {   3,  68 }, {  -8,  71 }, { -13,  98 },
+
+        /* 105 -> 165 */
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {   3,  65 }, {  -7,  69 }, {   8,  77 }, { -10,  66 },
+        {   3,  62 }, {  -3,  68 }, { -20,  81 }, {   0,  30 },
+        {   1,   7 }, {  -3,  23 }, { -21,  74 }, {  16,  66 },
+        { -23, 124 }, {  17,  37 }, {  44, -18 }, {  50, -34 },
+        { -22, 127 },
+
+        /* 166 - 226 */
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {  20,  34 }, {  19,  31 }, {  27,  44 }, {  19,  16 },
+        {  15,  36 }, {  15,  36 }, {  21,  28 }, {  25,  21 },
+        {  30,  20 }, {  31,  12 }, {  27,  16 }, {  24,  42 },
+        {   0,  93 }, {  14,  56 }, {  15,  57 }, {  26,  38 },
+        { -24, 127 },
+
+        /* 227 - 275 */
+        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
+        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
+        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
+        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
+        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
+        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
+        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
+        { -12,  92 }, { -18, 108 }, {  -4,  79 }, { -22,  69 },
+        { -16,  75 }, {  -2,  58 }, {   1,  58 }, { -13,  78 },
+        {  -9,  83 }, {  -4,  81 }, { -13,  99 }, { -13,  81 },
+        {  -6,  38 }, { -13,  62 }, {  -6,  58 }, {  -2,  59 },
+        { -16,  73 }, { -10,  76 }, { -13,  86 }, {  -9,  83 },
+        { -10,  87 },
+
+        /* 276 a bit special (not used, bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        {  -2,  76 }, { -18,  86 }, {  12,  70 }, {   5,  64 },
+        { -12,  70 }, {  11,  55 }, {   5,  56 }, {   0,  69 },
+        {   2,  65 }, {  -6,  74 }, {   5,  54 }, {   7,  54 },
+        {  -6,  76 }, { -11,  82 }, {  -2,  77 }, {  -2,  77 },
+        {  25,  42 },
+
+        /* 338 - 398 */
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        {  18,  31 }, {  19,  26 }, {  36,  24 }, {  24,  23 },
+        {  27,  16 }, {  24,  30 }, {  31,  29 }, {  22,  41 },
+        {  22,  42 }, {  16,  60 }, {  15,  52 }, {  14,  60 },
+        {   3,  78 }, { -16, 123 }, {  21,  53 }, {  22,  56 },
+        {  25,  61 },
+
+        /* 399 - 435 */
+        {  21,  33 }, {  19,  50 }, {  17,  61 },
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
+        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
+        {  -6,  68 }, { -10,  79 },
+
+        /* 436 - 459 */
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+    }
+};
diff --git a/contrib/ffmpeg/libavcodec/h264idct.c b/contrib/ffmpeg/libavcodec/h264idct.c
new file mode 100644
index 000000000..3506418ad
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/h264idct.c
@@ -0,0 +1,168 @@
+/*
+ * H.264 IDCT
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file h264-idct.c
+ * H.264 IDCT.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "dsputil.h"
+
+static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    block[0] += 1<<(shift-1);
+
+    for(i=0; i<4; i++){
+        const int z0=  block[0 + block_stride*i]     +  block[2 + block_stride*i];
+        const int z1=  block[0 + block_stride*i]     -  block[2 + block_stride*i];
+        const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
+        const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);
+
+        block[0 + block_stride*i]= z0 + z3;
+        block[1 + block_stride*i]= z1 + z2;
+        block[2 + block_stride*i]= z1 - z2;
+        block[3 + block_stride*i]= z0 - z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int z0=  block[i + block_stride*0]     +  block[i + block_stride*2];
+        const int z1=  block[i + block_stride*0]     -  block[i + block_stride*2];
+        const int z2= (block[i + block_stride*1]>>1) -  block[i + block_stride*3];
+        const int z3=  block[i + block_stride*1]     + (block[i + block_stride*3]>>1);
+
+        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
+        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
+        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
+        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
+    }
+}
+
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    idct_internal(dst, block, stride, 4, 6, 1);
+}
+
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 1);
+}
+
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 0);
+}
+
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i;
+    DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    block[0] += 32;
+
+    for( i = 0; i < 8; i++ )
+    {
+        const int a0 =  src[i][0] + src[i][4];
+        const int a2 =  src[i][0] - src[i][4];
+        const int a4 = (src[i][2]>>1) - src[i][6];
+        const int a6 = (src[i][6]>>1) + src[i][2];
+
+        const int b0 = a0 + a6;
+        const int b2 = a2 + a4;
+        const int b4 = a2 - a4;
+        const int b6 = a0 - a6;
+
+        const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
+        const int a3 =  src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
+        const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
+        const int a7 =  src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
+
+        const int b1 = (a7>>2) + a1;
+        const int b3 =  a3 + (a5>>2);
+        const int b5 = (a3>>2) - a5;
+        const int b7 =  a7 - (a1>>2);
+
+        src[i][0] = b0 + b7;
+        src[i][7] = b0 - b7;
+        src[i][1] = b2 + b5;
+        src[i][6] = b2 - b5;
+        src[i][2] = b4 + b3;
+        src[i][5] = b4 - b3;
+        src[i][3] = b6 + b1;
+        src[i][4] = b6 - b1;
+    }
+    for( i = 0; i < 8; i++ )
+    {
+        const int a0 =  src[0][i] + src[4][i];
+        const int a2 =  src[0][i] - src[4][i];
+        const int a4 = (src[2][i]>>1) - src[6][i];
+        const int a6 = (src[6][i]>>1) + src[2][i];
+
+        const int b0 = a0 + a6;
+        const int b2 = a2 + a4;
+        const int b4 = a2 - a4;
+        const int b6 = a0 - a6;
+
+        const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1);
+        const int a3 =  src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1);
+        const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1);
+        const int a7 =  src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1);
+
+        const int b1 = (a7>>2) + a1;
+        const int b3 =  a3 + (a5>>2);
+        const int b5 = (a3>>2) - a5;
+        const int b7 =  a7 - (a1>>2);
+
+        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
+        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
+        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
+        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
+        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
+        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
+        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
+        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
+    }
+}
+
+// assumes all AC coefs are 0
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i, j;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int dc = (block[0] + 32) >> 6;
+    for( j = 0; j < 4; j++ )
+    {
+        for( i = 0; i < 4; i++ )
+            dst[i] = cm[ dst[i] + dc ];
+        dst += stride;
+    }
+}
+
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i, j;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int dc = (block[0] + 32) >> 6;
+    for( j = 0; j < 8; j++ )
+    {
+        for( i = 0; i < 8; i++ )
+            dst[i] = cm[ dst[i] + dc ];
+        dst += stride;
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/huffyuv.c b/contrib/ffmpeg/libavcodec/huffyuv.c
new file mode 100644
index 000000000..0aefd6d72
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/huffyuv.c
@@ -0,0 +1,1290 @@
+/*
+ * huffyuv codec for libavcodec
+ *
+ * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
+ * the algorithm used
+ */
+
+/**
+ * @file huffyuv.c
+ * huffyuv codec for libavcodec.
+ */
+
+#include "common.h"
+#include "bitstream.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define VLC_BITS 11
+
+#ifdef WORDS_BIGENDIAN
+#define B 3
+#define G 2
+#define R 1
+#else
+#define B 0
+#define G 1
+#define R 2
+#endif
+
+typedef enum Predictor{
+    LEFT= 0,
+    PLANE,
+    MEDIAN,
+} Predictor;
+
+typedef struct HYuvContext{
+    AVCodecContext *avctx;
+    Predictor predictor;
+    GetBitContext gb;
+    PutBitContext pb;
+    int interlaced;
+    int decorrelate;
+    int bitstream_bpp;
+    int version;
+    int yuy2;                               //use yuy2 instead of 422P
+    int bgr32;                              //use bgr32 instead of bgr24
+    int width, height;
+    int flags;
+    int context;
+    int picture_number;
+    int last_slice_end;
+    uint8_t *temp[3];
+    uint64_t stats[3][256];
+    uint8_t len[3][256];
+    uint32_t bits[3][256];
+    VLC vlc[3];
+    AVFrame picture;
+    uint8_t *bitstream_buffer;
+    unsigned int bitstream_buffer_size;
+    DSPContext dsp;
+}HYuvContext;
+
+static const unsigned char classic_shift_luma[] = {
+  34,36,35,69,135,232,9,16,10,24,11,23,12,16,13,10,14,8,15,8,
+  16,8,17,20,16,10,207,206,205,236,11,8,10,21,9,23,8,8,199,70,
+  69,68, 0
+};
+
+static const unsigned char classic_shift_chroma[] = {
+  66,36,37,38,39,40,41,75,76,77,110,239,144,81,82,83,84,85,118,183,
+  56,57,88,89,56,89,154,57,58,57,26,141,57,56,58,57,58,57,184,119,
+  214,245,116,83,82,49,80,79,78,77,44,75,41,40,39,38,37,36,34, 0
+};
+
+static const unsigned char classic_add_luma[256] = {
+    3,  9,  5, 12, 10, 35, 32, 29, 27, 50, 48, 45, 44, 41, 39, 37,
+   73, 70, 68, 65, 64, 61, 58, 56, 53, 50, 49, 46, 44, 41, 38, 36,
+   68, 65, 63, 61, 58, 55, 53, 51, 48, 46, 45, 43, 41, 39, 38, 36,
+   35, 33, 32, 30, 29, 27, 26, 25, 48, 47, 46, 44, 43, 41, 40, 39,
+   37, 36, 35, 34, 32, 31, 30, 28, 27, 26, 24, 23, 22, 20, 19, 37,
+   35, 34, 33, 31, 30, 29, 27, 26, 24, 23, 21, 20, 18, 17, 15, 29,
+   27, 26, 24, 22, 21, 19, 17, 16, 14, 26, 25, 23, 21, 19, 18, 16,
+   15, 27, 25, 23, 21, 19, 17, 16, 14, 26, 25, 23, 21, 18, 17, 14,
+   12, 17, 19, 13,  4,  9,  2, 11,  1,  7,  8,  0, 16,  3, 14,  6,
+   12, 10,  5, 15, 18, 11, 10, 13, 15, 16, 19, 20, 22, 24, 27, 15,
+   18, 20, 22, 24, 26, 14, 17, 20, 22, 24, 27, 15, 18, 20, 23, 25,
+   28, 16, 19, 22, 25, 28, 32, 36, 21, 25, 29, 33, 38, 42, 45, 49,
+   28, 31, 34, 37, 40, 42, 44, 47, 49, 50, 52, 54, 56, 57, 59, 60,
+   62, 64, 66, 67, 69, 35, 37, 39, 40, 42, 43, 45, 47, 48, 51, 52,
+   54, 55, 57, 59, 60, 62, 63, 66, 67, 69, 71, 72, 38, 40, 42, 43,
+   46, 47, 49, 51, 26, 28, 30, 31, 33, 34, 18, 19, 11, 13,  7,  8,
+};
+
+static const unsigned char classic_add_chroma[256] = {
+    3,  1,  2,  2,  2,  2,  3,  3,  7,  5,  7,  5,  8,  6, 11,  9,
+    7, 13, 11, 10,  9,  8,  7,  5,  9,  7,  6,  4,  7,  5,  8,  7,
+   11,  8, 13, 11, 19, 15, 22, 23, 20, 33, 32, 28, 27, 29, 51, 77,
+   43, 45, 76, 81, 46, 82, 75, 55, 56,144, 58, 80, 60, 74,147, 63,
+  143, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 27, 30, 21, 22,
+   17, 14,  5,  6,100, 54, 47, 50, 51, 53,106,107,108,109,110,111,
+  112,113,114,115,  4,117,118, 92, 94,121,122,  3,124,103,  2,  1,
+    0,129,130,131,120,119,126,125,136,137,138,139,140,141,142,134,
+  135,132,133,104, 64,101, 62, 57,102, 95, 93, 59, 61, 28, 97, 96,
+   52, 49, 48, 29, 32, 25, 24, 46, 23, 98, 45, 44, 43, 20, 42, 41,
+   19, 18, 99, 40, 15, 39, 38, 16, 13, 12, 11, 37, 10,  9,  8, 36,
+    7,128,127,105,123,116, 35, 34, 33,145, 31, 79, 42,146, 78, 26,
+   83, 48, 49, 50, 44, 47, 26, 31, 30, 18, 17, 19, 21, 24, 25, 13,
+   14, 16, 17, 18, 20, 21, 12, 14, 15,  9, 10,  6,  9,  6,  5,  8,
+    6, 12,  8, 10,  7,  9,  6,  4,  6,  2,  2,  3,  3,  3,  3,  2,
+};
+
+static inline int add_left_prediction(uint8_t *dst, uint8_t *src, int w, int acc){
+    int i;
+
+    for(i=0; i<w-1; i++){
+        acc+= src[i];
+        dst[i]= acc;
+        i++;
+        acc+= src[i];
+        dst[i]= acc;
+    }
+
+    for(; i<w; i++){
+        acc+= src[i];
+        dst[i]= acc;
+    }
+
+    return acc;
+}
+
+static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+
+    l= *left;
+    lt= *left_top;
+
+    for(i=0; i<w; i++){
+        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
+        lt= src1[i];
+        dst[i]= l;
+    }
+
+    *left= l;
+    *left_top= lt;
+}
+
+static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
+    int i;
+    int r,g,b;
+    r= *red;
+    g= *green;
+    b= *blue;
+
+    for(i=0; i<w; i++){
+        b+= src[4*i+B];
+        g+= src[4*i+G];
+        r+= src[4*i+R];
+
+        dst[4*i+B]= b;
+        dst[4*i+G]= g;
+        dst[4*i+R]= r;
+    }
+
+    *red= r;
+    *green= g;
+    *blue= b;
+}
+
+static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, uint8_t *src, int w, int left){
+    int i;
+    if(w<32){
+        for(i=0; i<w; i++){
+            const int temp= src[i];
+            dst[i]= temp - left;
+            left= temp;
+        }
+        return left;
+    }else{
+        for(i=0; i<16; i++){
+            const int temp= src[i];
+            dst[i]= temp - left;
+            left= temp;
+        }
+        s->dsp.diff_bytes(dst+16, src+16, src+15, w-16);
+        return src[w-1];
+    }
+}
+
+static void read_len_table(uint8_t *dst, GetBitContext *gb){
+    int i, val, repeat;
+
+    for(i=0; i<256;){
+        repeat= get_bits(gb, 3);
+        val   = get_bits(gb, 5);
+        if(repeat==0)
+            repeat= get_bits(gb, 8);
+//printf("%d %d\n", val, repeat);
+        while (repeat--)
+            dst[i++] = val;
+    }
+}
+
+static int generate_bits_table(uint32_t *dst, uint8_t *len_table){
+    int len, index;
+    uint32_t bits=0;
+
+    for(len=32; len>0; len--){
+        for(index=0; index<256; index++){
+            if(len_table[index]==len)
+                dst[index]= bits++;
+        }
+        if(bits & 1){
+            av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n");
+            return -1;
+        }
+        bits >>= 1;
+    }
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
+    uint64_t counts[2*size];
+    int up[2*size];
+    int offset, i, next;
+
+    for(offset=1; ; offset<<=1){
+        for(i=0; i<size; i++){
+            counts[i]= stats[i] + offset - 1;
+        }
+
+        for(next=size; next<size*2; next++){
+            uint64_t min1, min2;
+            int min1_i, min2_i;
+
+            min1=min2= INT64_MAX;
+            min1_i= min2_i=-1;
+
+            for(i=0; i<next; i++){
+                if(min2 > counts[i]){
+                    if(min1 > counts[i]){
+                        min2= min1;
+                        min2_i= min1_i;
+                        min1= counts[i];
+                        min1_i= i;
+                    }else{
+                        min2= counts[i];
+                        min2_i= i;
+                    }
+                }
+            }
+
+            if(min2==INT64_MAX) break;
+
+            counts[next]= min1 + min2;
+            counts[min1_i]=
+            counts[min2_i]= INT64_MAX;
+            up[min1_i]=
+            up[min2_i]= next;
+            up[next]= -1;
+        }
+
+        for(i=0; i<size; i++){
+            int len;
+            int index=i;
+
+            for(len=0; up[index] != -1; len++)
+                index= up[index];
+
+            if(len >= 32) break;
+
+            dst[i]= len;
+        }
+        if(i==size) break;
+    }
+}
+#endif /* CONFIG_ENCODERS */
+
+static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
+    GetBitContext gb;
+    int i;
+
+    init_get_bits(&gb, src, length*8);
+
+    for(i=0; i<3; i++){
+        read_len_table(s->len[i], &gb);
+
+        if(generate_bits_table(s->bits[i], s->len[i])<0){
+            return -1;
+        }
+#if 0
+for(j=0; j<256; j++){
+printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
+}
+#endif
+        free_vlc(&s->vlc[i]);
+        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
+    }
+
+    return (get_bits_count(&gb)+7)/8;
+}
+
+static int read_old_huffman_tables(HYuvContext *s){
+#if 1
+    GetBitContext gb;
+    int i;
+
+    init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8);
+    read_len_table(s->len[0], &gb);
+    init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
+    read_len_table(s->len[1], &gb);
+
+    for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma  [i];
+    for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i];
+
+    if(s->bitstream_bpp >= 24){
+        memcpy(s->bits[1], s->bits[0], 256*sizeof(uint32_t));
+        memcpy(s->len[1] , s->len [0], 256*sizeof(uint8_t));
+    }
+    memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t));
+    memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t));
+
+    for(i=0; i<3; i++){
+        free_vlc(&s->vlc[i]);
+        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
+    }
+
+    return 0;
+#else
+    av_log(s->avctx, AV_LOG_DEBUG, "v1 huffyuv is not supported \n");
+    return -1;
+#endif
+}
+
+static void alloc_temp(HYuvContext *s){
+    int i;
+
+    if(s->bitstream_bpp<24){
+        for(i=0; i<3; i++){
+            s->temp[i]= av_malloc(s->width + 16);
+        }
+    }else{
+        s->temp[0]= av_malloc(4*s->width + 16);
+    }
+}
+
+static int common_init(AVCodecContext *avctx){
+    HYuvContext *s = avctx->priv_data;
+
+    s->avctx= avctx;
+    s->flags= avctx->flags;
+
+    dsputil_init(&s->dsp, avctx);
+
+    s->width= avctx->width;
+    s->height= avctx->height;
+    assert(s->width>0 && s->height>0);
+
+    return 0;
+}
+
+#ifdef CONFIG_DECODERS
+static int decode_init(AVCodecContext *avctx)
+{
+    HYuvContext *s = avctx->priv_data;
+
+    common_init(avctx);
+    memset(s->vlc, 0, 3*sizeof(VLC));
+
+    avctx->coded_frame= &s->picture;
+    s->interlaced= s->height > 288;
+
+s->bgr32=1;
+//if(avctx->extradata)
+//  printf("extradata:%X, extradata_size:%d\n", *(uint32_t*)avctx->extradata, avctx->extradata_size);
+    if(avctx->extradata_size){
+        if((avctx->bits_per_sample&7) && avctx->bits_per_sample != 12)
+            s->version=1; // do such files exist at all?
+        else
+            s->version=2;
+    }else
+        s->version=0;
+
+    if(s->version==2){
+        int method, interlace;
+
+        method= ((uint8_t*)avctx->extradata)[0];
+        s->decorrelate= method&64 ? 1 : 0;
+        s->predictor= method&63;
+        s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1];
+        if(s->bitstream_bpp==0)
+            s->bitstream_bpp= avctx->bits_per_sample&~7;
+        interlace= (((uint8_t*)avctx->extradata)[2] & 0x30) >> 4;
+        s->interlaced= (interlace==1) ? 1 : (interlace==2) ? 0 : s->interlaced;
+        s->context= ((uint8_t*)avctx->extradata)[2] & 0x40 ? 1 : 0;
+
+        if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0)
+            return -1;
+    }else{
+        switch(avctx->bits_per_sample&7){
+        case 1:
+            s->predictor= LEFT;
+            s->decorrelate= 0;
+            break;
+        case 2:
+            s->predictor= LEFT;
+            s->decorrelate= 1;
+            break;
+        case 3:
+            s->predictor= PLANE;
+            s->decorrelate= avctx->bits_per_sample >= 24;
+            break;
+        case 4:
+            s->predictor= MEDIAN;
+            s->decorrelate= 0;
+            break;
+        default:
+            s->predictor= LEFT; //OLD
+            s->decorrelate= 0;
+            break;
+        }
+        s->bitstream_bpp= avctx->bits_per_sample & ~7;
+        s->context= 0;
+
+        if(read_old_huffman_tables(s) < 0)
+            return -1;
+    }
+
+    switch(s->bitstream_bpp){
+    case 12:
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+        break;
+    case 16:
+        if(s->yuy2){
+            avctx->pix_fmt = PIX_FMT_YUV422;
+        }else{
+            avctx->pix_fmt = PIX_FMT_YUV422P;
+        }
+        break;
+    case 24:
+    case 32:
+        if(s->bgr32){
+            avctx->pix_fmt = PIX_FMT_RGBA32;
+        }else{
+            avctx->pix_fmt = PIX_FMT_BGR24;
+        }
+        break;
+    default:
+        assert(0);
+    }
+
+    alloc_temp(s);
+
+//    av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
+
+    return 0;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){
+    int i;
+    int index= 0;
+
+    for(i=0; i<256;){
+        int val= len[i];
+        int repeat=0;
+
+        for(; i<256 && len[i]==val && repeat<255; i++)
+            repeat++;
+
+        assert(val < 32 && val >0 && repeat<256 && repeat>0);
+        if(repeat>7){
+            buf[index++]= val;
+            buf[index++]= repeat;
+        }else{
+            buf[index++]= val | (repeat<<5);
+        }
+    }
+
+    return index;
+}
+
+static int encode_init(AVCodecContext *avctx)
+{
+    HYuvContext *s = avctx->priv_data;
+    int i, j;
+
+    common_init(avctx);
+
+    avctx->extradata= av_mallocz(1024*30); // 256*3+4 == 772
+    avctx->stats_out= av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
+    s->version=2;
+
+    avctx->coded_frame= &s->picture;
+
+    switch(avctx->pix_fmt){
+    case PIX_FMT_YUV420P:
+        s->bitstream_bpp= 12;
+        break;
+    case PIX_FMT_YUV422P:
+        s->bitstream_bpp= 16;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
+        return -1;
+    }
+    avctx->bits_per_sample= s->bitstream_bpp;
+    s->decorrelate= s->bitstream_bpp >= 24;
+    s->predictor= avctx->prediction_method;
+    s->interlaced= avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0;
+    if(avctx->context_model==1){
+        s->context= avctx->context_model;
+        if(s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)){
+            av_log(avctx, AV_LOG_ERROR, "context=1 is not compatible with 2 pass huffyuv encoding\n");
+            return -1;
+        }
+    }else s->context= 0;
+
+    if(avctx->codec->id==CODEC_ID_HUFFYUV){
+        if(avctx->pix_fmt==PIX_FMT_YUV420P){
+            av_log(avctx, AV_LOG_ERROR, "Error: YV12 is not supported by huffyuv; use vcodec=ffvhuff or format=422p\n");
+            return -1;
+        }
+        if(avctx->context_model){
+            av_log(avctx, AV_LOG_ERROR, "Error: per-frame huffman tables are not supported by huffyuv; use vcodec=ffvhuff\n");
+            return -1;
+        }
+        if(s->interlaced != ( s->height > 288 ))
+            av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n");
+    }
+
+    ((uint8_t*)avctx->extradata)[0]= s->predictor;
+    ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp;
+    ((uint8_t*)avctx->extradata)[2]= s->interlaced ? 0x10 : 0x20;
+    if(s->context)
+        ((uint8_t*)avctx->extradata)[2]|= 0x40;
+    ((uint8_t*)avctx->extradata)[3]= 0;
+    s->avctx->extradata_size= 4;
+
+    if(avctx->stats_in){
+        char *p= avctx->stats_in;
+
+        for(i=0; i<3; i++)
+            for(j=0; j<256; j++)
+                s->stats[i][j]= 1;
+
+        for(;;){
+            for(i=0; i<3; i++){
+                char *next;
+
+                for(j=0; j<256; j++){
+                    s->stats[i][j]+= strtol(p, &next, 0);
+                    if(next==p) return -1;
+                    p=next;
+                }
+            }
+            if(p[0]==0 || p[1]==0 || p[2]==0) break;
+        }
+    }else{
+        for(i=0; i<3; i++)
+            for(j=0; j<256; j++){
+                int d= FFMIN(j, 256-j);
+
+                s->stats[i][j]= 100000000/(d+1);
+            }
+    }
+
+    for(i=0; i<3; i++){
+        generate_len_table(s->len[i], s->stats[i], 256);
+
+        if(generate_bits_table(s->bits[i], s->len[i])<0){
+            return -1;
+        }
+
+        s->avctx->extradata_size+=
+        store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]);
+    }
+
+    if(s->context){
+        for(i=0; i<3; i++){
+            int pels = s->width*s->height / (i?40:10);
+            for(j=0; j<256; j++){
+                int d= FFMIN(j, 256-j);
+                s->stats[i][j]= pels/(d+1);
+            }
+        }
+    }else{
+        for(i=0; i<3; i++)
+            for(j=0; j<256; j++)
+                s->stats[i][j]= 0;
+    }
+
+//    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
+
+    alloc_temp(s);
+
+    s->picture_number=0;
+
+    return 0;
+}
+#endif /* CONFIG_ENCODERS */
+
+static void decode_422_bitstream(HYuvContext *s, int count){
+    int i;
+
+    count/=2;
+
+    for(i=0; i<count; i++){
+        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+        s->temp[1][  i  ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+        s->temp[2][  i  ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
+    }
+}
+
+static void decode_gray_bitstream(HYuvContext *s, int count){
+    int i;
+
+    count/=2;
+
+    for(i=0; i<count; i++){
+        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static int encode_422_bitstream(HYuvContext *s, int count){
+    int i;
+
+    if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 2*4*count){
+        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+    count/=2;
+    if(s->flags&CODEC_FLAG_PASS1){
+        for(i=0; i<count; i++){
+            s->stats[0][ s->temp[0][2*i  ] ]++;
+            s->stats[1][ s->temp[1][  i  ] ]++;
+            s->stats[0][ s->temp[0][2*i+1] ]++;
+            s->stats[2][ s->temp[2][  i  ] ]++;
+        }
+    }
+    if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
+        return 0;
+    if(s->context){
+        for(i=0; i<count; i++){
+            s->stats[0][ s->temp[0][2*i  ] ]++;
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
+            s->stats[1][ s->temp[1][  i  ] ]++;
+            put_bits(&s->pb, s->len[1][ s->temp[1][  i  ] ], s->bits[1][ s->temp[1][  i  ] ]);
+            s->stats[0][ s->temp[0][2*i+1] ]++;
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
+            s->stats[2][ s->temp[2][  i  ] ]++;
+            put_bits(&s->pb, s->len[2][ s->temp[2][  i  ] ], s->bits[2][ s->temp[2][  i  ] ]);
+        }
+    }else{
+        for(i=0; i<count; i++){
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
+            put_bits(&s->pb, s->len[1][ s->temp[1][  i  ] ], s->bits[1][ s->temp[1][  i  ] ]);
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
+            put_bits(&s->pb, s->len[2][ s->temp[2][  i  ] ], s->bits[2][ s->temp[2][  i  ] ]);
+        }
+    }
+    return 0;
+}
+
+static int encode_gray_bitstream(HYuvContext *s, int count){
+    int i;
+
+    if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 4*count){
+        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+    count/=2;
+    if(s->flags&CODEC_FLAG_PASS1){
+        for(i=0; i<count; i++){
+            s->stats[0][ s->temp[0][2*i  ] ]++;
+            s->stats[0][ s->temp[0][2*i+1] ]++;
+        }
+    }
+    if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
+        return 0;
+
+    if(s->context){
+        for(i=0; i<count; i++){
+            s->stats[0][ s->temp[0][2*i  ] ]++;
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
+            s->stats[0][ s->temp[0][2*i+1] ]++;
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
+        }
+    }else{
+        for(i=0; i<count; i++){
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
+            put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
+        }
+    }
+    return 0;
+}
+#endif /* CONFIG_ENCODERS */
+
+static void decode_bgr_bitstream(HYuvContext *s, int count){
+    int i;
+
+    if(s->decorrelate){
+        if(s->bitstream_bpp==24){
+            for(i=0; i<count; i++){
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+                s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
+            }
+        }else{
+            for(i=0; i<count; i++){
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+                s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
+                                   get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
+            }
+        }
+    }else{
+        if(s->bitstream_bpp==24){
+            for(i=0; i<count; i++){
+                s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
+            }
+        }else{
+            for(i=0; i<count; i++){
+                s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
+                                   get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
+            }
+        }
+    }
+}
+
+#ifdef CONFIG_DECODERS
+static void draw_slice(HYuvContext *s, int y){
+    int h, cy;
+    int offset[4];
+
+    if(s->avctx->draw_horiz_band==NULL)
+        return;
+
+    h= y - s->last_slice_end;
+    y -= h;
+
+    if(s->bitstream_bpp==12){
+        cy= y>>1;
+    }else{
+        cy= y;
+    }
+
+    offset[0] = s->picture.linesize[0]*y;
+    offset[1] = s->picture.linesize[1]*cy;
+    offset[2] = s->picture.linesize[2]*cy;
+    offset[3] = 0;
+    emms_c();
+
+    s->avctx->draw_horiz_band(s->avctx, &s->picture, offset, y, 3, h);
+
+    s->last_slice_end= y + h;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
+    HYuvContext *s = avctx->priv_data;
+    const int width= s->width;
+    const int width2= s->width>>1;
+    const int height= s->height;
+    int fake_ystride, fake_ustride, fake_vstride;
+    AVFrame * const p= &s->picture;
+    int table_size= 0;
+
+    AVFrame *picture = data;
+
+    s->bitstream_buffer= av_fast_realloc(s->bitstream_buffer, &s->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4);
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    if(s->context){
+        table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
+        if(table_size < 0)
+            return -1;
+    }
+
+    if((unsigned)(buf_size-table_size) >= INT_MAX/8)
+        return -1;
+
+    init_get_bits(&s->gb, s->bitstream_buffer+table_size, (buf_size-table_size)*8);
+
+    fake_ystride= s->interlaced ? p->linesize[0]*2  : p->linesize[0];
+    fake_ustride= s->interlaced ? p->linesize[1]*2  : p->linesize[1];
+    fake_vstride= s->interlaced ? p->linesize[2]*2  : p->linesize[2];
+
+    s->last_slice_end= 0;
+
+    if(s->bitstream_bpp<24){
+        int y, cy;
+        int lefty, leftu, leftv;
+        int lefttopy, lefttopu, lefttopv;
+
+        if(s->yuy2){
+            p->data[0][3]= get_bits(&s->gb, 8);
+            p->data[0][2]= get_bits(&s->gb, 8);
+            p->data[0][1]= get_bits(&s->gb, 8);
+            p->data[0][0]= get_bits(&s->gb, 8);
+
+            av_log(avctx, AV_LOG_ERROR, "YUY2 output is not implemented yet\n");
+            return -1;
+        }else{
+
+            leftv= p->data[2][0]= get_bits(&s->gb, 8);
+            lefty= p->data[0][1]= get_bits(&s->gb, 8);
+            leftu= p->data[1][0]= get_bits(&s->gb, 8);
+                   p->data[0][0]= get_bits(&s->gb, 8);
+
+            switch(s->predictor){
+            case LEFT:
+            case PLANE:
+                decode_422_bitstream(s, width-2);
+                lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
+                    leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
+                }
+
+                for(cy=y=1; y<s->height; y++,cy++){
+                    uint8_t *ydst, *udst, *vdst;
+
+                    if(s->bitstream_bpp==12){
+                        decode_gray_bitstream(s, width);
+
+                        ydst= p->data[0] + p->linesize[0]*y;
+
+                        lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
+                        if(s->predictor == PLANE){
+                            if(y>s->interlaced)
+                                s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
+                        }
+                        y++;
+                        if(y>=s->height) break;
+                    }
+
+                    draw_slice(s, y);
+
+                    ydst= p->data[0] + p->linesize[0]*y;
+                    udst= p->data[1] + p->linesize[1]*cy;
+                    vdst= p->data[2] + p->linesize[2]*cy;
+
+                    decode_422_bitstream(s, width);
+                    lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
+                    if(!(s->flags&CODEC_FLAG_GRAY)){
+                        leftu= add_left_prediction(udst, s->temp[1], width2, leftu);
+                        leftv= add_left_prediction(vdst, s->temp[2], width2, leftv);
+                    }
+                    if(s->predictor == PLANE){
+                        if(cy>s->interlaced){
+                            s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
+                            if(!(s->flags&CODEC_FLAG_GRAY)){
+                                s->dsp.add_bytes(udst, udst - fake_ustride, width2);
+                                s->dsp.add_bytes(vdst, vdst - fake_vstride, width2);
+                            }
+                        }
+                    }
+                }
+                draw_slice(s, height);
+
+                break;
+            case MEDIAN:
+                /* first line except first 2 pixels is left predicted */
+                decode_422_bitstream(s, width-2);
+                lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
+                    leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
+                }
+
+                cy=y=1;
+
+                /* second line is left predicted for interlaced case */
+                if(s->interlaced){
+                    decode_422_bitstream(s, width);
+                    lefty= add_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty);
+                    if(!(s->flags&CODEC_FLAG_GRAY)){
+                        leftu= add_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu);
+                        leftv= add_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv);
+                    }
+                    y++; cy++;
+                }
+
+                /* next 4 pixels are left predicted too */
+                decode_422_bitstream(s, 4);
+                lefty= add_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty);
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    leftu= add_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu);
+                    leftv= add_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv);
+                }
+
+                /* next line except the first 4 pixels is median predicted */
+                lefttopy= p->data[0][3];
+                decode_422_bitstream(s, width-4);
+                add_median_prediction(p->data[0] + fake_ystride+4, p->data[0]+4, s->temp[0], width-4, &lefty, &lefttopy);
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    lefttopu= p->data[1][1];
+                    lefttopv= p->data[2][1];
+                    add_median_prediction(p->data[1] + fake_ustride+2, p->data[1]+2, s->temp[1], width2-2, &leftu, &lefttopu);
+                    add_median_prediction(p->data[2] + fake_vstride+2, p->data[2]+2, s->temp[2], width2-2, &leftv, &lefttopv);
+                }
+                y++; cy++;
+
+                for(; y<height; y++,cy++){
+                    uint8_t *ydst, *udst, *vdst;
+
+                    if(s->bitstream_bpp==12){
+                        while(2*cy > y){
+                            decode_gray_bitstream(s, width);
+                            ydst= p->data[0] + p->linesize[0]*y;
+                            add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy);
+                            y++;
+                        }
+                        if(y>=height) break;
+                    }
+                    draw_slice(s, y);
+
+                    decode_422_bitstream(s, width);
+
+                    ydst= p->data[0] + p->linesize[0]*y;
+                    udst= p->data[1] + p->linesize[1]*cy;
+                    vdst= p->data[2] + p->linesize[2]*cy;
+
+                    add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy);
+                    if(!(s->flags&CODEC_FLAG_GRAY)){
+                        add_median_prediction(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu);
+                        add_median_prediction(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv);
+                    }
+                }
+
+                draw_slice(s, height);
+                break;
+            }
+        }
+    }else{
+        int y;
+        int leftr, leftg, leftb;
+        const int last_line= (height-1)*p->linesize[0];
+
+        if(s->bitstream_bpp==32){
+            skip_bits(&s->gb, 8);
+            leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
+            leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8);
+            leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
+        }else{
+            leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
+            leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8);
+            leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
+            skip_bits(&s->gb, 8);
+        }
+
+        if(s->bgr32){
+            switch(s->predictor){
+            case LEFT:
+            case PLANE:
+                decode_bgr_bitstream(s, width-1);
+                add_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb);
+
+                for(y=s->height-2; y>=0; y--){ //yes its stored upside down
+                    decode_bgr_bitstream(s, width);
+
+                    add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
+                    if(s->predictor == PLANE){
+                        if((y&s->interlaced)==0 && y<s->height-1-s->interlaced){
+                            s->dsp.add_bytes(p->data[0] + p->linesize[0]*y,
+                                             p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride);
+                        }
+                    }
+                }
+                draw_slice(s, height); // just 1 large slice as this is not possible in reverse order
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "prediction type not supported!\n");
+            }
+        }else{
+
+            av_log(avctx, AV_LOG_ERROR, "BGR24 output is not implemented yet\n");
+            return -1;
+        }
+    }
+    emms_c();
+
+    *picture= *p;
+    *data_size = sizeof(AVFrame);
+
+    return (get_bits_count(&s->gb)+31)/32*4 + table_size;
+}
+#endif
+
+static int common_end(HYuvContext *s){
+    int i;
+
+    for(i=0; i<3; i++){
+        av_freep(&s->temp[i]);
+    }
+    return 0;
+}
+
+#ifdef CONFIG_DECODERS
+static int decode_end(AVCodecContext *avctx)
+{
+    HYuvContext *s = avctx->priv_data;
+    int i;
+
+    common_end(s);
+    av_freep(&s->bitstream_buffer);
+
+    for(i=0; i<3; i++){
+        free_vlc(&s->vlc[i]);
+    }
+
+    return 0;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    HYuvContext *s = avctx->priv_data;
+    AVFrame *pict = data;
+    const int width= s->width;
+    const int width2= s->width>>1;
+    const int height= s->height;
+    const int fake_ystride= s->interlaced ? pict->linesize[0]*2  : pict->linesize[0];
+    const int fake_ustride= s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
+    const int fake_vstride= s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
+    AVFrame * const p= &s->picture;
+    int i, j, size=0;
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    if(s->context){
+        for(i=0; i<3; i++){
+            generate_len_table(s->len[i], s->stats[i], 256);
+            if(generate_bits_table(s->bits[i], s->len[i])<0)
+                return -1;
+            size+= store_table(s, s->len[i], &buf[size]);
+        }
+
+        for(i=0; i<3; i++)
+            for(j=0; j<256; j++)
+                s->stats[i][j] >>= 1;
+    }
+
+    init_put_bits(&s->pb, buf+size, buf_size-size);
+
+    if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){
+        int lefty, leftu, leftv, y, cy;
+
+        put_bits(&s->pb, 8, leftv= p->data[2][0]);
+        put_bits(&s->pb, 8, lefty= p->data[0][1]);
+        put_bits(&s->pb, 8, leftu= p->data[1][0]);
+        put_bits(&s->pb, 8,        p->data[0][0]);
+
+        lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty);
+        leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu);
+        leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv);
+
+        encode_422_bitstream(s, width-2);
+
+        if(s->predictor==MEDIAN){
+            int lefttopy, lefttopu, lefttopv;
+            cy=y=1;
+            if(s->interlaced){
+                lefty= sub_left_prediction(s, s->temp[0], p->data[0]+p->linesize[0], width , lefty);
+                leftu= sub_left_prediction(s, s->temp[1], p->data[1]+p->linesize[1], width2, leftu);
+                leftv= sub_left_prediction(s, s->temp[2], p->data[2]+p->linesize[2], width2, leftv);
+
+                encode_422_bitstream(s, width);
+                y++; cy++;
+            }
+
+            lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty);
+            leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ustride, 2, leftu);
+            leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_vstride, 2, leftv);
+
+            encode_422_bitstream(s, 4);
+
+            lefttopy= p->data[0][3];
+            lefttopu= p->data[1][1];
+            lefttopv= p->data[2][1];
+            s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy);
+            s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu);
+            s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv);
+            encode_422_bitstream(s, width-4);
+            y++; cy++;
+
+            for(; y<height; y++,cy++){
+                uint8_t *ydst, *udst, *vdst;
+
+                if(s->bitstream_bpp==12){
+                    while(2*cy > y){
+                        ydst= p->data[0] + p->linesize[0]*y;
+                        s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
+                        encode_gray_bitstream(s, width);
+                        y++;
+                    }
+                    if(y>=height) break;
+                }
+                ydst= p->data[0] + p->linesize[0]*y;
+                udst= p->data[1] + p->linesize[1]*cy;
+                vdst= p->data[2] + p->linesize[2]*cy;
+
+                s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
+                s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu);
+                s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv);
+
+                encode_422_bitstream(s, width);
+            }
+        }else{
+            for(cy=y=1; y<height; y++,cy++){
+                uint8_t *ydst, *udst, *vdst;
+
+                /* encode a luma only line & y++ */
+                if(s->bitstream_bpp==12){
+                    ydst= p->data[0] + p->linesize[0]*y;
+
+                    if(s->predictor == PLANE && s->interlaced < y){
+                        s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
+
+                        lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
+                    }else{
+                        lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
+                    }
+                    encode_gray_bitstream(s, width);
+                    y++;
+                    if(y>=height) break;
+                }
+
+                ydst= p->data[0] + p->linesize[0]*y;
+                udst= p->data[1] + p->linesize[1]*cy;
+                vdst= p->data[2] + p->linesize[2]*cy;
+
+                if(s->predictor == PLANE && s->interlaced < cy){
+                    s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
+                    s->dsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2);
+                    s->dsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2);
+
+                    lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
+                    leftu= sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu);
+                    leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + width2, width2, leftv);
+                }else{
+                    lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
+                    leftu= sub_left_prediction(s, s->temp[1], udst, width2, leftu);
+                    leftv= sub_left_prediction(s, s->temp[2], vdst, width2, leftv);
+                }
+
+                encode_422_bitstream(s, width);
+            }
+        }
+    }else{
+        av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
+    }
+    emms_c();
+
+    size+= (put_bits_count(&s->pb)+31)/8;
+    size/= 4;
+
+    if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){
+        int j;
+        char *p= avctx->stats_out;
+        char *end= p + 1024*30;
+        for(i=0; i<3; i++){
+            for(j=0; j<256; j++){
+                snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
+                p+= strlen(p);
+                s->stats[i][j]= 0;
+            }
+            snprintf(p, end-p, "\n");
+            p++;
+        }
+    }
+    if(!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)){
+        flush_put_bits(&s->pb);
+        s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
+        avctx->stats_out[0] = '\0';
+    }
+
+    s->picture_number++;
+
+    return size*4;
+}
+
+static int encode_end(AVCodecContext *avctx)
+{
+    HYuvContext *s = avctx->priv_data;
+
+    common_end(s);
+
+    av_freep(&avctx->extradata);
+    av_freep(&avctx->stats_out);
+
+    return 0;
+}
+#endif /* CONFIG_ENCODERS */
+
+#ifdef CONFIG_DECODERS
+AVCodec huffyuv_decoder = {
+    "huffyuv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_HUFFYUV,
+    sizeof(HYuvContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
+    NULL
+};
+
+AVCodec ffvhuff_decoder = {
+    "ffvhuff",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FFVHUFF,
+    sizeof(HYuvContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
+    NULL
+};
+#endif
+
+#ifdef CONFIG_ENCODERS
+
+AVCodec huffyuv_encoder = {
+    "huffyuv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_HUFFYUV,
+    sizeof(HYuvContext),
+    encode_init,
+    encode_frame,
+    encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV422P, -1},
+};
+
+AVCodec ffvhuff_encoder = {
+    "ffvhuff",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FFVHUFF,
+    sizeof(HYuvContext),
+    encode_init,
+    encode_frame,
+    encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, -1},
+};
+
+#endif //CONFIG_ENCODERS
diff --git a/contrib/ffmpeg/libavcodec/i386/cavsdsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/cavsdsp_mmx.c
new file mode 100644
index 000000000..51d519a5c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/cavsdsp_mmx.c
@@ -0,0 +1,518 @@
+/*
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
+ *
+ * MMX optimised DSP functions, based on H.264 optimisations by
+ * Michael Niedermayer and Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "../dsputil.h"
+#include "common.h"
+
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
+DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
+
+/*****************************************************************************
+ *
+ * inverse transform
+ *
+ ****************************************************************************/
+
+#define SUMSUB_BA( a, b ) \
+    "paddw "#b", "#a" \n\t"\
+    "paddw "#b", "#b" \n\t"\
+    "psubw "#a", "#b" \n\t"
+
+#define SBUTTERFLY(a,b,t,n)\
+    "movq " #a ", " #t "              \n\t" /* abcd */\
+    "punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
+    "punpckh" #n " " #b ", " #t "     \n\t" /* cgdh */
+
+#define TRANSPOSE4(a,b,c,d,t)\
+    SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
+    SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
+    SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
+    SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
+
+static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
+{
+    asm volatile(
+        "movq 112(%0), %%mm4  \n\t" /* mm4 = src7 */
+        "movq  16(%0), %%mm5  \n\t" /* mm5 = src1 */
+        "movq  80(%0), %%mm2  \n\t" /* mm2 = src5 */
+        "movq  48(%0), %%mm7  \n\t" /* mm7 = src3 */
+        "movq   %%mm4, %%mm0  \n\t"
+        "movq   %%mm5, %%mm3  \n\t"
+        "movq   %%mm2, %%mm6  \n\t"
+        "movq   %%mm7, %%mm1  \n\t"
+
+        "paddw  %%mm4, %%mm4  \n\t" /* mm4 = 2*src7 */
+        "paddw  %%mm3, %%mm3  \n\t" /* mm3 = 2*src1 */
+        "paddw  %%mm6, %%mm6  \n\t" /* mm6 = 2*src5 */
+        "paddw  %%mm1, %%mm1  \n\t" /* mm1 = 2*src3 */
+        "paddw  %%mm4, %%mm0  \n\t" /* mm0 = 3*src7 */
+        "paddw  %%mm3, %%mm5  \n\t" /* mm5 = 3*src1 */
+        "paddw  %%mm6, %%mm2  \n\t" /* mm2 = 3*src5 */
+        "paddw  %%mm1, %%mm7  \n\t" /* mm7 = 3*src3 */
+        "psubw  %%mm4, %%mm5  \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
+        "paddw  %%mm6, %%mm7  \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
+        "psubw  %%mm2, %%mm1  \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
+        "paddw  %%mm0, %%mm3  \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
+
+        "movq   %%mm5, %%mm4  \n\t"
+        "movq   %%mm7, %%mm6  \n\t"
+        "movq   %%mm3, %%mm0  \n\t"
+        "movq   %%mm1, %%mm2  \n\t"
+        SUMSUB_BA( %%mm7, %%mm5 )   /* mm7 = a0 + a1  mm5 = a0 - a1 */
+        "paddw  %%mm3, %%mm7  \n\t" /* mm7 = a0 + a1 + a3 */
+        "paddw  %%mm1, %%mm5  \n\t" /* mm5 = a0 - a1 + a2 */
+        "paddw  %%mm7, %%mm7  \n\t"
+        "paddw  %%mm5, %%mm5  \n\t"
+        "paddw  %%mm6, %%mm7  \n\t" /* mm7 = b4 */
+        "paddw  %%mm4, %%mm5  \n\t" /* mm5 = b5 */
+
+        SUMSUB_BA( %%mm1, %%mm3 )   /* mm1 = a3 + a2  mm3 = a3 - a2 */
+        "psubw  %%mm1, %%mm4  \n\t" /* mm4 = a0 - a2 - a3 */
+        "movq   %%mm4, %%mm1  \n\t" /* mm1 = a0 - a2 - a3 */
+        "psubw  %%mm6, %%mm3  \n\t" /* mm3 = a3 - a2 - a1 */
+        "paddw  %%mm1, %%mm1  \n\t"
+        "paddw  %%mm3, %%mm3  \n\t"
+        "psubw  %%mm2, %%mm1  \n\t" /* mm1 = b7 */
+        "paddw  %%mm0, %%mm3  \n\t" /* mm3 = b6 */
+
+        "movq  32(%0), %%mm2  \n\t" /* mm2 = src2 */
+        "movq  96(%0), %%mm6  \n\t" /* mm6 = src6 */
+        "movq   %%mm2, %%mm4  \n\t"
+        "movq   %%mm6, %%mm0  \n\t"
+        "psllw  $2,    %%mm4  \n\t" /* mm4 = 4*src2 */
+        "psllw  $2,    %%mm6  \n\t" /* mm6 = 4*src6 */
+        "paddw  %%mm4, %%mm2  \n\t" /* mm2 = 5*src2 */
+        "paddw  %%mm6, %%mm0  \n\t" /* mm0 = 5*src6 */
+        "paddw  %%mm2, %%mm2  \n\t"
+        "paddw  %%mm0, %%mm0  \n\t"
+        "psubw  %%mm0, %%mm4  \n\t" /* mm4 = 4*src2 - 10*src6 = a7 */
+        "paddw  %%mm2, %%mm6  \n\t" /* mm6 = 4*src6 + 10*src2 = a6 */
+
+        "movq    (%0), %%mm2  \n\t" /* mm2 = src0 */
+        "movq  64(%0), %%mm0  \n\t" /* mm0 = src4 */
+        SUMSUB_BA( %%mm0, %%mm2 )   /* mm0 = src0+src4  mm2 = src0-src4 */
+        "psllw  $3,    %%mm0  \n\t"
+        "psllw  $3,    %%mm2  \n\t"
+        "paddw  %1,    %%mm0  \n\t" /* add rounding bias */
+        "paddw  %1,    %%mm2  \n\t" /* add rounding bias */
+
+        SUMSUB_BA( %%mm6, %%mm0 )   /* mm6 = a4 + a6  mm0 = a4 - a6 */
+        SUMSUB_BA( %%mm4, %%mm2 )   /* mm4 = a5 + a7  mm2 = a5 - a7 */
+        SUMSUB_BA( %%mm7, %%mm6 )   /* mm7 = dst0  mm6 = dst7 */
+        SUMSUB_BA( %%mm5, %%mm4 )   /* mm5 = dst1  mm4 = dst6 */
+        SUMSUB_BA( %%mm3, %%mm2 )   /* mm3 = dst2  mm2 = dst5 */
+        SUMSUB_BA( %%mm1, %%mm0 )   /* mm1 = dst3  mm0 = dst4 */
+        :: "r"(block), "m"(bias)
+    );
+}
+
+static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
+{
+    int i;
+    DECLARE_ALIGNED_8(int16_t, b2[64]);
+
+    for(i=0; i<2; i++){
+        DECLARE_ALIGNED_8(uint64_t, tmp);
+
+        cavs_idct8_1d(block+4*i, ff_pw_4);
+
+        asm volatile(
+            "psraw     $3, %%mm7  \n\t"
+            "psraw     $3, %%mm6  \n\t"
+            "psraw     $3, %%mm5  \n\t"
+            "psraw     $3, %%mm4  \n\t"
+            "psraw     $3, %%mm3  \n\t"
+            "psraw     $3, %%mm2  \n\t"
+            "psraw     $3, %%mm1  \n\t"
+            "psraw     $3, %%mm0  \n\t"
+            "movq   %%mm7,    %0   \n\t"
+            TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
+            "movq   %%mm0,  8(%1)  \n\t"
+            "movq   %%mm6, 24(%1)  \n\t"
+            "movq   %%mm7, 40(%1)  \n\t"
+            "movq   %%mm4, 56(%1)  \n\t"
+            "movq    %0,    %%mm7  \n\t"
+            TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
+            "movq   %%mm7,   (%1)  \n\t"
+            "movq   %%mm1, 16(%1)  \n\t"
+            "movq   %%mm0, 32(%1)  \n\t"
+            "movq   %%mm3, 48(%1)  \n\t"
+            : "=m"(tmp)
+            : "r"(b2+32*i)
+            : "memory"
+        );
+    }
+
+    for(i=0; i<2; i++){
+        cavs_idct8_1d(b2+4*i, ff_pw_64);
+
+        asm volatile(
+            "psraw     $7, %%mm7  \n\t"
+            "psraw     $7, %%mm6  \n\t"
+            "psraw     $7, %%mm5  \n\t"
+            "psraw     $7, %%mm4  \n\t"
+            "psraw     $7, %%mm3  \n\t"
+            "psraw     $7, %%mm2  \n\t"
+            "psraw     $7, %%mm1  \n\t"
+            "psraw     $7, %%mm0  \n\t"
+            "movq   %%mm7,    (%0)  \n\t"
+            "movq   %%mm5,  16(%0)  \n\t"
+            "movq   %%mm3,  32(%0)  \n\t"
+            "movq   %%mm1,  48(%0)  \n\t"
+            "movq   %%mm0,  64(%0)  \n\t"
+            "movq   %%mm2,  80(%0)  \n\t"
+            "movq   %%mm4,  96(%0)  \n\t"
+            "movq   %%mm6, 112(%0)  \n\t"
+            :: "r"(b2+4*i)
+            : "memory"
+        );
+    }
+
+    add_pixels_clamped_mmx(b2, dst, stride);
+
+    /* clear block */
+    asm volatile(
+            "pxor %%mm7, %%mm7   \n\t"
+            "movq %%mm7, (%0)    \n\t"
+            "movq %%mm7, 8(%0)   \n\t"
+            "movq %%mm7, 16(%0)  \n\t"
+            "movq %%mm7, 24(%0)  \n\t"
+            "movq %%mm7, 32(%0)  \n\t"
+            "movq %%mm7, 40(%0)  \n\t"
+            "movq %%mm7, 48(%0)  \n\t"
+            "movq %%mm7, 56(%0)  \n\t"
+            "movq %%mm7, 64(%0)  \n\t"
+            "movq %%mm7, 72(%0)  \n\t"
+            "movq %%mm7, 80(%0)  \n\t"
+            "movq %%mm7, 88(%0)  \n\t"
+            "movq %%mm7, 96(%0)  \n\t"
+            "movq %%mm7, 104(%0) \n\t"
+            "movq %%mm7, 112(%0) \n\t"
+            "movq %%mm7, 120(%0) \n\t"
+            :: "r" (block)
+    );
+}
+
+/*****************************************************************************
+ *
+ * motion compensation
+ *
+ ****************************************************************************/
+
+/* vertical filter [-1 -2 96 42 -7  0]  */
+#define QPEL_CAVSV1(A,B,C,D,E,F,OP)      \
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "pmullw %5, %%mm6           \n\t"\
+        "movq "#D", %%mm7           \n\t"\
+        "pmullw %6, %%mm7           \n\t"\
+        "psllw $3, "#E"             \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "psraw $3, "#E"             \n\t"\
+        "paddw %%mm7, %%mm6         \n\t"\
+        "paddw "#E", %%mm6          \n\t"\
+        "paddw "#B", "#B"           \n\t"\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psraw $1, "#B"             \n\t"\
+        "psubw "#A", %%mm6          \n\t"\
+        "paddw %4, %%mm6            \n\t"\
+        "psraw $7, %%mm6            \n\t"\
+        "packuswb %%mm6, %%mm6      \n\t"\
+        OP(%%mm6, (%1), A, d)            \
+        "add %3, %1                 \n\t"
+
+/* vertical filter [ 0 -1  5  5 -1  0]  */
+#define QPEL_CAVSV2(A,B,C,D,E,F,OP)      \
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "paddw "#D", %%mm6          \n\t"\
+        "pmullw %5, %%mm6           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "paddw %4, %%mm6            \n\t"\
+        "psraw $3, %%mm6            \n\t"\
+        "packuswb %%mm6, %%mm6      \n\t"\
+        OP(%%mm6, (%1), A, d)            \
+        "add %3, %1                 \n\t"
+
+/* vertical filter [ 0 -7 42 96 -2 -1]  */
+#define QPEL_CAVSV3(A,B,C,D,E,F,OP)      \
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "pmullw %6, %%mm6           \n\t"\
+        "movq "#D", %%mm7           \n\t"\
+        "pmullw %5, %%mm7           \n\t"\
+        "psllw $3, "#B"             \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psraw $3, "#B"             \n\t"\
+        "paddw %%mm7, %%mm6         \n\t"\
+        "paddw "#B", %%mm6          \n\t"\
+        "paddw "#E", "#E"           \n\t"\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "psraw $1, "#E"             \n\t"\
+        "psubw "#F", %%mm6          \n\t"\
+        "paddw %4, %%mm6            \n\t"\
+        "psraw $7, %%mm6            \n\t"\
+        "packuswb %%mm6, %%mm6      \n\t"\
+        OP(%%mm6, (%1), A, d)            \
+        "add %3, %1                 \n\t"
+
+
+#define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
+    int w= 2;\
+    src -= 2*srcStride;\
+    \
+    while(w--){\
+      asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movd (%0), %%mm0           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm1           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm2           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm3           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm4           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+        VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+        VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+        VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+        VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+        VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+        VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+        VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+        \
+        : "+a"(src), "+c"(dst)\
+        : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
+        : "memory"\
+     );\
+     if(h==16){\
+        asm volatile(\
+            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+            VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+            VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+            VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+            VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+            \
+           : "+a"(src), "+c"(dst)\
+           : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD),  "m"(MUL1), "m"(MUL2)\
+           : "memory"\
+        );\
+     }\
+     src += 4-(h+5)*srcStride;\
+     dst += 4-h*dstStride;\
+   }
+
+#define QPEL_CAVS(OPNAME, OP, MMX)\
+static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    int h=8;\
+    asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %5, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq    (%0), %%mm0        \n\t"\
+        "movq   1(%0), %%mm2        \n\t"\
+        "movq %%mm0, %%mm1          \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpckhbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm3, %%mm1         \n\t"\
+        "pmullw %%mm6, %%mm0        \n\t"\
+        "pmullw %%mm6, %%mm1        \n\t"\
+        "movq   -1(%0), %%mm2       \n\t"\
+        "movq    2(%0), %%mm4       \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "movq %%mm4, %%mm5          \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        "punpckhbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm4, %%mm2         \n\t"\
+        "paddw %%mm3, %%mm5         \n\t"\
+        "psubw %%mm2, %%mm0         \n\t"\
+        "psubw %%mm5, %%mm1         \n\t"\
+        "movq %6, %%mm5             \n\t"\
+        "paddw %%mm5, %%mm0         \n\t"\
+        "paddw %%mm5, %%mm1         \n\t"\
+        "psraw $3, %%mm0            \n\t"\
+        "psraw $3, %%mm1            \n\t"\
+        "packuswb %%mm1, %%mm0      \n\t"\
+        OP(%%mm0, (%1),%%mm5, q)         \
+        "add %3, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
+        : "+a"(src), "+c"(dst), "+m"(h)\
+        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
+        : "memory"\
+    );\
+}\
+\
+static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+  QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42)      \
+}\
+\
+static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+  QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5)         \
+}\
+\
+static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+  QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42)      \
+}\
+\
+static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
+    OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
+    OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
+    OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## cavs_qpel8_h_ ## MMX(dst  , src  , dstStride, srcStride);\
+    OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## cavs_qpel8_h_ ## MMX(dst  , src  , dstStride, srcStride);\
+    OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+
+#define CAVS_MC(OPNAME, SIZE, MMX) \
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
+}\
+
+#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "    \n\t"
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgusb " #temp ", " #a "        \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgb " #temp ", " #a "          \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
+
+QPEL_CAVS(put_,       PUT_OP, 3dnow)
+QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
+QPEL_CAVS(put_,       PUT_OP, mmx2)
+QPEL_CAVS(avg_,  AVG_MMX2_OP, mmx2)
+
+CAVS_MC(put_, 8, 3dnow)
+CAVS_MC(put_, 16,3dnow)
+CAVS_MC(avg_, 8, 3dnow)
+CAVS_MC(avg_, 16,3dnow)
+CAVS_MC(put_, 8, mmx2)
+CAVS_MC(put_, 16,mmx2)
+CAVS_MC(avg_, 8, mmx2)
+CAVS_MC(avg_, 16,mmx2)
+
+void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+
+void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
+    c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
+
+    dspfunc(put_cavs_qpel, 0, 16);
+    dspfunc(put_cavs_qpel, 1, 8);
+    dspfunc(avg_cavs_qpel, 0, 16);
+    dspfunc(avg_cavs_qpel, 1, 8);
+#undef dspfunc
+    c->cavs_idct8_add = cavs_idct8_add_mmx;
+}
+
+void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
+    c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
+
+    dspfunc(put_cavs_qpel, 0, 16);
+    dspfunc(put_cavs_qpel, 1, 8);
+    dspfunc(avg_cavs_qpel, 0, 16);
+    dspfunc(avg_cavs_qpel, 1, 8);
+#undef dspfunc
+    c->cavs_idct8_add = cavs_idct8_add_mmx;
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/cputest.c b/contrib/ffmpeg/libavcodec/i386/cputest.c
new file mode 100644
index 000000000..262786b71
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/cputest.c
@@ -0,0 +1,126 @@
+/*
+ * CPU detection code, extracted from mmx.h
+ * (c)1997-99 by H. Dietz and R. Fisher
+ * Converted to C and improved by Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include "../dsputil.h"
+
+#undef printf
+
+#ifdef ARCH_X86_64
+#  define REG_b "rbx"
+#  define REG_S "rsi"
+#else
+#  define REG_b "ebx"
+#  define REG_S "esi"
+#endif
+
+/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
+#define cpuid(index,eax,ebx,ecx,edx)\
+    __asm __volatile\
+        ("mov %%"REG_b", %%"REG_S"\n\t"\
+         "cpuid\n\t"\
+         "xchg %%"REG_b", %%"REG_S\
+         : "=a" (eax), "=S" (ebx),\
+           "=c" (ecx), "=d" (edx)\
+         : "0" (index));
+
+/* Function to test if multimedia instructions are supported...  */
+int mm_support(void)
+{
+    int rval = 0;
+    int eax, ebx, ecx, edx;
+    int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
+    long a, c;
+
+    __asm__ __volatile__ (
+                          /* See if CPUID instruction is supported ... */
+                          /* ... Get copies of EFLAGS into eax and ecx */
+                          "pushf\n\t"
+                          "pop %0\n\t"
+                          "mov %0, %1\n\t"
+
+                          /* ... Toggle the ID bit in one copy and store */
+                          /*     to the EFLAGS reg */
+                          "xor $0x200000, %0\n\t"
+                          "push %0\n\t"
+                          "popf\n\t"
+
+                          /* ... Get the (hopefully modified) EFLAGS */
+                          "pushf\n\t"
+                          "pop %0\n\t"
+                          : "=a" (a), "=c" (c)
+                          :
+                          : "cc"
+                          );
+
+    if (a == c)
+        return 0; /* CPUID not supported */
+
+    cpuid(0, max_std_level, ebx, ecx, edx);
+
+    if(max_std_level >= 1){
+        cpuid(1, eax, ebx, ecx, std_caps);
+        if (std_caps & (1<<23))
+            rval |= MM_MMX;
+        if (std_caps & (1<<25))
+            rval |= MM_MMXEXT | MM_SSE;
+        if (std_caps & (1<<26))
+            rval |= MM_SSE2;
+        if (ecx & 1)
+            rval |= MM_SSE3;
+    }
+
+    cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
+
+    if(max_ext_level >= 0x80000001){
+        cpuid(0x80000001, eax, ebx, ecx, ext_caps);
+        if (ext_caps & (1<<31))
+            rval |= MM_3DNOW;
+        if (ext_caps & (1<<30))
+            rval |= MM_3DNOWEXT;
+        if (ext_caps & (1<<23))
+            rval |= MM_MMX;
+        if (ext_caps & (1<<22))
+            rval |= MM_MMXEXT;
+    }
+
+#if 0
+    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+        (rval&MM_MMX) ? "MMX ":"",
+        (rval&MM_MMXEXT) ? "MMX2 ":"",
+        (rval&MM_SSE) ? "SSE ":"",
+        (rval&MM_SSE2) ? "SSE2 ":"",
+        (rval&MM_3DNOW) ? "3DNow ":"",
+        (rval&MM_3DNOWEXT) ? "3DNowExt ":"");
+#endif
+    return rval;
+}
+
+#ifdef __TEST__
+int main ( void )
+{
+  int mm_flags;
+  mm_flags = mm_support();
+  printf("mm_support = 0x%08X\n",mm_flags);
+  return 0;
+}
+#endif
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
new file mode 100644
index 000000000..e09a1007e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
+ *                    Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * MMX optimized version of (put|avg)_h264_chroma_mc8.
+ * H264_CHROMA_MC8_TMPL must be defined to the desired function name
+ * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg
+ * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
+ */
+static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+    DECLARE_ALIGNED_8(uint64_t, AA);
+    DECLARE_ALIGNED_8(uint64_t, DD);
+    int i;
+
+    if(y==0 && x==0) {
+        /* no filter needed */
+        H264_CHROMA_MC8_MV0(dst, src, stride, h);
+        return;
+    }
+
+    assert(x<8 && y<8 && x>=0 && y>=0);
+
+    if(y==0 || x==0)
+    {
+        /* 1 dimensional filter only */
+        const int dxy = x ? 1 : stride;
+
+        asm volatile(
+            "movd %0, %%mm5\n\t"
+            "movq %1, %%mm4\n\t"
+            "punpcklwd %%mm5, %%mm5\n\t"
+            "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */
+            "movq %%mm4, %%mm6\n\t"
+            "pxor %%mm7, %%mm7\n\t"
+            "psubw %%mm5, %%mm4\n\t"     /* mm4 = A = 8-x */
+            "psrlw $1, %%mm6\n\t"        /* mm6 = 4 */
+            :: "rm"(x+y), "m"(ff_pw_8));
+
+        for(i=0; i<h; i++) {
+            asm volatile(
+                /* mm0 = src[0..7], mm1 = src[1..8] */
+                "movq %0, %%mm0\n\t"
+                "movq %1, %%mm2\n\t"
+                :: "m"(src[0]), "m"(src[dxy]));
+
+            asm volatile(
+                /* [mm0,mm1] = A * src[0..7] */
+                /* [mm2,mm3] = B * src[1..8] */
+                "movq %%mm0, %%mm1\n\t"
+                "movq %%mm2, %%mm3\n\t"
+                "punpcklbw %%mm7, %%mm0\n\t"
+                "punpckhbw %%mm7, %%mm1\n\t"
+                "punpcklbw %%mm7, %%mm2\n\t"
+                "punpckhbw %%mm7, %%mm3\n\t"
+                "pmullw %%mm4, %%mm0\n\t"
+                "pmullw %%mm4, %%mm1\n\t"
+                "pmullw %%mm5, %%mm2\n\t"
+                "pmullw %%mm5, %%mm3\n\t"
+
+                /* dst[0..7] = (A * src[0..7] + B * src[1..8] + 4) >> 3 */
+                "paddw %%mm6, %%mm0\n\t"
+                "paddw %%mm6, %%mm1\n\t"
+                "paddw %%mm2, %%mm0\n\t"
+                "paddw %%mm3, %%mm1\n\t"
+                "psrlw $3, %%mm0\n\t"
+                "psrlw $3, %%mm1\n\t"
+                "packuswb %%mm1, %%mm0\n\t"
+                H264_CHROMA_OP(%0, %%mm0)
+                "movq %%mm0, %0\n\t"
+                : "=m" (dst[0]));
+
+            src += stride;
+            dst += stride;
+        }
+        return;
+    }
+
+    /* general case, bilinear */
+    asm volatile("movd %2, %%mm4\n\t"
+                 "movd %3, %%mm6\n\t"
+                 "punpcklwd %%mm4, %%mm4\n\t"
+                 "punpcklwd %%mm6, %%mm6\n\t"
+                 "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
+                 "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
+                 "movq %%mm4, %%mm5\n\t"
+                 "pmullw %%mm6, %%mm4\n\t"    /* mm4 = x * y */
+                 "psllw $3, %%mm5\n\t"
+                 "psllw $3, %%mm6\n\t"
+                 "movq %%mm5, %%mm7\n\t"
+                 "paddw %%mm6, %%mm7\n\t"
+                 "movq %%mm4, %1\n\t"         /* DD = x * y */
+                 "psubw %%mm4, %%mm5\n\t"     /* mm5 = B = 8x - xy */
+                 "psubw %%mm4, %%mm6\n\t"     /* mm6 = C = 8y - xy */
+                 "paddw %4, %%mm4\n\t"
+                 "psubw %%mm7, %%mm4\n\t"     /* mm4 = A = xy - (8x+8y) + 64 */
+                 "pxor %%mm7, %%mm7\n\t"
+                 "movq %%mm4, %0\n\t"
+                 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
+
+    asm volatile(
+        /* mm0 = src[0..7], mm1 = src[1..8] */
+        "movq %0, %%mm0\n\t"
+        "movq %1, %%mm1\n\t"
+        : : "m" (src[0]), "m" (src[1]));
+
+    for(i=0; i<h; i++) {
+        src += stride;
+
+        asm volatile(
+            /* mm2 = A * src[0..3] + B * src[1..4] */
+            /* mm3 = A * src[4..7] + B * src[5..8] */
+            "movq %%mm0, %%mm2\n\t"
+            "movq %%mm1, %%mm3\n\t"
+            "punpckhbw %%mm7, %%mm0\n\t"
+            "punpcklbw %%mm7, %%mm1\n\t"
+            "punpcklbw %%mm7, %%mm2\n\t"
+            "punpckhbw %%mm7, %%mm3\n\t"
+            "pmullw %0, %%mm0\n\t"
+            "pmullw %0, %%mm2\n\t"
+            "pmullw %%mm5, %%mm1\n\t"
+            "pmullw %%mm5, %%mm3\n\t"
+            "paddw %%mm1, %%mm2\n\t"
+            "paddw %%mm0, %%mm3\n\t"
+            : : "m" (AA));
+
+        asm volatile(
+            /* [mm2,mm3] += C * src[0..7] */
+            "movq %0, %%mm0\n\t"
+            "movq %%mm0, %%mm1\n\t"
+            "punpcklbw %%mm7, %%mm0\n\t"
+            "punpckhbw %%mm7, %%mm1\n\t"
+            "pmullw %%mm6, %%mm0\n\t"
+            "pmullw %%mm6, %%mm1\n\t"
+            "paddw %%mm0, %%mm2\n\t"
+            "paddw %%mm1, %%mm3\n\t"
+            : : "m" (src[0]));
+
+        asm volatile(
+            /* [mm2,mm3] += D * src[1..8] */
+            "movq %1, %%mm1\n\t"
+            "movq %%mm1, %%mm0\n\t"
+            "movq %%mm1, %%mm4\n\t"
+            "punpcklbw %%mm7, %%mm0\n\t"
+            "punpckhbw %%mm7, %%mm4\n\t"
+            "pmullw %2, %%mm0\n\t"
+            "pmullw %2, %%mm4\n\t"
+            "paddw %%mm0, %%mm2\n\t"
+            "paddw %%mm4, %%mm3\n\t"
+            "movq %0, %%mm0\n\t"
+            : : "m" (src[0]), "m" (src[1]), "m" (DD));
+
+        asm volatile(
+            /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */
+            "paddw %1, %%mm2\n\t"
+            "paddw %1, %%mm3\n\t"
+            "psrlw $6, %%mm2\n\t"
+            "psrlw $6, %%mm3\n\t"
+            "packuswb %%mm3, %%mm2\n\t"
+            H264_CHROMA_OP(%0, %%mm2)
+            "movq %%mm2, %0\n\t"
+            : "=m" (dst[0]) : "m" (ff_pw_32));
+        dst+= stride;
+    }
+}
+
+static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+    DECLARE_ALIGNED_8(uint64_t, AA);
+    DECLARE_ALIGNED_8(uint64_t, DD);
+    int i;
+
+    /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*.
+     * could still save a few cycles, but maybe not worth the complexity. */
+
+    assert(x<8 && y<8 && x>=0 && y>=0);
+
+    asm volatile("movd %2, %%mm4\n\t"
+                 "movd %3, %%mm6\n\t"
+                 "punpcklwd %%mm4, %%mm4\n\t"
+                 "punpcklwd %%mm6, %%mm6\n\t"
+                 "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
+                 "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
+                 "movq %%mm4, %%mm5\n\t"
+                 "pmullw %%mm6, %%mm4\n\t"    /* mm4 = x * y */
+                 "psllw $3, %%mm5\n\t"
+                 "psllw $3, %%mm6\n\t"
+                 "movq %%mm5, %%mm7\n\t"
+                 "paddw %%mm6, %%mm7\n\t"
+                 "movq %%mm4, %1\n\t"         /* DD = x * y */
+                 "psubw %%mm4, %%mm5\n\t"     /* mm5 = B = 8x - xy */
+                 "psubw %%mm4, %%mm6\n\t"     /* mm6 = C = 8y - xy */
+                 "paddw %4, %%mm4\n\t"
+                 "psubw %%mm7, %%mm4\n\t"     /* mm4 = A = xy - (8x+8y) + 64 */
+                 "pxor %%mm7, %%mm7\n\t"
+                 "movq %%mm4, %0\n\t"
+                 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
+
+    asm volatile(
+        /* mm0 = src[0..3], mm1 = src[1..4] */
+        "movd %0, %%mm0\n\t"
+        "movd %1, %%mm1\n\t"
+        "punpcklbw %%mm7, %%mm0\n\t"
+        "punpcklbw %%mm7, %%mm1\n\t"
+        : : "m" (src[0]), "m" (src[1]));
+
+    for(i=0; i<h; i++) {
+        asm volatile(
+            /* mm2 = A * src[0..3] + B * src[1..4] */
+            "movq %%mm0, %%mm2\n\t"
+            "pmullw %0, %%mm2\n\t"
+            "pmullw %%mm5, %%mm1\n\t"
+            "paddw %%mm1, %%mm2\n\t"
+            : : "m" (AA));
+
+        src += stride;
+        asm volatile(
+            /* mm0 = src[0..3], mm1 = src[1..4] */
+            "movd %0, %%mm0\n\t"
+            "movd %1, %%mm1\n\t"
+            "punpcklbw %%mm7, %%mm0\n\t"
+            "punpcklbw %%mm7, %%mm1\n\t"
+            : : "m" (src[0]), "m" (src[1]));
+
+        asm volatile(
+            /* mm2 += C * src[0..3] + D * src[1..4] */
+            "movq %%mm0, %%mm3\n\t"
+            "movq %%mm1, %%mm4\n\t"
+            "pmullw %%mm6, %%mm3\n\t"
+            "pmullw %0, %%mm4\n\t"
+            "paddw %%mm3, %%mm2\n\t"
+            "paddw %%mm4, %%mm2\n\t"
+            : : "m" (DD));
+
+        asm volatile(
+            /* dst[0..3] = pack((mm2 + 32) >> 6) */
+            "paddw %1, %%mm2\n\t"
+            "psrlw $6, %%mm2\n\t"
+            "packuswb %%mm7, %%mm2\n\t"
+            H264_CHROMA_OP4(%0, %%mm2, %%mm3)
+            "movd %%mm2, %0\n\t"
+            : "=m" (dst[0]) : "m" (ff_pw_32));
+        dst += stride;
+    }
+}
+
+#ifdef H264_CHROMA_MC2_TMPL
+static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+    int CD=((1<<16)-1)*x*y + 8*y;
+    int AB=((8<<16)-8)*x + 64 - CD;
+    int i;
+
+    asm volatile(
+        /* mm5 = {A,B,A,B} */
+        /* mm6 = {C,D,C,D} */
+        "movd %0, %%mm5\n\t"
+        "movd %1, %%mm6\n\t"
+        "punpckldq %%mm5, %%mm5\n\t"
+        "punpckldq %%mm6, %%mm6\n\t"
+        "pxor %%mm7, %%mm7\n\t"
+        :: "r"(AB), "r"(CD));
+
+    asm volatile(
+        /* mm0 = src[0,1,1,2] */
+        "movd %0, %%mm0\n\t"
+        "punpcklbw %%mm7, %%mm0\n\t"
+        "pshufw $0x94, %%mm0, %%mm0\n\t"
+        :: "m"(src[0]));
+
+    for(i=0; i<h; i++) {
+        asm volatile(
+            /* mm1 = A * src[0,1] + B * src[1,2] */
+            "movq    %%mm0, %%mm1\n\t"
+            "pmaddwd %%mm5, %%mm1\n\t"
+            ::);
+
+        src += stride;
+        asm volatile(
+            /* mm0 = src[0,1,1,2] */
+            "movd %0, %%mm0\n\t"
+            "punpcklbw %%mm7, %%mm0\n\t"
+            "pshufw $0x94, %%mm0, %%mm0\n\t"
+            :: "m"(src[0]));
+
+        asm volatile(
+            /* mm1 += C * src[0,1] + D * src[1,2] */
+            "movq    %%mm0, %%mm2\n\t"
+            "pmaddwd %%mm6, %%mm2\n\t"
+            "paddw   %%mm2, %%mm1\n\t"
+            ::);
+
+        asm volatile(
+            /* dst[0,1] = pack((mm1 + 32) >> 6) */
+            "paddw %1, %%mm1\n\t"
+            "psrlw $6, %%mm1\n\t"
+            "packssdw %%mm7, %%mm1\n\t"
+            "packuswb %%mm7, %%mm1\n\t"
+            /* writes garbage to the right of dst.
+             * ok because partitions are processed from left to right. */
+            H264_CHROMA_OP4(%0, %%mm1, %%mm3)
+            "movd %%mm1, %0\n\t"
+            : "=m" (dst[0]) : "m" (ff_pw_32));
+        dst += stride;
+    }
+}
+#endif
+
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
new file mode 100644
index 000000000..5675828a4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -0,0 +1,3534 @@
+/*
+ * MMX optimized DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+ */
+
+#include "../dsputil.h"
+#include "../simple_idct.h"
+#include "../mpegvideo.h"
+#include "x86_cpu.h"
+#include "mmx.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+extern void ff_idct_xvid_mmx(short *block);
+extern void ff_idct_xvid_mmx2(short *block);
+
+int mm_flags; /* multimedia extension flags */
+
+/* pixel operations */
+static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
+static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
+static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
+
+static const uint64_t ff_pdw_80000000[2] attribute_used __attribute__ ((aligned(16))) =
+{0x8000000080000000ULL, 0x8000000080000000ULL};
+
+static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0014001400140014ULL;
+static const uint64_t ff_pw_3  attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL;
+static const uint64_t ff_pw_4  attribute_used __attribute__ ((aligned(8))) = 0x0004000400040004ULL;
+static const uint64_t ff_pw_5  attribute_used __attribute__ ((aligned(8))) = 0x0005000500050005ULL;
+static const uint64_t ff_pw_8  attribute_used __attribute__ ((aligned(8))) = 0x0008000800080008ULL;
+static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
+static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL;
+static const uint64_t ff_pw_64 attribute_used __attribute__ ((aligned(8))) = 0x0040004000400040ULL;
+static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x000F000F000F000FULL;
+
+static const uint64_t ff_pb_1  attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
+static const uint64_t ff_pb_3  attribute_used __attribute__ ((aligned(8))) = 0x0303030303030303ULL;
+static const uint64_t ff_pb_7  attribute_used __attribute__ ((aligned(8))) = 0x0707070707070707ULL;
+static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL;
+static const uint64_t ff_pb_A1 attribute_used __attribute__ ((aligned(8))) = 0xA1A1A1A1A1A1A1A1ULL;
+static const uint64_t ff_pb_5F attribute_used __attribute__ ((aligned(8))) = 0x5F5F5F5F5F5F5F5FULL;
+static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
+
+#define JUMPALIGN() __asm __volatile (ASMALIGN(3)::)
+#define MOVQ_ZERO(regd)  __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
+
+#define MOVQ_WONE(regd) \
+    __asm __volatile ( \
+    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+    "psrlw $15, %%" #regd ::)
+
+#define MOVQ_BFE(regd) \
+    __asm __volatile ( \
+    "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
+    "paddb %%" #regd ", %%" #regd " \n\t" ::)
+
+#ifndef PIC
+#define MOVQ_BONE(regd)  __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone))
+#define MOVQ_WTWO(regd)  __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
+#else
+// for shared library it's better to use this way for accessing constants
+// pcmpeqd -> -1
+#define MOVQ_BONE(regd) \
+    __asm __volatile ( \
+    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+    "psrlw $15, %%" #regd " \n\t" \
+    "packuswb %%" #regd ", %%" #regd " \n\t" ::)
+
+#define MOVQ_WTWO(regd) \
+    __asm __volatile ( \
+    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+    "psrlw $15, %%" #regd " \n\t" \
+    "psllw $1, %%" #regd " \n\t"::)
+
+#endif
+
+// using regr as temporary and for the output result
+// first argument is unmodifed and second is trashed
+// regfe is supposed to contain 0xfefefefefefefefe
+#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
+    "movq " #rega ", " #regr "  \n\t"\
+    "pand " #regb ", " #regr "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pand " #regfe "," #regb "  \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "paddb " #regb ", " #regr " \n\t"
+
+#define PAVGB_MMX(rega, regb, regr, regfe) \
+    "movq " #rega ", " #regr "  \n\t"\
+    "por  " #regb ", " #regr "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pand " #regfe "," #regb "  \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "psubb " #regb ", " #regr " \n\t"
+
+// mm6 is supposed to contain 0xfefefefefefefefe
+#define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp) \
+    "movq " #rega ", " #regr "  \n\t"\
+    "movq " #regc ", " #regp "  \n\t"\
+    "pand " #regb ", " #regr "  \n\t"\
+    "pand " #regd ", " #regp "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pxor " #regc ", " #regd "  \n\t"\
+    "pand %%mm6, " #regb "      \n\t"\
+    "pand %%mm6, " #regd "      \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "psrlq $1, " #regd "        \n\t"\
+    "paddb " #regb ", " #regr " \n\t"\
+    "paddb " #regd ", " #regp " \n\t"
+
+#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
+    "movq " #rega ", " #regr "  \n\t"\
+    "movq " #regc ", " #regp "  \n\t"\
+    "por  " #regb ", " #regr "  \n\t"\
+    "por  " #regd ", " #regp "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pxor " #regc ", " #regd "  \n\t"\
+    "pand %%mm6, " #regb "      \n\t"\
+    "pand %%mm6, " #regd "      \n\t"\
+    "psrlq $1, " #regd "        \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "psubb " #regb ", " #regr " \n\t"\
+    "psubb " #regd ", " #regp " \n\t"
+
+/***********************************/
+/* MMX no rounding */
+#define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
+#define SET_RND  MOVQ_WONE
+#define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e)               PAVGB_MMX_NO_RND(a, b, c, e)
+
+#include "dsputil_mmx_rnd.h"
+
+#undef DEF
+#undef SET_RND
+#undef PAVGBP
+#undef PAVGB
+/***********************************/
+/* MMX rounding */
+
+#define DEF(x, y) x ## _ ## y ##_mmx
+#define SET_RND  MOVQ_WTWO
+#define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e)               PAVGB_MMX(a, b, c, e)
+
+#include "dsputil_mmx_rnd.h"
+
+#undef DEF
+#undef SET_RND
+#undef PAVGBP
+#undef PAVGB
+
+/***********************************/
+/* 3Dnow specific */
+
+#define DEF(x) x ## _3dnow
+/* for Athlons PAVGUSB is prefered */
+#define PAVGB "pavgusb"
+
+#include "dsputil_mmx_avg.h"
+
+#undef DEF
+#undef PAVGB
+
+/***********************************/
+/* MMX2 specific */
+
+#define DEF(x) x ## _mmx2
+
+/* Introduced only in MMX2 set */
+#define PAVGB "pavgb"
+
+#include "dsputil_mmx_avg.h"
+
+#undef DEF
+#undef PAVGB
+
+#define SBUTTERFLY(a,b,t,n)\
+    "movq " #a ", " #t "              \n\t" /* abcd */\
+    "punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
+    "punpckh" #n " " #b ", " #t "     \n\t" /* cgdh */\
+
+/***********************************/
+/* standard MMX */
+
+#ifdef CONFIG_ENCODERS
+static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
+{
+    asm volatile(
+        "mov $-128, %%"REG_a"           \n\t"
+        "pxor %%mm7, %%mm7              \n\t"
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%0), %%mm0               \n\t"
+        "movq (%0, %2), %%mm2           \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "movq %%mm2, %%mm3              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "movq %%mm0, (%1, %%"REG_a")    \n\t"
+        "movq %%mm1, 8(%1, %%"REG_a")   \n\t"
+        "movq %%mm2, 16(%1, %%"REG_a")  \n\t"
+        "movq %%mm3, 24(%1, %%"REG_a")  \n\t"
+        "add %3, %0                     \n\t"
+        "add $32, %%"REG_a"             \n\t"
+        "js 1b                          \n\t"
+        : "+r" (pixels)
+        : "r" (block+64), "r" ((long)line_size), "r" ((long)line_size*2)
+        : "%"REG_a
+    );
+}
+
+static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
+{
+    asm volatile(
+        "pxor %%mm7, %%mm7              \n\t"
+        "mov $-128, %%"REG_a"           \n\t"
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%0), %%mm0               \n\t"
+        "movq (%1), %%mm2               \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "movq %%mm2, %%mm3              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "psubw %%mm2, %%mm0             \n\t"
+        "psubw %%mm3, %%mm1             \n\t"
+        "movq %%mm0, (%2, %%"REG_a")    \n\t"
+        "movq %%mm1, 8(%2, %%"REG_a")   \n\t"
+        "add %3, %0                     \n\t"
+        "add %3, %1                     \n\t"
+        "add $16, %%"REG_a"             \n\t"
+        "jnz 1b                         \n\t"
+        : "+r" (s1), "+r" (s2)
+        : "r" (block+64), "r" ((long)stride)
+        : "%"REG_a
+    );
+}
+#endif //CONFIG_ENCODERS
+
+void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    const DCTELEM *p;
+    uint8_t *pix;
+
+    /* read the pixels */
+    p = block;
+    pix = pixels;
+    /* unrolled loop */
+        __asm __volatile(
+                "movq   %3, %%mm0               \n\t"
+                "movq   8%3, %%mm1              \n\t"
+                "movq   16%3, %%mm2             \n\t"
+                "movq   24%3, %%mm3             \n\t"
+                "movq   32%3, %%mm4             \n\t"
+                "movq   40%3, %%mm5             \n\t"
+                "movq   48%3, %%mm6             \n\t"
+                "movq   56%3, %%mm7             \n\t"
+                "packuswb %%mm1, %%mm0          \n\t"
+                "packuswb %%mm3, %%mm2          \n\t"
+                "packuswb %%mm5, %%mm4          \n\t"
+                "packuswb %%mm7, %%mm6          \n\t"
+                "movq   %%mm0, (%0)             \n\t"
+                "movq   %%mm2, (%0, %1)         \n\t"
+                "movq   %%mm4, (%0, %1, 2)      \n\t"
+                "movq   %%mm6, (%0, %2)         \n\t"
+                ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p)
+                :"memory");
+        pix += line_size*4;
+        p += 32;
+
+    // if here would be an exact copy of the code above
+    // compiler would generate some very strange code
+    // thus using "r"
+    __asm __volatile(
+            "movq       (%3), %%mm0             \n\t"
+            "movq       8(%3), %%mm1            \n\t"
+            "movq       16(%3), %%mm2           \n\t"
+            "movq       24(%3), %%mm3           \n\t"
+            "movq       32(%3), %%mm4           \n\t"
+            "movq       40(%3), %%mm5           \n\t"
+            "movq       48(%3), %%mm6           \n\t"
+            "movq       56(%3), %%mm7           \n\t"
+            "packuswb %%mm1, %%mm0              \n\t"
+            "packuswb %%mm3, %%mm2              \n\t"
+            "packuswb %%mm5, %%mm4              \n\t"
+            "packuswb %%mm7, %%mm6              \n\t"
+            "movq       %%mm0, (%0)             \n\t"
+            "movq       %%mm2, (%0, %1)         \n\t"
+            "movq       %%mm4, (%0, %1, 2)      \n\t"
+            "movq       %%mm6, (%0, %2)         \n\t"
+            ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p)
+            :"memory");
+}
+
+static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
+  { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    int i;
+
+    movq_m2r(*vector128, mm1);
+    for (i = 0; i < 8; i++) {
+        movq_m2r(*(block), mm0);
+        packsswb_m2r(*(block + 4), mm0);
+        block += 8;
+        paddb_r2r(mm1, mm0);
+        movq_r2m(mm0, *pixels);
+        pixels += line_size;
+    }
+}
+
+void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    const DCTELEM *p;
+    uint8_t *pix;
+    int i;
+
+    /* read the pixels */
+    p = block;
+    pix = pixels;
+    MOVQ_ZERO(mm7);
+    i = 4;
+    do {
+        __asm __volatile(
+                "movq   (%2), %%mm0     \n\t"
+                "movq   8(%2), %%mm1    \n\t"
+                "movq   16(%2), %%mm2   \n\t"
+                "movq   24(%2), %%mm3   \n\t"
+                "movq   %0, %%mm4       \n\t"
+                "movq   %1, %%mm6       \n\t"
+                "movq   %%mm4, %%mm5    \n\t"
+                "punpcklbw %%mm7, %%mm4 \n\t"
+                "punpckhbw %%mm7, %%mm5 \n\t"
+                "paddsw %%mm4, %%mm0    \n\t"
+                "paddsw %%mm5, %%mm1    \n\t"
+                "movq   %%mm6, %%mm5    \n\t"
+                "punpcklbw %%mm7, %%mm6 \n\t"
+                "punpckhbw %%mm7, %%mm5 \n\t"
+                "paddsw %%mm6, %%mm2    \n\t"
+                "paddsw %%mm5, %%mm3    \n\t"
+                "packuswb %%mm1, %%mm0  \n\t"
+                "packuswb %%mm3, %%mm2  \n\t"
+                "movq   %%mm0, %0       \n\t"
+                "movq   %%mm2, %1       \n\t"
+                :"+m"(*pix), "+m"(*(pix+line_size))
+                :"r"(p)
+                :"memory");
+        pix += line_size*2;
+        p += 16;
+    } while (--i);
+}
+
+static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+         "lea (%3, %3), %%"REG_a"       \n\t"
+         ASMALIGN(3)
+         "1:                            \n\t"
+         "movd (%1), %%mm0              \n\t"
+         "movd (%1, %3), %%mm1          \n\t"
+         "movd %%mm0, (%2)              \n\t"
+         "movd %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "movd (%1), %%mm0              \n\t"
+         "movd (%1, %3), %%mm1          \n\t"
+         "movd %%mm0, (%2)              \n\t"
+         "movd %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "subl $4, %0                   \n\t"
+         "jnz 1b                        \n\t"
+         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "r"((long)line_size)
+         : "%"REG_a, "memory"
+        );
+}
+
+static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+         "lea (%3, %3), %%"REG_a"       \n\t"
+         ASMALIGN(3)
+         "1:                            \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "subl $4, %0                   \n\t"
+         "jnz 1b                        \n\t"
+         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "r"((long)line_size)
+         : "%"REG_a, "memory"
+        );
+}
+
+static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+         "lea (%3, %3), %%"REG_a"       \n\t"
+         ASMALIGN(3)
+         "1:                            \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq 8(%1), %%mm4             \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq 8(%1, %3), %%mm5         \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm4, 8(%2)             \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "movq %%mm5, 8(%2, %3)         \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq 8(%1), %%mm4             \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq 8(%1, %3), %%mm5         \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm4, 8(%2)             \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "movq %%mm5, 8(%2, %3)         \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "subl $4, %0                   \n\t"
+         "jnz 1b                        \n\t"
+         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "r"((long)line_size)
+         : "%"REG_a, "memory"
+        );
+}
+
+static void clear_blocks_mmx(DCTELEM *blocks)
+{
+    __asm __volatile(
+                "pxor %%mm7, %%mm7              \n\t"
+                "mov $-128*6, %%"REG_a"         \n\t"
+                "1:                             \n\t"
+                "movq %%mm7, (%0, %%"REG_a")    \n\t"
+                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"
+                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"
+                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"
+                "add $32, %%"REG_a"             \n\t"
+                " js 1b                         \n\t"
+                : : "r" (((uint8_t *)blocks)+128*6)
+                : "%"REG_a
+        );
+}
+
+#ifdef CONFIG_ENCODERS
+static int pix_sum16_mmx(uint8_t * pix, int line_size){
+    const int h=16;
+    int sum;
+    long index= -line_size*h;
+
+    __asm __volatile(
+                "pxor %%mm7, %%mm7              \n\t"
+                "pxor %%mm6, %%mm6              \n\t"
+                "1:                             \n\t"
+                "movq (%2, %1), %%mm0           \n\t"
+                "movq (%2, %1), %%mm1           \n\t"
+                "movq 8(%2, %1), %%mm2          \n\t"
+                "movq 8(%2, %1), %%mm3          \n\t"
+                "punpcklbw %%mm7, %%mm0         \n\t"
+                "punpckhbw %%mm7, %%mm1         \n\t"
+                "punpcklbw %%mm7, %%mm2         \n\t"
+                "punpckhbw %%mm7, %%mm3         \n\t"
+                "paddw %%mm0, %%mm1             \n\t"
+                "paddw %%mm2, %%mm3             \n\t"
+                "paddw %%mm1, %%mm3             \n\t"
+                "paddw %%mm3, %%mm6             \n\t"
+                "add %3, %1                     \n\t"
+                " js 1b                         \n\t"
+                "movq %%mm6, %%mm5              \n\t"
+                "psrlq $32, %%mm6               \n\t"
+                "paddw %%mm5, %%mm6             \n\t"
+                "movq %%mm6, %%mm5              \n\t"
+                "psrlq $16, %%mm6               \n\t"
+                "paddw %%mm5, %%mm6             \n\t"
+                "movd %%mm6, %0                 \n\t"
+                "andl $0xFFFF, %0               \n\t"
+                : "=&r" (sum), "+r" (index)
+                : "r" (pix - index), "r" ((long)line_size)
+        );
+
+        return sum;
+}
+#endif //CONFIG_ENCODERS
+
+static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
+    long i=0;
+    asm volatile(
+        "1:                             \n\t"
+        "movq  (%1, %0), %%mm0          \n\t"
+        "movq  (%2, %0), %%mm1          \n\t"
+        "paddb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, (%2, %0)           \n\t"
+        "movq 8(%1, %0), %%mm0          \n\t"
+        "movq 8(%2, %0), %%mm1          \n\t"
+        "paddb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, 8(%2, %0)          \n\t"
+        "add $16, %0                    \n\t"
+        "cmp %3, %0                     \n\t"
+        " jb 1b                         \n\t"
+        : "+r" (i)
+        : "r"(src), "r"(dst), "r"((long)w-15)
+    );
+    for(; i<w; i++)
+        dst[i+0] += src[i+0];
+}
+
+#define H263_LOOP_FILTER \
+        "pxor %%mm7, %%mm7              \n\t"\
+        "movq  %0, %%mm0                \n\t"\
+        "movq  %0, %%mm1                \n\t"\
+        "movq  %3, %%mm2                \n\t"\
+        "movq  %3, %%mm3                \n\t"\
+        "punpcklbw %%mm7, %%mm0         \n\t"\
+        "punpckhbw %%mm7, %%mm1         \n\t"\
+        "punpcklbw %%mm7, %%mm2         \n\t"\
+        "punpckhbw %%mm7, %%mm3         \n\t"\
+        "psubw %%mm2, %%mm0             \n\t"\
+        "psubw %%mm3, %%mm1             \n\t"\
+        "movq  %1, %%mm2                \n\t"\
+        "movq  %1, %%mm3                \n\t"\
+        "movq  %2, %%mm4                \n\t"\
+        "movq  %2, %%mm5                \n\t"\
+        "punpcklbw %%mm7, %%mm2         \n\t"\
+        "punpckhbw %%mm7, %%mm3         \n\t"\
+        "punpcklbw %%mm7, %%mm4         \n\t"\
+        "punpckhbw %%mm7, %%mm5         \n\t"\
+        "psubw %%mm2, %%mm4             \n\t"\
+        "psubw %%mm3, %%mm5             \n\t"\
+        "psllw $2, %%mm4                \n\t"\
+        "psllw $2, %%mm5                \n\t"\
+        "paddw %%mm0, %%mm4             \n\t"\
+        "paddw %%mm1, %%mm5             \n\t"\
+        "pxor %%mm6, %%mm6              \n\t"\
+        "pcmpgtw %%mm4, %%mm6           \n\t"\
+        "pcmpgtw %%mm5, %%mm7           \n\t"\
+        "pxor %%mm6, %%mm4              \n\t"\
+        "pxor %%mm7, %%mm5              \n\t"\
+        "psubw %%mm6, %%mm4             \n\t"\
+        "psubw %%mm7, %%mm5             \n\t"\
+        "psrlw $3, %%mm4                \n\t"\
+        "psrlw $3, %%mm5                \n\t"\
+        "packuswb %%mm5, %%mm4          \n\t"\
+        "packsswb %%mm7, %%mm6          \n\t"\
+        "pxor %%mm7, %%mm7              \n\t"\
+        "movd %4, %%mm2                 \n\t"\
+        "punpcklbw %%mm2, %%mm2         \n\t"\
+        "punpcklbw %%mm2, %%mm2         \n\t"\
+        "punpcklbw %%mm2, %%mm2         \n\t"\
+        "psubusb %%mm4, %%mm2           \n\t"\
+        "movq %%mm2, %%mm3              \n\t"\
+        "psubusb %%mm4, %%mm3           \n\t"\
+        "psubb %%mm3, %%mm2             \n\t"\
+        "movq %1, %%mm3                 \n\t"\
+        "movq %2, %%mm4                 \n\t"\
+        "pxor %%mm6, %%mm3              \n\t"\
+        "pxor %%mm6, %%mm4              \n\t"\
+        "paddusb %%mm2, %%mm3           \n\t"\
+        "psubusb %%mm2, %%mm4           \n\t"\
+        "pxor %%mm6, %%mm3              \n\t"\
+        "pxor %%mm6, %%mm4              \n\t"\
+        "paddusb %%mm2, %%mm2           \n\t"\
+        "packsswb %%mm1, %%mm0          \n\t"\
+        "pcmpgtb %%mm0, %%mm7           \n\t"\
+        "pxor %%mm7, %%mm0              \n\t"\
+        "psubb %%mm7, %%mm0             \n\t"\
+        "movq %%mm0, %%mm1              \n\t"\
+        "psubusb %%mm2, %%mm0           \n\t"\
+        "psubb %%mm0, %%mm1             \n\t"\
+        "pand %5, %%mm1                 \n\t"\
+        "psrlw $2, %%mm1                \n\t"\
+        "pxor %%mm7, %%mm1              \n\t"\
+        "psubb %%mm7, %%mm1             \n\t"\
+        "movq %0, %%mm5                 \n\t"\
+        "movq %3, %%mm6                 \n\t"\
+        "psubb %%mm1, %%mm5             \n\t"\
+        "paddb %%mm1, %%mm6             \n\t"
+
+static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
+    const int strength= ff_h263_loop_filter_strength[qscale];
+
+    asm volatile(
+
+        H263_LOOP_FILTER
+
+        "movq %%mm3, %1                 \n\t"
+        "movq %%mm4, %2                 \n\t"
+        "movq %%mm5, %0                 \n\t"
+        "movq %%mm6, %3                 \n\t"
+        : "+m" (*(uint64_t*)(src - 2*stride)),
+          "+m" (*(uint64_t*)(src - 1*stride)),
+          "+m" (*(uint64_t*)(src + 0*stride)),
+          "+m" (*(uint64_t*)(src + 1*stride))
+        : "g" (2*strength), "m"(ff_pb_FC)
+    );
+}
+
+static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
+    asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
+        "movd  %4, %%mm0                \n\t"
+        "movd  %5, %%mm1                \n\t"
+        "movd  %6, %%mm2                \n\t"
+        "movd  %7, %%mm3                \n\t"
+        "punpcklbw %%mm1, %%mm0         \n\t"
+        "punpcklbw %%mm3, %%mm2         \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "punpcklwd %%mm2, %%mm0         \n\t"
+        "punpckhwd %%mm2, %%mm1         \n\t"
+        "movd  %%mm0, %0                \n\t"
+        "punpckhdq %%mm0, %%mm0         \n\t"
+        "movd  %%mm0, %1                \n\t"
+        "movd  %%mm1, %2                \n\t"
+        "punpckhdq %%mm1, %%mm1         \n\t"
+        "movd  %%mm1, %3                \n\t"
+
+        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 3*dst_stride))
+        :  "m" (*(uint32_t*)(src + 0*src_stride)),
+           "m" (*(uint32_t*)(src + 1*src_stride)),
+           "m" (*(uint32_t*)(src + 2*src_stride)),
+           "m" (*(uint32_t*)(src + 3*src_stride))
+    );
+}
+
+static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
+    const int strength= ff_h263_loop_filter_strength[qscale];
+    uint64_t temp[4] __attribute__ ((aligned(8)));
+    uint8_t *btemp= (uint8_t*)temp;
+
+    src -= 2;
+
+    transpose4x4(btemp  , src           , 8, stride);
+    transpose4x4(btemp+4, src + 4*stride, 8, stride);
+    asm volatile(
+        H263_LOOP_FILTER // 5 3 4 6
+
+        : "+m" (temp[0]),
+          "+m" (temp[1]),
+          "+m" (temp[2]),
+          "+m" (temp[3])
+        : "g" (2*strength), "m"(ff_pb_FC)
+    );
+
+    asm volatile(
+        "movq %%mm5, %%mm1              \n\t"
+        "movq %%mm4, %%mm0              \n\t"
+        "punpcklbw %%mm3, %%mm5         \n\t"
+        "punpcklbw %%mm6, %%mm4         \n\t"
+        "punpckhbw %%mm3, %%mm1         \n\t"
+        "punpckhbw %%mm6, %%mm0         \n\t"
+        "movq %%mm5, %%mm3              \n\t"
+        "movq %%mm1, %%mm6              \n\t"
+        "punpcklwd %%mm4, %%mm5         \n\t"
+        "punpcklwd %%mm0, %%mm1         \n\t"
+        "punpckhwd %%mm4, %%mm3         \n\t"
+        "punpckhwd %%mm0, %%mm6         \n\t"
+        "movd %%mm5, (%0)               \n\t"
+        "punpckhdq %%mm5, %%mm5         \n\t"
+        "movd %%mm5, (%0,%2)            \n\t"
+        "movd %%mm3, (%0,%2,2)          \n\t"
+        "punpckhdq %%mm3, %%mm3         \n\t"
+        "movd %%mm3, (%0,%3)            \n\t"
+        "movd %%mm1, (%1)               \n\t"
+        "punpckhdq %%mm1, %%mm1         \n\t"
+        "movd %%mm1, (%1,%2)            \n\t"
+        "movd %%mm6, (%1,%2,2)          \n\t"
+        "punpckhdq %%mm6, %%mm6         \n\t"
+        "movd %%mm6, (%1,%3)            \n\t"
+        :: "r" (src),
+           "r" (src + 4*stride),
+           "r" ((long)   stride ),
+           "r" ((long)(3*stride))
+    );
+}
+
+#ifdef CONFIG_ENCODERS
+static int pix_norm1_mmx(uint8_t *pix, int line_size) {
+    int tmp;
+  asm volatile (
+      "movl $16,%%ecx\n"
+      "pxor %%mm0,%%mm0\n"
+      "pxor %%mm7,%%mm7\n"
+      "1:\n"
+      "movq (%0),%%mm2\n"       /* mm2 = pix[0-7] */
+      "movq 8(%0),%%mm3\n"      /* mm3 = pix[8-15] */
+
+      "movq %%mm2,%%mm1\n"      /* mm1 = mm2 = pix[0-7] */
+
+      "punpckhbw %%mm0,%%mm1\n" /* mm1 = [pix4-7] */
+      "punpcklbw %%mm0,%%mm2\n" /* mm2 = [pix0-3] */
+
+      "movq %%mm3,%%mm4\n"      /* mm4 = mm3 = pix[8-15] */
+      "punpckhbw %%mm0,%%mm3\n" /* mm3 = [pix12-15] */
+      "punpcklbw %%mm0,%%mm4\n" /* mm4 = [pix8-11] */
+
+      "pmaddwd %%mm1,%%mm1\n"   /* mm1 = (pix0^2+pix1^2,pix2^2+pix3^2) */
+      "pmaddwd %%mm2,%%mm2\n"   /* mm2 = (pix4^2+pix5^2,pix6^2+pix7^2) */
+
+      "pmaddwd %%mm3,%%mm3\n"
+      "pmaddwd %%mm4,%%mm4\n"
+
+      "paddd %%mm1,%%mm2\n"     /* mm2 = (pix0^2+pix1^2+pix4^2+pix5^2,
+                                          pix2^2+pix3^2+pix6^2+pix7^2) */
+      "paddd %%mm3,%%mm4\n"
+      "paddd %%mm2,%%mm7\n"
+
+      "add %2, %0\n"
+      "paddd %%mm4,%%mm7\n"
+      "dec %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm7,%%mm1\n"
+      "psrlq $32, %%mm7\n"      /* shift hi dword to lo */
+      "paddd %%mm7,%%mm1\n"
+      "movd %%mm1,%1\n"
+      : "+r" (pix), "=r"(tmp) : "r" ((long)line_size) : "%ecx" );
+    return tmp;
+}
+
+static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+  asm volatile (
+      "movl %4,%%ecx\n"
+      "shr $1,%%ecx\n"
+      "pxor %%mm0,%%mm0\n"      /* mm0 = 0 */
+      "pxor %%mm7,%%mm7\n"      /* mm7 holds the sum */
+      "1:\n"
+      "movq (%0),%%mm1\n"       /* mm1 = pix1[0][0-7] */
+      "movq (%1),%%mm2\n"       /* mm2 = pix2[0][0-7] */
+      "movq (%0,%3),%%mm3\n"    /* mm3 = pix1[1][0-7] */
+      "movq (%1,%3),%%mm4\n"    /* mm4 = pix2[1][0-7] */
+
+      /* todo: mm1-mm2, mm3-mm4 */
+      /* algo: substract mm1 from mm2 with saturation and vice versa */
+      /*       OR the results to get absolute difference */
+      "movq %%mm1,%%mm5\n"
+      "movq %%mm3,%%mm6\n"
+      "psubusb %%mm2,%%mm1\n"
+      "psubusb %%mm4,%%mm3\n"
+      "psubusb %%mm5,%%mm2\n"
+      "psubusb %%mm6,%%mm4\n"
+
+      "por %%mm1,%%mm2\n"
+      "por %%mm3,%%mm4\n"
+
+      /* now convert to 16-bit vectors so we can square them */
+      "movq %%mm2,%%mm1\n"
+      "movq %%mm4,%%mm3\n"
+
+      "punpckhbw %%mm0,%%mm2\n"
+      "punpckhbw %%mm0,%%mm4\n"
+      "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */
+      "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */
+
+      "pmaddwd %%mm2,%%mm2\n"
+      "pmaddwd %%mm4,%%mm4\n"
+      "pmaddwd %%mm1,%%mm1\n"
+      "pmaddwd %%mm3,%%mm3\n"
+
+      "lea (%0,%3,2), %0\n"     /* pix1 += 2*line_size */
+      "lea (%1,%3,2), %1\n"     /* pix2 += 2*line_size */
+
+      "paddd %%mm2,%%mm1\n"
+      "paddd %%mm4,%%mm3\n"
+      "paddd %%mm1,%%mm7\n"
+      "paddd %%mm3,%%mm7\n"
+
+      "decl %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm7,%%mm1\n"
+      "psrlq $32, %%mm7\n"      /* shift hi dword to lo */
+      "paddd %%mm7,%%mm1\n"
+      "movd %%mm1,%2\n"
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+      : "r" ((long)line_size) , "m" (h)
+      : "%ecx");
+    return tmp;
+}
+
+static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+  asm volatile (
+      "movl %4,%%ecx\n"
+      "pxor %%mm0,%%mm0\n"      /* mm0 = 0 */
+      "pxor %%mm7,%%mm7\n"      /* mm7 holds the sum */
+      "1:\n"
+      "movq (%0),%%mm1\n"       /* mm1 = pix1[0-7] */
+      "movq (%1),%%mm2\n"       /* mm2 = pix2[0-7] */
+      "movq 8(%0),%%mm3\n"      /* mm3 = pix1[8-15] */
+      "movq 8(%1),%%mm4\n"      /* mm4 = pix2[8-15] */
+
+      /* todo: mm1-mm2, mm3-mm4 */
+      /* algo: substract mm1 from mm2 with saturation and vice versa */
+      /*       OR the results to get absolute difference */
+      "movq %%mm1,%%mm5\n"
+      "movq %%mm3,%%mm6\n"
+      "psubusb %%mm2,%%mm1\n"
+      "psubusb %%mm4,%%mm3\n"
+      "psubusb %%mm5,%%mm2\n"
+      "psubusb %%mm6,%%mm4\n"
+
+      "por %%mm1,%%mm2\n"
+      "por %%mm3,%%mm4\n"
+
+      /* now convert to 16-bit vectors so we can square them */
+      "movq %%mm2,%%mm1\n"
+      "movq %%mm4,%%mm3\n"
+
+      "punpckhbw %%mm0,%%mm2\n"
+      "punpckhbw %%mm0,%%mm4\n"
+      "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */
+      "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */
+
+      "pmaddwd %%mm2,%%mm2\n"
+      "pmaddwd %%mm4,%%mm4\n"
+      "pmaddwd %%mm1,%%mm1\n"
+      "pmaddwd %%mm3,%%mm3\n"
+
+      "add %3,%0\n"
+      "add %3,%1\n"
+
+      "paddd %%mm2,%%mm1\n"
+      "paddd %%mm4,%%mm3\n"
+      "paddd %%mm1,%%mm7\n"
+      "paddd %%mm3,%%mm7\n"
+
+      "decl %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm7,%%mm1\n"
+      "psrlq $32, %%mm7\n"      /* shift hi dword to lo */
+      "paddd %%mm7,%%mm1\n"
+      "movd %%mm1,%2\n"
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+      : "r" ((long)line_size) , "m" (h)
+      : "%ecx");
+    return tmp;
+}
+
+static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+  asm volatile (
+      "shr $1,%2\n"
+      "pxor %%xmm0,%%xmm0\n"    /* mm0 = 0 */
+      "pxor %%xmm7,%%xmm7\n"    /* mm7 holds the sum */
+      "1:\n"
+      "movdqu (%0),%%xmm1\n"    /* mm1 = pix1[0][0-15] */
+      "movdqu (%1),%%xmm2\n"    /* mm2 = pix2[0][0-15] */
+      "movdqu (%0,%4),%%xmm3\n" /* mm3 = pix1[1][0-15] */
+      "movdqu (%1,%4),%%xmm4\n" /* mm4 = pix2[1][0-15] */
+
+      /* todo: mm1-mm2, mm3-mm4 */
+      /* algo: substract mm1 from mm2 with saturation and vice versa */
+      /*       OR the results to get absolute difference */
+      "movdqa %%xmm1,%%xmm5\n"
+      "movdqa %%xmm3,%%xmm6\n"
+      "psubusb %%xmm2,%%xmm1\n"
+      "psubusb %%xmm4,%%xmm3\n"
+      "psubusb %%xmm5,%%xmm2\n"
+      "psubusb %%xmm6,%%xmm4\n"
+
+      "por %%xmm1,%%xmm2\n"
+      "por %%xmm3,%%xmm4\n"
+
+      /* now convert to 16-bit vectors so we can square them */
+      "movdqa %%xmm2,%%xmm1\n"
+      "movdqa %%xmm4,%%xmm3\n"
+
+      "punpckhbw %%xmm0,%%xmm2\n"
+      "punpckhbw %%xmm0,%%xmm4\n"
+      "punpcklbw %%xmm0,%%xmm1\n"  /* mm1 now spread over (mm1,mm2) */
+      "punpcklbw %%xmm0,%%xmm3\n"  /* mm4 now spread over (mm3,mm4) */
+
+      "pmaddwd %%xmm2,%%xmm2\n"
+      "pmaddwd %%xmm4,%%xmm4\n"
+      "pmaddwd %%xmm1,%%xmm1\n"
+      "pmaddwd %%xmm3,%%xmm3\n"
+
+      "lea (%0,%4,2), %0\n"        /* pix1 += 2*line_size */
+      "lea (%1,%4,2), %1\n"        /* pix2 += 2*line_size */
+
+      "paddd %%xmm2,%%xmm1\n"
+      "paddd %%xmm4,%%xmm3\n"
+      "paddd %%xmm1,%%xmm7\n"
+      "paddd %%xmm3,%%xmm7\n"
+
+      "decl %2\n"
+      "jnz 1b\n"
+
+      "movdqa %%xmm7,%%xmm1\n"
+      "psrldq $8, %%xmm7\n"        /* shift hi qword to lo */
+      "paddd %%xmm1,%%xmm7\n"
+      "movdqa %%xmm7,%%xmm1\n"
+      "psrldq $4, %%xmm7\n"        /* shift hi dword to lo */
+      "paddd %%xmm1,%%xmm7\n"
+      "movd %%xmm7,%3\n"
+      : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
+      : "r" ((long)line_size));
+    return tmp;
+}
+
+static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
+    int tmp;
+  asm volatile (
+      "movl %3,%%ecx\n"
+      "pxor %%mm7,%%mm7\n"
+      "pxor %%mm6,%%mm6\n"
+
+      "movq (%0),%%mm0\n"
+      "movq %%mm0, %%mm1\n"
+      "psllq $8, %%mm0\n"
+      "psrlq $8, %%mm1\n"
+      "psrlq $8, %%mm0\n"
+      "movq %%mm0, %%mm2\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm0\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm2\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm0\n"
+      "psubw %%mm3, %%mm2\n"
+
+      "add %2,%0\n"
+
+      "movq (%0),%%mm4\n"
+      "movq %%mm4, %%mm1\n"
+      "psllq $8, %%mm4\n"
+      "psrlq $8, %%mm1\n"
+      "psrlq $8, %%mm4\n"
+      "movq %%mm4, %%mm5\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm4\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm5\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm4\n"
+      "psubw %%mm3, %%mm5\n"
+      "psubw %%mm4, %%mm0\n"
+      "psubw %%mm5, %%mm2\n"
+      "pxor %%mm3, %%mm3\n"
+      "pxor %%mm1, %%mm1\n"
+      "pcmpgtw %%mm0, %%mm3\n\t"
+      "pcmpgtw %%mm2, %%mm1\n\t"
+      "pxor %%mm3, %%mm0\n"
+      "pxor %%mm1, %%mm2\n"
+      "psubw %%mm3, %%mm0\n"
+      "psubw %%mm1, %%mm2\n"
+      "paddw %%mm0, %%mm2\n"
+      "paddw %%mm2, %%mm6\n"
+
+      "add %2,%0\n"
+      "1:\n"
+
+      "movq (%0),%%mm0\n"
+      "movq %%mm0, %%mm1\n"
+      "psllq $8, %%mm0\n"
+      "psrlq $8, %%mm1\n"
+      "psrlq $8, %%mm0\n"
+      "movq %%mm0, %%mm2\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm0\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm2\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm0\n"
+      "psubw %%mm3, %%mm2\n"
+      "psubw %%mm0, %%mm4\n"
+      "psubw %%mm2, %%mm5\n"
+      "pxor %%mm3, %%mm3\n"
+      "pxor %%mm1, %%mm1\n"
+      "pcmpgtw %%mm4, %%mm3\n\t"
+      "pcmpgtw %%mm5, %%mm1\n\t"
+      "pxor %%mm3, %%mm4\n"
+      "pxor %%mm1, %%mm5\n"
+      "psubw %%mm3, %%mm4\n"
+      "psubw %%mm1, %%mm5\n"
+      "paddw %%mm4, %%mm5\n"
+      "paddw %%mm5, %%mm6\n"
+
+      "add %2,%0\n"
+
+      "movq (%0),%%mm4\n"
+      "movq %%mm4, %%mm1\n"
+      "psllq $8, %%mm4\n"
+      "psrlq $8, %%mm1\n"
+      "psrlq $8, %%mm4\n"
+      "movq %%mm4, %%mm5\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm4\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm5\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm4\n"
+      "psubw %%mm3, %%mm5\n"
+      "psubw %%mm4, %%mm0\n"
+      "psubw %%mm5, %%mm2\n"
+      "pxor %%mm3, %%mm3\n"
+      "pxor %%mm1, %%mm1\n"
+      "pcmpgtw %%mm0, %%mm3\n\t"
+      "pcmpgtw %%mm2, %%mm1\n\t"
+      "pxor %%mm3, %%mm0\n"
+      "pxor %%mm1, %%mm2\n"
+      "psubw %%mm3, %%mm0\n"
+      "psubw %%mm1, %%mm2\n"
+      "paddw %%mm0, %%mm2\n"
+      "paddw %%mm2, %%mm6\n"
+
+      "add %2,%0\n"
+      "subl $2, %%ecx\n"
+      " jnz 1b\n"
+
+      "movq %%mm6, %%mm0\n"
+      "punpcklwd %%mm7,%%mm0\n"
+      "punpckhwd %%mm7,%%mm6\n"
+      "paddd %%mm0, %%mm6\n"
+
+      "movq %%mm6,%%mm0\n"
+      "psrlq $32, %%mm6\n"
+      "paddd %%mm6,%%mm0\n"
+      "movd %%mm0,%1\n"
+      : "+r" (pix1), "=r"(tmp)
+      : "r" ((long)line_size) , "g" (h-2)
+      : "%ecx");
+      return tmp;
+}
+
+static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
+    int tmp;
+    uint8_t * pix= pix1;
+  asm volatile (
+      "movl %3,%%ecx\n"
+      "pxor %%mm7,%%mm7\n"
+      "pxor %%mm6,%%mm6\n"
+
+      "movq (%0),%%mm0\n"
+      "movq 1(%0),%%mm1\n"
+      "movq %%mm0, %%mm2\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm0\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm2\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm0\n"
+      "psubw %%mm3, %%mm2\n"
+
+      "add %2,%0\n"
+
+      "movq (%0),%%mm4\n"
+      "movq 1(%0),%%mm1\n"
+      "movq %%mm4, %%mm5\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm4\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm5\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm4\n"
+      "psubw %%mm3, %%mm5\n"
+      "psubw %%mm4, %%mm0\n"
+      "psubw %%mm5, %%mm2\n"
+      "pxor %%mm3, %%mm3\n"
+      "pxor %%mm1, %%mm1\n"
+      "pcmpgtw %%mm0, %%mm3\n\t"
+      "pcmpgtw %%mm2, %%mm1\n\t"
+      "pxor %%mm3, %%mm0\n"
+      "pxor %%mm1, %%mm2\n"
+      "psubw %%mm3, %%mm0\n"
+      "psubw %%mm1, %%mm2\n"
+      "paddw %%mm0, %%mm2\n"
+      "paddw %%mm2, %%mm6\n"
+
+      "add %2,%0\n"
+      "1:\n"
+
+      "movq (%0),%%mm0\n"
+      "movq 1(%0),%%mm1\n"
+      "movq %%mm0, %%mm2\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm0\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm2\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm0\n"
+      "psubw %%mm3, %%mm2\n"
+      "psubw %%mm0, %%mm4\n"
+      "psubw %%mm2, %%mm5\n"
+      "pxor %%mm3, %%mm3\n"
+      "pxor %%mm1, %%mm1\n"
+      "pcmpgtw %%mm4, %%mm3\n\t"
+      "pcmpgtw %%mm5, %%mm1\n\t"
+      "pxor %%mm3, %%mm4\n"
+      "pxor %%mm1, %%mm5\n"
+      "psubw %%mm3, %%mm4\n"
+      "psubw %%mm1, %%mm5\n"
+      "paddw %%mm4, %%mm5\n"
+      "paddw %%mm5, %%mm6\n"
+
+      "add %2,%0\n"
+
+      "movq (%0),%%mm4\n"
+      "movq 1(%0),%%mm1\n"
+      "movq %%mm4, %%mm5\n"
+      "movq %%mm1, %%mm3\n"
+      "punpcklbw %%mm7,%%mm4\n"
+      "punpcklbw %%mm7,%%mm1\n"
+      "punpckhbw %%mm7,%%mm5\n"
+      "punpckhbw %%mm7,%%mm3\n"
+      "psubw %%mm1, %%mm4\n"
+      "psubw %%mm3, %%mm5\n"
+      "psubw %%mm4, %%mm0\n"
+      "psubw %%mm5, %%mm2\n"
+      "pxor %%mm3, %%mm3\n"
+      "pxor %%mm1, %%mm1\n"
+      "pcmpgtw %%mm0, %%mm3\n\t"
+      "pcmpgtw %%mm2, %%mm1\n\t"
+      "pxor %%mm3, %%mm0\n"
+      "pxor %%mm1, %%mm2\n"
+      "psubw %%mm3, %%mm0\n"
+      "psubw %%mm1, %%mm2\n"
+      "paddw %%mm0, %%mm2\n"
+      "paddw %%mm2, %%mm6\n"
+
+      "add %2,%0\n"
+      "subl $2, %%ecx\n"
+      " jnz 1b\n"
+
+      "movq %%mm6, %%mm0\n"
+      "punpcklwd %%mm7,%%mm0\n"
+      "punpckhwd %%mm7,%%mm6\n"
+      "paddd %%mm0, %%mm6\n"
+
+      "movq %%mm6,%%mm0\n"
+      "psrlq $32, %%mm6\n"
+      "paddd %%mm6,%%mm0\n"
+      "movd %%mm0,%1\n"
+      : "+r" (pix1), "=r"(tmp)
+      : "r" ((long)line_size) , "g" (h-2)
+      : "%ecx");
+      return tmp + hf_noise8_mmx(pix+8, line_size, h);
+}
+
+static int nsse16_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    MpegEncContext *c = p;
+    int score1, score2;
+
+    if(c) score1 = c->dsp.sse[0](c, pix1, pix2, line_size, h);
+    else  score1 = sse16_mmx(c, pix1, pix2, line_size, h);
+    score2= hf_noise16_mmx(pix1, line_size, h) - hf_noise16_mmx(pix2, line_size, h);
+
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+
+static int nsse8_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    MpegEncContext *c = p;
+    int score1= sse8_mmx(c, pix1, pix2, line_size, h);
+    int score2= hf_noise8_mmx(pix1, line_size, h) - hf_noise8_mmx(pix2, line_size, h);
+
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+
+static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
+    int tmp;
+
+    assert( (((int)pix) & 7) == 0);
+    assert((line_size &7) ==0);
+
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0), %%mm2\n"\
+      "movq 8(%0), %%mm3\n"\
+      "add %2,%0\n"\
+      "movq %%mm2, " #out0 "\n"\
+      "movq %%mm3, " #out1 "\n"\
+      "psubusb " #in0 ", %%mm2\n"\
+      "psubusb " #in1 ", %%mm3\n"\
+      "psubusb " #out0 ", " #in0 "\n"\
+      "psubusb " #out1 ", " #in1 "\n"\
+      "por %%mm2, " #in0 "\n"\
+      "por %%mm3, " #in1 "\n"\
+      "movq " #in0 ", %%mm2\n"\
+      "movq " #in1 ", %%mm3\n"\
+      "punpcklbw %%mm7, " #in0 "\n"\
+      "punpcklbw %%mm7, " #in1 "\n"\
+      "punpckhbw %%mm7, %%mm2\n"\
+      "punpckhbw %%mm7, %%mm3\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw %%mm3, %%mm2\n"\
+      "paddw %%mm2, " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+
+  asm volatile (
+      "movl %3,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pxor %%mm7,%%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq 8(%0),%%mm1\n"
+      "add %2,%0\n"
+      "subl $2, %%ecx\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm6,%%mm0\n"
+      "psrlq $32, %%mm6\n"
+      "paddw %%mm6,%%mm0\n"
+      "movq %%mm0,%%mm6\n"
+      "psrlq $16, %%mm0\n"
+      "paddw %%mm6,%%mm0\n"
+      "movd %%mm0,%1\n"
+      : "+r" (pix), "=r"(tmp)
+      : "r" ((long)line_size) , "m" (h)
+      : "%ecx");
+    return tmp & 0xFFFF;
+}
+#undef SUM
+
+static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
+    int tmp;
+
+    assert( (((int)pix) & 7) == 0);
+    assert((line_size &7) ==0);
+
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0), " #out0 "\n"\
+      "movq 8(%0), " #out1 "\n"\
+      "add %2,%0\n"\
+      "psadbw " #out0 ", " #in0 "\n"\
+      "psadbw " #out1 ", " #in1 "\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+  asm volatile (
+      "movl %3,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pxor %%mm7,%%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq 8(%0),%%mm1\n"
+      "add %2,%0\n"
+      "subl $2, %%ecx\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movd %%mm6,%1\n"
+      : "+r" (pix), "=r"(tmp)
+      : "r" ((long)line_size) , "m" (h)
+      : "%ecx");
+    return tmp;
+}
+#undef SUM
+
+static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+
+    assert( (((int)pix1) & 7) == 0);
+    assert( (((int)pix2) & 7) == 0);
+    assert((line_size &7) ==0);
+
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0),%%mm2\n"\
+      "movq (%1)," #out0 "\n"\
+      "movq 8(%0),%%mm3\n"\
+      "movq 8(%1)," #out1 "\n"\
+      "add %3,%0\n"\
+      "add %3,%1\n"\
+      "psubb " #out0 ", %%mm2\n"\
+      "psubb " #out1 ", %%mm3\n"\
+      "pxor %%mm7, %%mm2\n"\
+      "pxor %%mm7, %%mm3\n"\
+      "movq %%mm2, " #out0 "\n"\
+      "movq %%mm3, " #out1 "\n"\
+      "psubusb " #in0 ", %%mm2\n"\
+      "psubusb " #in1 ", %%mm3\n"\
+      "psubusb " #out0 ", " #in0 "\n"\
+      "psubusb " #out1 ", " #in1 "\n"\
+      "por %%mm2, " #in0 "\n"\
+      "por %%mm3, " #in1 "\n"\
+      "movq " #in0 ", %%mm2\n"\
+      "movq " #in1 ", %%mm3\n"\
+      "punpcklbw %%mm7, " #in0 "\n"\
+      "punpcklbw %%mm7, " #in1 "\n"\
+      "punpckhbw %%mm7, %%mm2\n"\
+      "punpckhbw %%mm7, %%mm3\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw %%mm3, %%mm2\n"\
+      "paddw %%mm2, " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+
+  asm volatile (
+      "movl %4,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pcmpeqw %%mm7,%%mm7\n"
+      "psllw $15, %%mm7\n"
+      "packsswb %%mm7, %%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq (%1),%%mm2\n"
+      "movq 8(%0),%%mm1\n"
+      "movq 8(%1),%%mm3\n"
+      "add %3,%0\n"
+      "add %3,%1\n"
+      "subl $2, %%ecx\n"
+      "psubb %%mm2, %%mm0\n"
+      "psubb %%mm3, %%mm1\n"
+      "pxor %%mm7, %%mm0\n"
+      "pxor %%mm7, %%mm1\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm6,%%mm0\n"
+      "psrlq $32, %%mm6\n"
+      "paddw %%mm6,%%mm0\n"
+      "movq %%mm0,%%mm6\n"
+      "psrlq $16, %%mm0\n"
+      "paddw %%mm6,%%mm0\n"
+      "movd %%mm0,%2\n"
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+      : "r" ((long)line_size) , "m" (h)
+      : "%ecx");
+    return tmp & 0x7FFF;
+}
+#undef SUM
+
+static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+
+    assert( (((int)pix1) & 7) == 0);
+    assert( (((int)pix2) & 7) == 0);
+    assert((line_size &7) ==0);
+
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0)," #out0 "\n"\
+      "movq (%1),%%mm2\n"\
+      "movq 8(%0)," #out1 "\n"\
+      "movq 8(%1),%%mm3\n"\
+      "add %3,%0\n"\
+      "add %3,%1\n"\
+      "psubb %%mm2, " #out0 "\n"\
+      "psubb %%mm3, " #out1 "\n"\
+      "pxor %%mm7, " #out0 "\n"\
+      "pxor %%mm7, " #out1 "\n"\
+      "psadbw " #out0 ", " #in0 "\n"\
+      "psadbw " #out1 ", " #in1 "\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+  asm volatile (
+      "movl %4,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pcmpeqw %%mm7,%%mm7\n"
+      "psllw $15, %%mm7\n"
+      "packsswb %%mm7, %%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq (%1),%%mm2\n"
+      "movq 8(%0),%%mm1\n"
+      "movq 8(%1),%%mm3\n"
+      "add %3,%0\n"
+      "add %3,%1\n"
+      "subl $2, %%ecx\n"
+      "psubb %%mm2, %%mm0\n"
+      "psubb %%mm3, %%mm1\n"
+      "pxor %%mm7, %%mm0\n"
+      "pxor %%mm7, %%mm1\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movd %%mm6,%2\n"
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+      : "r" ((long)line_size) , "m" (h)
+      : "%ecx");
+    return tmp;
+}
+#undef SUM
+
+static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i=0;
+    asm volatile(
+        "1:                             \n\t"
+        "movq  (%2, %0), %%mm0          \n\t"
+        "movq  (%1, %0), %%mm1          \n\t"
+        "psubb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, (%3, %0)           \n\t"
+        "movq 8(%2, %0), %%mm0          \n\t"
+        "movq 8(%1, %0), %%mm1          \n\t"
+        "psubb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, 8(%3, %0)          \n\t"
+        "add $16, %0                    \n\t"
+        "cmp %4, %0                     \n\t"
+        " jb 1b                         \n\t"
+        : "+r" (i)
+        : "r"(src1), "r"(src2), "r"(dst), "r"((long)w-15)
+    );
+    for(; i<w; i++)
+        dst[i+0] = src1[i+0]-src2[i+0];
+}
+
+static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
+    long i=0;
+    uint8_t l, lt;
+
+    asm volatile(
+        "1:                             \n\t"
+        "movq  -1(%1, %0), %%mm0        \n\t" // LT
+        "movq  (%1, %0), %%mm1          \n\t" // T
+        "movq  -1(%2, %0), %%mm2        \n\t" // L
+        "movq  (%2, %0), %%mm3          \n\t" // X
+        "movq %%mm2, %%mm4              \n\t" // L
+        "psubb %%mm0, %%mm2             \n\t"
+        "paddb %%mm1, %%mm2             \n\t" // L + T - LT
+        "movq %%mm4, %%mm5              \n\t" // L
+        "pmaxub %%mm1, %%mm4            \n\t" // max(T, L)
+        "pminub %%mm5, %%mm1            \n\t" // min(T, L)
+        "pminub %%mm2, %%mm4            \n\t"
+        "pmaxub %%mm1, %%mm4            \n\t"
+        "psubb %%mm4, %%mm3             \n\t" // dst - pred
+        "movq %%mm3, (%3, %0)           \n\t"
+        "add $8, %0                     \n\t"
+        "cmp %4, %0                     \n\t"
+        " jb 1b                         \n\t"
+        : "+r" (i)
+        : "r"(src1), "r"(src2), "r"(dst), "r"((long)w)
+    );
+
+    l= *left;
+    lt= *left_top;
+
+    dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
+
+    *left_top= src1[w-1];
+    *left    = src2[w-1];
+}
+
+#define LBUTTERFLY2(a1,b1,a2,b2)\
+    "paddw " #b1 ", " #a1 "           \n\t"\
+    "paddw " #b2 ", " #a2 "           \n\t"\
+    "paddw " #b1 ", " #b1 "           \n\t"\
+    "paddw " #b2 ", " #b2 "           \n\t"\
+    "psubw " #a1 ", " #b1 "           \n\t"\
+    "psubw " #a2 ", " #b2 "           \n\t"
+
+#define HADAMARD48\
+        LBUTTERFLY2(%%mm0, %%mm1, %%mm2, %%mm3)\
+        LBUTTERFLY2(%%mm4, %%mm5, %%mm6, %%mm7)\
+        LBUTTERFLY2(%%mm0, %%mm2, %%mm1, %%mm3)\
+        LBUTTERFLY2(%%mm4, %%mm6, %%mm5, %%mm7)\
+        LBUTTERFLY2(%%mm0, %%mm4, %%mm1, %%mm5)\
+        LBUTTERFLY2(%%mm2, %%mm6, %%mm3, %%mm7)\
+
+#define MMABS(a,z)\
+    "pxor " #z ", " #z "              \n\t"\
+    "pcmpgtw " #a ", " #z "           \n\t"\
+    "pxor " #z ", " #a "              \n\t"\
+    "psubw " #z ", " #a "             \n\t"
+
+#define MMABS_SUM(a,z, sum)\
+    "pxor " #z ", " #z "              \n\t"\
+    "pcmpgtw " #a ", " #z "           \n\t"\
+    "pxor " #z ", " #a "              \n\t"\
+    "psubw " #z ", " #a "             \n\t"\
+    "paddusw " #a ", " #sum "         \n\t"
+
+#define MMABS_MMX2(a,z)\
+    "pxor " #z ", " #z "              \n\t"\
+    "psubw " #a ", " #z "             \n\t"\
+    "pmaxsw " #z ", " #a "            \n\t"
+
+#define MMABS_SUM_MMX2(a,z, sum)\
+    "pxor " #z ", " #z "              \n\t"\
+    "psubw " #a ", " #z "             \n\t"\
+    "pmaxsw " #z ", " #a "            \n\t"\
+    "paddusw " #a ", " #sum "         \n\t"
+
+#define TRANSPOSE4(a,b,c,d,t)\
+    SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
+    SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
+    SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
+    SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
+
+#define LOAD4(o, a, b, c, d)\
+        "movq "#o"(%1), " #a "        \n\t"\
+        "movq "#o"+16(%1), " #b "     \n\t"\
+        "movq "#o"+32(%1), " #c "     \n\t"\
+        "movq "#o"+48(%1), " #d "     \n\t"
+
+#define STORE4(o, a, b, c, d)\
+        "movq "#a", "#o"(%1)          \n\t"\
+        "movq "#b", "#o"+16(%1)       \n\t"\
+        "movq "#c", "#o"+32(%1)       \n\t"\
+        "movq "#d", "#o"+48(%1)       \n\t"\
+
+static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
+    DECLARE_ALIGNED_8(uint64_t, temp[16]);
+    int sum=0;
+
+    assert(h==8);
+
+    diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
+
+    asm volatile(
+        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
+        LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+
+        "movq %%mm7, 112(%1)            \n\t"
+
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
+        STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
+
+        "movq 112(%1), %%mm7            \n\t"
+        TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
+        STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
+
+        LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
+        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+
+        "movq %%mm7, 120(%1)            \n\t"
+
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
+        STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
+
+        "movq 120(%1), %%mm7            \n\t"
+        TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
+        "movq %%mm7, %%mm5              \n\t"//FIXME remove
+        "movq %%mm6, %%mm7              \n\t"
+        "movq %%mm0, %%mm6              \n\t"
+//        STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
+
+        LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
+//        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+        "movq %%mm7, 64(%1)             \n\t"
+        MMABS(%%mm0, %%mm7)
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
+        MMABS_SUM(%%mm2, %%mm7, %%mm0)
+        MMABS_SUM(%%mm3, %%mm7, %%mm0)
+        MMABS_SUM(%%mm4, %%mm7, %%mm0)
+        MMABS_SUM(%%mm5, %%mm7, %%mm0)
+        MMABS_SUM(%%mm6, %%mm7, %%mm0)
+        "movq 64(%1), %%mm1             \n\t"
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
+        "movq %%mm0, 64(%1)             \n\t"
+
+        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
+        LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+        "movq %%mm7, (%1)               \n\t"
+        MMABS(%%mm0, %%mm7)
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
+        MMABS_SUM(%%mm2, %%mm7, %%mm0)
+        MMABS_SUM(%%mm3, %%mm7, %%mm0)
+        MMABS_SUM(%%mm4, %%mm7, %%mm0)
+        MMABS_SUM(%%mm5, %%mm7, %%mm0)
+        MMABS_SUM(%%mm6, %%mm7, %%mm0)
+        "movq (%1), %%mm1               \n\t"
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
+        "movq 64(%1), %%mm1             \n\t"
+        MMABS_SUM(%%mm1, %%mm7, %%mm0)
+
+        "movq %%mm0, %%mm1              \n\t"
+        "psrlq $32, %%mm0               \n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "psrlq $16, %%mm0               \n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
+        "movd %%mm0, %0                 \n\t"
+
+        : "=r" (sum)
+        : "r"(temp)
+    );
+    return sum&0xFFFF;
+}
+
+static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
+    DECLARE_ALIGNED_8(uint64_t, temp[16]);
+    int sum=0;
+
+    assert(h==8);
+
+    diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
+
+    asm volatile(
+        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
+        LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+
+        "movq %%mm7, 112(%1)            \n\t"
+
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
+        STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
+
+        "movq 112(%1), %%mm7            \n\t"
+        TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
+        STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
+
+        LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
+        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+
+        "movq %%mm7, 120(%1)            \n\t"
+
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
+        STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
+
+        "movq 120(%1), %%mm7            \n\t"
+        TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
+        "movq %%mm7, %%mm5              \n\t"//FIXME remove
+        "movq %%mm6, %%mm7              \n\t"
+        "movq %%mm0, %%mm6              \n\t"
+//        STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
+
+        LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
+//        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+        "movq %%mm7, 64(%1)             \n\t"
+        MMABS_MMX2(%%mm0, %%mm7)
+        MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm3, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm4, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0)
+        "movq 64(%1), %%mm1             \n\t"
+        MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
+        "movq %%mm0, 64(%1)             \n\t"
+
+        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
+        LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
+
+        HADAMARD48
+        "movq %%mm7, (%1)               \n\t"
+        MMABS_MMX2(%%mm0, %%mm7)
+        MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm3, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm4, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0)
+        MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0)
+        "movq (%1), %%mm1               \n\t"
+        MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
+        "movq 64(%1), %%mm1             \n\t"
+        MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
+
+        "pshufw $0x0E, %%mm0, %%mm1     \n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
+        "pshufw $0x01, %%mm0, %%mm1     \n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
+        "movd %%mm0, %0                 \n\t"
+
+        : "=r" (sum)
+        : "r"(temp)
+    );
+    return sum&0xFFFF;
+}
+
+
+WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx)
+WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
+#endif //CONFIG_ENCODERS
+
+#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
+#define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
+
+#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
+        "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
+        "movq "MANGLE(ff_pw_20)", %%mm4   \n\t" /* 20 */\
+        "pmullw " #m3 ", %%mm4            \n\t" /* 20x1 */\
+        "movq "#in7", " #m3 "             \n\t" /* d */\
+        "movq "#in0", %%mm5               \n\t" /* D */\
+        "paddw " #m3 ", %%mm5             \n\t" /* x4 */\
+        "psubw %%mm5, %%mm4               \n\t" /* 20x1 - x4 */\
+        "movq "#in1", %%mm5               \n\t" /* C */\
+        "movq "#in2", %%mm6               \n\t" /* B */\
+        "paddw " #m6 ", %%mm5             \n\t" /* x3 */\
+        "paddw " #m5 ", %%mm6             \n\t" /* x2 */\
+        "paddw %%mm6, %%mm6               \n\t" /* 2x2 */\
+        "psubw %%mm6, %%mm5               \n\t" /* -2x2 + x3 */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm5  \n\t" /* -6x2 + 3x3 */\
+        "paddw " #rnd ", %%mm4            \n\t" /* x2 */\
+        "paddw %%mm4, %%mm5               \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
+        "psraw $5, %%mm5                  \n\t"\
+        "packuswb %%mm5, %%mm5            \n\t"\
+        OP(%%mm5, out, %%mm7, d)
+
+#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
+static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint64_t temp;\
+\
+    asm volatile(\
+        "pxor %%mm7, %%mm7                \n\t"\
+        "1:                               \n\t"\
+        "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm1                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm2                \n\t" /* ABCDEFGH */\
+        "punpcklbw %%mm7, %%mm0           \n\t" /* 0A0B0C0D */\
+        "punpckhbw %%mm7, %%mm1           \n\t" /* 0E0F0G0H */\
+        "pshufw $0x90, %%mm0, %%mm5       \n\t" /* 0A0A0B0C */\
+        "pshufw $0x41, %%mm0, %%mm6       \n\t" /* 0B0A0A0B */\
+        "movq %%mm2, %%mm3                \n\t" /* ABCDEFGH */\
+        "movq %%mm2, %%mm4                \n\t" /* ABCDEFGH */\
+        "psllq $8, %%mm2                  \n\t" /* 0ABCDEFG */\
+        "psllq $16, %%mm3                 \n\t" /* 00ABCDEF */\
+        "psllq $24, %%mm4                 \n\t" /* 000ABCDE */\
+        "punpckhbw %%mm7, %%mm2           \n\t" /* 0D0E0F0G */\
+        "punpckhbw %%mm7, %%mm3           \n\t" /* 0C0D0E0F */\
+        "punpckhbw %%mm7, %%mm4           \n\t" /* 0B0C0D0E */\
+        "paddw %%mm3, %%mm5               \n\t" /* b */\
+        "paddw %%mm2, %%mm6               \n\t" /* c */\
+        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
+        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
+        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
+        "paddw %%mm4, %%mm0               \n\t" /* a */\
+        "paddw %%mm1, %%mm5               \n\t" /* d */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
+        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
+        "paddw %6, %%mm6                  \n\t"\
+        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm0                  \n\t"\
+        "movq %%mm0, %5                   \n\t"\
+        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
+        \
+        "movq 5(%0), %%mm0                \n\t" /* FGHIJKLM */\
+        "movq %%mm0, %%mm5                \n\t" /* FGHIJKLM */\
+        "movq %%mm0, %%mm6                \n\t" /* FGHIJKLM */\
+        "psrlq $8, %%mm0                  \n\t" /* GHIJKLM0 */\
+        "psrlq $16, %%mm5                 \n\t" /* HIJKLM00 */\
+        "punpcklbw %%mm7, %%mm0           \n\t" /* 0G0H0I0J */\
+        "punpcklbw %%mm7, %%mm5           \n\t" /* 0H0I0J0K */\
+        "paddw %%mm0, %%mm2               \n\t" /* b */\
+        "paddw %%mm5, %%mm3               \n\t" /* c */\
+        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
+        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
+        "movq %%mm6, %%mm2                \n\t" /* FGHIJKLM */\
+        "psrlq $24, %%mm6                 \n\t" /* IJKLM000 */\
+        "punpcklbw %%mm7, %%mm2           \n\t" /* 0F0G0H0I */\
+        "punpcklbw %%mm7, %%mm6           \n\t" /* 0I0J0K0L */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
+        "paddw %%mm2, %%mm1               \n\t" /* a */\
+        "paddw %%mm6, %%mm4               \n\t" /* d */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+        "psubw %%mm4, %%mm3               \n\t" /* - 6b +3c - d */\
+        "paddw %6, %%mm1                  \n\t"\
+        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b +3c - d */\
+        "psraw $5, %%mm3                  \n\t"\
+        "movq %5, %%mm1                   \n\t"\
+        "packuswb %%mm3, %%mm1            \n\t"\
+        OP_MMX2(%%mm1, (%1),%%mm4, q)\
+        /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
+        \
+        "movq 9(%0), %%mm1                \n\t" /* JKLMNOPQ */\
+        "movq %%mm1, %%mm4                \n\t" /* JKLMNOPQ */\
+        "movq %%mm1, %%mm3                \n\t" /* JKLMNOPQ */\
+        "psrlq $8, %%mm1                  \n\t" /* KLMNOPQ0 */\
+        "psrlq $16, %%mm4                 \n\t" /* LMNOPQ00 */\
+        "punpcklbw %%mm7, %%mm1           \n\t" /* 0K0L0M0N */\
+        "punpcklbw %%mm7, %%mm4           \n\t" /* 0L0M0N0O */\
+        "paddw %%mm1, %%mm5               \n\t" /* b */\
+        "paddw %%mm4, %%mm0               \n\t" /* c */\
+        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
+        "psubw %%mm5, %%mm0               \n\t" /* c - 2b */\
+        "movq %%mm3, %%mm5                \n\t" /* JKLMNOPQ */\
+        "psrlq $24, %%mm3                 \n\t" /* MNOPQ000 */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm0  \n\t" /* 3c - 6b */\
+        "punpcklbw %%mm7, %%mm3           \n\t" /* 0M0N0O0P */\
+        "paddw %%mm3, %%mm2               \n\t" /* d */\
+        "psubw %%mm2, %%mm0               \n\t" /* -6b + 3c - d */\
+        "movq %%mm5, %%mm2                \n\t" /* JKLMNOPQ */\
+        "punpcklbw %%mm7, %%mm2           \n\t" /* 0J0K0L0M */\
+        "punpckhbw %%mm7, %%mm5           \n\t" /* 0N0O0P0Q */\
+        "paddw %%mm2, %%mm6               \n\t" /* a */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
+        "paddw %6, %%mm0                  \n\t"\
+        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm0                  \n\t"\
+        /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
+        \
+        "paddw %%mm5, %%mm3               \n\t" /* a */\
+        "pshufw $0xF9, %%mm5, %%mm6       \n\t" /* 0O0P0Q0Q */\
+        "paddw %%mm4, %%mm6               \n\t" /* b */\
+        "pshufw $0xBE, %%mm5, %%mm4       \n\t" /* 0P0Q0Q0P */\
+        "pshufw $0x6F, %%mm5, %%mm5       \n\t" /* 0Q0Q0P0O */\
+        "paddw %%mm1, %%mm4               \n\t" /* c */\
+        "paddw %%mm2, %%mm5               \n\t" /* d */\
+        "paddw %%mm6, %%mm6               \n\t" /* 2b */\
+        "psubw %%mm6, %%mm4               \n\t" /* c - 2b */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm4  \n\t" /* 3c - 6b */\
+        "psubw %%mm5, %%mm3               \n\t" /* -6b + 3c - d */\
+        "paddw %6, %%mm4                  \n\t"\
+        "paddw %%mm3, %%mm4               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm4                  \n\t"\
+        "packuswb %%mm4, %%mm0            \n\t"\
+        OP_MMX2(%%mm0, 8(%1), %%mm4, q)\
+        \
+        "add %3, %0                       \n\t"\
+        "add %4, %1                       \n\t"\
+        "decl %2                          \n\t"\
+        " jnz 1b                          \n\t"\
+        : "+a"(src), "+c"(dst), "+m"(h)\
+        : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
+        : "memory"\
+    );\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    int i;\
+    int16_t temp[16];\
+    /* quick HACK, XXX FIXME MUST be optimized */\
+    for(i=0; i<h; i++)\
+    {\
+        temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
+        temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
+        temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
+        temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
+        temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
+        temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]);\
+        temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]);\
+        temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]);\
+        temp[ 8]= (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]);\
+        temp[ 9]= (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]);\
+        temp[10]= (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]);\
+        temp[11]= (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]);\
+        temp[12]= (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]);\
+        temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
+        temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
+        temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
+        asm volatile(\
+            "movq (%0), %%mm0               \n\t"\
+            "movq 8(%0), %%mm1              \n\t"\
+            "paddw %2, %%mm0                \n\t"\
+            "paddw %2, %%mm1                \n\t"\
+            "psraw $5, %%mm0                \n\t"\
+            "psraw $5, %%mm1                \n\t"\
+            "packuswb %%mm1, %%mm0          \n\t"\
+            OP_3DNOW(%%mm0, (%1), %%mm1, q)\
+            "movq 16(%0), %%mm0             \n\t"\
+            "movq 24(%0), %%mm1             \n\t"\
+            "paddw %2, %%mm0                \n\t"\
+            "paddw %2, %%mm1                \n\t"\
+            "psraw $5, %%mm0                \n\t"\
+            "psraw $5, %%mm1                \n\t"\
+            "packuswb %%mm1, %%mm0          \n\t"\
+            OP_3DNOW(%%mm0, 8(%1), %%mm1, q)\
+            :: "r"(temp), "r"(dst), "m"(ROUNDER)\
+            : "memory"\
+        );\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint64_t temp;\
+\
+    asm volatile(\
+        "pxor %%mm7, %%mm7                \n\t"\
+        "1:                               \n\t"\
+        "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm1                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm2                \n\t" /* ABCDEFGH */\
+        "punpcklbw %%mm7, %%mm0           \n\t" /* 0A0B0C0D */\
+        "punpckhbw %%mm7, %%mm1           \n\t" /* 0E0F0G0H */\
+        "pshufw $0x90, %%mm0, %%mm5       \n\t" /* 0A0A0B0C */\
+        "pshufw $0x41, %%mm0, %%mm6       \n\t" /* 0B0A0A0B */\
+        "movq %%mm2, %%mm3                \n\t" /* ABCDEFGH */\
+        "movq %%mm2, %%mm4                \n\t" /* ABCDEFGH */\
+        "psllq $8, %%mm2                  \n\t" /* 0ABCDEFG */\
+        "psllq $16, %%mm3                 \n\t" /* 00ABCDEF */\
+        "psllq $24, %%mm4                 \n\t" /* 000ABCDE */\
+        "punpckhbw %%mm7, %%mm2           \n\t" /* 0D0E0F0G */\
+        "punpckhbw %%mm7, %%mm3           \n\t" /* 0C0D0E0F */\
+        "punpckhbw %%mm7, %%mm4           \n\t" /* 0B0C0D0E */\
+        "paddw %%mm3, %%mm5               \n\t" /* b */\
+        "paddw %%mm2, %%mm6               \n\t" /* c */\
+        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
+        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
+        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
+        "paddw %%mm4, %%mm0               \n\t" /* a */\
+        "paddw %%mm1, %%mm5               \n\t" /* d */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
+        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
+        "paddw %6, %%mm6                  \n\t"\
+        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm0                  \n\t"\
+        /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
+        \
+        "movd 5(%0), %%mm5                \n\t" /* FGHI */\
+        "punpcklbw %%mm7, %%mm5           \n\t" /* 0F0G0H0I */\
+        "pshufw $0xF9, %%mm5, %%mm6       \n\t" /* 0G0H0I0I */\
+        "paddw %%mm5, %%mm1               \n\t" /* a */\
+        "paddw %%mm6, %%mm2               \n\t" /* b */\
+        "pshufw $0xBE, %%mm5, %%mm6       \n\t" /* 0H0I0I0H */\
+        "pshufw $0x6F, %%mm5, %%mm5       \n\t" /* 0I0I0H0G */\
+        "paddw %%mm6, %%mm3               \n\t" /* c */\
+        "paddw %%mm5, %%mm4               \n\t" /* d */\
+        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
+        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
+        "psubw %%mm4, %%mm3               \n\t" /* -6b + 3c - d */\
+        "paddw %6, %%mm1                  \n\t"\
+        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm3                  \n\t"\
+        "packuswb %%mm3, %%mm0            \n\t"\
+        OP_MMX2(%%mm0, (%1), %%mm4, q)\
+        \
+        "add %3, %0                       \n\t"\
+        "add %4, %1                       \n\t"\
+        "decl %2                          \n\t"\
+        " jnz 1b                          \n\t"\
+        : "+a"(src), "+c"(dst), "+m"(h)\
+        : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
+        : "memory"\
+    );\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    int i;\
+    int16_t temp[8];\
+    /* quick HACK, XXX FIXME MUST be optimized */\
+    for(i=0; i<h; i++)\
+    {\
+        temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
+        temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
+        temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
+        temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
+        temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
+        temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
+        temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
+        temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
+        asm volatile(\
+            "movq (%0), %%mm0           \n\t"\
+            "movq 8(%0), %%mm1          \n\t"\
+            "paddw %2, %%mm0            \n\t"\
+            "paddw %2, %%mm1            \n\t"\
+            "psraw $5, %%mm0            \n\t"\
+            "psraw $5, %%mm1            \n\t"\
+            "packuswb %%mm1, %%mm0      \n\t"\
+            OP_3DNOW(%%mm0, (%1), %%mm1, q)\
+            :: "r"(temp), "r"(dst), "m"(ROUNDER)\
+            :"memory"\
+        );\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}
+
+#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint64_t temp[17*4];\
+    uint64_t *temp_ptr= temp;\
+    int count= 17;\
+\
+    /*FIXME unroll */\
+    asm volatile(\
+        "pxor %%mm7, %%mm7              \n\t"\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq (%0), %%mm1               \n\t"\
+        "movq 8(%0), %%mm2              \n\t"\
+        "movq 8(%0), %%mm3              \n\t"\
+        "punpcklbw %%mm7, %%mm0         \n\t"\
+        "punpckhbw %%mm7, %%mm1         \n\t"\
+        "punpcklbw %%mm7, %%mm2         \n\t"\
+        "punpckhbw %%mm7, %%mm3         \n\t"\
+        "movq %%mm0, (%1)               \n\t"\
+        "movq %%mm1, 17*8(%1)           \n\t"\
+        "movq %%mm2, 2*17*8(%1)         \n\t"\
+        "movq %%mm3, 3*17*8(%1)         \n\t"\
+        "add $8, %1                     \n\t"\
+        "add %3, %0                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
+        : "+r" (src), "+r" (temp_ptr), "+r"(count)\
+        : "r" ((long)srcStride)\
+        : "memory"\
+    );\
+    \
+    temp_ptr= temp;\
+    count=4;\
+    \
+/*FIXME reorder for speed */\
+    asm volatile(\
+        /*"pxor %%mm7, %%mm7              \n\t"*/\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq 8(%0), %%mm1              \n\t"\
+        "movq 16(%0), %%mm2             \n\t"\
+        "movq 24(%0), %%mm3             \n\t"\
+        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
+        \
+        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
+        \
+        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"  \
+        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
+        \
+        "add $136, %0                   \n\t"\
+        "add %6, %1                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
+        \
+        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
+        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
+        :"memory"\
+    );\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint64_t temp[9*2];\
+    uint64_t *temp_ptr= temp;\
+    int count= 9;\
+\
+    /*FIXME unroll */\
+    asm volatile(\
+        "pxor %%mm7, %%mm7              \n\t"\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq (%0), %%mm1               \n\t"\
+        "punpcklbw %%mm7, %%mm0         \n\t"\
+        "punpckhbw %%mm7, %%mm1         \n\t"\
+        "movq %%mm0, (%1)               \n\t"\
+        "movq %%mm1, 9*8(%1)            \n\t"\
+        "add $8, %1                     \n\t"\
+        "add %3, %0                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
+        : "+r" (src), "+r" (temp_ptr), "+r"(count)\
+        : "r" ((long)srcStride)\
+        : "memory"\
+    );\
+    \
+    temp_ptr= temp;\
+    count=2;\
+    \
+/*FIXME reorder for speed */\
+    asm volatile(\
+        /*"pxor %%mm7, %%mm7              \n\t"*/\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq 8(%0), %%mm1              \n\t"\
+        "movq 16(%0), %%mm2             \n\t"\
+        "movq 24(%0), %%mm3             \n\t"\
+        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
+        \
+        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
+        \
+        QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
+        "add %4, %1                     \n\t"\
+        QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
+        QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
+                \
+        "add $72, %0                    \n\t"\
+        "add %6, %1                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
+         \
+        : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
+        : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
+        : "memory"\
+   );\
+}\
+\
+static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels8_mmx(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[8];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[8];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[8];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[8];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half) + 64;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half) + 64;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half) + 64;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half) + 64;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half) + 64;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half) + 64;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[8 + 9];\
+    uint8_t * const halfH= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[9];\
+    uint8_t * const halfH= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels16_mmx(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[32];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[32];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[32];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[32];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[16*2 + 17*2];\
+    uint8_t * const halfH= ((uint8_t*)half) + 256;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[16*2 + 17*2];\
+    uint8_t * const halfH= ((uint8_t*)half) + 256;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[16*2 + 17*2];\
+    uint8_t * const halfH= ((uint8_t*)half) + 256;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[16*2 + 17*2];\
+    uint8_t * const halfH= ((uint8_t*)half) + 256;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[16*2 + 17*2];\
+    uint8_t * const halfH= ((uint8_t*)half) + 256;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[16*2 + 17*2];\
+    uint8_t * const halfH= ((uint8_t*)half) + 256;\
+    uint8_t * const halfHV= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[17*2];\
+    uint8_t * const halfH= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[17*2];\
+    uint8_t * const halfH= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t half[17*2];\
+    uint8_t * const halfH= ((uint8_t*)half);\
+    put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
+}
+
+#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "        \n\t"
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgusb " #temp ", " #a "        \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgb " #temp ", " #a "          \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
+
+QPEL_BASE(put_       , ff_pw_16, _       , PUT_OP, PUT_OP)
+QPEL_BASE(avg_       , ff_pw_16, _       , AVG_MMX2_OP, AVG_3DNOW_OP)
+QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
+QPEL_OP(put_       , ff_pw_16, _       , PUT_OP, 3dnow)
+QPEL_OP(avg_       , ff_pw_16, _       , AVG_3DNOW_OP, 3dnow)
+QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
+QPEL_OP(put_       , ff_pw_16, _       , PUT_OP, mmx2)
+QPEL_OP(avg_       , ff_pw_16, _       , AVG_MMX2_OP, mmx2)
+QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
+
+/***********************************/
+/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
+
+#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\
+}
+#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\
+}
+
+#define QPEL_2TAP(OPNAME, SIZE, MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\
+                          OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\
+                          OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\
+                          OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\
+}\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\
+}\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0,         1,       0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1,        -1,       0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0,         stride,  0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride,   -stride,  0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0,         stride,  1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1,         stride, -1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride,   -stride,  1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\
+
+QPEL_2TAP(put_, 16, mmx2)
+QPEL_2TAP(avg_, 16, mmx2)
+QPEL_2TAP(put_,  8, mmx2)
+QPEL_2TAP(avg_,  8, mmx2)
+QPEL_2TAP(put_, 16, 3dnow)
+QPEL_2TAP(avg_, 16, 3dnow)
+QPEL_2TAP(put_,  8, 3dnow)
+QPEL_2TAP(avg_,  8, 3dnow)
+
+
+#if 0
+static void just_return() { return; }
+#endif
+
+#define SET_QPEL_FUNC(postfix1, postfix2) \
+    c->put_ ## postfix1 = put_ ## postfix2;\
+    c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\
+    c->avg_ ## postfix1 = avg_ ## postfix2;
+
+static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){
+    const int w = 8;
+    const int ix = ox>>(16+shift);
+    const int iy = oy>>(16+shift);
+    const int oxs = ox>>4;
+    const int oys = oy>>4;
+    const int dxxs = dxx>>4;
+    const int dxys = dxy>>4;
+    const int dyxs = dyx>>4;
+    const int dyys = dyy>>4;
+    const uint16_t r4[4] = {r,r,r,r};
+    const uint16_t dxy4[4] = {dxys,dxys,dxys,dxys};
+    const uint16_t dyy4[4] = {dyys,dyys,dyys,dyys};
+    const uint64_t shift2 = 2*shift;
+    uint8_t edge_buf[(h+1)*stride];
+    int x, y;
+
+    const int dxw = (dxx-(1<<(16+shift)))*(w-1);
+    const int dyh = (dyy-(1<<(16+shift)))*(h-1);
+    const int dxh = dxy*(h-1);
+    const int dyw = dyx*(w-1);
+    if( // non-constant fullpel offset (3% of blocks)
+        (ox^(ox+dxw) | ox^(ox+dxh) | ox^(ox+dxw+dxh) |
+         oy^(oy+dyw) | oy^(oy+dyh) | oy^(oy+dyw+dyh)) >> (16+shift)
+        // uses more than 16 bits of subpel mv (only at huge resolution)
+        || (dxx|dxy|dyx|dyy)&15 )
+    {
+        //FIXME could still use mmx for some of the rows
+        ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height);
+        return;
+    }
+
+    src += ix + iy*stride;
+    if( (unsigned)ix >= width-w ||
+        (unsigned)iy >= height-h )
+    {
+        ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height);
+        src = edge_buf;
+    }
+
+    asm volatile(
+        "movd         %0, %%mm6 \n\t"
+        "pxor      %%mm7, %%mm7 \n\t"
+        "punpcklwd %%mm6, %%mm6 \n\t"
+        "punpcklwd %%mm6, %%mm6 \n\t"
+        :: "r"(1<<shift)
+    );
+
+    for(x=0; x<w; x+=4){
+        uint16_t dx4[4] = { oxs - dxys + dxxs*(x+0),
+                            oxs - dxys + dxxs*(x+1),
+                            oxs - dxys + dxxs*(x+2),
+                            oxs - dxys + dxxs*(x+3) };
+        uint16_t dy4[4] = { oys - dyys + dyxs*(x+0),
+                            oys - dyys + dyxs*(x+1),
+                            oys - dyys + dyxs*(x+2),
+                            oys - dyys + dyxs*(x+3) };
+
+        for(y=0; y<h; y++){
+            asm volatile(
+                "movq   %0,  %%mm4 \n\t"
+                "movq   %1,  %%mm5 \n\t"
+                "paddw  %2,  %%mm4 \n\t"
+                "paddw  %3,  %%mm5 \n\t"
+                "movq   %%mm4, %0  \n\t"
+                "movq   %%mm5, %1  \n\t"
+                "psrlw  $12, %%mm4 \n\t"
+                "psrlw  $12, %%mm5 \n\t"
+                : "+m"(*dx4), "+m"(*dy4)
+                : "m"(*dxy4), "m"(*dyy4)
+            );
+
+            asm volatile(
+                "movq   %%mm6, %%mm2 \n\t"
+                "movq   %%mm6, %%mm1 \n\t"
+                "psubw  %%mm4, %%mm2 \n\t"
+                "psubw  %%mm5, %%mm1 \n\t"
+                "movq   %%mm2, %%mm0 \n\t"
+                "movq   %%mm4, %%mm3 \n\t"
+                "pmullw %%mm1, %%mm0 \n\t" // (s-dx)*(s-dy)
+                "pmullw %%mm5, %%mm3 \n\t" // dx*dy
+                "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
+                "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
+
+                "movd   %4,    %%mm5 \n\t"
+                "movd   %3,    %%mm4 \n\t"
+                "punpcklbw %%mm7, %%mm5 \n\t"
+                "punpcklbw %%mm7, %%mm4 \n\t"
+                "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
+                "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
+
+                "movd   %2,    %%mm5 \n\t"
+                "movd   %1,    %%mm4 \n\t"
+                "punpcklbw %%mm7, %%mm5 \n\t"
+                "punpcklbw %%mm7, %%mm4 \n\t"
+                "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
+                "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
+                "paddw  %5,    %%mm1 \n\t"
+                "paddw  %%mm3, %%mm2 \n\t"
+                "paddw  %%mm1, %%mm0 \n\t"
+                "paddw  %%mm2, %%mm0 \n\t"
+
+                "psrlw    %6,    %%mm0 \n\t"
+                "packuswb %%mm0, %%mm0 \n\t"
+                "movd     %%mm0, %0    \n\t"
+
+                : "=m"(dst[x+y*stride])
+                : "m"(src[0]), "m"(src[1]),
+                  "m"(src[stride]), "m"(src[stride+1]),
+                  "m"(*r4), "m"(shift2)
+            );
+            src += stride;
+        }
+        src += 4-h*stride;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    long i=0;
+
+    assert(FFABS(scale) < 256);
+    scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
+
+    asm volatile(
+        "pcmpeqw %%mm6, %%mm6           \n\t" // -1w
+        "psrlw $15, %%mm6               \n\t" //  1w
+        "pxor %%mm7, %%mm7              \n\t"
+        "movd  %4, %%mm5                \n\t"
+        "punpcklwd %%mm5, %%mm5         \n\t"
+        "punpcklwd %%mm5, %%mm5         \n\t"
+        "1:                             \n\t"
+        "movq  (%1, %0), %%mm0          \n\t"
+        "movq  8(%1, %0), %%mm1         \n\t"
+        "pmulhw %%mm5, %%mm0            \n\t"
+        "pmulhw %%mm5, %%mm1            \n\t"
+        "paddw %%mm6, %%mm0             \n\t"
+        "paddw %%mm6, %%mm1             \n\t"
+        "psraw $1, %%mm0                \n\t"
+        "psraw $1, %%mm1                \n\t"
+        "paddw (%2, %0), %%mm0          \n\t"
+        "paddw 8(%2, %0), %%mm1         \n\t"
+        "psraw $6, %%mm0                \n\t"
+        "psraw $6, %%mm1                \n\t"
+        "pmullw (%3, %0), %%mm0         \n\t"
+        "pmullw 8(%3, %0), %%mm1        \n\t"
+        "pmaddwd %%mm0, %%mm0           \n\t"
+        "pmaddwd %%mm1, %%mm1           \n\t"
+        "paddd %%mm1, %%mm0             \n\t"
+        "psrld $4, %%mm0                \n\t"
+        "paddd %%mm0, %%mm7             \n\t"
+        "add $16, %0                    \n\t"
+        "cmp $128, %0                   \n\t" //FIXME optimize & bench
+        " jb 1b                         \n\t"
+        "movq %%mm7, %%mm6              \n\t"
+        "psrlq $32, %%mm7               \n\t"
+        "paddd %%mm6, %%mm7             \n\t"
+        "psrld $2, %%mm7                \n\t"
+        "movd %%mm7, %0                 \n\t"
+
+        : "+r" (i)
+        : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
+    );
+    return i;
+}
+
+static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
+    long i=0;
+
+    if(FFABS(scale) < 256){
+        scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
+        asm volatile(
+                "pcmpeqw %%mm6, %%mm6   \n\t" // -1w
+                "psrlw $15, %%mm6       \n\t" //  1w
+                "movd  %3, %%mm5        \n\t"
+                "punpcklwd %%mm5, %%mm5 \n\t"
+                "punpcklwd %%mm5, %%mm5 \n\t"
+                "1:                     \n\t"
+                "movq  (%1, %0), %%mm0  \n\t"
+                "movq  8(%1, %0), %%mm1 \n\t"
+                "pmulhw %%mm5, %%mm0    \n\t"
+                "pmulhw %%mm5, %%mm1    \n\t"
+                "paddw %%mm6, %%mm0     \n\t"
+                "paddw %%mm6, %%mm1     \n\t"
+                "psraw $1, %%mm0        \n\t"
+                "psraw $1, %%mm1        \n\t"
+                "paddw (%2, %0), %%mm0  \n\t"
+                "paddw 8(%2, %0), %%mm1 \n\t"
+                "movq %%mm0, (%2, %0)   \n\t"
+                "movq %%mm1, 8(%2, %0)  \n\t"
+                "add $16, %0            \n\t"
+                "cmp $128, %0           \n\t" //FIXME optimize & bench
+                " jb 1b                 \n\t"
+
+                : "+r" (i)
+                : "r"(basis), "r"(rem), "g"(scale)
+        );
+    }else{
+        for(i=0; i<8*8; i++){
+            rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+        }
+    }
+}
+#endif /* CONFIG_ENCODERS */
+
+#define PREFETCH(name, op) \
+static void name(void *mem, int stride, int h){\
+    const uint8_t *p= mem;\
+    do{\
+        asm volatile(#op" %0" :: "m"(*p));\
+        p+= stride;\
+    }while(--h);\
+}
+PREFETCH(prefetch_mmx2,  prefetcht0)
+PREFETCH(prefetch_3dnow, prefetch)
+#undef PREFETCH
+
+#include "h264dsp_mmx.c"
+
+/* AVS specific */
+void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
+
+void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels8_mmx(dst, src, stride, 8);
+}
+void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels8_mmx(dst, src, stride, 8);
+}
+void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels16_mmx(dst, src, stride, 16);
+}
+void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels16_mmx(dst, src, stride, 16);
+}
+
+/* external functions, from idct_mmx.c */
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
+
+void ff_vp3_idct_sse2(int16_t *input_data);
+void ff_vp3_idct_mmx(int16_t *data);
+void ff_vp3_dsp_init_mmx(void);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+   converted */
+#ifdef CONFIG_GPL
+static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmx_idct (block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmx_idct (block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmxext_idct (block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmxext_idct (block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+#endif
+static void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_vp3_idct_sse2(block);
+    put_signed_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_vp3_idct_sse2(block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_vp3_idct_mmx(block);
+    put_signed_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_vp3_idct_mmx(block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_idct_xvid_mmx (block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_idct_xvid_mmx (block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_idct_xvid_mmx2 (block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_idct_xvid_mmx2 (block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+
+static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
+{
+    int i;
+    asm volatile("pxor %%mm7, %%mm7":);
+    for(i=0; i<blocksize; i+=2) {
+        asm volatile(
+            "movq    %0,    %%mm0 \n\t"
+            "movq    %1,    %%mm1 \n\t"
+            "movq    %%mm0, %%mm2 \n\t"
+            "movq    %%mm1, %%mm3 \n\t"
+            "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+            "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+            "pslld   $31,   %%mm2 \n\t" // keep only the sign bit
+            "pxor    %%mm2, %%mm1 \n\t"
+            "movq    %%mm3, %%mm4 \n\t"
+            "pand    %%mm1, %%mm3 \n\t"
+            "pandn   %%mm1, %%mm4 \n\t"
+            "pfadd   %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+            "pfsub   %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+            "movq    %%mm3, %1    \n\t"
+            "movq    %%mm0, %0    \n\t"
+            :"+m"(mag[i]), "+m"(ang[i])
+            ::"memory"
+        );
+    }
+    asm volatile("femms");
+}
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+    int i;
+
+    asm volatile(
+            "movaps  %0,     %%xmm5 \n\t"
+        ::"m"(ff_pdw_80000000[0])
+    );
+    for(i=0; i<blocksize; i+=4) {
+        asm volatile(
+            "movaps  %0,     %%xmm0 \n\t"
+            "movaps  %1,     %%xmm1 \n\t"
+            "xorps   %%xmm2, %%xmm2 \n\t"
+            "xorps   %%xmm3, %%xmm3 \n\t"
+            "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+            "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+            "andps   %%xmm5, %%xmm2 \n\t" // keep only the sign bit
+            "xorps   %%xmm2, %%xmm1 \n\t"
+            "movaps  %%xmm3, %%xmm4 \n\t"
+            "andps   %%xmm1, %%xmm3 \n\t"
+            "andnps  %%xmm1, %%xmm4 \n\t"
+            "addps   %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+            "subps   %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+            "movaps  %%xmm3, %1     \n\t"
+            "movaps  %%xmm0, %0     \n\t"
+            :"+m"(mag[i]), "+m"(ang[i])
+            ::"memory"
+        );
+    }
+}
+
+static void vector_fmul_3dnow(float *dst, const float *src, int len){
+    long i = (len-4)*4;
+    asm volatile(
+        "1: \n\t"
+        "movq    (%1,%0), %%mm0 \n\t"
+        "movq   8(%1,%0), %%mm1 \n\t"
+        "pfmul   (%2,%0), %%mm0 \n\t"
+        "pfmul  8(%2,%0), %%mm1 \n\t"
+        "movq   %%mm0,  (%1,%0) \n\t"
+        "movq   %%mm1, 8(%1,%0) \n\t"
+        "sub  $16, %0 \n\t"
+        "jge 1b \n\t"
+        "femms  \n\t"
+        :"+r"(i)
+        :"r"(dst), "r"(src)
+        :"memory"
+    );
+}
+static void vector_fmul_sse(float *dst, const float *src, int len){
+    long i = (len-8)*4;
+    asm volatile(
+        "1: \n\t"
+        "movaps    (%1,%0), %%xmm0 \n\t"
+        "movaps  16(%1,%0), %%xmm1 \n\t"
+        "mulps     (%2,%0), %%xmm0 \n\t"
+        "mulps   16(%2,%0), %%xmm1 \n\t"
+        "movaps  %%xmm0,   (%1,%0) \n\t"
+        "movaps  %%xmm1, 16(%1,%0) \n\t"
+        "sub  $32, %0 \n\t"
+        "jge 1b \n\t"
+        :"+r"(i)
+        :"r"(dst), "r"(src)
+        :"memory"
+    );
+}
+
+static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
+    long i = len*4-16;
+    asm volatile(
+        "1: \n\t"
+        "pswapd   8(%1), %%mm0 \n\t"
+        "pswapd    (%1), %%mm1 \n\t"
+        "pfmul  (%3,%0), %%mm0 \n\t"
+        "pfmul 8(%3,%0), %%mm1 \n\t"
+        "movq  %%mm0,  (%2,%0) \n\t"
+        "movq  %%mm1, 8(%2,%0) \n\t"
+        "add   $16, %1 \n\t"
+        "sub   $16, %0 \n\t"
+        "jge   1b \n\t"
+        :"+r"(i), "+r"(src1)
+        :"r"(dst), "r"(src0)
+    );
+    asm volatile("femms");
+}
+static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
+    long i = len*4-32;
+    asm volatile(
+        "1: \n\t"
+        "movaps        16(%1), %%xmm0 \n\t"
+        "movaps          (%1), %%xmm1 \n\t"
+        "shufps $0x1b, %%xmm0, %%xmm0 \n\t"
+        "shufps $0x1b, %%xmm1, %%xmm1 \n\t"
+        "mulps        (%3,%0), %%xmm0 \n\t"
+        "mulps      16(%3,%0), %%xmm1 \n\t"
+        "movaps     %%xmm0,   (%2,%0) \n\t"
+        "movaps     %%xmm1, 16(%2,%0) \n\t"
+        "add    $32, %1 \n\t"
+        "sub    $32, %0 \n\t"
+        "jge    1b \n\t"
+        :"+r"(i), "+r"(src1)
+        :"r"(dst), "r"(src0)
+    );
+}
+
+static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
+                                      const float *src2, int src3, int len, int step){
+    long i = (len-4)*4;
+    if(step == 2 && src3 == 0){
+        dst += (len-4)*2;
+        asm volatile(
+            "1: \n\t"
+            "movq   (%2,%0),  %%mm0 \n\t"
+            "movq  8(%2,%0),  %%mm1 \n\t"
+            "pfmul  (%3,%0),  %%mm0 \n\t"
+            "pfmul 8(%3,%0),  %%mm1 \n\t"
+            "pfadd  (%4,%0),  %%mm0 \n\t"
+            "pfadd 8(%4,%0),  %%mm1 \n\t"
+            "movd     %%mm0,   (%1) \n\t"
+            "movd     %%mm1, 16(%1) \n\t"
+            "psrlq      $32,  %%mm0 \n\t"
+            "psrlq      $32,  %%mm1 \n\t"
+            "movd     %%mm0,  8(%1) \n\t"
+            "movd     %%mm1, 24(%1) \n\t"
+            "sub  $32, %1 \n\t"
+            "sub  $16, %0 \n\t"
+            "jge  1b \n\t"
+            :"+r"(i), "+r"(dst)
+            :"r"(src0), "r"(src1), "r"(src2)
+            :"memory"
+        );
+    }
+    else if(step == 1 && src3 == 0){
+        asm volatile(
+            "1: \n\t"
+            "movq    (%2,%0), %%mm0 \n\t"
+            "movq   8(%2,%0), %%mm1 \n\t"
+            "pfmul   (%3,%0), %%mm0 \n\t"
+            "pfmul  8(%3,%0), %%mm1 \n\t"
+            "pfadd   (%4,%0), %%mm0 \n\t"
+            "pfadd  8(%4,%0), %%mm1 \n\t"
+            "movq  %%mm0,   (%1,%0) \n\t"
+            "movq  %%mm1,  8(%1,%0) \n\t"
+            "sub  $16, %0 \n\t"
+            "jge  1b \n\t"
+            :"+r"(i)
+            :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
+            :"memory"
+        );
+    }
+    else
+        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+    asm volatile("femms");
+}
+static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
+                                    const float *src2, int src3, int len, int step){
+    long i = (len-8)*4;
+    if(step == 2 && src3 == 0){
+        dst += (len-8)*2;
+        asm volatile(
+            "1: \n\t"
+            "movaps   (%2,%0), %%xmm0 \n\t"
+            "movaps 16(%2,%0), %%xmm1 \n\t"
+            "mulps    (%3,%0), %%xmm0 \n\t"
+            "mulps  16(%3,%0), %%xmm1 \n\t"
+            "addps    (%4,%0), %%xmm0 \n\t"
+            "addps  16(%4,%0), %%xmm1 \n\t"
+            "movss     %%xmm0,   (%1) \n\t"
+            "movss     %%xmm1, 32(%1) \n\t"
+            "movhlps   %%xmm0, %%xmm2 \n\t"
+            "movhlps   %%xmm1, %%xmm3 \n\t"
+            "movss     %%xmm2, 16(%1) \n\t"
+            "movss     %%xmm3, 48(%1) \n\t"
+            "shufps $0xb1, %%xmm0, %%xmm0 \n\t"
+            "shufps $0xb1, %%xmm1, %%xmm1 \n\t"
+            "movss     %%xmm0,  8(%1) \n\t"
+            "movss     %%xmm1, 40(%1) \n\t"
+            "movhlps   %%xmm0, %%xmm2 \n\t"
+            "movhlps   %%xmm1, %%xmm3 \n\t"
+            "movss     %%xmm2, 24(%1) \n\t"
+            "movss     %%xmm3, 56(%1) \n\t"
+            "sub  $64, %1 \n\t"
+            "sub  $32, %0 \n\t"
+            "jge  1b \n\t"
+            :"+r"(i), "+r"(dst)
+            :"r"(src0), "r"(src1), "r"(src2)
+            :"memory"
+        );
+    }
+    else if(step == 1 && src3 == 0){
+        asm volatile(
+            "1: \n\t"
+            "movaps   (%2,%0), %%xmm0 \n\t"
+            "movaps 16(%2,%0), %%xmm1 \n\t"
+            "mulps    (%3,%0), %%xmm0 \n\t"
+            "mulps  16(%3,%0), %%xmm1 \n\t"
+            "addps    (%4,%0), %%xmm0 \n\t"
+            "addps  16(%4,%0), %%xmm1 \n\t"
+            "movaps %%xmm0,   (%1,%0) \n\t"
+            "movaps %%xmm1, 16(%1,%0) \n\t"
+            "sub  $32, %0 \n\t"
+            "jge  1b \n\t"
+            :"+r"(i)
+            :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
+            :"memory"
+        );
+    }
+    else
+        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+}
+
+static void float_to_int16_3dnow(int16_t *dst, const float *src, int len){
+    // not bit-exact: pf2id uses different rounding than C and SSE
+    int i;
+    for(i=0; i<len; i+=4) {
+        asm volatile(
+            "pf2id       %1, %%mm0 \n\t"
+            "pf2id       %2, %%mm1 \n\t"
+            "packssdw %%mm1, %%mm0 \n\t"
+            "movq     %%mm0, %0    \n\t"
+            :"=m"(dst[i])
+            :"m"(src[i]), "m"(src[i+2])
+        );
+    }
+    asm volatile("femms");
+}
+static void float_to_int16_sse(int16_t *dst, const float *src, int len){
+    int i;
+    for(i=0; i<len; i+=4) {
+        asm volatile(
+            "cvtps2pi    %1, %%mm0 \n\t"
+            "cvtps2pi    %2, %%mm1 \n\t"
+            "packssdw %%mm1, %%mm0 \n\t"
+            "movq     %%mm0, %0    \n\t"
+            :"=m"(dst[i])
+            :"m"(src[i]), "m"(src[i+2])
+        );
+    }
+    asm volatile("emms");
+}
+
+#ifdef CONFIG_SNOW_ENCODER
+extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
+extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
+extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
+extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
+extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                          int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+#endif
+
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
+{
+    mm_flags = mm_support();
+
+    if (avctx->dsp_mask) {
+        if (avctx->dsp_mask & FF_MM_FORCE)
+            mm_flags |= (avctx->dsp_mask & 0xffff);
+        else
+            mm_flags &= ~(avctx->dsp_mask & 0xffff);
+    }
+
+#if 0
+    av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
+    if (mm_flags & MM_MMX)
+        av_log(avctx, AV_LOG_INFO, " mmx");
+    if (mm_flags & MM_MMXEXT)
+        av_log(avctx, AV_LOG_INFO, " mmxext");
+    if (mm_flags & MM_3DNOW)
+        av_log(avctx, AV_LOG_INFO, " 3dnow");
+    if (mm_flags & MM_SSE)
+        av_log(avctx, AV_LOG_INFO, " sse");
+    if (mm_flags & MM_SSE2)
+        av_log(avctx, AV_LOG_INFO, " sse2");
+    av_log(avctx, AV_LOG_INFO, "\n");
+#endif
+
+    if (mm_flags & MM_MMX) {
+        const int idct_algo= avctx->idct_algo;
+
+#ifdef CONFIG_ENCODERS
+        const int dct_algo = avctx->dct_algo;
+        if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
+            if(mm_flags & MM_SSE2){
+                c->fdct = ff_fdct_sse2;
+            }else if(mm_flags & MM_MMXEXT){
+                c->fdct = ff_fdct_mmx2;
+            }else{
+                c->fdct = ff_fdct_mmx;
+            }
+        }
+#endif //CONFIG_ENCODERS
+        if(avctx->lowres==0){
+            if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
+                c->idct_put= ff_simple_idct_put_mmx;
+                c->idct_add= ff_simple_idct_add_mmx;
+                c->idct    = ff_simple_idct_mmx;
+                c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
+#ifdef CONFIG_GPL
+            }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
+                if(mm_flags & MM_MMXEXT){
+                    c->idct_put= ff_libmpeg2mmx2_idct_put;
+                    c->idct_add= ff_libmpeg2mmx2_idct_add;
+                    c->idct    = ff_mmxext_idct;
+                }else{
+                    c->idct_put= ff_libmpeg2mmx_idct_put;
+                    c->idct_add= ff_libmpeg2mmx_idct_add;
+                    c->idct    = ff_mmx_idct;
+                }
+                c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+#endif
+            }else if(idct_algo==FF_IDCT_VP3 &&
+                     avctx->codec->id!=CODEC_ID_THEORA &&
+                     !(avctx->flags & CODEC_FLAG_BITEXACT)){
+                if(mm_flags & MM_SSE2){
+                    c->idct_put= ff_vp3_idct_put_sse2;
+                    c->idct_add= ff_vp3_idct_add_sse2;
+                    c->idct    = ff_vp3_idct_sse2;
+                    c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
+                }else{
+                    ff_vp3_dsp_init_mmx();
+                    c->idct_put= ff_vp3_idct_put_mmx;
+                    c->idct_add= ff_vp3_idct_add_mmx;
+                    c->idct    = ff_vp3_idct_mmx;
+                    c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
+                }
+            }else if(idct_algo==FF_IDCT_CAVS){
+                    c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
+            }else if(idct_algo==FF_IDCT_XVIDMMX){
+                if(mm_flags & MM_MMXEXT){
+                    c->idct_put= ff_idct_xvid_mmx2_put;
+                    c->idct_add= ff_idct_xvid_mmx2_add;
+                    c->idct    = ff_idct_xvid_mmx2;
+                }else{
+                    c->idct_put= ff_idct_xvid_mmx_put;
+                    c->idct_add= ff_idct_xvid_mmx_add;
+                    c->idct    = ff_idct_xvid_mmx;
+                }
+            }
+        }
+
+#ifdef CONFIG_ENCODERS
+        c->get_pixels = get_pixels_mmx;
+        c->diff_pixels = diff_pixels_mmx;
+#endif //CONFIG_ENCODERS
+        c->put_pixels_clamped = put_pixels_clamped_mmx;
+        c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
+        c->add_pixels_clamped = add_pixels_clamped_mmx;
+        c->clear_blocks = clear_blocks_mmx;
+#ifdef CONFIG_ENCODERS
+        c->pix_sum = pix_sum16_mmx;
+#endif //CONFIG_ENCODERS
+
+        c->put_pixels_tab[0][0] = put_pixels16_mmx;
+        c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
+        c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
+        c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
+
+        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
+
+        c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
+        c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
+        c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
+        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+
+        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
+        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
+        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
+        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
+
+        c->put_pixels_tab[1][0] = put_pixels8_mmx;
+        c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
+        c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
+        c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
+
+        c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
+
+        c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
+        c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
+        c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
+        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+
+        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
+        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
+        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
+        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
+
+        c->gmc= gmc_mmx;
+
+        c->add_bytes= add_bytes_mmx;
+#ifdef CONFIG_ENCODERS
+        c->diff_bytes= diff_bytes_mmx;
+
+        c->hadamard8_diff[0]= hadamard8_diff16_mmx;
+        c->hadamard8_diff[1]= hadamard8_diff_mmx;
+
+        c->pix_norm1 = pix_norm1_mmx;
+        c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx;
+          c->sse[1] = sse8_mmx;
+        c->vsad[4]= vsad_intra16_mmx;
+
+        c->nsse[0] = nsse16_mmx;
+        c->nsse[1] = nsse8_mmx;
+        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+            c->vsad[0] = vsad16_mmx;
+        }
+
+        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+            c->try_8x8basis= try_8x8basis_mmx;
+        }
+        c->add_8x8basis= add_8x8basis_mmx;
+
+#endif //CONFIG_ENCODERS
+
+        c->h263_v_loop_filter= h263_v_loop_filter_mmx;
+        c->h263_h_loop_filter= h263_h_loop_filter_mmx;
+        c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;
+        c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
+
+        c->h264_idct_dc_add=
+        c->h264_idct_add= ff_h264_idct_add_mmx;
+        c->h264_idct8_dc_add=
+        c->h264_idct8_add= ff_h264_idct8_add_mmx;
+
+        if (mm_flags & MM_MMXEXT) {
+            c->prefetch = prefetch_mmx2;
+
+            c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
+            c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
+
+            c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
+            c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
+            c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
+
+            c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
+            c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
+
+            c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
+            c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
+            c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+
+#ifdef CONFIG_ENCODERS
+            c->hadamard8_diff[0]= hadamard8_diff16_mmx2;
+            c->hadamard8_diff[1]= hadamard8_diff_mmx2;
+            c->vsad[4]= vsad_intra16_mmx2;
+#endif //CONFIG_ENCODERS
+
+            c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
+            c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
+
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
+                c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
+                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+#ifdef CONFIG_ENCODERS
+                c->vsad[0] = vsad16_mmx2;
+#endif //CONFIG_ENCODERS
+            }
+
+#if 1
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][12], qpel16_mc03_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][14], qpel16_mc23_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 0], qpel8_mc00_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 1], qpel8_mc10_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 2], qpel8_mc20_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 3], qpel8_mc30_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 4], qpel8_mc01_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 6], qpel8_mc21_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 8], qpel8_mc02_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][10], qpel8_mc22_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2)
+#endif
+
+//FIXME 3dnow too
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_mmx2; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_mmx2; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_mmx2; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_mmx2; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_mmx2; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_mmx2; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_mmx2; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_mmx2
+
+            dspfunc(put_h264_qpel, 0, 16);
+            dspfunc(put_h264_qpel, 1, 8);
+            dspfunc(put_h264_qpel, 2, 4);
+            dspfunc(avg_h264_qpel, 0, 16);
+            dspfunc(avg_h264_qpel, 1, 8);
+            dspfunc(avg_h264_qpel, 2, 4);
+
+            dspfunc(put_2tap_qpel, 0, 16);
+            dspfunc(put_2tap_qpel, 1, 8);
+            dspfunc(avg_2tap_qpel, 0, 16);
+            dspfunc(avg_2tap_qpel, 1, 8);
+#undef dspfunc
+
+            c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
+            c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
+            c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
+            c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
+            c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
+            c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
+            c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
+            c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
+            c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
+            c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
+            c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
+
+            c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
+            c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
+            c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
+            c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
+            c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
+            c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
+            c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
+            c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
+
+            c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
+            c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
+            c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
+            c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
+            c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
+            c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
+            c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
+            c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
+
+#ifdef CONFIG_CAVS_DECODER
+            ff_cavsdsp_init_mmx2(c, avctx);
+#endif
+
+#ifdef CONFIG_ENCODERS
+            c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
+#endif //CONFIG_ENCODERS
+        } else if (mm_flags & MM_3DNOW) {
+            c->prefetch = prefetch_3dnow;
+
+            c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
+            c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
+
+            c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
+            c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
+            c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
+
+            c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
+            c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
+
+            c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
+            c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
+            c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
+
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+                c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
+                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
+            }
+
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][12], qpel16_mc03_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][14], qpel16_mc23_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 0], qpel8_mc00_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 1], qpel8_mc10_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 2], qpel8_mc20_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 3], qpel8_mc30_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 4], qpel8_mc01_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 6], qpel8_mc21_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 8], qpel8_mc02_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][10], qpel8_mc22_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow)
+            SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow)
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_3dnow; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_3dnow; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_3dnow; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_3dnow; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_3dnow; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_3dnow; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_3dnow; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_3dnow
+
+            dspfunc(put_h264_qpel, 0, 16);
+            dspfunc(put_h264_qpel, 1, 8);
+            dspfunc(put_h264_qpel, 2, 4);
+            dspfunc(avg_h264_qpel, 0, 16);
+            dspfunc(avg_h264_qpel, 1, 8);
+            dspfunc(avg_h264_qpel, 2, 4);
+
+            dspfunc(put_2tap_qpel, 0, 16);
+            dspfunc(put_2tap_qpel, 1, 8);
+            dspfunc(avg_2tap_qpel, 0, 16);
+            dspfunc(avg_2tap_qpel, 1, 8);
+
+            c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
+            c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
+        }
+
+#ifdef CONFIG_SNOW_ENCODER
+        if(mm_flags & MM_SSE2){
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
+            c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
+            c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
+        }
+        else{
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
+            c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
+            c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
+        }
+#endif
+
+        if(mm_flags & MM_3DNOW){
+            c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+            c->vector_fmul = vector_fmul_3dnow;
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT))
+                c->float_to_int16 = float_to_int16_3dnow;
+        }
+        if(mm_flags & MM_3DNOWEXT)
+            c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
+        if(mm_flags & MM_SSE){
+            c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
+            c->vector_fmul = vector_fmul_sse;
+            c->float_to_int16 = float_to_int16_sse;
+            c->vector_fmul_reverse = vector_fmul_reverse_sse;
+            c->vector_fmul_add_add = vector_fmul_add_add_sse;
+        }
+        if(mm_flags & MM_3DNOW)
+            c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
+    }
+
+#ifdef CONFIG_ENCODERS
+    dsputil_init_pix_mmx(c, avctx);
+#endif //CONFIG_ENCODERS
+#if 0
+    // for speed testing
+    get_pixels = just_return;
+    put_pixels_clamped = just_return;
+    add_pixels_clamped = just_return;
+
+    pix_abs16x16 = just_return;
+    pix_abs16x16_x2 = just_return;
+    pix_abs16x16_y2 = just_return;
+    pix_abs16x16_xy2 = just_return;
+
+    put_pixels_tab[0] = just_return;
+    put_pixels_tab[1] = just_return;
+    put_pixels_tab[2] = just_return;
+    put_pixels_tab[3] = just_return;
+
+    put_no_rnd_pixels_tab[0] = just_return;
+    put_no_rnd_pixels_tab[1] = just_return;
+    put_no_rnd_pixels_tab[2] = just_return;
+    put_no_rnd_pixels_tab[3] = just_return;
+
+    avg_pixels_tab[0] = just_return;
+    avg_pixels_tab[1] = just_return;
+    avg_pixels_tab[2] = just_return;
+    avg_pixels_tab[3] = just_return;
+
+    avg_no_rnd_pixels_tab[0] = just_return;
+    avg_no_rnd_pixels_tab[1] = just_return;
+    avg_no_rnd_pixels_tab[2] = just_return;
+    avg_no_rnd_pixels_tab[3] = just_return;
+
+    //av_fdct = just_return;
+    //ff_idct = just_return;
+#endif
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx_avg.h b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx_avg.h
new file mode 100644
index 000000000..b365cea57
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx_avg.h
@@ -0,0 +1,870 @@
+/*
+ * DSP utils : average functions are compiled twice for 3dnow/mmx2
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+ * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
+ * and improved by Zdenek Kabelac <kabi@users.sf.net>
+ */
+
+/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
+   clobber bug - now it will work with 2.95.2 and also with -fPIC
+ */
+static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "movd   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $4, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "movd   (%2), %%mm2             \n\t"
+        "movd   4(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "movd   8(%2), %%mm2            \n\t"
+        "movd   12(%2), %%mm3           \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $16, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+}
+
+
+static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+//the following should be used, though better not with gcc ...
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
+}
+
+static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%2), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   16(%2), %%mm2           \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+//the following should be used, though better not with gcc ...
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
+}
+
+static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "movd   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $4, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 4(%2), %%mm1             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 8(%2), %%mm0             \n\t"
+        PAVGB" 12(%2), %%mm1            \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $16, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+}
+
+
+static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+//the following should be used, though better not with gcc ...
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
+}
+
+static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq 8(%1), %%mm2              \n\t"
+        "movq 8(%1, %3), %%mm3          \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 9(%1), %%mm2             \n\t"
+        PAVGB" 9(%1, %3), %%mm3         \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "movq %%mm2, 8(%2)              \n\t"
+        "movq %%mm3, 8(%2, %3)          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq 8(%1), %%mm2              \n\t"
+        "movq 8(%1, %3), %%mm3          \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 9(%1), %%mm2             \n\t"
+        PAVGB" 9(%1, %3), %%mm3         \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "movq %%mm2, 8(%2)              \n\t"
+        "movq %%mm3, 8(%2, %3)          \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+//the following should be used, though better not with gcc ...
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
+}
+
+static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        PAVGB" 8(%3), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        PAVGB" 8(%3), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        PAVGB" 8(%3), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+//the following should be used, though better not with gcc ...
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
+}
+
+static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    __asm __volatile(
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "movq   (%2), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%2), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   16(%2), %%mm2           \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+//the following should be used, though better not with gcc ...
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
+}
+
+/* GL: this function does incorrect rounding if overflow */
+static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BONE(mm6);
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        "movq 1(%1), %%mm1              \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm0           \n\t"
+        "psubusb %%mm6, %%mm2           \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq 1(%1), %%mm1              \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm0           \n\t"
+        "psubusb %%mm6, %%mm2           \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "sub %3, %2                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "movq %%mm0, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D" (block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+/* GL: this function does incorrect rounding if overflow */
+static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BONE(mm6);
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "sub %3, %2                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm1           \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "movq %%mm0, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm1           \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D" (block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%2), %%mm0               \n\t"
+        "movq (%2, %3), %%mm1           \n\t"
+        PAVGB" (%1), %%mm0              \n\t"
+        PAVGB" (%1, %3), %%mm1          \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "movq (%2), %%mm0               \n\t"
+        "movq (%2, %3), %%mm1           \n\t"
+        PAVGB" (%1), %%mm0              \n\t"
+        PAVGB" (%1, %3), %%mm1          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm2         \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" (%2, %3), %%mm2          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm2         \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" (%2, %3), %%mm2          \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "sub %3, %2                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "movq (%2, %3), %%mm3           \n\t"
+        "movq (%2, %%"REG_a"), %%mm4    \n\t"
+        PAVGB" %%mm3, %%mm0             \n\t"
+        PAVGB" %%mm4, %%mm1             \n\t"
+        "movq %%mm0, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq (%2, %3), %%mm3           \n\t"
+        "movq (%2, %%"REG_a"), %%mm4    \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        PAVGB" %%mm4, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
+}
+
+// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
+static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BONE(mm6);
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+         ASMALIGN(3)
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "psubusb %%mm6, %%mm2           \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 1(%1, %%"REG_a"), %%mm2  \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" (%2, %3), %%mm1          \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 1(%1, %%"REG_a"), %%mm0  \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        PAVGB" (%2), %%mm2              \n\t"
+        PAVGB" (%2, %3), %%mm1          \n\t"
+        "movq %%mm2, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a,  "memory");
+}
+
+//FIXME the following could be optimized too ...
+static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(put_no_rnd_pixels8_x2)(block  , pixels  , line_size, h);
+    DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(put_pixels8_y2)(block  , pixels  , line_size, h);
+    DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(put_no_rnd_pixels8_y2)(block  , pixels  , line_size, h);
+    DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(avg_pixels8)(block  , pixels  , line_size, h);
+    DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(avg_pixels8_x2)(block  , pixels  , line_size, h);
+    DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(avg_pixels8_y2)(block  , pixels  , line_size, h);
+    DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(avg_pixels8_xy2)(block  , pixels  , line_size, h);
+    DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
+
+#define QPEL_2TAP_L3(OPNAME) \
+static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+    asm volatile(\
+        "1:                    \n\t"\
+        "movq   (%1,%2), %%mm0 \n\t"\
+        "movq  8(%1,%2), %%mm1 \n\t"\
+        PAVGB"  (%1,%3), %%mm0 \n\t"\
+        PAVGB" 8(%1,%3), %%mm1 \n\t"\
+        PAVGB"  (%1),    %%mm0 \n\t"\
+        PAVGB" 8(%1),    %%mm1 \n\t"\
+        STORE_OP( (%1,%4),%%mm0)\
+        STORE_OP(8(%1,%4),%%mm1)\
+        "movq  %%mm0,  (%1,%4) \n\t"\
+        "movq  %%mm1, 8(%1,%4) \n\t"\
+        "add   %5, %1          \n\t"\
+        "decl  %0              \n\t"\
+        "jnz   1b              \n\t"\
+        :"+g"(h), "+r"(src)\
+        :"r"((long)off1), "r"((long)off2),\
+         "r"((long)(dst-src)), "r"((long)stride)\
+        :"memory"\
+    );\
+}\
+static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+    asm volatile(\
+        "1:                    \n\t"\
+        "movq   (%1,%2), %%mm0 \n\t"\
+        PAVGB"  (%1,%3), %%mm0 \n\t"\
+        PAVGB"  (%1),    %%mm0 \n\t"\
+        STORE_OP((%1,%4),%%mm0)\
+        "movq  %%mm0,  (%1,%4) \n\t"\
+        "add   %5, %1          \n\t"\
+        "decl  %0              \n\t"\
+        "jnz   1b              \n\t"\
+        :"+g"(h), "+r"(src)\
+        :"r"((long)off1), "r"((long)off2),\
+         "r"((long)(dst-src)), "r"((long)stride)\
+        :"memory"\
+    );\
+}
+
+#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
+QPEL_2TAP_L3(avg_)
+#undef STORE_OP
+#define STORE_OP(a,b)
+QPEL_2TAP_L3(put_)
+#undef STORE_OP
+#undef QPEL_2TAP_L3
diff --git a/contrib/ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
new file mode 100644
index 000000000..f53b34662
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
@@ -0,0 +1,592 @@
+/*
+ * DSP utils mmx functions are compiled twice for rnd/no_rnd
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+ * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
+ * and improved by Zdenek Kabelac <kabi@users.sf.net>
+ */
+
+// put_pixels
+static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    __asm __volatile(
+        "lea    (%3, %3), %%"REG_a"     \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
+}
+
+static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    MOVQ_BFE(mm6);
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+        " jz 1f                         \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
+        "movq   %%mm4, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm5, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   16(%2), %%mm1           \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm2             \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $32, %2                 \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm5, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+}
+
+static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    __asm __volatile(
+        "lea        (%3, %3), %%"REG_a" \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "movq   8(%1), %%mm0            \n\t"
+        "movq   9(%1), %%mm1            \n\t"
+        "movq   8(%1, %3), %%mm2        \n\t"
+        "movq   9(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, 8(%2)            \n\t"
+        "movq   %%mm5, 8(%2, %3)        \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "movq   8(%1), %%mm0            \n\t"
+        "movq   9(%1), %%mm1            \n\t"
+        "movq   8(%1, %3), %%mm2        \n\t"
+        "movq   9(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, 8(%2)            \n\t"
+        "movq   %%mm5, 8(%2, %3)        \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
+}
+
+static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    MOVQ_BFE(mm6);
+    __asm __volatile(
+        "testl $1, %0                   \n\t"
+        " jz 1f                         \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "movq   8(%1), %%mm2            \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "movq   8(%1), %%mm2            \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   16(%2), %%mm1           \n\t"
+        "movq   8(%1), %%mm2            \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
+}
+
+static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    __asm __volatile(
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"),%%mm2   \n\t"
+        PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"),%%mm0   \n\t"
+        PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
+}
+
+static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_ZERO(mm7);
+    SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
+    __asm __volatile(
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm4            \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "xor    %%"REG_a", %%"REG_a"    \n\t"
+        "add    %3, %1                  \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "paddusw %%mm2, %%mm0           \n\t"
+        "paddusw %%mm3, %%mm1           \n\t"
+        "paddusw %%mm6, %%mm4           \n\t"
+        "paddusw %%mm6, %%mm5           \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "psrlw  $2, %%mm4               \n\t"
+        "psrlw  $2, %%mm5               \n\t"
+        "packuswb  %%mm5, %%mm4         \n\t"
+        "movq   %%mm4, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"           \n\t"
+
+        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
+        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm2, %%mm4           \n\t"
+        "paddusw %%mm3, %%mm5           \n\t"
+        "paddusw %%mm6, %%mm0           \n\t"
+        "paddusw %%mm6, %%mm1           \n\t"
+        "paddusw %%mm4, %%mm0           \n\t"
+        "paddusw %%mm5, %%mm1           \n\t"
+        "psrlw  $2, %%mm0               \n\t"
+        "psrlw  $2, %%mm1               \n\t"
+        "packuswb  %%mm1, %%mm0         \n\t"
+        "movq   %%mm0, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"           \n\t"
+
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels)
+        :"D"(block), "r"((long)line_size)
+        :REG_a, "memory");
+}
+
+// avg_pixels
+static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+             "movd  %0, %%mm0           \n\t"
+             "movd  %1, %%mm1           \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movd  %%mm2, %0           \n\t"
+             :"+m"(*block)
+             :"m"(*pixels)
+             :"memory");
+        pixels += line_size;
+        block += line_size;
+    }
+    while (--h);
+}
+
+// in case more speed is needed - unroling would certainly help
+static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+             "movq  %0, %%mm0           \n\t"
+             "movq  %1, %%mm1           \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movq  %%mm2, %0           \n\t"
+             :"+m"(*block)
+             :"m"(*pixels)
+             :"memory");
+        pixels += line_size;
+        block += line_size;
+    }
+    while (--h);
+}
+
+static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+             "movq  %0, %%mm0           \n\t"
+             "movq  %1, %%mm1           \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movq  %%mm2, %0           \n\t"
+             "movq  8%0, %%mm0          \n\t"
+             "movq  8%1, %%mm1          \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movq  %%mm2, 8%0          \n\t"
+             :"+m"(*block)
+             :"m"(*pixels)
+             :"memory");
+        pixels += line_size;
+        block += line_size;
+    }
+    while (--h);
+}
+
+static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  1%1, %%mm1           \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            :"+m"(*block)
+            :"m"(*pixels)
+            :"memory");
+        pixels += line_size;
+        block += line_size;
+    } while (--h);
+}
+
+static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  %2, %%mm1            \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            :"+m"(*dst)
+            :"m"(*src1), "m"(*src2)
+            :"memory");
+        dst += dstStride;
+        src1 += src1Stride;
+        src2 += 8;
+    } while (--h);
+}
+
+static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  1%1, %%mm1           \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            "movq  8%1, %%mm0           \n\t"
+            "movq  9%1, %%mm1           \n\t"
+            "movq  8%0, %%mm3           \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, 8%0           \n\t"
+            :"+m"(*block)
+            :"m"(*pixels)
+            :"memory");
+        pixels += line_size;
+        block += line_size;
+    } while (--h);
+}
+
+static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+    MOVQ_BFE(mm6);
+    JUMPALIGN();
+    do {
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  %2, %%mm1            \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            "movq  8%1, %%mm0           \n\t"
+            "movq  8%2, %%mm1           \n\t"
+            "movq  8%0, %%mm3           \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, 8%0           \n\t"
+            :"+m"(*dst)
+            :"m"(*src1), "m"(*src2)
+            :"memory");
+        dst += dstStride;
+        src1 += src1Stride;
+        src2 += 16;
+    } while (--h);
+}
+
+static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_BFE(mm6);
+    __asm __volatile(
+        "lea    (%3, %3), %%"REG_a"     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"), %%mm2  \n\t"
+        PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
+        "movq   (%2), %%mm3             \n\t"
+        PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
+        "movq   (%2, %3), %%mm3         \n\t"
+        PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+        "movq   %%mm0, (%2)             \n\t"
+        "movq   %%mm1, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
+        "movq   (%2), %%mm3             \n\t"
+        PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
+        "movq   (%2, %3), %%mm3         \n\t"
+        PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+        "movq   %%mm2, (%2)             \n\t"
+        "movq   %%mm1, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
+}
+
+// this routine is 'slightly' suboptimal but mostly unused
+static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    MOVQ_ZERO(mm7);
+    SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
+    __asm __volatile(
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm4            \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "xor    %%"REG_a", %%"REG_a"    \n\t"
+        "add    %3, %1                  \n\t"
+        ASMALIGN(3)
+        "1:                             \n\t"
+        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "paddusw %%mm2, %%mm0           \n\t"
+        "paddusw %%mm3, %%mm1           \n\t"
+        "paddusw %%mm6, %%mm4           \n\t"
+        "paddusw %%mm6, %%mm5           \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "psrlw  $2, %%mm4               \n\t"
+        "psrlw  $2, %%mm5               \n\t"
+                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
+        "packuswb  %%mm5, %%mm4         \n\t"
+                "pcmpeqd %%mm2, %%mm2   \n\t"
+                "paddb %%mm2, %%mm2     \n\t"
+                PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
+                "movq   %%mm5, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"                \n\t"
+
+        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
+        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm2, %%mm4           \n\t"
+        "paddusw %%mm3, %%mm5           \n\t"
+        "paddusw %%mm6, %%mm0           \n\t"
+        "paddusw %%mm6, %%mm1           \n\t"
+        "paddusw %%mm4, %%mm0           \n\t"
+        "paddusw %%mm5, %%mm1           \n\t"
+        "psrlw  $2, %%mm0               \n\t"
+        "psrlw  $2, %%mm1               \n\t"
+                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
+        "packuswb  %%mm1, %%mm0         \n\t"
+                "pcmpeqd %%mm2, %%mm2   \n\t"
+                "paddb %%mm2, %%mm2     \n\t"
+                PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
+                "movq   %%mm1, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"           \n\t"
+
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels)
+        :"D"(block), "r"((long)line_size)
+        :REG_a, "memory");
+}
+
+//FIXME optimize
+static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(put, pixels8_y2)(block  , pixels  , line_size, h);
+    DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+
+static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(put, pixels8_xy2)(block  , pixels  , line_size, h);
+    DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
+
+static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(avg, pixels8_y2)(block  , pixels  , line_size, h);
+    DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+
+static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+    DEF(avg, pixels8_xy2)(block  , pixels  , line_size, h);
+    DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
+
+
diff --git a/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
new file mode 100644
index 000000000..2ffbfecf6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/fdct_mmx.c
@@ -0,0 +1,566 @@
+/*
+ * MMX optimized forward DCT
+ * The gcc porting is Copyright (c) 2001 Fabrice Bellard.
+ * cleanup/optimizations are Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
+ *
+ * from  fdctam32.c - AP922 MMX(3D-Now) forward-DCT
+ *
+ *  Intel Application Note AP-922 - fast, precise implementation of DCT
+ *        http://developer.intel.com/vtune/cbts/appnotes.htm
+ *
+ * Also of inspiration:
+ * a page about fdct at http://www.geocities.com/ssavekar/dct.htm
+ * Skal's fdct at http://skal.planet-d.net/coding/dct.html
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "common.h"
+#include "../dsputil.h"
+#include "mmx.h"
+
+#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
+
+//////////////////////////////////////////////////////////////////////
+//
+// constants for the forward DCT
+// -----------------------------
+//
+// Be sure to check that your compiler is aligning all constants to QWORD
+// (8-byte) memory boundaries!  Otherwise the unaligned memory access will
+// severely stall MMX execution.
+//
+//////////////////////////////////////////////////////////////////////
+
+#define BITS_FRW_ACC   3 //; 2 or 3 for accuracy
+#define SHIFT_FRW_COL  BITS_FRW_ACC
+#define SHIFT_FRW_ROW  (BITS_FRW_ACC + 17 - 3)
+#define RND_FRW_ROW    (1 << (SHIFT_FRW_ROW-1))
+//#define RND_FRW_COL    (1 << (SHIFT_FRW_COL-1))
+
+//concatenated table, for forward DCT transformation
+static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
+    13036,  13036,  13036,  13036,        // tg * (2<<16) + 0.5
+    27146,  27146,  27146,  27146,        // tg * (2<<16) + 0.5
+   -21746, -21746, -21746, -21746,        // tg * (2<<16) + 0.5
+};
+
+static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
+    23170, 23170, 23170, 23170,           //cos * (2<<15) + 0.5
+};
+
+static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
+
+static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
+
+static struct
+{
+ const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
+} fdct_r_row_sse2 ATTR_ALIGN(16)=
+{{
+ RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
+}};
+//static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
+
+static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = {  // forward_dct coeff table
+  16384,   16384,   22725,   19266,
+  16384,   16384,   12873,    4520,
+  21407,    8867,   19266,   -4520,
+  -8867,  -21407,  -22725,  -12873,
+  16384,  -16384,   12873,  -22725,
+ -16384,   16384,    4520,   19266,
+   8867,  -21407,    4520,  -12873,
+  21407,   -8867,   19266,  -22725,
+
+  22725,   22725,   31521,   26722,
+  22725,   22725,   17855,    6270,
+  29692,   12299,   26722,   -6270,
+ -12299,  -29692,  -31521,  -17855,
+  22725,  -22725,   17855,  -31521,
+ -22725,   22725,    6270,   26722,
+  12299,  -29692,    6270,  -17855,
+  29692,  -12299,   26722,  -31521,
+
+  21407,   21407,   29692,   25172,
+  21407,   21407,   16819,    5906,
+  27969,   11585,   25172,   -5906,
+ -11585,  -27969,  -29692,  -16819,
+  21407,  -21407,   16819,  -29692,
+ -21407,   21407,    5906,   25172,
+  11585,  -27969,    5906,  -16819,
+  27969,  -11585,   25172,  -29692,
+
+  19266,   19266,   26722,   22654,
+  19266,   19266,   15137,    5315,
+  25172,   10426,   22654,   -5315,
+ -10426,  -25172,  -26722,  -15137,
+  19266,  -19266,   15137,  -26722,
+ -19266,   19266,    5315,   22654,
+  10426,  -25172,    5315,  -15137,
+  25172,  -10426,   22654,  -26722,
+
+  16384,   16384,   22725,   19266,
+  16384,   16384,   12873,    4520,
+  21407,    8867,   19266,   -4520,
+  -8867,  -21407,  -22725,  -12873,
+  16384,  -16384,   12873,  -22725,
+ -16384,   16384,    4520,   19266,
+   8867,  -21407,    4520,  -12873,
+  21407,   -8867,   19266,  -22725,
+
+  19266,   19266,   26722,   22654,
+  19266,   19266,   15137,    5315,
+  25172,   10426,   22654,   -5315,
+ -10426,  -25172,  -26722,  -15137,
+  19266,  -19266,   15137,  -26722,
+ -19266,   19266,    5315,   22654,
+  10426,  -25172,    5315,  -15137,
+  25172,  -10426,   22654,  -26722,
+
+  21407,   21407,   29692,   25172,
+  21407,   21407,   16819,    5906,
+  27969,   11585,   25172,   -5906,
+ -11585,  -27969,  -29692,  -16819,
+  21407,  -21407,   16819,  -29692,
+ -21407,   21407,    5906,   25172,
+  11585,  -27969,    5906,  -16819,
+  27969,  -11585,   25172,  -29692,
+
+  22725,   22725,   31521,   26722,
+  22725,   22725,   17855,    6270,
+  29692,   12299,   26722,   -6270,
+ -12299,  -29692,  -31521,  -17855,
+  22725,  -22725,   17855,  -31521,
+ -22725,   22725,    6270,   26722,
+  12299,  -29692,    6270,  -17855,
+  29692,  -12299,   26722,  -31521,
+};
+
+static struct
+{
+ const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
+} tab_frw_01234567_sse2 ATTR_ALIGN(16) =
+{{
+//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = {  // forward_dct coeff table
+#define TABLE_SSE2 C4,  C4,  C1,  C3, -C6, -C2, -C1, -C5, \
+                   C4,  C4,  C5,  C7,  C2,  C6,  C3, -C7, \
+                  -C4,  C4,  C7,  C3,  C6, -C2,  C7, -C5, \
+                   C4, -C4,  C5, -C1,  C2, -C6,  C3, -C1,
+// c1..c7 * cos(pi/4) * 2^15
+#define C1 22725
+#define C2 21407
+#define C3 19266
+#define C4 16384
+#define C5 12873
+#define C6 8867
+#define C7 4520
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 31521
+#define C2 29692
+#define C3 26722
+#define C4 22725
+#define C5 17855
+#define C6 12299
+#define C7 6270
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 29692
+#define C2 27969
+#define C3 25172
+#define C4 21407
+#define C5 16819
+#define C6 11585
+#define C7 5906
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 26722
+#define C2 25172
+#define C3 22654
+#define C4 19266
+#define C5 15137
+#define C6 10426
+#define C7 5315
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 22725
+#define C2 21407
+#define C3 19266
+#define C4 16384
+#define C5 12873
+#define C6 8867
+#define C7 4520
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 26722
+#define C2 25172
+#define C3 22654
+#define C4 19266
+#define C5 15137
+#define C6 10426
+#define C7 5315
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 29692
+#define C2 27969
+#define C3 25172
+#define C4 21407
+#define C5 16819
+#define C6 11585
+#define C7 5906
+TABLE_SSE2
+
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#define C1 31521
+#define C2 29692
+#define C3 26722
+#define C4 22725
+#define C5 17855
+#define C6 12299
+#define C7 6270
+TABLE_SSE2
+}};
+
+
+static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
+{
+    movq_m2r(*(in + offset + 1 * 8), mm0);
+    movq_m2r(*(in + offset + 6 * 8), mm1);
+    movq_r2r(mm0, mm2);
+    movq_m2r(*(in + offset + 2 * 8), mm3);
+    paddsw_r2r(mm1, mm0);
+    movq_m2r(*(in + offset + 5 * 8), mm4);
+    psllw_i2r(SHIFT_FRW_COL, mm0);
+    movq_m2r(*(in + offset + 0 * 8), mm5);
+    paddsw_r2r(mm3, mm4);
+    paddsw_m2r(*(in + offset + 7 * 8), mm5);
+    psllw_i2r(SHIFT_FRW_COL, mm4);
+    movq_r2r(mm0, mm6);
+    psubsw_r2r(mm1, mm2);
+    movq_m2r(*(fdct_tg_all_16 + 4), mm1);
+    psubsw_r2r(mm4, mm0);
+    movq_m2r(*(in + offset + 3 * 8), mm7);
+    pmulhw_r2r(mm0, mm1);
+    paddsw_m2r(*(in + offset + 4 * 8), mm7);
+    psllw_i2r(SHIFT_FRW_COL, mm5);
+    paddsw_r2r(mm4, mm6);
+    psllw_i2r(SHIFT_FRW_COL, mm7);
+    movq_r2r(mm5, mm4);
+    psubsw_r2r(mm7, mm5);
+    paddsw_r2r(mm5, mm1);
+    paddsw_r2r(mm7, mm4);
+    por_m2r(fdct_one_corr, mm1);
+    psllw_i2r(SHIFT_FRW_COL + 1, mm2);
+    pmulhw_m2r(*(fdct_tg_all_16 + 4), mm5);
+    movq_r2r(mm4, mm7);
+    psubsw_m2r(*(in + offset + 5 * 8), mm3);
+    psubsw_r2r(mm6, mm4);
+    movq_r2m(mm1, *(out + offset + 2 * 8));
+    paddsw_r2r(mm6, mm7);
+    movq_m2r(*(in + offset + 3 * 8), mm1);
+    psllw_i2r(SHIFT_FRW_COL + 1, mm3);
+    psubsw_m2r(*(in + offset + 4 * 8), mm1);
+    movq_r2r(mm2, mm6);
+    movq_r2m(mm4, *(out + offset + 4 * 8));
+    paddsw_r2r(mm3, mm2);
+    pmulhw_m2r(*ocos_4_16, mm2);
+    psubsw_r2r(mm3, mm6);
+    pmulhw_m2r(*ocos_4_16, mm6);
+    psubsw_r2r(mm0, mm5);
+    por_m2r(fdct_one_corr, mm5);
+    psllw_i2r(SHIFT_FRW_COL, mm1);
+    por_m2r(fdct_one_corr, mm2);
+    movq_r2r(mm1, mm4);
+    movq_m2r(*(in + offset + 0 * 8), mm3);
+    paddsw_r2r(mm6, mm1);
+    psubsw_m2r(*(in + offset + 7 * 8), mm3);
+    psubsw_r2r(mm6, mm4);
+    movq_m2r(*(fdct_tg_all_16 + 0), mm0);
+    psllw_i2r(SHIFT_FRW_COL, mm3);
+    movq_m2r(*(fdct_tg_all_16 + 8), mm6);
+    pmulhw_r2r(mm1, mm0);
+    movq_r2m(mm7, *(out + offset + 0 * 8));
+    pmulhw_r2r(mm4, mm6);
+    movq_r2m(mm5, *(out + offset + 6 * 8));
+    movq_r2r(mm3, mm7);
+    movq_m2r(*(fdct_tg_all_16 + 8), mm5);
+    psubsw_r2r(mm2, mm7);
+    paddsw_r2r(mm2, mm3);
+    pmulhw_r2r(mm7, mm5);
+    paddsw_r2r(mm3, mm0);
+    paddsw_r2r(mm4, mm6);
+    pmulhw_m2r(*(fdct_tg_all_16 + 0), mm3);
+    por_m2r(fdct_one_corr, mm0);
+    paddsw_r2r(mm7, mm5);
+    psubsw_r2r(mm6, mm7);
+    movq_r2m(mm0, *(out + offset + 1 * 8));
+    paddsw_r2r(mm4, mm5);
+    movq_r2m(mm7, *(out + offset + 3 * 8));
+    psubsw_r2r(mm1, mm3);
+    movq_r2m(mm5, *(out + offset + 5 * 8));
+    movq_r2m(mm3, *(out + offset + 7 * 8));
+}
+
+
+static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
+{
+    asm volatile(
+#define FDCT_ROW_SSE2_H1(i,t)                    \
+        "movq      " #i "(%0), %%xmm2      \n\t" \
+        "movq      " #i "+8(%0), %%xmm0    \n\t" \
+        "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
+        "movdqa    " #t "+48(%1), %%xmm7   \n\t" \
+        "movdqa    " #t "(%1), %%xmm4      \n\t" \
+        "movdqa    " #t "+16(%1), %%xmm5   \n\t"
+
+#define FDCT_ROW_SSE2_H2(i,t)                    \
+        "movq      " #i "(%0), %%xmm2      \n\t" \
+        "movq      " #i "+8(%0), %%xmm0    \n\t" \
+        "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
+        "movdqa    " #t "+48(%1), %%xmm7   \n\t"
+
+#define FDCT_ROW_SSE2(i)                      \
+        "movq      %%xmm2, %%xmm1       \n\t" \
+        "pshuflw   $27, %%xmm0, %%xmm0  \n\t" \
+        "paddsw    %%xmm0, %%xmm1       \n\t" \
+        "psubsw    %%xmm0, %%xmm2       \n\t" \
+        "punpckldq %%xmm2, %%xmm1       \n\t" \
+        "pshufd    $78, %%xmm1, %%xmm2  \n\t" \
+        "pmaddwd   %%xmm2, %%xmm3       \n\t" \
+        "pmaddwd   %%xmm1, %%xmm7       \n\t" \
+        "pmaddwd   %%xmm5, %%xmm2       \n\t" \
+        "pmaddwd   %%xmm4, %%xmm1       \n\t" \
+        "paddd     %%xmm7, %%xmm3       \n\t" \
+        "paddd     %%xmm2, %%xmm1       \n\t" \
+        "paddd     %%xmm6, %%xmm3       \n\t" \
+        "paddd     %%xmm6, %%xmm1       \n\t" \
+        "psrad     %3, %%xmm3           \n\t" \
+        "psrad     %3, %%xmm1           \n\t" \
+        "packssdw  %%xmm3, %%xmm1       \n\t" \
+        "movdqa    %%xmm1, " #i "(%4)   \n\t"
+
+        "movdqa    (%2), %%xmm6         \n\t"
+        FDCT_ROW_SSE2_H1(0,0)
+        FDCT_ROW_SSE2(0)
+        FDCT_ROW_SSE2_H2(64,0)
+        FDCT_ROW_SSE2(64)
+
+        FDCT_ROW_SSE2_H1(16,64)
+        FDCT_ROW_SSE2(16)
+        FDCT_ROW_SSE2_H2(112,64)
+        FDCT_ROW_SSE2(112)
+
+        FDCT_ROW_SSE2_H1(32,128)
+        FDCT_ROW_SSE2(32)
+        FDCT_ROW_SSE2_H2(96,128)
+        FDCT_ROW_SSE2(96)
+
+        FDCT_ROW_SSE2_H1(48,192)
+        FDCT_ROW_SSE2(48)
+        FDCT_ROW_SSE2_H2(80,192)
+        FDCT_ROW_SSE2(80)
+        :
+        : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
+    );
+}
+
+static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
+{
+    pshufw_m2r(*(in + 4), mm5, 0x1B);
+    movq_m2r(*(in + 0), mm0);
+    movq_r2r(mm0, mm1);
+    paddsw_r2r(mm5, mm0);
+    psubsw_r2r(mm5, mm1);
+    movq_r2r(mm0, mm2);
+    punpckldq_r2r(mm1, mm0);
+    punpckhdq_r2r(mm1, mm2);
+    movq_m2r(*(table + 0), mm1);
+    movq_m2r(*(table + 4), mm3);
+    movq_m2r(*(table + 8), mm4);
+    movq_m2r(*(table + 12), mm5);
+    movq_m2r(*(table + 16), mm6);
+    movq_m2r(*(table + 20), mm7);
+    pmaddwd_r2r(mm0, mm1);
+    pmaddwd_r2r(mm2, mm3);
+    pmaddwd_r2r(mm0, mm4);
+    pmaddwd_r2r(mm2, mm5);
+    pmaddwd_r2r(mm0, mm6);
+    pmaddwd_r2r(mm2, mm7);
+    pmaddwd_m2r(*(table + 24), mm0);
+    pmaddwd_m2r(*(table + 28), mm2);
+    paddd_r2r(mm1, mm3);
+    paddd_r2r(mm4, mm5);
+    paddd_r2r(mm6, mm7);
+    paddd_r2r(mm0, mm2);
+    movq_m2r(*fdct_r_row, mm0);
+    paddd_r2r(mm0, mm3);
+    paddd_r2r(mm0, mm5);
+    paddd_r2r(mm0, mm7);
+    paddd_r2r(mm0, mm2);
+    psrad_i2r(SHIFT_FRW_ROW, mm3);
+    psrad_i2r(SHIFT_FRW_ROW, mm5);
+    psrad_i2r(SHIFT_FRW_ROW, mm7);
+    psrad_i2r(SHIFT_FRW_ROW, mm2);
+    packssdw_r2r(mm5, mm3);
+    packssdw_r2r(mm2, mm7);
+    movq_r2m(mm3, *(out + 0));
+    movq_r2m(mm7, *(out + 4));
+}
+
+static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
+{
+//FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
+    movd_m2r(*(in + 6), mm1);
+    punpcklwd_m2r(*(in + 4), mm1);
+    movq_r2r(mm1, mm2);
+    psrlq_i2r(0x20, mm1);
+    movq_m2r(*(in + 0), mm0);
+    punpcklwd_r2r(mm2, mm1);
+    movq_r2r(mm0, mm5);
+    paddsw_r2r(mm1, mm0);
+    psubsw_r2r(mm1, mm5);
+    movq_r2r(mm0, mm2);
+    punpckldq_r2r(mm5, mm0);
+    punpckhdq_r2r(mm5, mm2);
+    movq_m2r(*(table + 0), mm1);
+    movq_m2r(*(table + 4), mm3);
+    movq_m2r(*(table + 8), mm4);
+    movq_m2r(*(table + 12), mm5);
+    movq_m2r(*(table + 16), mm6);
+    movq_m2r(*(table + 20), mm7);
+    pmaddwd_r2r(mm0, mm1);
+    pmaddwd_r2r(mm2, mm3);
+    pmaddwd_r2r(mm0, mm4);
+    pmaddwd_r2r(mm2, mm5);
+    pmaddwd_r2r(mm0, mm6);
+    pmaddwd_r2r(mm2, mm7);
+    pmaddwd_m2r(*(table + 24), mm0);
+    pmaddwd_m2r(*(table + 28), mm2);
+    paddd_r2r(mm1, mm3);
+    paddd_r2r(mm4, mm5);
+    paddd_r2r(mm6, mm7);
+    paddd_r2r(mm0, mm2);
+    movq_m2r(*fdct_r_row, mm0);
+    paddd_r2r(mm0, mm3);
+    paddd_r2r(mm0, mm5);
+    paddd_r2r(mm0, mm7);
+    paddd_r2r(mm0, mm2);
+    psrad_i2r(SHIFT_FRW_ROW, mm3);
+    psrad_i2r(SHIFT_FRW_ROW, mm5);
+    psrad_i2r(SHIFT_FRW_ROW, mm7);
+    psrad_i2r(SHIFT_FRW_ROW, mm2);
+    packssdw_r2r(mm5, mm3);
+    packssdw_r2r(mm2, mm7);
+    movq_r2m(mm3, *(out + 0));
+    movq_r2m(mm7, *(out + 4));
+}
+
+void ff_fdct_mmx(int16_t *block)
+{
+    int64_t align_tmp[16] ATTR_ALIGN(8);
+    int16_t * block1= (int16_t*)align_tmp;
+    const int16_t *table= tab_frw_01234567;
+    int i;
+
+    fdct_col(block, block1, 0);
+    fdct_col(block, block1, 4);
+
+    for(i=8;i>0;i--) {
+        fdct_row_mmx(block1, block, table);
+        block1 += 8;
+        table += 32;
+        block += 8;
+    }
+}
+
+void ff_fdct_mmx2(int16_t *block)
+{
+    int64_t align_tmp[16] ATTR_ALIGN(8);
+    int16_t *block1= (int16_t*)align_tmp;
+    const int16_t *table= tab_frw_01234567;
+    int i;
+
+    fdct_col(block, block1, 0);
+    fdct_col(block, block1, 4);
+
+    for(i=8;i>0;i--) {
+        fdct_row_mmx2(block1, block, table);
+        block1 += 8;
+        table += 32;
+        block += 8;
+    }
+}
+
+void ff_fdct_sse2(int16_t *block)
+{
+    int64_t align_tmp[16] ATTR_ALIGN(16);
+    int16_t * const block1= (int16_t*)align_tmp;
+
+    fdct_col(block, block1, 0);
+    fdct_col(block, block1, 4);
+
+    fdct_row_sse2(block1, block);
+}
+
diff --git a/contrib/ffmpeg/libavcodec/i386/fft_3dn.c b/contrib/ffmpeg/libavcodec/i386/fft_3dn.c
new file mode 100644
index 000000000..8087f1932
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/fft_3dn.c
@@ -0,0 +1,125 @@
+/*
+ * FFT/MDCT transform with 3DNow! optimizations
+ * Copyright (c) 2006 Zuxy MENG Jie, Loren Merritt
+ * Based on fft_sse.c copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "../dsputil.h"
+
+static const int p1m1[2] __attribute__((aligned(8))) =
+    { 0, 1 << 31 };
+
+static const int m1p1[2] __attribute__((aligned(8))) =
+    { 1 << 31, 0 };
+
+void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z)
+{
+    int ln = s->nbits;
+    long i, j;
+    long nblocks, nloops;
+    FFTComplex *p, *cptr;
+
+    asm volatile(
+        /* FEMMS is not a must here but recommended by AMD */
+        "femms \n\t"
+        "movq %0, %%mm7 \n\t"
+        ::"m"(*(s->inverse ? m1p1 : p1m1))
+    );
+
+    i = 8 << ln;
+    asm volatile(
+        "1: \n\t"
+        "sub $32, %0 \n\t"
+        "movq    (%0,%1), %%mm0 \n\t"
+        "movq  16(%0,%1), %%mm1 \n\t"
+        "movq   8(%0,%1), %%mm2 \n\t"
+        "movq  24(%0,%1), %%mm3 \n\t"
+        "movq      %%mm0, %%mm4 \n\t"
+        "movq      %%mm1, %%mm5 \n\t"
+        "pfadd     %%mm2, %%mm0 \n\t"
+        "pfadd     %%mm3, %%mm1 \n\t"
+        "pfsub     %%mm2, %%mm4 \n\t"
+        "pfsub     %%mm3, %%mm5 \n\t"
+        "movq      %%mm0, %%mm2 \n\t"
+        "punpckldq %%mm5, %%mm6 \n\t"
+        "punpckhdq %%mm6, %%mm5 \n\t"
+        "movq      %%mm4, %%mm3 \n\t"
+        "pxor      %%mm7, %%mm5 \n\t"
+        "pfadd     %%mm1, %%mm0 \n\t"
+        "pfadd     %%mm5, %%mm4 \n\t"
+        "pfsub     %%mm1, %%mm2 \n\t"
+        "pfsub     %%mm5, %%mm3 \n\t"
+        "movq      %%mm0,   (%0,%1) \n\t"
+        "movq      %%mm4,  8(%0,%1) \n\t"
+        "movq      %%mm2, 16(%0,%1) \n\t"
+        "movq      %%mm3, 24(%0,%1) \n\t"
+        "jg 1b \n\t"
+        :"+r"(i)
+        :"r"(z)
+    );
+    /* pass 2 .. ln-1 */
+
+    nblocks = 1 << (ln-3);
+    nloops = 1 << 2;
+    cptr = s->exptab1;
+    do {
+        p = z;
+        j = nblocks;
+        do {
+            i = nloops*8;
+            asm volatile(
+                "1: \n\t"
+                "sub $16, %0 \n\t"
+                "movq    (%1,%0), %%mm0 \n\t"
+                "movq   8(%1,%0), %%mm1 \n\t"
+                "movq    (%2,%0), %%mm2 \n\t"
+                "movq   8(%2,%0), %%mm3 \n\t"
+                "movq      %%mm2, %%mm4 \n\t"
+                "movq      %%mm3, %%mm5 \n\t"
+                "punpckldq %%mm2, %%mm2 \n\t"
+                "punpckldq %%mm3, %%mm3 \n\t"
+                "punpckhdq %%mm4, %%mm4 \n\t"
+                "punpckhdq %%mm5, %%mm5 \n\t"
+                "pfmul   (%3,%0,2), %%mm2 \n\t" //  cre*re cim*re
+                "pfmul  8(%3,%0,2), %%mm3 \n\t"
+                "pfmul 16(%3,%0,2), %%mm4 \n\t" // -cim*im cre*im
+                "pfmul 24(%3,%0,2), %%mm5 \n\t"
+                "pfadd     %%mm2, %%mm4 \n\t" // cre*re-cim*im cim*re+cre*im
+                "pfadd     %%mm3, %%mm5 \n\t"
+                "movq      %%mm0, %%mm2 \n\t"
+                "movq      %%mm1, %%mm3 \n\t"
+                "pfadd     %%mm4, %%mm0 \n\t"
+                "pfadd     %%mm5, %%mm1 \n\t"
+                "pfsub     %%mm4, %%mm2 \n\t"
+                "pfsub     %%mm5, %%mm3 \n\t"
+                "movq      %%mm0,  (%1,%0) \n\t"
+                "movq      %%mm1, 8(%1,%0) \n\t"
+                "movq      %%mm2,  (%2,%0) \n\t"
+                "movq      %%mm3, 8(%2,%0) \n\t"
+                "jg 1b \n\t"
+                :"+r"(i)
+                :"r"(p), "r"(p + nloops), "r"(cptr)
+            );
+            p += nloops*2;
+        } while (--j);
+        cptr += nloops*2;
+        nblocks >>= 1;
+        nloops <<= 1;
+    } while (nblocks != 0);
+    asm volatile("femms");
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/fft_3dn2.c b/contrib/ffmpeg/libavcodec/i386/fft_3dn2.c
new file mode 100644
index 000000000..a4fe5f0b6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/fft_3dn2.c
@@ -0,0 +1,210 @@
+/*
+ * FFT/MDCT transform with Extended 3DNow! optimizations
+ * Copyright (c) 2006 Zuxy MENG Jie, Loren Merritt
+ * Based on fft_sse.c copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "../dsputil.h"
+
+static const int p1m1[2] __attribute__((aligned(8))) =
+    { 0, 1 << 31 };
+
+static const int m1p1[2] __attribute__((aligned(8))) =
+    { 1 << 31, 0 };
+
+void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z)
+{
+    int ln = s->nbits;
+    long i, j;
+    long nblocks, nloops;
+    FFTComplex *p, *cptr;
+
+    asm volatile(
+        /* FEMMS is not a must here but recommended by AMD */
+        "femms \n\t"
+        "movq %0, %%mm7 \n\t"
+        ::"m"(*(s->inverse ? m1p1 : p1m1))
+    );
+
+    i = 8 << ln;
+    asm volatile(
+        "1: \n\t"
+        "sub $32, %0 \n\t"
+        "movq    (%0,%1), %%mm0 \n\t"
+        "movq  16(%0,%1), %%mm1 \n\t"
+        "movq   8(%0,%1), %%mm2 \n\t"
+        "movq  24(%0,%1), %%mm3 \n\t"
+        "movq      %%mm0, %%mm4 \n\t"
+        "movq      %%mm1, %%mm5 \n\t"
+        "pfadd     %%mm2, %%mm0 \n\t"
+        "pfadd     %%mm3, %%mm1 \n\t"
+        "pfsub     %%mm2, %%mm4 \n\t"
+        "pfsub     %%mm3, %%mm5 \n\t"
+        "movq      %%mm0, %%mm2 \n\t"
+        "pswapd    %%mm5, %%mm5 \n\t"
+        "movq      %%mm4, %%mm3 \n\t"
+        "pxor      %%mm7, %%mm5 \n\t"
+        "pfadd     %%mm1, %%mm0 \n\t"
+        "pfadd     %%mm5, %%mm4 \n\t"
+        "pfsub     %%mm1, %%mm2 \n\t"
+        "pfsub     %%mm5, %%mm3 \n\t"
+        "movq      %%mm0,   (%0,%1) \n\t"
+        "movq      %%mm4,  8(%0,%1) \n\t"
+        "movq      %%mm2, 16(%0,%1) \n\t"
+        "movq      %%mm3, 24(%0,%1) \n\t"
+        "jg 1b \n\t"
+        :"+r"(i)
+        :"r"(z)
+    );
+    /* pass 2 .. ln-1 */
+
+    nblocks = 1 << (ln-3);
+    nloops = 1 << 2;
+    cptr = s->exptab1;
+    do {
+        p = z;
+        j = nblocks;
+        do {
+            i = nloops*8;
+            asm volatile(
+                "1: \n\t"
+                "sub $16, %0 \n\t"
+                "movq    (%1,%0), %%mm0 \n\t"
+                "movq   8(%1,%0), %%mm1 \n\t"
+                "movq    (%2,%0), %%mm2 \n\t"
+                "movq   8(%2,%0), %%mm3 \n\t"
+                "movq  (%3,%0,2), %%mm4 \n\t"
+                "movq 8(%3,%0,2), %%mm5 \n\t"
+                "pswapd    %%mm4, %%mm6 \n\t" // no need for cptr[2] & cptr[3]
+                "pswapd    %%mm5, %%mm7 \n\t"
+                "pfmul     %%mm2, %%mm4 \n\t" // cre*re cim*im
+                "pfmul     %%mm3, %%mm5 \n\t"
+                "pfmul     %%mm2, %%mm6 \n\t" // cim*re cre*im
+                "pfmul     %%mm3, %%mm7 \n\t"
+                "pfpnacc   %%mm6, %%mm4 \n\t" // cre*re-cim*im cim*re+cre*im
+                "pfpnacc   %%mm7, %%mm5 \n\t"
+                "movq      %%mm0, %%mm2 \n\t"
+                "movq      %%mm1, %%mm3 \n\t"
+                "pfadd     %%mm4, %%mm0 \n\t"
+                "pfadd     %%mm5, %%mm1 \n\t"
+                "pfsub     %%mm4, %%mm2 \n\t"
+                "pfsub     %%mm5, %%mm3 \n\t"
+                "movq      %%mm0,  (%1,%0) \n\t"
+                "movq      %%mm1, 8(%1,%0) \n\t"
+                "movq      %%mm2,  (%2,%0) \n\t"
+                "movq      %%mm3, 8(%2,%0) \n\t"
+                "jg 1b \n\t"
+                :"+r"(i)
+                :"r"(p), "r"(p + nloops), "r"(cptr)
+            );
+            p += nloops*2;
+        } while (--j);
+        cptr += nloops*2;
+        nblocks >>= 1;
+        nloops <<= 1;
+    } while (nblocks != 0);
+    asm volatile("femms");
+}
+
+void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
+                        const FFTSample *input, FFTSample *tmp)
+{
+    long k, n8, n4, n2, n;
+    const uint16_t *revtab = s->fft.revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)tmp;
+
+    n = 1 << s->nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k = 0; k < n4; k++) {
+        // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it
+        asm volatile(
+            "movd       %0, %%mm0 \n\t"
+            "movd       %2, %%mm1 \n\t"
+            "punpckldq  %1, %%mm0 \n\t"
+            "punpckldq  %3, %%mm1 \n\t"
+            "movq    %%mm0, %%mm2 \n\t"
+            "pfmul   %%mm1, %%mm0 \n\t"
+            "pswapd  %%mm1, %%mm1 \n\t"
+            "pfmul   %%mm1, %%mm2 \n\t"
+            "pfpnacc %%mm2, %%mm0 \n\t"
+            ::"m"(in2[-2*k]), "m"(in1[2*k]),
+              "m"(tcos[k]), "m"(tsin[k])
+        );
+        asm volatile(
+            "movq    %%mm0, %0    \n\t"
+            :"=m"(z[revtab[k]])
+        );
+    }
+
+    ff_fft_calc(&s->fft, z);
+
+    /* post rotation + reordering */
+    for(k = 0; k < n4; k++) {
+        asm volatile(
+            "movq       %0, %%mm0 \n\t"
+            "movd       %1, %%mm1 \n\t"
+            "punpckldq  %2, %%mm1 \n\t"
+            "movq    %%mm0, %%mm2 \n\t"
+            "pfmul   %%mm1, %%mm0 \n\t"
+            "pswapd  %%mm1, %%mm1 \n\t"
+            "pfmul   %%mm1, %%mm2 \n\t"
+            "pfpnacc %%mm2, %%mm0 \n\t"
+            "movq    %%mm0, %0    \n\t"
+            :"+m"(z[k])
+            :"m"(tcos[k]), "m"(tsin[k])
+        );
+    }
+
+    k = n-8;
+    asm volatile("movd %0, %%mm7" ::"r"(1<<31));
+    asm volatile(
+        "1: \n\t"
+        "movq    (%4,%0), %%mm0 \n\t" // z[n8+k]
+        "neg %0 \n\t"
+        "pswapd -8(%4,%0), %%mm1 \n\t" // z[n8-1-k]
+        "movq      %%mm0, %%mm2 \n\t"
+        "pxor      %%mm7, %%mm2 \n\t"
+        "punpckldq %%mm1, %%mm2 \n\t"
+        "pswapd    %%mm2, %%mm3 \n\t"
+        "punpckhdq %%mm1, %%mm0 \n\t"
+        "pswapd    %%mm0, %%mm4 \n\t"
+        "pxor      %%mm7, %%mm0 \n\t"
+        "pxor      %%mm7, %%mm4 \n\t"
+        "movq      %%mm3, -8(%3,%0) \n\t" // output[n-2-2*k] = { z[n8-1-k].im, -z[n8+k].re }
+        "movq      %%mm4, -8(%2,%0) \n\t" // output[n2-2-2*k]= { -z[n8-1-k].re, z[n8+k].im }
+        "neg %0 \n\t"
+        "movq      %%mm0, (%1,%0) \n\t"   // output[2*k]     = { -z[n8+k].im, z[n8-1-k].re }
+        "movq      %%mm2, (%2,%0) \n\t"   // output[n2+2*k]  = { -z[n8+k].re, z[n8-1-k].im }
+        "sub $8, %0 \n\t"
+        "jge 1b \n\t"
+        :"+r"(k)
+        :"r"(output), "r"(output+n2), "r"(output+n), "r"(z+n8)
+        :"memory"
+    );
+    asm volatile("femms");
+}
+
diff --git a/contrib/ffmpeg/libavcodec/i386/fft_sse.c b/contrib/ffmpeg/libavcodec/i386/fft_sse.c
new file mode 100644
index 000000000..0dc0c61c1
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/fft_sse.c
@@ -0,0 +1,247 @@
+/*
+ * FFT/MDCT transform with SSE optimizations
+ * Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "../dsputil.h"
+
+static const int p1p1p1m1[4] __attribute__((aligned(16))) =
+    { 0, 0, 0, 1 << 31 };
+
+static const int p1p1m1p1[4] __attribute__((aligned(16))) =
+    { 0, 0, 1 << 31, 0 };
+
+static const int p1p1m1m1[4] __attribute__((aligned(16))) =
+    { 0, 0, 1 << 31, 1 << 31 };
+
+static const int p1m1p1m1[4] __attribute__((aligned(16))) =
+    { 0, 1 << 31, 0, 1 << 31 };
+
+static const int m1m1m1m1[4] __attribute__((aligned(16))) =
+    { 1 << 31, 1 << 31, 1 << 31, 1 << 31 };
+
+#if 0
+static void print_v4sf(const char *str, __m128 a)
+{
+    float *p = (float *)&a;
+    printf("%s: %f %f %f %f\n",
+           str, p[0], p[1], p[2], p[3]);
+}
+#endif
+
+/* XXX: handle reverse case */
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
+{
+    int ln = s->nbits;
+    long i, j;
+    long nblocks, nloops;
+    FFTComplex *p, *cptr;
+
+    asm volatile(
+        "movaps %0, %%xmm4 \n\t"
+        "movaps %1, %%xmm5 \n\t"
+        ::"m"(*p1p1m1m1),
+          "m"(*(s->inverse ? p1p1m1p1 : p1p1p1m1))
+    );
+
+    i = 8 << ln;
+    asm volatile(
+        "1: \n\t"
+        "sub $32, %0 \n\t"
+        /* do the pass 0 butterfly */
+        "movaps   (%0,%1), %%xmm0 \n\t"
+        "movaps    %%xmm0, %%xmm1 \n\t"
+        "shufps     $0x4E, %%xmm0, %%xmm0 \n\t"
+        "xorps     %%xmm4, %%xmm1 \n\t"
+        "addps     %%xmm1, %%xmm0 \n\t"
+        "movaps 16(%0,%1), %%xmm2 \n\t"
+        "movaps    %%xmm2, %%xmm3 \n\t"
+        "shufps     $0x4E, %%xmm2, %%xmm2 \n\t"
+        "xorps     %%xmm4, %%xmm3 \n\t"
+        "addps     %%xmm3, %%xmm2 \n\t"
+        /* multiply third by -i */
+        /* by toggling the sign bit */
+        "shufps     $0xB4, %%xmm2, %%xmm2 \n\t"
+        "xorps     %%xmm5, %%xmm2 \n\t"
+        /* do the pass 1 butterfly */
+        "movaps    %%xmm0, %%xmm1 \n\t"
+        "addps     %%xmm2, %%xmm0 \n\t"
+        "subps     %%xmm2, %%xmm1 \n\t"
+        "movaps    %%xmm0,   (%0,%1) \n\t"
+        "movaps    %%xmm1, 16(%0,%1) \n\t"
+        "jg 1b \n\t"
+        :"+r"(i)
+        :"r"(z)
+    );
+    /* pass 2 .. ln-1 */
+
+    nblocks = 1 << (ln-3);
+    nloops = 1 << 2;
+    cptr = s->exptab1;
+    do {
+        p = z;
+        j = nblocks;
+        do {
+            i = nloops*8;
+            asm volatile(
+                "1: \n\t"
+                "sub $16, %0 \n\t"
+                "movaps    (%2,%0), %%xmm1 \n\t"
+                "movaps    (%1,%0), %%xmm0 \n\t"
+                "movaps     %%xmm1, %%xmm2 \n\t"
+                "shufps      $0xA0, %%xmm1, %%xmm1 \n\t"
+                "shufps      $0xF5, %%xmm2, %%xmm2 \n\t"
+                "mulps   (%3,%0,2), %%xmm1 \n\t" //  cre*re cim*re
+                "mulps 16(%3,%0,2), %%xmm2 \n\t" // -cim*im cre*im
+                "addps      %%xmm2, %%xmm1 \n\t"
+                "movaps     %%xmm0, %%xmm3 \n\t"
+                "addps      %%xmm1, %%xmm0 \n\t"
+                "subps      %%xmm1, %%xmm3 \n\t"
+                "movaps     %%xmm0, (%1,%0) \n\t"
+                "movaps     %%xmm3, (%2,%0) \n\t"
+                "jg 1b \n\t"
+                :"+r"(i)
+                :"r"(p), "r"(p + nloops), "r"(cptr)
+            );
+            p += nloops*2;
+        } while (--j);
+        cptr += nloops*2;
+        nblocks >>= 1;
+        nloops <<= 1;
+    } while (nblocks != 0);
+}
+
+void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output,
+                       const FFTSample *input, FFTSample *tmp)
+{
+    long k, n8, n4, n2, n;
+    const uint16_t *revtab = s->fft.revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)tmp;
+
+    n = 1 << s->nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    asm volatile ("movaps %0, %%xmm7\n\t"::"m"(*p1m1p1m1));
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 4;
+
+    /* Complex multiplication
+       Two complex products per iteration, we could have 4 with 8 xmm
+       registers, 8 with 16 xmm registers.
+       Maybe we should unroll more.
+    */
+    for (k = 0; k < n4; k += 2) {
+        asm volatile (
+            "movaps          %0, %%xmm0 \n\t"   // xmm0 = r0 X  r1 X : in2
+            "movaps          %1, %%xmm3 \n\t"   // xmm3 = X  i1 X  i0: in1
+            "movlps          %2, %%xmm1 \n\t"   // xmm1 = X  X  R1 R0: tcos
+            "movlps          %3, %%xmm2 \n\t"   // xmm2 = X  X  I1 I0: tsin
+            "shufps $95, %%xmm0, %%xmm0 \n\t"   // xmm0 = r1 r1 r0 r0
+            "shufps $160,%%xmm3, %%xmm3 \n\t"   // xmm3 = i1 i1 i0 i0
+            "unpcklps    %%xmm2, %%xmm1 \n\t"   // xmm1 = I1 R1 I0 R0
+            "movaps      %%xmm1, %%xmm2 \n\t"   // xmm2 = I1 R1 I0 R0
+            "xorps       %%xmm7, %%xmm2 \n\t"   // xmm2 = -I1 R1 -I0 R0
+            "mulps       %%xmm1, %%xmm0 \n\t"   // xmm0 = rI rR rI rR
+            "shufps $177,%%xmm2, %%xmm2 \n\t"   // xmm2 = R1 -I1 R0 -I0
+            "mulps       %%xmm2, %%xmm3 \n\t"   // xmm3 = Ri -Ii Ri -Ii
+            "addps       %%xmm3, %%xmm0 \n\t"   // xmm0 = result
+            ::"m"(in2[-2*k]), "m"(in1[2*k]),
+              "m"(tcos[k]), "m"(tsin[k])
+        );
+        /* Should be in the same block, hack for gcc2.95 & gcc3 */
+        asm (
+            "movlps      %%xmm0, %0     \n\t"
+            "movhps      %%xmm0, %1     \n\t"
+            :"=m"(z[revtab[k]]), "=m"(z[revtab[k + 1]])
+        );
+    }
+
+    ff_fft_calc_sse(&s->fft, z);
+
+    /* Not currently needed, added for safety */
+    asm volatile ("movaps %0, %%xmm7\n\t"::"m"(*p1m1p1m1));
+
+    /* post rotation + reordering */
+    for (k = 0; k < n4; k += 2) {
+        asm (
+            "movaps          %0, %%xmm0 \n\t"   // xmm0 = i1 r1 i0 r0: z
+            "movlps          %1, %%xmm1 \n\t"   // xmm1 = X  X  R1 R0: tcos
+            "movaps      %%xmm0, %%xmm3 \n\t"   // xmm3 = i1 r1 i0 r0
+            "movlps          %2, %%xmm2 \n\t"   // xmm2 = X  X  I1 I0: tsin
+            "shufps $160,%%xmm0, %%xmm0 \n\t"   // xmm0 = r1 r1 r0 r0
+            "shufps $245,%%xmm3, %%xmm3 \n\t"   // xmm3 = i1 i1 i0 i0
+            "unpcklps    %%xmm2, %%xmm1 \n\t"   // xmm1 = I1 R1 I0 R0
+            "movaps      %%xmm1, %%xmm2 \n\t"   // xmm2 = I1 R1 I0 R0
+            "xorps       %%xmm7, %%xmm2 \n\t"   // xmm2 = -I1 R1 -I0 R0
+            "mulps       %%xmm1, %%xmm0 \n\t"   // xmm0 = rI rR rI rR
+            "shufps $177,%%xmm2, %%xmm2 \n\t"   // xmm2 = R1 -I1 R0 -I0
+            "mulps       %%xmm2, %%xmm3 \n\t"   // xmm3 = Ri -Ii Ri -Ii
+            "addps       %%xmm3, %%xmm0 \n\t"   // xmm0 = result
+            "movaps      %%xmm0, %0     \n\t"
+            :"+m"(z[k])
+            :"m"(tcos[k]), "m"(tsin[k])
+        );
+    }
+
+    /*
+       Mnemonics:
+       0 = z[k].re
+       1 = z[k].im
+       2 = z[k + 1].re
+       3 = z[k + 1].im
+       4 = z[-k - 2].re
+       5 = z[-k - 2].im
+       6 = z[-k - 1].re
+       7 = z[-k - 1].im
+    */
+    k = 16-n;
+    asm volatile("movaps %0, %%xmm7 \n\t"::"m"(*m1m1m1m1));
+    asm volatile(
+        "1: \n\t"
+        "movaps  -16(%4,%0), %%xmm1 \n\t"   // xmm1 = 4 5 6 7 = z[-2-k]
+        "neg %0 \n\t"
+        "movaps     (%4,%0), %%xmm0 \n\t"   // xmm0 = 0 1 2 3 = z[k]
+        "xorps       %%xmm7, %%xmm0 \n\t"   // xmm0 = -0 -1 -2 -3
+        "movaps      %%xmm0, %%xmm2 \n\t"   // xmm2 = -0 -1 -2 -3
+        "shufps $141,%%xmm1, %%xmm0 \n\t"   // xmm0 = -1 -3 4 6
+        "shufps $216,%%xmm1, %%xmm2 \n\t"   // xmm2 = -0 -2 5 7
+        "shufps $156,%%xmm0, %%xmm0 \n\t"   // xmm0 = -1 6 -3 4 !
+        "shufps $156,%%xmm2, %%xmm2 \n\t"   // xmm2 = -0 7 -2 5 !
+        "movaps      %%xmm0, (%1,%0) \n\t"  // output[2*k]
+        "movaps      %%xmm2, (%2,%0) \n\t"  // output[n2+2*k]
+        "neg %0 \n\t"
+        "shufps $27, %%xmm0, %%xmm0 \n\t"   // xmm0 = 4 -3 6 -1
+        "xorps       %%xmm7, %%xmm0 \n\t"   // xmm0 = -4 3 -6 1 !
+        "shufps $27, %%xmm2, %%xmm2 \n\t"   // xmm2 = 5 -2 7 -0 !
+        "movaps      %%xmm0, -16(%2,%0) \n\t" // output[n2-4-2*k]
+        "movaps      %%xmm2, -16(%3,%0) \n\t" // output[n-4-2*k]
+        "add $16, %0 \n\t"
+        "jle 1b \n\t"
+        :"+r"(k)
+        :"r"(output), "r"(output+n2), "r"(output+n), "r"(z+n8)
+        :"memory"
+    );
+}
+
diff --git a/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
new file mode 100644
index 000000000..40baf199b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/h264dsp_mmx.c
@@ -0,0 +1,1513 @@
+/*
+ * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+/***********************************/
+/* IDCT */
+
+/* in/out: mma=mma+mmb, mmb=mmb-mma */
+#define SUMSUB_BA( a, b ) \
+    "paddw "#b", "#a" \n\t"\
+    "paddw "#b", "#b" \n\t"\
+    "psubw "#a", "#b" \n\t"
+
+#define SUMSUB_BADC( a, b, c, d ) \
+    "paddw "#b", "#a" \n\t"\
+    "paddw "#d", "#c" \n\t"\
+    "paddw "#b", "#b" \n\t"\
+    "paddw "#d", "#d" \n\t"\
+    "psubw "#a", "#b" \n\t"\
+    "psubw "#c", "#d" \n\t"
+
+#define SUMSUBD2_AB( a, b, t ) \
+    "movq  "#b", "#t" \n\t"\
+    "psraw  $1 , "#b" \n\t"\
+    "paddw "#a", "#b" \n\t"\
+    "psraw  $1 , "#a" \n\t"\
+    "psubw "#t", "#a" \n\t"
+
+#define IDCT4_1D( s02, s13, d02, d13, t ) \
+    SUMSUB_BA  ( s02, d02 )\
+    SUMSUBD2_AB( s13, d13, t )\
+    SUMSUB_BADC( d13, s02, s13, d02 )
+
+#define TRANSPOSE4(a,b,c,d,t)\
+    SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
+    SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
+    SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
+    SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
+
+#define STORE_DIFF_4P( p, t, z ) \
+    "psraw      $6,     "#p" \n\t"\
+    "movd       (%0),   "#t" \n\t"\
+    "punpcklbw "#z",    "#t" \n\t"\
+    "paddsw    "#t",    "#p" \n\t"\
+    "packuswb  "#z",    "#p" \n\t"\
+    "movd      "#p",    (%0) \n\t"
+
+static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
+{
+    /* Load dct coeffs */
+    asm volatile(
+        "movq   (%0), %%mm0 \n\t"
+        "movq  8(%0), %%mm1 \n\t"
+        "movq 16(%0), %%mm2 \n\t"
+        "movq 24(%0), %%mm3 \n\t"
+    :: "r"(block) );
+
+    asm volatile(
+        /* mm1=s02+s13  mm2=s02-s13  mm4=d02+d13  mm0=d02-d13 */
+        IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 )
+
+        "movq      %0,    %%mm6 \n\t"
+        /* in: 1,4,0,2  out: 1,2,3,0 */
+        TRANSPOSE4( %%mm3, %%mm1, %%mm0, %%mm2, %%mm4 )
+
+        "paddw     %%mm6, %%mm3 \n\t"
+
+        /* mm2=s02+s13  mm3=s02-s13  mm4=d02+d13  mm1=d02-d13 */
+        IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 )
+
+        "pxor %%mm7, %%mm7    \n\t"
+    :: "m"(ff_pw_32));
+
+    asm volatile(
+    STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
+        "add %1, %0             \n\t"
+    STORE_DIFF_4P( %%mm2, %%mm1, %%mm7)
+        "add %1, %0             \n\t"
+    STORE_DIFF_4P( %%mm3, %%mm1, %%mm7)
+        "add %1, %0             \n\t"
+    STORE_DIFF_4P( %%mm4, %%mm1, %%mm7)
+        : "+r"(dst)
+        : "r" ((long)stride)
+    );
+}
+
+static inline void h264_idct8_1d(int16_t *block)
+{
+    asm volatile(
+        "movq 112(%0), %%mm7  \n\t"
+        "movq  80(%0), %%mm5  \n\t"
+        "movq  48(%0), %%mm3  \n\t"
+        "movq  16(%0), %%mm1  \n\t"
+
+        "movq   %%mm7, %%mm4  \n\t"
+        "movq   %%mm3, %%mm6  \n\t"
+        "movq   %%mm5, %%mm0  \n\t"
+        "movq   %%mm7, %%mm2  \n\t"
+        "psraw  $1,    %%mm4  \n\t"
+        "psraw  $1,    %%mm6  \n\t"
+        "psubw  %%mm7, %%mm0  \n\t"
+        "psubw  %%mm6, %%mm2  \n\t"
+        "psubw  %%mm4, %%mm0  \n\t"
+        "psubw  %%mm3, %%mm2  \n\t"
+        "psubw  %%mm3, %%mm0  \n\t"
+        "paddw  %%mm1, %%mm2  \n\t"
+
+        "movq   %%mm5, %%mm4  \n\t"
+        "movq   %%mm1, %%mm6  \n\t"
+        "psraw  $1,    %%mm4  \n\t"
+        "psraw  $1,    %%mm6  \n\t"
+        "paddw  %%mm5, %%mm4  \n\t"
+        "paddw  %%mm1, %%mm6  \n\t"
+        "paddw  %%mm7, %%mm4  \n\t"
+        "paddw  %%mm5, %%mm6  \n\t"
+        "psubw  %%mm1, %%mm4  \n\t"
+        "paddw  %%mm3, %%mm6  \n\t"
+
+        "movq   %%mm0, %%mm1  \n\t"
+        "movq   %%mm4, %%mm3  \n\t"
+        "movq   %%mm2, %%mm5  \n\t"
+        "movq   %%mm6, %%mm7  \n\t"
+        "psraw  $2,    %%mm6  \n\t"
+        "psraw  $2,    %%mm3  \n\t"
+        "psraw  $2,    %%mm5  \n\t"
+        "psraw  $2,    %%mm0  \n\t"
+        "paddw  %%mm6, %%mm1  \n\t"
+        "paddw  %%mm2, %%mm3  \n\t"
+        "psubw  %%mm4, %%mm5  \n\t"
+        "psubw  %%mm0, %%mm7  \n\t"
+
+        "movq  32(%0), %%mm2  \n\t"
+        "movq  96(%0), %%mm6  \n\t"
+        "movq   %%mm2, %%mm4  \n\t"
+        "movq   %%mm6, %%mm0  \n\t"
+        "psraw  $1,    %%mm4  \n\t"
+        "psraw  $1,    %%mm6  \n\t"
+        "psubw  %%mm0, %%mm4  \n\t"
+        "paddw  %%mm2, %%mm6  \n\t"
+
+        "movq    (%0), %%mm2  \n\t"
+        "movq  64(%0), %%mm0  \n\t"
+        SUMSUB_BA( %%mm0, %%mm2 )
+        SUMSUB_BA( %%mm6, %%mm0 )
+        SUMSUB_BA( %%mm4, %%mm2 )
+        SUMSUB_BA( %%mm7, %%mm6 )
+        SUMSUB_BA( %%mm5, %%mm4 )
+        SUMSUB_BA( %%mm3, %%mm2 )
+        SUMSUB_BA( %%mm1, %%mm0 )
+        :: "r"(block)
+    );
+}
+
+static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
+{
+    int i;
+    int16_t __attribute__ ((aligned(8))) b2[64];
+
+    block[0] += 32;
+
+    for(i=0; i<2; i++){
+        DECLARE_ALIGNED_8(uint64_t, tmp);
+
+        h264_idct8_1d(block+4*i);
+
+        asm volatile(
+            "movq   %%mm7,    %0   \n\t"
+            TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
+            "movq   %%mm0,  8(%1)  \n\t"
+            "movq   %%mm6, 24(%1)  \n\t"
+            "movq   %%mm7, 40(%1)  \n\t"
+            "movq   %%mm4, 56(%1)  \n\t"
+            "movq    %0,    %%mm7  \n\t"
+            TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
+            "movq   %%mm7,   (%1)  \n\t"
+            "movq   %%mm1, 16(%1)  \n\t"
+            "movq   %%mm0, 32(%1)  \n\t"
+            "movq   %%mm3, 48(%1)  \n\t"
+            : "=m"(tmp)
+            : "r"(b2+32*i)
+            : "memory"
+        );
+    }
+
+    for(i=0; i<2; i++){
+        h264_idct8_1d(b2+4*i);
+
+        asm volatile(
+            "psraw     $6, %%mm7  \n\t"
+            "psraw     $6, %%mm6  \n\t"
+            "psraw     $6, %%mm5  \n\t"
+            "psraw     $6, %%mm4  \n\t"
+            "psraw     $6, %%mm3  \n\t"
+            "psraw     $6, %%mm2  \n\t"
+            "psraw     $6, %%mm1  \n\t"
+            "psraw     $6, %%mm0  \n\t"
+
+            "movq   %%mm7,    (%0)  \n\t"
+            "movq   %%mm5,  16(%0)  \n\t"
+            "movq   %%mm3,  32(%0)  \n\t"
+            "movq   %%mm1,  48(%0)  \n\t"
+            "movq   %%mm0,  64(%0)  \n\t"
+            "movq   %%mm2,  80(%0)  \n\t"
+            "movq   %%mm4,  96(%0)  \n\t"
+            "movq   %%mm6, 112(%0)  \n\t"
+            :: "r"(b2+4*i)
+            : "memory"
+        );
+    }
+
+    add_pixels_clamped_mmx(b2, dst, stride);
+}
+
+static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+    int dc = (block[0] + 32) >> 6;
+    asm volatile(
+        "movd          %0, %%mm0 \n\t"
+        "pshufw $0, %%mm0, %%mm0 \n\t"
+        "pxor       %%mm1, %%mm1 \n\t"
+        "psubw      %%mm0, %%mm1 \n\t"
+        "packuswb   %%mm0, %%mm0 \n\t"
+        "packuswb   %%mm1, %%mm1 \n\t"
+        ::"r"(dc)
+    );
+    asm volatile(
+        "movd          %0, %%mm2 \n\t"
+        "movd          %1, %%mm3 \n\t"
+        "movd          %2, %%mm4 \n\t"
+        "movd          %3, %%mm5 \n\t"
+        "paddusb    %%mm0, %%mm2 \n\t"
+        "paddusb    %%mm0, %%mm3 \n\t"
+        "paddusb    %%mm0, %%mm4 \n\t"
+        "paddusb    %%mm0, %%mm5 \n\t"
+        "psubusb    %%mm1, %%mm2 \n\t"
+        "psubusb    %%mm1, %%mm3 \n\t"
+        "psubusb    %%mm1, %%mm4 \n\t"
+        "psubusb    %%mm1, %%mm5 \n\t"
+        "movd       %%mm2, %0    \n\t"
+        "movd       %%mm3, %1    \n\t"
+        "movd       %%mm4, %2    \n\t"
+        "movd       %%mm5, %3    \n\t"
+        :"+m"(*(uint32_t*)(dst+0*stride)),
+         "+m"(*(uint32_t*)(dst+1*stride)),
+         "+m"(*(uint32_t*)(dst+2*stride)),
+         "+m"(*(uint32_t*)(dst+3*stride))
+    );
+}
+
+static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+    int dc = (block[0] + 32) >> 6;
+    int y;
+    asm volatile(
+        "movd          %0, %%mm0 \n\t"
+        "pshufw $0, %%mm0, %%mm0 \n\t"
+        "pxor       %%mm1, %%mm1 \n\t"
+        "psubw      %%mm0, %%mm1 \n\t"
+        "packuswb   %%mm0, %%mm0 \n\t"
+        "packuswb   %%mm1, %%mm1 \n\t"
+        ::"r"(dc)
+    );
+    for(y=2; y--; dst += 4*stride){
+    asm volatile(
+        "movq          %0, %%mm2 \n\t"
+        "movq          %1, %%mm3 \n\t"
+        "movq          %2, %%mm4 \n\t"
+        "movq          %3, %%mm5 \n\t"
+        "paddusb    %%mm0, %%mm2 \n\t"
+        "paddusb    %%mm0, %%mm3 \n\t"
+        "paddusb    %%mm0, %%mm4 \n\t"
+        "paddusb    %%mm0, %%mm5 \n\t"
+        "psubusb    %%mm1, %%mm2 \n\t"
+        "psubusb    %%mm1, %%mm3 \n\t"
+        "psubusb    %%mm1, %%mm4 \n\t"
+        "psubusb    %%mm1, %%mm5 \n\t"
+        "movq       %%mm2, %0    \n\t"
+        "movq       %%mm3, %1    \n\t"
+        "movq       %%mm4, %2    \n\t"
+        "movq       %%mm5, %3    \n\t"
+        :"+m"(*(uint64_t*)(dst+0*stride)),
+         "+m"(*(uint64_t*)(dst+1*stride)),
+         "+m"(*(uint64_t*)(dst+2*stride)),
+         "+m"(*(uint64_t*)(dst+3*stride))
+    );
+    }
+}
+
+
+/***********************************/
+/* deblocking */
+
+// out: o = |x-y|>a
+// clobbers: t
+#define DIFF_GT_MMX(x,y,a,o,t)\
+    "movq     "#y", "#t"  \n\t"\
+    "movq     "#x", "#o"  \n\t"\
+    "psubusb  "#x", "#t"  \n\t"\
+    "psubusb  "#y", "#o"  \n\t"\
+    "por      "#t", "#o"  \n\t"\
+    "psubusb  "#a", "#o"  \n\t"
+
+// out: o = |x-y|>a
+// clobbers: t
+#define DIFF_GT2_MMX(x,y,a,o,t)\
+    "movq     "#y", "#t"  \n\t"\
+    "movq     "#x", "#o"  \n\t"\
+    "psubusb  "#x", "#t"  \n\t"\
+    "psubusb  "#y", "#o"  \n\t"\
+    "psubusb  "#a", "#t"  \n\t"\
+    "psubusb  "#a", "#o"  \n\t"\
+    "pcmpeqb  "#t", "#o"  \n\t"\
+
+// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1
+// out: mm5=beta-1, mm7=mask
+// clobbers: mm4,mm6
+#define H264_DEBLOCK_MASK(alpha1, beta1) \
+    "pshufw $0, "#alpha1", %%mm4 \n\t"\
+    "pshufw $0, "#beta1 ", %%mm5 \n\t"\
+    "packuswb  %%mm4, %%mm4      \n\t"\
+    "packuswb  %%mm5, %%mm5      \n\t"\
+    DIFF_GT_MMX(%%mm1, %%mm2, %%mm4, %%mm7, %%mm6) /* |p0-q0| > alpha-1 */\
+    DIFF_GT_MMX(%%mm0, %%mm1, %%mm5, %%mm4, %%mm6) /* |p1-p0| > beta-1 */\
+    "por       %%mm4, %%mm7      \n\t"\
+    DIFF_GT_MMX(%%mm3, %%mm2, %%mm5, %%mm4, %%mm6) /* |q1-q0| > beta-1 */\
+    "por       %%mm4, %%mm7      \n\t"\
+    "pxor      %%mm6, %%mm6      \n\t"\
+    "pcmpeqb   %%mm6, %%mm7      \n\t"
+
+// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
+// out: mm1=p0' mm2=q0'
+// clobbers: mm0,3-6
+#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\
+        "movq    %%mm1              , %%mm5 \n\t"\
+        "pxor    %%mm2              , %%mm5 \n\t" /* p0^q0*/\
+        "pand    "#pb_01"           , %%mm5 \n\t" /* (p0^q0)&1*/\
+        "pcmpeqb %%mm4              , %%mm4 \n\t"\
+        "pxor    %%mm4              , %%mm3 \n\t"\
+        "pavgb   %%mm0              , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
+        "pavgb   "MANGLE(ff_pb_3)"  , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
+        "pxor    %%mm1              , %%mm4 \n\t"\
+        "pavgb   %%mm2              , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
+        "pavgb   %%mm5              , %%mm3 \n\t"\
+        "paddusb %%mm4              , %%mm3 \n\t" /* d+128+33*/\
+        "movq    "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\
+        "psubusb %%mm3              , %%mm6 \n\t"\
+        "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\
+        "pminub  %%mm7              , %%mm6 \n\t"\
+        "pminub  %%mm7              , %%mm3 \n\t"\
+        "psubusb %%mm6              , %%mm1 \n\t"\
+        "psubusb %%mm3              , %%mm2 \n\t"\
+        "paddusb %%mm3              , %%mm1 \n\t"\
+        "paddusb %%mm6              , %%mm2 \n\t"
+
+// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) %8=mm_bone
+// out: (q1addr) = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
+// clobbers: q2, tmp, tc0
+#define H264_DEBLOCK_Q1(p1, q2, q2addr, q1addr, tc0, tmp)\
+        "movq     %%mm1,  "#tmp"   \n\t"\
+        "pavgb    %%mm2,  "#tmp"   \n\t"\
+        "pavgb    "#tmp", "#q2"    \n\t" /* avg(p2,avg(p0,q0)) */\
+        "pxor   "q2addr", "#tmp"   \n\t"\
+        "pand     %8,     "#tmp"   \n\t" /* (p2^avg(p0,q0))&1 */\
+        "psubusb  "#tmp", "#q2"    \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\
+        "movq     "#p1",  "#tmp"   \n\t"\
+        "psubusb  "#tc0", "#tmp"   \n\t"\
+        "paddusb  "#p1",  "#tc0"   \n\t"\
+        "pmaxub   "#tmp", "#q2"    \n\t"\
+        "pminub   "#tc0", "#q2"    \n\t"\
+        "movq     "#q2",  "q1addr" \n\t"
+
+static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
+{
+    DECLARE_ALIGNED_8(uint64_t, tmp0[2]);
+
+    asm volatile(
+        "movq    (%1,%3), %%mm0    \n\t" //p1
+        "movq    (%1,%3,2), %%mm1  \n\t" //p0
+        "movq    (%2),    %%mm2    \n\t" //q0
+        "movq    (%2,%3), %%mm3    \n\t" //q1
+        H264_DEBLOCK_MASK(%6, %7)
+
+        "movd      %5,    %%mm4    \n\t"
+        "punpcklbw %%mm4, %%mm4    \n\t"
+        "punpcklwd %%mm4, %%mm4    \n\t"
+        "pcmpeqb   %%mm3, %%mm3    \n\t"
+        "movq      %%mm4, %%mm6    \n\t"
+        "pcmpgtb   %%mm3, %%mm4    \n\t"
+        "movq      %%mm6, 8+%0     \n\t"
+        "pand      %%mm4, %%mm7    \n\t"
+        "movq      %%mm7, %0       \n\t"
+
+        /* filter p1 */
+        "movq     (%1),   %%mm3    \n\t" //p2
+        DIFF_GT2_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1
+        "pand     %%mm7,  %%mm6    \n\t" // mask & |p2-p0|<beta
+        "pand     8+%0,   %%mm7    \n\t" // mask & tc0
+        "movq     %%mm7,  %%mm4    \n\t"
+        "psubb    %%mm6,  %%mm7    \n\t"
+        "pand     %%mm4,  %%mm6    \n\t" // mask & |p2-p0|<beta & tc0
+        H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%1)", "(%1,%3)", %%mm6, %%mm4)
+
+        /* filter q1 */
+        "movq    (%2,%3,2), %%mm4  \n\t" //q2
+        DIFF_GT2_MMX(%%mm2, %%mm4, %%mm5, %%mm6, %%mm3) // |q2-q0|>beta-1
+        "pand     %0,     %%mm6    \n\t"
+        "movq     8+%0,   %%mm5    \n\t" // can be merged with the and below but is slower then
+        "pand     %%mm6,  %%mm5    \n\t"
+        "psubb    %%mm6,  %%mm7    \n\t"
+        "movq    (%2,%3), %%mm3    \n\t"
+        H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
+
+        /* filter p0, q0 */
+        H264_DEBLOCK_P0_Q0(%8, unused)
+        "movq      %%mm1, (%1,%3,2) \n\t"
+        "movq      %%mm2, (%2)      \n\t"
+
+        : "=m"(*tmp0)
+        : "r"(pix-3*stride), "r"(pix), "r"((long)stride),
+          "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
+          "m"(mm_bone)
+    );
+}
+
+static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    if((tc0[0] & tc0[1]) >= 0)
+        h264_loop_filter_luma_mmx2(pix, stride, alpha-1, beta-1, tc0);
+    if((tc0[2] & tc0[3]) >= 0)
+        h264_loop_filter_luma_mmx2(pix+8, stride, alpha-1, beta-1, tc0+2);
+}
+static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    //FIXME: could cut some load/stores by merging transpose with filter
+    // also, it only needs to transpose 6x8
+    DECLARE_ALIGNED_8(uint8_t, trans[8*8]);
+    int i;
+    for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
+        if((tc0[0] & tc0[1]) < 0)
+            continue;
+        transpose4x4(trans,       pix-4,          8, stride);
+        transpose4x4(trans  +4*8, pix,            8, stride);
+        transpose4x4(trans+4,     pix-4+4*stride, 8, stride);
+        transpose4x4(trans+4+4*8, pix  +4*stride, 8, stride);
+        h264_loop_filter_luma_mmx2(trans+4*8, 8, alpha-1, beta-1, tc0);
+        transpose4x4(pix-2,          trans  +2*8, stride, 8);
+        transpose4x4(pix-2+4*stride, trans+4+2*8, stride, 8);
+    }
+}
+
+static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
+{
+    asm volatile(
+        "movq    (%0),    %%mm0     \n\t" //p1
+        "movq    (%0,%2), %%mm1     \n\t" //p0
+        "movq    (%1),    %%mm2     \n\t" //q0
+        "movq    (%1,%2), %%mm3     \n\t" //q1
+        H264_DEBLOCK_MASK(%4, %5)
+        "movd      %3,    %%mm6     \n\t"
+        "punpcklbw %%mm6, %%mm6     \n\t"
+        "pand      %%mm6, %%mm7     \n\t" // mm7 = tc&mask
+        H264_DEBLOCK_P0_Q0(%6, %7)
+        "movq      %%mm1, (%0,%2)   \n\t"
+        "movq      %%mm2, (%1)      \n\t"
+
+        :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),
+           "r"(*(uint32_t*)tc0),
+           "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F)
+    );
+}
+
+static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_mmx2(pix, stride, alpha-1, beta-1, tc0);
+}
+
+static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    //FIXME: could cut some load/stores by merging transpose with filter
+    DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
+    transpose4x4(trans, pix-2, 8, stride);
+    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
+    h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
+    transpose4x4(pix-2, trans, stride, 8);
+    transpose4x4(pix-2+4*stride, trans+4, stride, 8);
+}
+
+// p0 = (p0 + q1 + 2*p1 + 2) >> 2
+#define H264_FILTER_CHROMA4(p0, p1, q1, one) \
+    "movq    "#p0", %%mm4  \n\t"\
+    "pxor    "#q1", %%mm4  \n\t"\
+    "pand   "#one", %%mm4  \n\t" /* mm4 = (p0^q1)&1 */\
+    "pavgb   "#q1", "#p0"  \n\t"\
+    "psubusb %%mm4, "#p0"  \n\t"\
+    "pavgb   "#p1", "#p0"  \n\t" /* dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) */\
+
+static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1)
+{
+    asm volatile(
+        "movq    (%0),    %%mm0     \n\t"
+        "movq    (%0,%2), %%mm1     \n\t"
+        "movq    (%1),    %%mm2     \n\t"
+        "movq    (%1,%2), %%mm3     \n\t"
+        H264_DEBLOCK_MASK(%3, %4)
+        "movq    %%mm1,   %%mm5     \n\t"
+        "movq    %%mm2,   %%mm6     \n\t"
+        H264_FILTER_CHROMA4(%%mm1, %%mm0, %%mm3, %5) //p0'
+        H264_FILTER_CHROMA4(%%mm2, %%mm3, %%mm0, %5) //q0'
+        "psubb   %%mm5,   %%mm1     \n\t"
+        "psubb   %%mm6,   %%mm2     \n\t"
+        "pand    %%mm7,   %%mm1     \n\t"
+        "pand    %%mm7,   %%mm2     \n\t"
+        "paddb   %%mm5,   %%mm1     \n\t"
+        "paddb   %%mm6,   %%mm2     \n\t"
+        "movq    %%mm1,   (%0,%2)   \n\t"
+        "movq    %%mm2,   (%1)      \n\t"
+        :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),
+           "m"(alpha1), "m"(beta1), "m"(mm_bone)
+    );
+}
+
+static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1);
+}
+
+static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
+{
+    //FIXME: could cut some load/stores by merging transpose with filter
+    DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
+    transpose4x4(trans, pix-2, 8, stride);
+    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
+    h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
+    transpose4x4(pix-2, trans, stride, 8);
+    transpose4x4(pix-2+4*stride, trans+4, stride, 8);
+}
+
+static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
+                                            int bidir, int edges, int step, int mask_mv0, int mask_mv1 ) {
+    int dir;
+    asm volatile(
+        "pxor %%mm7, %%mm7 \n\t"
+        "movq %0, %%mm6 \n\t"
+        "movq %1, %%mm5 \n\t"
+        "movq %2, %%mm4 \n\t"
+        ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7)
+    );
+    // could do a special case for dir==0 && edges==1, but it only reduces the
+    // average filter time by 1.2%
+    for( dir=1; dir>=0; dir-- ) {
+        const int d_idx = dir ? -8 : -1;
+        const int mask_mv = dir ? mask_mv1 : mask_mv0;
+        DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
+        int b_idx, edge, l;
+        for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
+            asm volatile(
+                "pand %0, %%mm0 \n\t"
+                ::"m"(mask_dir)
+            );
+            if(!(mask_mv & edge)) {
+                asm volatile("pxor %%mm0, %%mm0 \n\t":);
+                for( l = bidir; l >= 0; l-- ) {
+                    asm volatile(
+                        "movd %0, %%mm1 \n\t"
+                        "punpckldq %1, %%mm1 \n\t"
+                        "movq %%mm1, %%mm2 \n\t"
+                        "psrlw $7, %%mm2 \n\t"
+                        "pand %%mm6, %%mm2 \n\t"
+                        "por %%mm2, %%mm1 \n\t" // ref_cache with -2 mapped to -1
+                        "punpckldq %%mm1, %%mm2 \n\t"
+                        "pcmpeqb %%mm2, %%mm1 \n\t"
+                        "paddb %%mm6, %%mm1 \n\t"
+                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
+                        "por %%mm1, %%mm0 \n\t"
+
+                        "movq %2, %%mm1 \n\t"
+                        "movq %3, %%mm2 \n\t"
+                        "psubw %4, %%mm1 \n\t"
+                        "psubw %5, %%mm2 \n\t"
+                        "packsswb %%mm2, %%mm1 \n\t"
+                        "paddb %%mm5, %%mm1 \n\t"
+                        "pminub %%mm4, %%mm1 \n\t"
+                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
+                        "por %%mm1, %%mm0 \n\t"
+                        ::"m"(ref[l][b_idx]),
+                          "m"(ref[l][b_idx+d_idx]),
+                          "m"(mv[l][b_idx][0]),
+                          "m"(mv[l][b_idx+2][0]),
+                          "m"(mv[l][b_idx+d_idx][0]),
+                          "m"(mv[l][b_idx+d_idx+2][0])
+                    );
+                }
+            }
+            asm volatile(
+                "movd %0, %%mm1 \n\t"
+                "por  %1, %%mm1 \n\t"
+                "punpcklbw %%mm7, %%mm1 \n\t"
+                "pcmpgtw %%mm7, %%mm1 \n\t" // nnz[b] || nnz[bn]
+                ::"m"(nnz[b_idx]),
+                  "m"(nnz[b_idx+d_idx])
+            );
+            asm volatile(
+                "pcmpeqw %%mm7, %%mm0 \n\t"
+                "pcmpeqw %%mm7, %%mm0 \n\t"
+                "psrlw $15, %%mm0 \n\t" // nonzero -> 1
+                "psrlw $14, %%mm1 \n\t"
+                "movq %%mm0, %%mm2 \n\t"
+                "por %%mm1, %%mm2 \n\t"
+                "psrlw $1, %%mm1 \n\t"
+                "pandn %%mm2, %%mm1 \n\t"
+                "movq %%mm1, %0 \n\t"
+                :"=m"(*bS[dir][edge])
+                ::"memory"
+            );
+        }
+        edges = 4;
+        step = 1;
+    }
+    asm volatile(
+        "movq   (%0), %%mm0 \n\t"
+        "movq  8(%0), %%mm1 \n\t"
+        "movq 16(%0), %%mm2 \n\t"
+        "movq 24(%0), %%mm3 \n\t"
+        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4)
+        "movq %%mm0,   (%0) \n\t"
+        "movq %%mm3,  8(%0) \n\t"
+        "movq %%mm4, 16(%0) \n\t"
+        "movq %%mm2, 24(%0) \n\t"
+        ::"r"(bS[0])
+        :"memory"
+    );
+}
+
+/***********************************/
+/* motion compensation */
+
+#define QPEL_H264V(A,B,C,D,E,F,OP)\
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "paddw "#D", %%mm6          \n\t"\
+        "psllw $2, %%mm6            \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "pmullw %4, %%mm6           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "paddw %5, "#A"             \n\t"\
+        "paddw "#F", "#A"           \n\t"\
+        "paddw "#A", %%mm6          \n\t"\
+        "psraw $5, %%mm6            \n\t"\
+        "packuswb %%mm6, %%mm6      \n\t"\
+        OP(%%mm6, (%1), A, d)\
+        "add %3, %1                 \n\t"
+
+#define QPEL_H264HV(A,B,C,D,E,F,OF)\
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "paddw "#D", %%mm6          \n\t"\
+        "psllw $2, %%mm6            \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "pmullw %3, %%mm6           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "paddw "#F", "#A"           \n\t"\
+        "paddw "#A", %%mm6          \n\t"\
+        "movq %%mm6, "#OF"(%1)      \n\t"
+
+#define QPEL_H264(OPNAME, OP, MMX)\
+static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    int h=4;\
+\
+    asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %5, %%mm4             \n\t"\
+        "movq %6, %%mm5             \n\t"\
+        "1:                         \n\t"\
+        "movd  -1(%0), %%mm1        \n\t"\
+        "movd    (%0), %%mm2        \n\t"\
+        "movd   1(%0), %%mm3        \n\t"\
+        "movd   2(%0), %%mm0        \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "paddw %%mm0, %%mm1         \n\t"\
+        "paddw %%mm3, %%mm2         \n\t"\
+        "movd  -2(%0), %%mm0        \n\t"\
+        "movd   3(%0), %%mm3        \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm3, %%mm0         \n\t"\
+        "psllw $2, %%mm2            \n\t"\
+        "psubw %%mm1, %%mm2         \n\t"\
+        "pmullw %%mm4, %%mm2        \n\t"\
+        "paddw %%mm5, %%mm0         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "psraw $5, %%mm0            \n\t"\
+        "packuswb %%mm0, %%mm0      \n\t"\
+        OP(%%mm0, (%1),%%mm6, d)\
+        "add %3, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
+        : "+a"(src), "+c"(dst), "+m"(h)\
+        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "memory"\
+    );\
+}\
+static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+    int h=4;\
+    asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %0, %%mm4             \n\t"\
+        "movq %1, %%mm5             \n\t"\
+        :: "m"(ff_pw_5), "m"(ff_pw_16)\
+    );\
+    do{\
+    asm volatile(\
+        "movd  -1(%0), %%mm1        \n\t"\
+        "movd    (%0), %%mm2        \n\t"\
+        "movd   1(%0), %%mm3        \n\t"\
+        "movd   2(%0), %%mm0        \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "paddw %%mm0, %%mm1         \n\t"\
+        "paddw %%mm3, %%mm2         \n\t"\
+        "movd  -2(%0), %%mm0        \n\t"\
+        "movd   3(%0), %%mm3        \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm3, %%mm0         \n\t"\
+        "psllw $2, %%mm2            \n\t"\
+        "psubw %%mm1, %%mm2         \n\t"\
+        "pmullw %%mm4, %%mm2        \n\t"\
+        "paddw %%mm5, %%mm0         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "movd   (%2), %%mm3         \n\t"\
+        "psraw $5, %%mm0            \n\t"\
+        "packuswb %%mm0, %%mm0      \n\t"\
+        PAVGB" %%mm3, %%mm0         \n\t"\
+        OP(%%mm0, (%1),%%mm6, d)\
+        "add %4, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "add %3, %2                 \n\t"\
+        : "+a"(src), "+c"(dst), "+d"(src2)\
+        : "D"((long)src2Stride), "S"((long)dstStride)\
+        : "memory"\
+    );\
+    }while(--h);\
+}\
+static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    src -= 2*srcStride;\
+    asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movd (%0), %%mm0           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm1           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm2           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm3           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm4           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+         \
+        : "+a"(src), "+c"(dst)\
+        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "memory"\
+    );\
+}\
+static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    int h=4;\
+    int w=3;\
+    src -= 2*srcStride+2;\
+    while(w--){\
+        asm volatile(\
+            "pxor %%mm7, %%mm7      \n\t"\
+            "movd (%0), %%mm0       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm1       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm2       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm3       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm4       \n\t"\
+            "add %2, %0             \n\t"\
+            "punpcklbw %%mm7, %%mm0 \n\t"\
+            "punpcklbw %%mm7, %%mm1 \n\t"\
+            "punpcklbw %%mm7, %%mm2 \n\t"\
+            "punpcklbw %%mm7, %%mm3 \n\t"\
+            "punpcklbw %%mm7, %%mm4 \n\t"\
+            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
+            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
+            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
+            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
+             \
+            : "+a"(src)\
+            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
+            : "memory"\
+        );\
+        tmp += 4;\
+        src += 4 - 9*srcStride;\
+    }\
+    tmp -= 3*4;\
+    asm volatile(\
+        "movq %4, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq     (%0), %%mm0       \n\t"\
+        "paddw  10(%0), %%mm0       \n\t"\
+        "movq    2(%0), %%mm1       \n\t"\
+        "paddw   8(%0), %%mm1       \n\t"\
+        "movq    4(%0), %%mm2       \n\t"\
+        "paddw   6(%0), %%mm2       \n\t"\
+        "psubw %%mm1, %%mm0         \n\t"/*a-b   (abccba)*/\
+        "psraw $2, %%mm0            \n\t"/*(a-b)/4 */\
+        "psubw %%mm1, %%mm0         \n\t"/*(a-b)/4-b */\
+        "paddsw %%mm2, %%mm0        \n\t"\
+        "psraw $2, %%mm0            \n\t"/*((a-b)/4-b+c)/4 */\
+        "paddw %%mm6, %%mm2         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 +32 */\
+        "psraw $6, %%mm0            \n\t"\
+        "packuswb %%mm0, %%mm0      \n\t"\
+        OP(%%mm0, (%1),%%mm7, d)\
+        "add $24, %0                \n\t"\
+        "add %3, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
+        : "+a"(tmp), "+c"(dst), "+m"(h)\
+        : "S"((long)dstStride), "m"(ff_pw_32)\
+        : "memory"\
+    );\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    int h=8;\
+    asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %5, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq    (%0), %%mm0        \n\t"\
+        "movq   1(%0), %%mm2        \n\t"\
+        "movq %%mm0, %%mm1          \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpckhbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm3, %%mm1         \n\t"\
+        "psllw $2, %%mm0            \n\t"\
+        "psllw $2, %%mm1            \n\t"\
+        "movq   -1(%0), %%mm2       \n\t"\
+        "movq    2(%0), %%mm4       \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "movq %%mm4, %%mm5          \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        "punpckhbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm4, %%mm2         \n\t"\
+        "paddw %%mm3, %%mm5         \n\t"\
+        "psubw %%mm2, %%mm0         \n\t"\
+        "psubw %%mm5, %%mm1         \n\t"\
+        "pmullw %%mm6, %%mm0        \n\t"\
+        "pmullw %%mm6, %%mm1        \n\t"\
+        "movd   -2(%0), %%mm2       \n\t"\
+        "movd    7(%0), %%mm5       \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm3, %%mm2         \n\t"\
+        "paddw %%mm5, %%mm4         \n\t"\
+        "movq %6, %%mm5             \n\t"\
+        "paddw %%mm5, %%mm2         \n\t"\
+        "paddw %%mm5, %%mm4         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm4, %%mm1         \n\t"\
+        "psraw $5, %%mm0            \n\t"\
+        "psraw $5, %%mm1            \n\t"\
+        "packuswb %%mm1, %%mm0      \n\t"\
+        OP(%%mm0, (%1),%%mm5, q)\
+        "add %3, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
+        : "+a"(src), "+c"(dst), "+m"(h)\
+        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "memory"\
+    );\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+    int h=8;\
+    asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %0, %%mm6             \n\t"\
+        :: "m"(ff_pw_5)\
+    );\
+    do{\
+    asm volatile(\
+        "movq    (%0), %%mm0        \n\t"\
+        "movq   1(%0), %%mm2        \n\t"\
+        "movq %%mm0, %%mm1          \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpckhbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm3, %%mm1         \n\t"\
+        "psllw $2, %%mm0            \n\t"\
+        "psllw $2, %%mm1            \n\t"\
+        "movq   -1(%0), %%mm2       \n\t"\
+        "movq    2(%0), %%mm4       \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "movq %%mm4, %%mm5          \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        "punpckhbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm4, %%mm2         \n\t"\
+        "paddw %%mm3, %%mm5         \n\t"\
+        "psubw %%mm2, %%mm0         \n\t"\
+        "psubw %%mm5, %%mm1         \n\t"\
+        "pmullw %%mm6, %%mm0        \n\t"\
+        "pmullw %%mm6, %%mm1        \n\t"\
+        "movd   -2(%0), %%mm2       \n\t"\
+        "movd    7(%0), %%mm5       \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm3, %%mm2         \n\t"\
+        "paddw %%mm5, %%mm4         \n\t"\
+        "movq %5, %%mm5             \n\t"\
+        "paddw %%mm5, %%mm2         \n\t"\
+        "paddw %%mm5, %%mm4         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm4, %%mm1         \n\t"\
+        "psraw $5, %%mm0            \n\t"\
+        "psraw $5, %%mm1            \n\t"\
+        "movq (%2), %%mm4           \n\t"\
+        "packuswb %%mm1, %%mm0      \n\t"\
+        PAVGB" %%mm4, %%mm0         \n\t"\
+        OP(%%mm0, (%1),%%mm5, q)\
+        "add %4, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "add %3, %2                 \n\t"\
+        : "+a"(src), "+c"(dst), "+d"(src2)\
+        : "D"((long)src2Stride), "S"((long)dstStride),\
+          "m"(ff_pw_16)\
+        : "memory"\
+    );\
+    }while(--h);\
+}\
+\
+static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    int w= 2;\
+    src -= 2*srcStride;\
+    \
+    while(w--){\
+      asm volatile(\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movd (%0), %%mm0           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm1           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm2           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm3           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm4           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+        QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+        QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+         \
+        : "+a"(src), "+c"(dst)\
+        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "memory"\
+     );\
+     if(h==16){\
+        asm volatile(\
+            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+            QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+            QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+            QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+            QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+            \
+           : "+a"(src), "+c"(dst)\
+           : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+           : "memory"\
+        );\
+     }\
+     src += 4-(h+5)*srcStride;\
+     dst += 4-h*dstStride;\
+   }\
+}\
+static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+    int h = size;\
+    int w = (size+8)>>2;\
+    src -= 2*srcStride+2;\
+    while(w--){\
+        asm volatile(\
+            "pxor %%mm7, %%mm7      \n\t"\
+            "movd (%0), %%mm0       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm1       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm2       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm3       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm4       \n\t"\
+            "add %2, %0             \n\t"\
+            "punpcklbw %%mm7, %%mm0 \n\t"\
+            "punpcklbw %%mm7, %%mm1 \n\t"\
+            "punpcklbw %%mm7, %%mm2 \n\t"\
+            "punpcklbw %%mm7, %%mm3 \n\t"\
+            "punpcklbw %%mm7, %%mm4 \n\t"\
+            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\
+            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\
+            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\
+            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\
+            QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\
+            QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
+            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
+            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
+            : "+a"(src)\
+            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
+            : "memory"\
+        );\
+        if(size==16){\
+            asm volatile(\
+                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1,  8*48)\
+                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2,  9*48)\
+                QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
+                QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
+                QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
+                QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
+                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
+                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
+                : "+a"(src)\
+                : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
+                : "memory"\
+            );\
+        }\
+        tmp += 4;\
+        src += 4 - (size+5)*srcStride;\
+    }\
+    tmp -= size+8;\
+    w = size>>4;\
+    do{\
+    h = size;\
+    asm volatile(\
+        "movq %4, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq     (%0), %%mm0       \n\t"\
+        "movq    8(%0), %%mm3       \n\t"\
+        "movq    2(%0), %%mm1       \n\t"\
+        "movq   10(%0), %%mm4       \n\t"\
+        "paddw   %%mm4, %%mm0       \n\t"\
+        "paddw   %%mm3, %%mm1       \n\t"\
+        "paddw  18(%0), %%mm3       \n\t"\
+        "paddw  16(%0), %%mm4       \n\t"\
+        "movq    4(%0), %%mm2       \n\t"\
+        "movq   12(%0), %%mm5       \n\t"\
+        "paddw   6(%0), %%mm2       \n\t"\
+        "paddw  14(%0), %%mm5       \n\t"\
+        "psubw %%mm1, %%mm0         \n\t"\
+        "psubw %%mm4, %%mm3         \n\t"\
+        "psraw $2, %%mm0            \n\t"\
+        "psraw $2, %%mm3            \n\t"\
+        "psubw %%mm1, %%mm0         \n\t"\
+        "psubw %%mm4, %%mm3         \n\t"\
+        "paddsw %%mm2, %%mm0        \n\t"\
+        "paddsw %%mm5, %%mm3        \n\t"\
+        "psraw $2, %%mm0            \n\t"\
+        "psraw $2, %%mm3            \n\t"\
+        "paddw %%mm6, %%mm2         \n\t"\
+        "paddw %%mm6, %%mm5         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm5, %%mm3         \n\t"\
+        "psraw $6, %%mm0            \n\t"\
+        "psraw $6, %%mm3            \n\t"\
+        "packuswb %%mm3, %%mm0      \n\t"\
+        OP(%%mm0, (%1),%%mm7, q)\
+        "add $48, %0                \n\t"\
+        "add %3, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
+        : "+a"(tmp), "+c"(dst), "+m"(h)\
+        : "S"((long)dstStride), "m"(ff_pw_32)\
+        : "memory"\
+    );\
+    tmp += 8 - size*24;\
+    dst += 8 - size*dstStride;\
+    }while(w--);\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\
+    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
+    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+    src += 8*dstStride;\
+    dst += 8*dstStride;\
+    src2 += 8*src2Stride;\
+    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\
+    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+}\
+\
+static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 8);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+    asm volatile(\
+        "movq       %5,  %%mm6          \n\t"\
+        "movq      (%1), %%mm0          \n\t"\
+        "movq    24(%1), %%mm1          \n\t"\
+        "paddw    %%mm6, %%mm0          \n\t"\
+        "paddw    %%mm6, %%mm1          \n\t"\
+        "psraw      $5,  %%mm0          \n\t"\
+        "psraw      $5,  %%mm1          \n\t"\
+        "packuswb %%mm0, %%mm0          \n\t"\
+        "packuswb %%mm1, %%mm1          \n\t"\
+        PAVGB"     (%0), %%mm0          \n\t"\
+        PAVGB"  (%0,%3), %%mm1          \n\t"\
+        OP(%%mm0, (%2),    %%mm4, d)\
+        OP(%%mm1, (%2,%4), %%mm5, d)\
+        "lea  (%0,%3,2), %0             \n\t"\
+        "lea  (%2,%4,2), %2             \n\t"\
+        "movq    48(%1), %%mm0          \n\t"\
+        "movq    72(%1), %%mm1          \n\t"\
+        "paddw    %%mm6, %%mm0          \n\t"\
+        "paddw    %%mm6, %%mm1          \n\t"\
+        "psraw      $5,  %%mm0          \n\t"\
+        "psraw      $5,  %%mm1          \n\t"\
+        "packuswb %%mm0, %%mm0          \n\t"\
+        "packuswb %%mm1, %%mm1          \n\t"\
+        PAVGB"     (%0), %%mm0          \n\t"\
+        PAVGB"  (%0,%3), %%mm1          \n\t"\
+        OP(%%mm0, (%2),    %%mm4, d)\
+        OP(%%mm1, (%2,%4), %%mm5, d)\
+        :"+a"(src8), "+c"(src16), "+d"(dst)\
+        :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\
+        :"memory");\
+}\
+static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+    asm volatile(\
+        "movq       %0,  %%mm6          \n\t"\
+        ::"m"(ff_pw_16)\
+        );\
+    while(h--){\
+    asm volatile(\
+        "movq      (%1), %%mm0          \n\t"\
+        "movq     8(%1), %%mm1          \n\t"\
+        "paddw    %%mm6, %%mm0          \n\t"\
+        "paddw    %%mm6, %%mm1          \n\t"\
+        "psraw      $5,  %%mm0          \n\t"\
+        "psraw      $5,  %%mm1          \n\t"\
+        "packuswb %%mm1, %%mm0          \n\t"\
+        PAVGB"     (%0), %%mm0          \n\t"\
+        OP(%%mm0, (%2), %%mm5, q)\
+        ::"a"(src8), "c"(src16), "d"(dst)\
+        :"memory");\
+        src8 += src8Stride;\
+        src16 += 24;\
+        dst += dstStride;\
+    }\
+}\
+static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+    OPNAME ## pixels8_l2_shift5_ ## MMX(dst  , src16  , src8  , dstStride, src8Stride, h);\
+    OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
+}\
+
+
+#define H264_MC(OPNAME, SIZE, MMX) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _mmx(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*SIZE/8];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*SIZE/8];\
+    uint8_t * const half= (uint8_t*)temp;\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*SIZE/8];\
+    uint8_t * const halfV= (uint8_t*)temp;\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*SIZE/8];\
+    uint8_t * const halfV= (uint8_t*)temp;\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*SIZE/8];\
+    uint8_t * const halfV= (uint8_t*)temp;\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*SIZE/8];\
+    uint8_t * const halfV= (uint8_t*)temp;\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*(SIZE<8?12:24)/4];\
+    int16_t * const tmp= (int16_t*)temp;\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
+    uint8_t * const halfHV= (uint8_t*)temp;\
+    int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
+    uint8_t * const halfHV= (uint8_t*)temp;\
+    int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
+    int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\
+    uint8_t * const halfHV= ((uint8_t*)temp);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\
+    int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\
+    uint8_t * const halfHV= ((uint8_t*)temp);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
+}\
+
+
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgusb " #temp ", " #a "        \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgb " #temp ", " #a "          \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
+
+#define PAVGB "pavgusb"
+QPEL_H264(put_,       PUT_OP, 3dnow)
+QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
+#undef PAVGB
+#define PAVGB "pavgb"
+QPEL_H264(put_,       PUT_OP, mmx2)
+QPEL_H264(avg_,  AVG_MMX2_OP, mmx2)
+#undef PAVGB
+
+H264_MC(put_, 4, 3dnow)
+H264_MC(put_, 8, 3dnow)
+H264_MC(put_, 16,3dnow)
+H264_MC(avg_, 4, 3dnow)
+H264_MC(avg_, 8, 3dnow)
+H264_MC(avg_, 16,3dnow)
+H264_MC(put_, 4, mmx2)
+H264_MC(put_, 8, mmx2)
+H264_MC(put_, 16,mmx2)
+H264_MC(avg_, 4, mmx2)
+H264_MC(avg_, 8, mmx2)
+H264_MC(avg_, 16,mmx2)
+
+
+#define H264_CHROMA_OP(S,D)
+#define H264_CHROMA_OP4(S,D,T)
+#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx
+#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx
+#define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2
+#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
+#include "dsputil_h264_template_mmx.c"
+#undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC2_TMPL
+#undef H264_CHROMA_MC8_MV0
+
+#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
+#define H264_CHROMA_OP4(S,D,T) "movd  " #S ", " #T " \n\t"\
+                               "pavgb " #T ", " #D " \n\t"
+#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2
+#define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2
+#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
+#include "dsputil_h264_template_mmx.c"
+#undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC2_TMPL
+#undef H264_CHROMA_MC8_MV0
+
+#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
+#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
+                               "pavgusb " #T ", " #D " \n\t"
+#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_3dnow
+#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
+#include "dsputil_h264_template_mmx.c"
+#undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
+
+/***********************************/
+/* weighted prediction */
+
+static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h)
+{
+    int x, y;
+    offset <<= log2_denom;
+    offset += (1 << log2_denom) >> 1;
+    asm volatile(
+        "movd    %0, %%mm4        \n\t"
+        "movd    %1, %%mm5        \n\t"
+        "movd    %2, %%mm6        \n\t"
+        "pshufw  $0, %%mm4, %%mm4 \n\t"
+        "pshufw  $0, %%mm5, %%mm5 \n\t"
+        "pxor    %%mm7, %%mm7     \n\t"
+        :: "g"(weight), "g"(offset), "g"(log2_denom)
+    );
+    for(y=0; y<h; y+=2){
+        for(x=0; x<w; x+=4){
+            asm volatile(
+                "movd      %0,    %%mm0 \n\t"
+                "movd      %1,    %%mm1 \n\t"
+                "punpcklbw %%mm7, %%mm0 \n\t"
+                "punpcklbw %%mm7, %%mm1 \n\t"
+                "pmullw    %%mm4, %%mm0 \n\t"
+                "pmullw    %%mm4, %%mm1 \n\t"
+                "paddsw    %%mm5, %%mm0 \n\t"
+                "paddsw    %%mm5, %%mm1 \n\t"
+                "psraw     %%mm6, %%mm0 \n\t"
+                "psraw     %%mm6, %%mm1 \n\t"
+                "packuswb  %%mm7, %%mm0 \n\t"
+                "packuswb  %%mm7, %%mm1 \n\t"
+                "movd      %%mm0, %0    \n\t"
+                "movd      %%mm1, %1    \n\t"
+                : "+m"(*(uint32_t*)(dst+x)),
+                  "+m"(*(uint32_t*)(dst+x+stride))
+            );
+        }
+        dst += 2*stride;
+    }
+}
+
+static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset, int w, int h)
+{
+    int x, y;
+    offset = ((offset + 1) | 1) << log2_denom;
+    asm volatile(
+        "movd    %0, %%mm3        \n\t"
+        "movd    %1, %%mm4        \n\t"
+        "movd    %2, %%mm5        \n\t"
+        "movd    %3, %%mm6        \n\t"
+        "pshufw  $0, %%mm3, %%mm3 \n\t"
+        "pshufw  $0, %%mm4, %%mm4 \n\t"
+        "pshufw  $0, %%mm5, %%mm5 \n\t"
+        "pxor    %%mm7, %%mm7     \n\t"
+        :: "g"(weightd), "g"(weights), "g"(offset), "g"(log2_denom+1)
+    );
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x+=4){
+            asm volatile(
+                "movd      %0,    %%mm0 \n\t"
+                "movd      %1,    %%mm1 \n\t"
+                "punpcklbw %%mm7, %%mm0 \n\t"
+                "punpcklbw %%mm7, %%mm1 \n\t"
+                "pmullw    %%mm3, %%mm0 \n\t"
+                "pmullw    %%mm4, %%mm1 \n\t"
+                "paddsw    %%mm1, %%mm0 \n\t"
+                "paddsw    %%mm5, %%mm0 \n\t"
+                "psraw     %%mm6, %%mm0 \n\t"
+                "packuswb  %%mm0, %%mm0 \n\t"
+                "movd      %%mm0, %0    \n\t"
+                : "+m"(*(uint32_t*)(dst+x))
+                :  "m"(*(uint32_t*)(src+x))
+            );
+        }
+        src += stride;
+        dst += stride;
+    }
+}
+
+#define H264_WEIGHT(W,H) \
+static void ff_h264_biweight_ ## W ## x ## H ## _mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    ff_h264_biweight_WxH_mmx2(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
+} \
+static void ff_h264_weight_ ## W ## x ## H ## _mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset){ \
+    ff_h264_weight_WxH_mmx2(dst, stride, log2_denom, weight, offset, W, H); \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16, 8)
+H264_WEIGHT( 8,16)
+H264_WEIGHT( 8, 8)
+H264_WEIGHT( 8, 4)
+H264_WEIGHT( 4, 8)
+H264_WEIGHT( 4, 4)
+H264_WEIGHT( 4, 2)
+
diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
new file mode 100644
index 000000000..ba595845a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx.c
@@ -0,0 +1,597 @@
+/*
+ * idct_mmx.c
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "common.h"
+#include "../dsputil.h"
+
+#include "mmx.h"
+
+#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 6
+
+#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
+#define rounder(bias) {round (bias), round (bias)}
+
+#if 0
+/* C row IDCT - its just here to document the MMXEXT and MMX versions */
+static inline void idct_row (int16_t * row, int offset,
+                             int16_t * table, int32_t * rounder)
+{
+    int C1, C2, C3, C4, C5, C6, C7;
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+
+    row += offset;
+
+    C1 = table[1];
+    C2 = table[2];
+    C3 = table[3];
+    C4 = table[4];
+    C5 = table[5];
+    C6 = table[6];
+    C7 = table[7];
+
+    a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
+    a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
+    a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
+    a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
+
+    b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+    b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+    b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+    b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+    row[0] = (a0 + b0) >> ROW_SHIFT;
+    row[1] = (a1 + b1) >> ROW_SHIFT;
+    row[2] = (a2 + b2) >> ROW_SHIFT;
+    row[3] = (a3 + b3) >> ROW_SHIFT;
+    row[4] = (a3 - b3) >> ROW_SHIFT;
+    row[5] = (a2 - b2) >> ROW_SHIFT;
+    row[6] = (a1 - b1) >> ROW_SHIFT;
+    row[7] = (a0 - b0) >> ROW_SHIFT;
+}
+#endif
+
+
+/* MMXEXT row IDCT */
+
+#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)      {  c4,  c2, -c4, -c2,   \
+                                                   c4,  c6,  c4,  c6,   \
+                                                   c1,  c3, -c1, -c5,   \
+                                                   c5,  c7,  c3, -c7,   \
+                                                   c4, -c6,  c4, -c6,   \
+                                                  -c4,  c2,  c4, -c2,   \
+                                                   c5, -c1,  c3, -c1,   \
+                                                   c7,  c3,  c7, -c5 }
+
+static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
+{
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
+
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
+
+    movq_m2r (*table, mm3);             // mm3 = -C2 -C4 C2 C4
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
+
+    movq_m2r (*(table+4), mm4);         // mm4 = C6 C4 C6 C4
+    pmaddwd_r2r (mm0, mm3);             // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
+
+    pshufw_r2r (mm2, mm2, 0x4e);        // mm2 = x2 x0 x6 x4
+}
+
+static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
+{
+    movq_m2r (*(table+8), mm1);         // mm1 = -C5 -C1 C3 C1
+    pmaddwd_r2r (mm2, mm4);             // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
+
+    pmaddwd_m2r (*(table+16), mm0);     // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
+    pshufw_r2r (mm6, mm6, 0x4e);        // mm6 = x3 x1 x7 x5
+
+    movq_m2r (*(table+12), mm7);        // mm7 = -C7 C3 C7 C5
+    pmaddwd_r2r (mm5, mm1);             // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
+
+    paddd_m2r (*rounder, mm3);          // mm3 += rounder
+    pmaddwd_r2r (mm6, mm7);             // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
+
+    pmaddwd_m2r (*(table+20), mm2);     // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
+    paddd_r2r (mm4, mm3);               // mm3 = a1 a0 + rounder
+
+    pmaddwd_m2r (*(table+24), mm5);     // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
+    movq_r2r (mm3, mm4);                // mm4 = a1 a0 + rounder
+
+    pmaddwd_m2r (*(table+28), mm6);     // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
+    paddd_r2r (mm7, mm1);               // mm1 = b1 b0
+
+    paddd_m2r (*rounder, mm0);          // mm0 += rounder
+    psubd_r2r (mm1, mm3);               // mm3 = a1-b1 a0-b0 + rounder
+
+    psrad_i2r (ROW_SHIFT, mm3);         // mm3 = y6 y7
+    paddd_r2r (mm4, mm1);               // mm1 = a1+b1 a0+b0 + rounder
+
+    paddd_r2r (mm2, mm0);               // mm0 = a3 a2 + rounder
+    psrad_i2r (ROW_SHIFT, mm1);         // mm1 = y1 y0
+
+    paddd_r2r (mm6, mm5);               // mm5 = b3 b2
+    movq_r2r (mm0, mm4);                // mm4 = a3 a2 + rounder
+
+    paddd_r2r (mm5, mm0);               // mm0 = a3+b3 a2+b2 + rounder
+    psubd_r2r (mm5, mm4);               // mm4 = a3-b3 a2-b2 + rounder
+}
+
+static inline void mmxext_row_tail (int16_t * row, int store)
+{
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
+
+    psrad_i2r (ROW_SHIFT, mm4);         // mm4 = y4 y5
+
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
+
+    packssdw_r2r (mm3, mm4);            // mm4 = y6 y7 y4 y5
+
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    pshufw_r2r (mm4, mm4, 0xb1);        // mm4 = y7 y6 y5 y4
+
+    /* slot */
+
+    movq_r2m (mm4, *(row+store+4));     // save y7 y6 y5 y4
+}
+
+static inline void mmxext_row_mid (int16_t * row, int store,
+                                   int offset, const int16_t * table)
+{
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
+
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    psrad_i2r (ROW_SHIFT, mm4);         // mm4 = y4 y5
+
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
+
+    packssdw_r2r (mm3, mm4);            // mm4 = y6 y7 y4 y5
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
+
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    pshufw_r2r (mm4, mm4, 0xb1);        // mm4 = y7 y6 y5 y4
+
+    movq_m2r (*table, mm3);             // mm3 = -C2 -C4 C2 C4
+    movq_r2m (mm4, *(row+store+4));     // save y7 y6 y5 y4
+
+    pmaddwd_r2r (mm0, mm3);             // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
+
+    movq_m2r (*(table+4), mm4);         // mm4 = C6 C4 C6 C4
+    pshufw_r2r (mm2, mm2, 0x4e);        // mm2 = x2 x0 x6 x4
+}
+
+
+/* MMX row IDCT */
+
+#define mmx_table(c1,c2,c3,c4,c5,c6,c7) {  c4,  c2,  c4,  c6,   \
+                                           c4,  c6, -c4, -c2,   \
+                                           c1,  c3,  c3, -c7,   \
+                                           c5,  c7, -c1, -c5,   \
+                                           c4, -c6,  c4, -c2,   \
+                                          -c4,  c2,  c4, -c6,   \
+                                           c5, -c1,  c7, -c5,   \
+                                           c7,  c3,  c3, -c1 }
+
+static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
+{
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
+
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
+
+    movq_m2r (*table, mm3);             // mm3 = C6 C4 C2 C4
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
+
+    punpckldq_r2r (mm0, mm0);           // mm0 = x2 x0 x2 x0
+
+    movq_m2r (*(table+4), mm4);         // mm4 = -C2 -C4 C6 C4
+    pmaddwd_r2r (mm0, mm3);             // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
+
+    movq_m2r (*(table+8), mm1);         // mm1 = -C7 C3 C3 C1
+    punpckhdq_r2r (mm2, mm2);           // mm2 = x6 x4 x6 x4
+}
+
+static inline void mmx_row (const int16_t * table, const int32_t * rounder)
+{
+    pmaddwd_r2r (mm2, mm4);             // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
+    punpckldq_r2r (mm5, mm5);           // mm5 = x3 x1 x3 x1
+
+    pmaddwd_m2r (*(table+16), mm0);     // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
+    punpckhdq_r2r (mm6, mm6);           // mm6 = x7 x5 x7 x5
+
+    movq_m2r (*(table+12), mm7);        // mm7 = -C5 -C1 C7 C5
+    pmaddwd_r2r (mm5, mm1);             // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
+
+    paddd_m2r (*rounder, mm3);          // mm3 += rounder
+    pmaddwd_r2r (mm6, mm7);             // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
+
+    pmaddwd_m2r (*(table+20), mm2);     // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
+    paddd_r2r (mm4, mm3);               // mm3 = a1 a0 + rounder
+
+    pmaddwd_m2r (*(table+24), mm5);     // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
+    movq_r2r (mm3, mm4);                // mm4 = a1 a0 + rounder
+
+    pmaddwd_m2r (*(table+28), mm6);     // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
+    paddd_r2r (mm7, mm1);               // mm1 = b1 b0
+
+    paddd_m2r (*rounder, mm0);          // mm0 += rounder
+    psubd_r2r (mm1, mm3);               // mm3 = a1-b1 a0-b0 + rounder
+
+    psrad_i2r (ROW_SHIFT, mm3);         // mm3 = y6 y7
+    paddd_r2r (mm4, mm1);               // mm1 = a1+b1 a0+b0 + rounder
+
+    paddd_r2r (mm2, mm0);               // mm0 = a3 a2 + rounder
+    psrad_i2r (ROW_SHIFT, mm1);         // mm1 = y1 y0
+
+    paddd_r2r (mm6, mm5);               // mm5 = b3 b2
+    movq_r2r (mm0, mm7);                // mm7 = a3 a2 + rounder
+
+    paddd_r2r (mm5, mm0);               // mm0 = a3+b3 a2+b2 + rounder
+    psubd_r2r (mm5, mm7);               // mm7 = a3-b3 a2-b2 + rounder
+}
+
+static inline void mmx_row_tail (int16_t * row, int store)
+{
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
+
+    psrad_i2r (ROW_SHIFT, mm7);         // mm7 = y4 y5
+
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
+
+    packssdw_r2r (mm3, mm7);            // mm7 = y6 y7 y4 y5
+
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    movq_r2r (mm7, mm4);                // mm4 = y6 y7 y4 y5
+
+    pslld_i2r (16, mm7);                // mm7 = y7 0 y5 0
+
+    psrld_i2r (16, mm4);                // mm4 = 0 y6 0 y4
+
+    por_r2r (mm4, mm7);                 // mm7 = y7 y6 y5 y4
+
+    /* slot */
+
+    movq_r2m (mm7, *(row+store+4));     // save y7 y6 y5 y4
+}
+
+static inline void mmx_row_mid (int16_t * row, int store,
+                                int offset, const int16_t * table)
+{
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
+
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    psrad_i2r (ROW_SHIFT, mm7);         // mm7 = y4 y5
+
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
+
+    packssdw_r2r (mm3, mm7);            // mm7 = y6 y7 y4 y5
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
+
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    movq_r2r (mm7, mm1);                // mm1 = y6 y7 y4 y5
+
+    punpckldq_r2r (mm0, mm0);           // mm0 = x2 x0 x2 x0
+    psrld_i2r (16, mm7);                // mm7 = 0 y6 0 y4
+
+    movq_m2r (*table, mm3);             // mm3 = C6 C4 C2 C4
+    pslld_i2r (16, mm1);                // mm1 = y7 0 y5 0
+
+    movq_m2r (*(table+4), mm4);         // mm4 = -C2 -C4 C6 C4
+    por_r2r (mm1, mm7);                 // mm7 = y7 y6 y5 y4
+
+    movq_m2r (*(table+8), mm1);         // mm1 = -C7 C3 C3 C1
+    punpckhdq_r2r (mm2, mm2);           // mm2 = x6 x4 x6 x4
+
+    movq_r2m (mm7, *(row+store+4));     // save y7 y6 y5 y4
+    pmaddwd_r2r (mm0, mm3);             // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
+}
+
+
+#if 0
+// C column IDCT - its just here to document the MMXEXT and MMX versions
+static inline void idct_col (int16_t * col, int offset)
+{
+/* multiplication - as implemented on mmx */
+#define F(c,x) (((c) * (x)) >> 16)
+
+/* saturation - it helps us handle torture test cases */
+#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
+
+    int16_t x0, x1, x2, x3, x4, x5, x6, x7;
+    int16_t y0, y1, y2, y3, y4, y5, y6, y7;
+    int16_t a0, a1, a2, a3, b0, b1, b2, b3;
+    int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
+
+    col += offset;
+
+    x0 = col[0*8];
+    x1 = col[1*8];
+    x2 = col[2*8];
+    x3 = col[3*8];
+    x4 = col[4*8];
+    x5 = col[5*8];
+    x6 = col[6*8];
+    x7 = col[7*8];
+
+    u04 = S (x0 + x4);
+    v04 = S (x0 - x4);
+    u26 = S (F (T2, x6) + x2);
+    v26 = S (F (T2, x2) - x6);
+
+    a0 = S (u04 + u26);
+    a1 = S (v04 + v26);
+    a2 = S (v04 - v26);
+    a3 = S (u04 - u26);
+
+    u17 = S (F (T1, x7) + x1);
+    v17 = S (F (T1, x1) - x7);
+    u35 = S (F (T3, x5) + x3);
+    v35 = S (F (T3, x3) - x5);
+
+    b0 = S (u17 + u35);
+    b3 = S (v17 - v35);
+    u12 = S (u17 - u35);
+    v12 = S (v17 + v35);
+    u12 = S (2 * F (C4, u12));
+    v12 = S (2 * F (C4, v12));
+    b1 = S (u12 + v12);
+    b2 = S (u12 - v12);
+
+    y0 = S (a0 + b0) >> COL_SHIFT;
+    y1 = S (a1 + b1) >> COL_SHIFT;
+    y2 = S (a2 + b2) >> COL_SHIFT;
+    y3 = S (a3 + b3) >> COL_SHIFT;
+
+    y4 = S (a3 - b3) >> COL_SHIFT;
+    y5 = S (a2 - b2) >> COL_SHIFT;
+    y6 = S (a1 - b1) >> COL_SHIFT;
+    y7 = S (a0 - b0) >> COL_SHIFT;
+
+    col[0*8] = y0;
+    col[1*8] = y1;
+    col[2*8] = y2;
+    col[3*8] = y3;
+    col[4*8] = y4;
+    col[5*8] = y5;
+    col[6*8] = y6;
+    col[7*8] = y7;
+}
+#endif
+
+
+// MMX column IDCT
+static inline void idct_col (int16_t * col, int offset)
+{
+#define T1 13036
+#define T2 27146
+#define T3 43790
+#define C4 23170
+
+    static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
+    static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
+    static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
+    static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+
+    /* column code adapted from peter gubanov */
+    /* http://www.elecard.com/peter/idct.shtml */
+
+    movq_m2r (*_T1, mm0);               // mm0 = T1
+
+    movq_m2r (*(col+offset+1*8), mm1);  // mm1 = x1
+    movq_r2r (mm0, mm2);                // mm2 = T1
+
+    movq_m2r (*(col+offset+7*8), mm4);  // mm4 = x7
+    pmulhw_r2r (mm1, mm0);              // mm0 = T1*x1
+
+    movq_m2r (*_T3, mm5);               // mm5 = T3
+    pmulhw_r2r (mm4, mm2);              // mm2 = T1*x7
+
+    movq_m2r (*(col+offset+5*8), mm6);  // mm6 = x5
+    movq_r2r (mm5, mm7);                // mm7 = T3-1
+
+    movq_m2r (*(col+offset+3*8), mm3);  // mm3 = x3
+    psubsw_r2r (mm4, mm0);              // mm0 = v17
+
+    movq_m2r (*_T2, mm4);               // mm4 = T2
+    pmulhw_r2r (mm3, mm5);              // mm5 = (T3-1)*x3
+
+    paddsw_r2r (mm2, mm1);              // mm1 = u17
+    pmulhw_r2r (mm6, mm7);              // mm7 = (T3-1)*x5
+
+    /* slot */
+
+    movq_r2r (mm4, mm2);                // mm2 = T2
+    paddsw_r2r (mm3, mm5);              // mm5 = T3*x3
+
+    pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
+    paddsw_r2r (mm6, mm7);              // mm7 = T3*x5
+
+    psubsw_r2r (mm6, mm5);              // mm5 = v35
+    paddsw_r2r (mm3, mm7);              // mm7 = u35
+
+    movq_m2r (*(col+offset+6*8), mm3);  // mm3 = x6
+    movq_r2r (mm0, mm6);                // mm6 = v17
+
+    pmulhw_r2r (mm3, mm2);              // mm2 = T2*x6
+    psubsw_r2r (mm5, mm0);              // mm0 = b3
+
+    psubsw_r2r (mm3, mm4);              // mm4 = v26
+    paddsw_r2r (mm6, mm5);              // mm5 = v12
+
+    movq_r2m (mm0, *(col+offset+3*8));  // save b3 in scratch0
+    movq_r2r (mm1, mm6);                // mm6 = u17
+
+    paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
+    paddsw_r2r (mm7, mm6);              // mm6 = b0
+
+    psubsw_r2r (mm7, mm1);              // mm1 = u12
+    movq_r2r (mm1, mm7);                // mm7 = u12
+
+    movq_m2r (*(col+offset+0*8), mm3);  // mm3 = x0
+    paddsw_r2r (mm5, mm1);              // mm1 = u12+v12
+
+    movq_m2r (*_C4, mm0);               // mm0 = C4/2
+    psubsw_r2r (mm5, mm7);              // mm7 = u12-v12
+
+    movq_r2m (mm6, *(col+offset+5*8));  // save b0 in scratch1
+    pmulhw_r2r (mm0, mm1);              // mm1 = b1/2
+
+    movq_r2r (mm4, mm6);                // mm6 = v26
+    pmulhw_r2r (mm0, mm7);              // mm7 = b2/2
+
+    movq_m2r (*(col+offset+4*8), mm5);  // mm5 = x4
+    movq_r2r (mm3, mm0);                // mm0 = x0
+
+    psubsw_r2r (mm5, mm3);              // mm3 = v04
+    paddsw_r2r (mm5, mm0);              // mm0 = u04
+
+    paddsw_r2r (mm3, mm4);              // mm4 = a1
+    movq_r2r (mm0, mm5);                // mm5 = u04
+
+    psubsw_r2r (mm6, mm3);              // mm3 = a2
+    paddsw_r2r (mm2, mm5);              // mm5 = a0
+
+    paddsw_r2r (mm1, mm1);              // mm1 = b1
+    psubsw_r2r (mm2, mm0);              // mm0 = a3
+
+    paddsw_r2r (mm7, mm7);              // mm7 = b2
+    movq_r2r (mm3, mm2);                // mm2 = a2
+
+    movq_r2r (mm4, mm6);                // mm6 = a1
+    paddsw_r2r (mm7, mm3);              // mm3 = a2+b2
+
+    psraw_i2r (COL_SHIFT, mm3);         // mm3 = y2
+    paddsw_r2r (mm1, mm4);              // mm4 = a1+b1
+
+    psraw_i2r (COL_SHIFT, mm4);         // mm4 = y1
+    psubsw_r2r (mm1, mm6);              // mm6 = a1-b1
+
+    movq_m2r (*(col+offset+5*8), mm1);  // mm1 = b0
+    psubsw_r2r (mm7, mm2);              // mm2 = a2-b2
+
+    psraw_i2r (COL_SHIFT, mm6);         // mm6 = y6
+    movq_r2r (mm5, mm7);                // mm7 = a0
+
+    movq_r2m (mm4, *(col+offset+1*8));  // save y1
+    psraw_i2r (COL_SHIFT, mm2);         // mm2 = y5
+
+    movq_r2m (mm3, *(col+offset+2*8));  // save y2
+    paddsw_r2r (mm1, mm5);              // mm5 = a0+b0
+
+    movq_m2r (*(col+offset+3*8), mm4);  // mm4 = b3
+    psubsw_r2r (mm1, mm7);              // mm7 = a0-b0
+
+    psraw_i2r (COL_SHIFT, mm5);         // mm5 = y0
+    movq_r2r (mm0, mm3);                // mm3 = a3
+
+    movq_r2m (mm2, *(col+offset+5*8));  // save y5
+    psubsw_r2r (mm4, mm3);              // mm3 = a3-b3
+
+    psraw_i2r (COL_SHIFT, mm7);         // mm7 = y7
+    paddsw_r2r (mm0, mm4);              // mm4 = a3+b3
+
+    movq_r2m (mm5, *(col+offset+0*8));  // save y0
+    psraw_i2r (COL_SHIFT, mm3);         // mm3 = y4
+
+    movq_r2m (mm6, *(col+offset+6*8));  // save y6
+    psraw_i2r (COL_SHIFT, mm4);         // mm4 = y3
+
+    movq_r2m (mm7, *(col+offset+7*8));  // save y7
+
+    movq_r2m (mm3, *(col+offset+4*8));  // save y4
+
+    movq_r2m (mm4, *(col+offset+3*8));  // save y3
+
+#undef T1
+#undef T2
+#undef T3
+#undef C4
+}
+
+static const int32_t rounder0[] ATTR_ALIGN(8) =
+    rounder ((1 << (COL_SHIFT - 1)) - 0.5);
+static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
+static const int32_t rounder1[] ATTR_ALIGN(8) =
+    rounder (1.25683487303);        /* C1*(C1/C4+C1+C7)/2 */
+static const int32_t rounder7[] ATTR_ALIGN(8) =
+    rounder (-0.25);                /* C1*(C7/C4+C7-C1)/2 */
+static const int32_t rounder2[] ATTR_ALIGN(8) =
+    rounder (0.60355339059);        /* C2 * (C6+C2)/2 */
+static const int32_t rounder6[] ATTR_ALIGN(8) =
+    rounder (-0.25);                /* C2 * (C6-C2)/2 */
+static const int32_t rounder3[] ATTR_ALIGN(8) =
+    rounder (0.087788325588);       /* C3*(-C3/C4+C3+C5)/2 */
+static const int32_t rounder5[] ATTR_ALIGN(8) =
+    rounder (-0.441341716183);      /* C3*(-C5/C4+C5-C3)/2 */
+
+#undef COL_SHIFT
+#undef ROW_SHIFT
+
+#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
+void idct (int16_t * block)                                             \
+{                                                                       \
+    static const int16_t table04[] ATTR_ALIGN(16) =                     \
+        table (22725, 21407, 19266, 16384, 12873,  8867, 4520);         \
+    static const int16_t table17[] ATTR_ALIGN(16) =                     \
+        table (31521, 29692, 26722, 22725, 17855, 12299, 6270);         \
+    static const int16_t table26[] ATTR_ALIGN(16) =                     \
+        table (29692, 27969, 25172, 21407, 16819, 11585, 5906);         \
+    static const int16_t table35[] ATTR_ALIGN(16) =                     \
+        table (26722, 25172, 22654, 19266, 15137, 10426, 5315);         \
+                                                                        \
+    idct_row_head (block, 0*8, table04);                                \
+    idct_row (table04, rounder0);                                       \
+    idct_row_mid (block, 0*8, 4*8, table04);                            \
+    idct_row (table04, rounder4);                                       \
+    idct_row_mid (block, 4*8, 1*8, table17);                            \
+    idct_row (table17, rounder1);                                       \
+    idct_row_mid (block, 1*8, 7*8, table17);                            \
+    idct_row (table17, rounder7);                                       \
+    idct_row_mid (block, 7*8, 2*8, table26);                            \
+    idct_row (table26, rounder2);                                       \
+    idct_row_mid (block, 2*8, 6*8, table26);                            \
+    idct_row (table26, rounder6);                                       \
+    idct_row_mid (block, 6*8, 3*8, table35);                            \
+    idct_row (table35, rounder3);                                       \
+    idct_row_mid (block, 3*8, 5*8, table35);                            \
+    idct_row (table35, rounder5);                                       \
+    idct_row_tail (block, 5*8);                                         \
+                                                                        \
+    idct_col (block, 0);                                                \
+    idct_col (block, 4);                                                \
+}
+
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
+
+declare_idct (ff_mmxext_idct, mmxext_table,
+              mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
+
+declare_idct (ff_mmx_idct, mmx_table,
+              mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
+
diff --git a/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
new file mode 100644
index 000000000..43eb329cc
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/idct_mmx_xvid.c
@@ -0,0 +1,535 @@
+///****************************************************************************
+// *
+// *  XVID MPEG-4 VIDEO CODEC
+// *  - MMX and XMM forward discrete cosine transform -
+// *
+// *  Copyright(C) 2001 Peter Ross <pross@xvid.org>
+// *
+// * This file is part of FFmpeg.
+// *
+// * FFmpeg is free software; you can redistribute it and/or
+// * modify it under the terms of the GNU Lesser General Public
+// * License as published by the Free Software Foundation; either
+// * version 2.1 of the License, or (at your option) any later version.
+// *
+// * FFmpeg is distributed in the hope that it will be useful,
+// * but WITHOUT ANY WARRANTY; without even the implied warranty of
+// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// * Lesser General Public License for more details.
+// *
+// * You should have received a copy of the GNU Lesser General Public License
+// * along with FFmpeg; if not, write to the Free Software Foundation,
+// * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// *
+// * $Id: idct_mmx_xvid.c,v 1.1.2.1 2006/12/02 01:19:55 dgp85 Exp $
+// *
+// ***************************************************************************/
+
+// ****************************************************************************
+//
+// Originally provided by Intel at AP-922
+// http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
+// (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
+// but in a limited edition.
+// New macro implements a column part for precise iDCT
+// The routine precision now satisfies IEEE standard 1180-1990.
+//
+// Copyright(C) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
+// Rounding trick Copyright(C) 2000 Michel Lespinasse <walken@zoy.org>
+//
+// http://www.elecard.com/peter/idct.html
+// http://www.linuxvideo.org/mpeg2dec/
+//
+// ***************************************************************************/
+//
+// These examples contain code fragments for first stage iDCT 8x8
+// (for rows) and first stage DCT 8x8 (for columns)
+//
+
+// conversion to gcc syntax by michael niedermayer
+
+
+#include <inttypes.h>
+#include "../avcodec.h"
+
+//=============================================================================
+// Macros and other preprocessor constants
+//=============================================================================
+
+#define BITS_INV_ACC    5                              // 4 or 5 for IEEE
+#define SHIFT_INV_ROW   (16 - BITS_INV_ACC) //11
+#define SHIFT_INV_COL   (1 + BITS_INV_ACC) //6
+#define RND_INV_ROW     (1024 * (6 - BITS_INV_ACC))
+#define RND_INV_COL     (16 * (BITS_INV_ACC - 3))
+#define RND_INV_CORR    (RND_INV_COL - 1)
+
+#define BITS_FRW_ACC    3                              // 2 or 3 for accuracy
+#define SHIFT_FRW_COL   BITS_FRW_ACC
+#define SHIFT_FRW_ROW   (BITS_FRW_ACC + 17)
+#define RND_FRW_ROW     (262144*(BITS_FRW_ACC - 1))
+
+
+//-----------------------------------------------------------------------------
+// Various memory constants (trigonometric values or rounding values)
+//-----------------------------------------------------------------------------
+
+
+static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
+  13036,13036,13036,13036,        // tg * (2<<16) + 0.5
+  27146,27146,27146,27146,        // tg * (2<<16) + 0.5
+  -21746,-21746,-21746,-21746,    // tg * (2<<16) + 0.5
+  23170,23170,23170,23170};       // cos * (2<<15) + 0.5
+
+static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
+  65536,65536,
+  3597,3597,
+  2260,2260,
+  1203,1203,
+  0,0,
+  120,120,
+  512,512,
+  512,512};
+
+//-----------------------------------------------------------------------------
+//
+// The first stage iDCT 8x8 - inverse DCTs of rows
+//
+//-----------------------------------------------------------------------------
+// The 8-point inverse DCT direct algorithm
+//-----------------------------------------------------------------------------
+//
+// static const short w[32] = {
+//       FIX(cos_4_16),  FIX(cos_2_16),  FIX(cos_4_16),  FIX(cos_6_16),
+//       FIX(cos_4_16),  FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
+//       FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16),  FIX(cos_2_16),
+//       FIX(cos_4_16), -FIX(cos_2_16),  FIX(cos_4_16), -FIX(cos_6_16),
+//       FIX(cos_1_16),  FIX(cos_3_16),  FIX(cos_5_16),  FIX(cos_7_16),
+//       FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
+//       FIX(cos_5_16), -FIX(cos_1_16),  FIX(cos_7_16),  FIX(cos_3_16),
+//       FIX(cos_7_16), -FIX(cos_5_16),  FIX(cos_3_16), -FIX(cos_1_16) };
+//
+// #define DCT_8_INV_ROW(x, y)
+// {
+//       int a0, a1, a2, a3, b0, b1, b2, b3;
+//
+//       a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3];
+//       a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7];
+//       a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11];
+//       a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
+//       b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
+//       b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
+//       b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
+//       b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
+//
+//       y[0] = SHIFT_ROUND ( a0 + b0 );
+//       y[1] = SHIFT_ROUND ( a1 + b1 );
+//       y[2] = SHIFT_ROUND ( a2 + b2 );
+//       y[3] = SHIFT_ROUND ( a3 + b3 );
+//       y[4] = SHIFT_ROUND ( a3 - b3 );
+//       y[5] = SHIFT_ROUND ( a2 - b2 );
+//       y[6] = SHIFT_ROUND ( a1 - b1 );
+//       y[7] = SHIFT_ROUND ( a0 - b0 );
+// }
+//
+//-----------------------------------------------------------------------------
+//
+// In this implementation the outputs of the iDCT-1D are multiplied
+//       for rows 0,4 - by cos_4_16,
+//       for rows 1,7 - by cos_1_16,
+//       for rows 2,6 - by cos_2_16,
+//       for rows 3,5 - by cos_3_16
+// and are shifted to the left for better accuracy
+//
+// For the constants used,
+//       FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
+//
+//-----------------------------------------------------------------------------
+
+//-----------------------------------------------------------------------------
+// Tables for mmx processors
+//-----------------------------------------------------------------------------
+
+// Table for rows 0,4 - constants are multiplied by cos_4_16
+static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
+  16384,16384,16384,-16384,       // movq-> w06 w04 w02 w00
+  21407,8867,8867,-21407,         // w07 w05 w03 w01
+  16384,-16384,16384,16384,       // w14 w12 w10 w08
+  -8867,21407,-21407,-8867,       // w15 w13 w11 w09
+  22725,12873,19266,-22725,       // w22 w20 w18 w16
+  19266,4520,-4520,-12873,        // w23 w21 w19 w17
+  12873,4520,4520,19266,          // w30 w28 w26 w24
+  -22725,19266,-12873,-22725,     // w31 w29 w27 w25
+// Table for rows 1,7 - constants are multiplied by cos_1_16
+  22725,22725,22725,-22725,       // movq-> w06 w04 w02 w00
+  29692,12299,12299,-29692,       // w07 w05 w03 w01
+  22725,-22725,22725,22725,       // w14 w12 w10 w08
+  -12299,29692,-29692,-12299,     // w15 w13 w11 w09
+  31521,17855,26722,-31521,       // w22 w20 w18 w16
+  26722,6270,-6270,-17855,        // w23 w21 w19 w17
+  17855,6270,6270,26722,          // w30 w28 w26 w24
+  -31521,26722,-17855,-31521,     // w31 w29 w27 w25
+// Table for rows 2,6 - constants are multiplied by cos_2_16
+  21407,21407,21407,-21407,       // movq-> w06 w04 w02 w00
+  27969,11585,11585,-27969,       // w07 w05 w03 w01
+  21407,-21407,21407,21407,       // w14 w12 w10 w08
+  -11585,27969,-27969,-11585,     // w15 w13 w11 w09
+  29692,16819,25172,-29692,       // w22 w20 w18 w16
+  25172,5906,-5906,-16819,        // w23 w21 w19 w17
+  16819,5906,5906,25172,          // w30 w28 w26 w24
+  -29692,25172,-16819,-29692,     // w31 w29 w27 w25
+// Table for rows 3,5 - constants are multiplied by cos_3_16
+  19266,19266,19266,-19266,       // movq-> w06 w04 w02 w00
+  25172,10426,10426,-25172,       // w07 w05 w03 w01
+  19266,-19266,19266,19266,       // w14 w12 w10 w08
+  -10426,25172,-25172,-10426,     // w15 w13 w11 w09
+  26722,15137,22654,-26722,       // w22 w20 w18 w16
+  22654,5315,-5315,-15137,        // w23 w21 w19 w17
+  15137,5315,5315,22654,          // w30 w28 w26 w24
+  -26722,22654,-15137,-26722,     // w31 w29 w27 w25
+};
+//-----------------------------------------------------------------------------
+// Tables for xmm processors
+//-----------------------------------------------------------------------------
+
+// %3 for rows 0,4 - constants are multiplied by cos_4_16
+static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
+  16384,21407,16384,8867,      // movq-> w05 w04 w01 w00
+  16384,8867,-16384,-21407,    // w07 w06 w03 w02
+  16384,-8867,16384,-21407,    // w13 w12 w09 w08
+  -16384,21407,16384,-8867,    // w15 w14 w11 w10
+  22725,19266,19266,-4520,     // w21 w20 w17 w16
+  12873,4520,-22725,-12873,    // w23 w22 w19 w18
+  12873,-22725,4520,-12873,    // w29 w28 w25 w24
+  4520,19266,19266,-22725,     // w31 w30 w27 w26
+// %3 for rows 1,7 - constants are multiplied by cos_1_16
+  22725,29692,22725,12299,     // movq-> w05 w04 w01 w00
+  22725,12299,-22725,-29692,   // w07 w06 w03 w02
+  22725,-12299,22725,-29692,   // w13 w12 w09 w08
+  -22725,29692,22725,-12299,   // w15 w14 w11 w10
+  31521,26722,26722,-6270,     // w21 w20 w17 w16
+  17855,6270,-31521,-17855,    // w23 w22 w19 w18
+  17855,-31521,6270,-17855,    // w29 w28 w25 w24
+  6270,26722,26722,-31521,     // w31 w30 w27 w26
+// %3 for rows 2,6 - constants are multiplied by cos_2_16
+  21407,27969,21407,11585,     // movq-> w05 w04 w01 w00
+  21407,11585,-21407,-27969,   // w07 w06 w03 w02
+  21407,-11585,21407,-27969,   // w13 w12 w09 w08
+  -21407,27969,21407,-11585,   // w15 w14 w11 w10
+  29692,25172,25172,-5906,     // w21 w20 w17 w16
+  16819,5906,-29692,-16819,    // w23 w22 w19 w18
+  16819,-29692,5906,-16819,    // w29 w28 w25 w24
+  5906,25172,25172,-29692,     // w31 w30 w27 w26
+// %3 for rows 3,5 - constants are multiplied by cos_3_16
+  19266,25172,19266,10426,     // movq-> w05 w04 w01 w00
+  19266,10426,-19266,-25172,   // w07 w06 w03 w02
+  19266,-10426,19266,-25172,   // w13 w12 w09 w08
+  -19266,25172,19266,-10426,   // w15 w14 w11 w10
+  26722,22654,22654,-5315,     // w21 w20 w17 w16
+  15137,5315,-26722,-15137,    // w23 w22 w19 w18
+  15137,-26722,5315,-15137,    // w29 w28 w25 w24
+  5315,22654,22654,-26722,     // w31 w30 w27 w26
+};
+//=============================================================================
+// Helper macros for the code
+//=============================================================================
+
+//-----------------------------------------------------------------------------
+// DCT_8_INV_ROW_MMX( INP, OUT, TABLE, ROUNDER
+//-----------------------------------------------------------------------------
+
+#define DCT_8_INV_ROW_MMX(A1,A2,A3,A4)\
+  "movq " #A1 ",%%mm0              \n\t"/* 0 ; x3 x2 x1 x0*/\
+  "movq 8+" #A1 ",%%mm1            \n\t"/* 1 ; x7 x6 x5 x4*/\
+  "movq %%mm0,%%mm2            \n\t"/* 2  ; x3 x2 x1 x0*/\
+  "movq " #A3 ",%%mm3              \n\t"/* 3 ; w06 w04 w02 w00*/\
+  "punpcklwd %%mm1,%%mm0       \n\t"/* x5 x1 x4 x0*/\
+  "movq %%mm0,%%mm5            \n\t"/* 5 ; x5 x1 x4 x0*/\
+  "punpckldq %%mm0,%%mm0       \n\t"/* x4 x0 x4 x0*/\
+  "movq 8+" #A3 ",%%mm4            \n\t"/* 4 ; w07 w05 w03 w01*/\
+  "punpckhwd %%mm1,%%mm2       \n\t"/* 1 ; x7 x3 x6 x2*/\
+  "pmaddwd %%mm0,%%mm3         \n\t"/* x4*w06+x0*w04 x4*w02+x0*w00*/\
+  "movq %%mm2,%%mm6            \n\t"/* 6 ; x7 x3 x6 x2*/\
+  "movq 32+" #A3 ",%%mm1           \n\t"/* 1 ; w22 w20 w18 w16*/\
+  "punpckldq %%mm2,%%mm2       \n\t"/* x6 x2 x6 x2*/\
+  "pmaddwd %%mm2,%%mm4         \n\t"/* x6*w07+x2*w05 x6*w03+x2*w01*/\
+  "punpckhdq %%mm5,%%mm5       \n\t"/* x5 x1 x5 x1*/\
+  "pmaddwd 16+" #A3 ",%%mm0        \n\t"/* x4*w14+x0*w12 x4*w10+x0*w08*/\
+  "punpckhdq %%mm6,%%mm6       \n\t"/* x7 x3 x7 x3*/\
+  "movq 40+" #A3 ",%%mm7           \n\t"/* 7 ; w23 w21 w19 w17*/\
+  "pmaddwd %%mm5,%%mm1         \n\t"/* x5*w22+x1*w20 x5*w18+x1*w16*/\
+  "paddd " #A4 ",%%mm3             \n\t"/* +%4*/\
+  "pmaddwd %%mm6,%%mm7         \n\t"/* x7*w23+x3*w21 x7*w19+x3*w17*/\
+  "pmaddwd 24+" #A3 ",%%mm2        \n\t"/* x6*w15+x2*w13 x6*w11+x2*w09*/\
+  "paddd %%mm4,%%mm3           \n\t"/* 4 ; a1=sum(even1) a0=sum(even0)*/\
+  "pmaddwd 48+" #A3 ",%%mm5        \n\t"/* x5*w30+x1*w28 x5*w26+x1*w24*/\
+  "movq %%mm3,%%mm4            \n\t"/* 4 ; a1 a0*/\
+  "pmaddwd 56+" #A3 ",%%mm6        \n\t"/* x7*w31+x3*w29 x7*w27+x3*w25*/\
+  "paddd %%mm7,%%mm1           \n\t"/* 7 ; b1=sum(odd1) b0=sum(odd0)*/\
+  "paddd " #A4 ",%%mm0             \n\t"/* +%4*/\
+  "psubd %%mm1,%%mm3           \n\t"/* a1-b1 a0-b0*/\
+  "psrad $11,%%mm3 \n\t"/* y6=a1-b1 y7=a0-b0*/\
+  "paddd %%mm4,%%mm1           \n\t"/* 4 ; a1+b1 a0+b0*/\
+  "paddd %%mm2,%%mm0           \n\t"/* 2 ; a3=sum(even3) a2=sum(even2)*/\
+  "psrad $11,%%mm1 \n\t"/* y1=a1+b1 y0=a0+b0*/\
+  "paddd %%mm6,%%mm5           \n\t"/* 6 ; b3=sum(odd3) b2=sum(odd2)*/\
+  "movq %%mm0,%%mm4            \n\t"/* 4 ; a3 a2*/\
+  "paddd %%mm5,%%mm0           \n\t"/* a3+b3 a2+b2*/\
+  "psubd %%mm5,%%mm4           \n\t"/* 5 ; a3-b3 a2-b2*/\
+  "psrad $11,%%mm0 \n\t"/* y3=a3+b3 y2=a2+b2*/\
+  "psrad $11,%%mm4 \n\t"/* y4=a3-b3 y5=a2-b2*/\
+  "packssdw %%mm0,%%mm1        \n\t"/* 0 ; y3 y2 y1 y0*/\
+  "packssdw %%mm3,%%mm4        \n\t"/* 3 ; y6 y7 y4 y5*/\
+  "movq %%mm4,%%mm7            \n\t"/* 7 ; y6 y7 y4 y5*/\
+  "psrld $16,%%mm4            \n\t"/* 0 y6 0 y4*/\
+  "pslld $16,%%mm7            \n\t"/* y7 0 y5 0*/\
+  "movq %%mm1," #A2 "              \n\t"/* 1 ; save y3 y2 y1 y0*/\
+  "por %%mm4,%%mm7             \n\t"/* 4 ; y7 y6 y5 y4*/\
+  "movq %%mm7,8            +" #A2 "\n\t"/* 7 ; save y7 y6 y5 y4*/\
+
+
+//-----------------------------------------------------------------------------
+// DCT_8_INV_ROW_XMM( INP, OUT, TABLE, ROUNDER
+//-----------------------------------------------------------------------------
+
+#define DCT_8_INV_ROW_XMM(A1,A2,A3,A4)\
+  "movq " #A1 ",%%mm0                  \n\t"/* 0     ; x3 x2 x1 x0*/\
+  "movq 8+" #A1 ",%%mm1                \n\t"/* 1     ; x7 x6 x5 x4*/\
+  "movq %%mm0,%%mm2                \n\t"/* 2     ; x3 x2 x1 x0*/\
+  "movq " #A3 ",%%mm3                  \n\t"/* 3     ; w05 w04 w01 w00*/\
+  "pshufw $0x88,%%mm0,%%mm0        \n\t"/* x2 x0 x2 x0*/\
+  "movq 8+" #A3 ",%%mm4                \n\t"/* 4     ; w07 w06 w03 w02*/\
+  "movq %%mm1,%%mm5                \n\t"/* 5     ; x7 x6 x5 x4*/\
+  "pmaddwd %%mm0,%%mm3             \n\t"/* x2*w05+x0*w04 x2*w01+x0*w00*/\
+  "movq 32+" #A3 ",%%mm6               \n\t"/* 6     ; w21 w20 w17 w16*/\
+  "pshufw $0x88,%%mm1,%%mm1        \n\t"/* x6 x4 x6 x4*/\
+  "pmaddwd %%mm1,%%mm4             \n\t"/* x6*w07+x4*w06 x6*w03+x4*w02*/\
+  "movq 40+" #A3 ",%%mm7               \n\t"/* 7    ; w23 w22 w19 w18*/\
+  "pshufw $0xdd,%%mm2,%%mm2        \n\t"/* x3 x1 x3 x1*/\
+  "pmaddwd %%mm2,%%mm6             \n\t"/* x3*w21+x1*w20 x3*w17+x1*w16*/\
+  "pshufw $0xdd,%%mm5,%%mm5        \n\t"/* x7 x5 x7 x5*/\
+  "pmaddwd %%mm5,%%mm7             \n\t"/* x7*w23+x5*w22 x7*w19+x5*w18*/\
+  "paddd " #A4 ",%%mm3                 \n\t"/* +%4*/\
+  "pmaddwd 16+" #A3 ",%%mm0            \n\t"/* x2*w13+x0*w12 x2*w09+x0*w08*/\
+  "paddd %%mm4,%%mm3               \n\t"/* 4     ; a1=sum(even1) a0=sum(even0)*/\
+  "pmaddwd 24+" #A3 ",%%mm1            \n\t"/* x6*w15+x4*w14 x6*w11+x4*w10*/\
+  "movq %%mm3,%%mm4                \n\t"/* 4     ; a1 a0*/\
+  "pmaddwd 48+" #A3 ",%%mm2            \n\t"/* x3*w29+x1*w28 x3*w25+x1*w24*/\
+  "paddd %%mm7,%%mm6               \n\t"/* 7     ; b1=sum(odd1) b0=sum(odd0)*/\
+  "pmaddwd 56+" #A3 ",%%mm5            \n\t"/* x7*w31+x5*w30 x7*w27+x5*w26*/\
+  "paddd %%mm6,%%mm3               \n\t"/* a1+b1 a0+b0*/\
+  "paddd " #A4 ",%%mm0                 \n\t"/* +%4*/\
+  "psrad $11,%%mm3     \n\t"/* y1=a1+b1 y0=a0+b0*/\
+  "paddd %%mm1,%%mm0               \n\t"/* 1     ; a3=sum(even3) a2=sum(even2)*/\
+  "psubd %%mm6,%%mm4               \n\t"/* 6     ; a1-b1 a0-b0*/\
+  "movq %%mm0,%%mm7                \n\t"/* 7     ; a3 a2*/\
+  "paddd %%mm5,%%mm2               \n\t"/* 5     ; b3=sum(odd3) b2=sum(odd2)*/\
+  "paddd %%mm2,%%mm0               \n\t"/* a3+b3 a2+b2*/\
+  "psrad $11,%%mm4     \n\t"/* y6=a1-b1 y7=a0-b0*/\
+  "psubd %%mm2,%%mm7               \n\t"/* 2     ; a3-b3 a2-b2*/\
+  "psrad $11,%%mm0     \n\t"/* y3=a3+b3 y2=a2+b2*/\
+  "psrad $11,%%mm7     \n\t"/* y4=a3-b3 y5=a2-b2*/\
+  "packssdw %%mm0,%%mm3            \n\t"/* 0     ; y3 y2 y1 y0*/\
+  "packssdw %%mm4,%%mm7            \n\t"/* 4     ; y6 y7 y4 y5*/\
+  "movq %%mm3, " #A2 "                  \n\t"/* 3     ; save y3 y2 y1 y0*/\
+  "pshufw $0xb1,%%mm7,%%mm7        \n\t"/* y7 y6 y5 y4*/\
+  "movq %%mm7,8                +" #A2 "\n\t"/* 7     ; save y7 y6 y5 y4*/\
+
+
+//-----------------------------------------------------------------------------
+//
+// The first stage DCT 8x8 - forward DCTs of columns
+//
+// The %2puts are multiplied
+// for rows 0,4 - on cos_4_16,
+// for rows 1,7 - on cos_1_16,
+// for rows 2,6 - on cos_2_16,
+// for rows 3,5 - on cos_3_16
+// and are shifted to the left for rise of accuracy
+//
+//-----------------------------------------------------------------------------
+//
+// The 8-point scaled forward DCT algorithm (26a8m)
+//
+//-----------------------------------------------------------------------------
+//
+// #define DCT_8_FRW_COL(x, y)
+//{
+// short t0, t1, t2, t3, t4, t5, t6, t7;
+// short tp03, tm03, tp12, tm12, tp65, tm65;
+// short tp465, tm465, tp765, tm765;
+//
+// t0 = LEFT_SHIFT ( x[0] + x[7] );
+// t1 = LEFT_SHIFT ( x[1] + x[6] );
+// t2 = LEFT_SHIFT ( x[2] + x[5] );
+// t3 = LEFT_SHIFT ( x[3] + x[4] );
+// t4 = LEFT_SHIFT ( x[3] - x[4] );
+// t5 = LEFT_SHIFT ( x[2] - x[5] );
+// t6 = LEFT_SHIFT ( x[1] - x[6] );
+// t7 = LEFT_SHIFT ( x[0] - x[7] );
+//
+// tp03 = t0 + t3;
+// tm03 = t0 - t3;
+// tp12 = t1 + t2;
+// tm12 = t1 - t2;
+//
+// y[0] = tp03 + tp12;
+// y[4] = tp03 - tp12;
+//
+// y[2] = tm03 + tm12 * tg_2_16;
+// y[6] = tm03 * tg_2_16 - tm12;
+//
+// tp65 =(t6 +t5 )*cos_4_16;
+// tm65 =(t6 -t5 )*cos_4_16;
+//
+// tp765 = t7 + tp65;
+// tm765 = t7 - tp65;
+// tp465 = t4 + tm65;
+// tm465 = t4 - tm65;
+//
+// y[1] = tp765 + tp465 * tg_1_16;
+// y[7] = tp765 * tg_1_16 - tp465;
+// y[5] = tm765 * tg_3_16 + tm465;
+// y[3] = tm765 - tm465 * tg_3_16;
+//}
+//
+//-----------------------------------------------------------------------------
+
+//-----------------------------------------------------------------------------
+// DCT_8_INV_COL_4  INP,OUT
+//-----------------------------------------------------------------------------
+
+#define DCT_8_INV_COL(A1,A2)\
+  "movq 2*8(%3),%%mm0\n\t"\
+  "movq 16*3+" #A1 ",%%mm3\n\t"\
+  "movq %%mm0,%%mm1            \n\t"/* tg_3_16*/\
+  "movq 16*5+" #A1 ",%%mm5\n\t"\
+  "pmulhw %%mm3,%%mm0          \n\t"/* x3*(tg_3_16-1)*/\
+  "movq (%3),%%mm4\n\t"\
+  "pmulhw %%mm5,%%mm1          \n\t"/* x5*(tg_3_16-1)*/\
+  "movq 16*7+" #A1 ",%%mm7\n\t"\
+  "movq %%mm4,%%mm2            \n\t"/* tg_1_16*/\
+  "movq 16*1+" #A1 ",%%mm6\n\t"\
+  "pmulhw %%mm7,%%mm4          \n\t"/* x7*tg_1_16*/\
+  "paddsw %%mm3,%%mm0          \n\t"/* x3*tg_3_16*/\
+  "pmulhw %%mm6,%%mm2          \n\t"/* x1*tg_1_16*/\
+  "paddsw %%mm3,%%mm1          \n\t"/* x3+x5*(tg_3_16-1)*/\
+  "psubsw %%mm5,%%mm0          \n\t"/* x3*tg_3_16-x5 = tm35*/\
+  "movq 3*8(%3),%%mm3\n\t"\
+  "paddsw %%mm5,%%mm1          \n\t"/* x3+x5*tg_3_16 = tp35*/\
+  "paddsw %%mm6,%%mm4          \n\t"/* x1+tg_1_16*x7 = tp17*/\
+  "psubsw %%mm7,%%mm2          \n\t"/* x1*tg_1_16-x7 = tm17*/\
+  "movq %%mm4,%%mm5            \n\t"/* tp17*/\
+  "movq %%mm2,%%mm6            \n\t"/* tm17*/\
+  "paddsw %%mm1,%%mm5          \n\t"/* tp17+tp35 = b0*/\
+  "psubsw %%mm0,%%mm6          \n\t"/* tm17-tm35 = b3*/\
+  "psubsw %%mm1,%%mm4          \n\t"/* tp17-tp35 = t1*/\
+  "paddsw %%mm0,%%mm2          \n\t"/* tm17+tm35 = t2*/\
+  "movq 1*8(%3),%%mm7\n\t"\
+  "movq %%mm4,%%mm1            \n\t"/* t1*/\
+  "movq %%mm5,3*16         +" #A2 "\n\t"/* save b0*/\
+  "paddsw %%mm2,%%mm1          \n\t"/* t1+t2*/\
+  "movq %%mm6,5*16         +" #A2 "\n\t"/* save b3*/\
+  "psubsw %%mm2,%%mm4          \n\t"/* t1-t2*/\
+  "movq 2*16+" #A1 ",%%mm5\n\t"\
+  "movq %%mm7,%%mm0            \n\t"/* tg_2_16*/\
+  "movq 6*16+" #A1 ",%%mm6\n\t"\
+  "pmulhw %%mm5,%%mm0          \n\t"/* x2*tg_2_16*/\
+  "pmulhw %%mm6,%%mm7          \n\t"/* x6*tg_2_16*/\
+  "pmulhw %%mm3,%%mm1          \n\t"/* ocos_4_16*(t1+t2) = b1/2*/\
+  "movq 0*16+" #A1 ",%%mm2\n\t"\
+  "pmulhw %%mm3,%%mm4          \n\t"/* ocos_4_16*(t1-t2) = b2/2*/\
+  "psubsw %%mm6,%%mm0          \n\t"/* t2*tg_2_16-x6 = tm26*/\
+  "movq %%mm2,%%mm3            \n\t"/* x0*/\
+  "movq 4*16+" #A1 ",%%mm6\n\t"\
+  "paddsw %%mm5,%%mm7          \n\t"/* x2+x6*tg_2_16 = tp26*/\
+  "paddsw %%mm6,%%mm2          \n\t"/* x0+x4 = tp04*/\
+  "psubsw %%mm6,%%mm3          \n\t"/* x0-x4 = tm04*/\
+  "movq %%mm2,%%mm5            \n\t"/* tp04*/\
+  "movq %%mm3,%%mm6            \n\t"/* tm04*/\
+  "psubsw %%mm7,%%mm2          \n\t"/* tp04-tp26 = a3*/\
+  "paddsw %%mm0,%%mm3          \n\t"/* tm04+tm26 = a1*/\
+  "paddsw %%mm1,%%mm1          \n\t"/* b1*/\
+  "paddsw %%mm4,%%mm4          \n\t"/* b2*/\
+  "paddsw %%mm7,%%mm5          \n\t"/* tp04+tp26 = a0*/\
+  "psubsw %%mm0,%%mm6          \n\t"/* tm04-tm26 = a2*/\
+  "movq %%mm3,%%mm7            \n\t"/* a1*/\
+  "movq %%mm6,%%mm0            \n\t"/* a2*/\
+  "paddsw %%mm1,%%mm3          \n\t"/* a1+b1*/\
+  "paddsw %%mm4,%%mm6          \n\t"/* a2+b2*/\
+  "psraw $6,%%mm3 \n\t"/* dst1*/\
+  "psubsw %%mm1,%%mm7          \n\t"/* a1-b1*/\
+  "psraw $6,%%mm6 \n\t"/* dst2*/\
+  "psubsw %%mm4,%%mm0          \n\t"/* a2-b2*/\
+  "movq 3*16+" #A2 ",%%mm1         \n\t"/* load b0*/\
+  "psraw $6,%%mm7 \n\t"/* dst6*/\
+  "movq %%mm5,%%mm4            \n\t"/* a0*/\
+  "psraw $6,%%mm0 \n\t"/* dst5*/\
+  "movq %%mm3,1*16+" #A2 "\n\t"\
+  "paddsw %%mm1,%%mm5          \n\t"/* a0+b0*/\
+  "movq %%mm6,2*16+" #A2 "\n\t"\
+  "psubsw %%mm1,%%mm4          \n\t"/* a0-b0*/\
+  "movq 5*16+" #A2 ",%%mm3         \n\t"/* load b3*/\
+  "psraw $6,%%mm5 \n\t"/* dst0*/\
+  "movq %%mm2,%%mm6            \n\t"/* a3*/\
+  "psraw $6,%%mm4 \n\t"/* dst7*/\
+  "movq %%mm0,5*16+" #A2 "\n\t"\
+  "paddsw %%mm3,%%mm2          \n\t"/* a3+b3*/\
+  "movq %%mm7,6*16+" #A2 "\n\t"\
+  "psubsw %%mm3,%%mm6          \n\t"/* a3-b3*/\
+  "movq %%mm5,0*16+" #A2 "\n\t"\
+  "psraw $6,%%mm2 \n\t"/* dst3*/\
+  "movq %%mm4,7*16+" #A2 "\n\t"\
+  "psraw $6,%%mm6 \n\t"/* dst4*/\
+  "movq %%mm2,3*16+" #A2 "\n\t"\
+  "movq %%mm6,4*16+" #A2 "\n\t"
+
+//=============================================================================
+// Code
+//=============================================================================
+
+//-----------------------------------------------------------------------------
+// void idct_mmx(uint16_t block[64]);
+//-----------------------------------------------------------------------------
+
+
+void ff_idct_xvid_mmx(short *block){
+asm volatile(
+            //# Process each row
+    DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
+    DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
+    DCT_8_INV_ROW_MMX(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1))
+    DCT_8_INV_ROW_MMX(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1))
+    DCT_8_INV_ROW_MMX(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1))
+    DCT_8_INV_ROW_MMX(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
+    DCT_8_INV_ROW_MMX(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
+    DCT_8_INV_ROW_MMX(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
+
+            //# Process the columns (4 at a time)
+    DCT_8_INV_COL(0(%0), 0(%0))
+    DCT_8_INV_COL(8(%0), 8(%0))
+    :: "r"(block), "r"(rounder_0), "r"(tab_i_04_mmx), "r"(tg_1_16));
+}
+
+//-----------------------------------------------------------------------------
+// void idct_xmm(uint16_t block[64]);
+//-----------------------------------------------------------------------------
+
+
+void ff_idct_xvid_mmx2(short *block){
+asm volatile(
+            //# Process each row
+    DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
+    DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
+    DCT_8_INV_ROW_XMM(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1))
+    DCT_8_INV_ROW_XMM(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1))
+    DCT_8_INV_ROW_XMM(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1))
+    DCT_8_INV_ROW_XMM(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
+    DCT_8_INV_ROW_XMM(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
+    DCT_8_INV_ROW_XMM(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
+
+            //# Process the columns (4 at a time)
+    DCT_8_INV_COL(0(%0), 0(%0))
+    DCT_8_INV_COL(8(%0), 8(%0))
+    :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16));
+}
+
diff --git a/contrib/ffmpeg/libavcodec/i386/mathops.h b/contrib/ffmpeg/libavcodec/i386/mathops.h
new file mode 100644
index 000000000..3553a4025
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/mathops.h
@@ -0,0 +1,41 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef FRAC_BITS
+#   define MULL(ra, rb) \
+        ({ int rt, dummy; asm (\
+            "imull %3               \n\t"\
+            "shrdl %4, %%edx, %%eax \n\t"\
+            : "=a"(rt), "=d"(dummy)\
+            : "a" (ra), "rm" (rb), "i"(FRAC_BITS));\
+         rt; })
+#endif
+
+#define MULH(ra, rb) \
+    ({ int rt, dummy;\
+     asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb));\
+     rt; })
+
+#define MUL64(ra, rb) \
+    ({ int64_t rt;\
+     asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb));\
+     rt; })
+
diff --git a/contrib/ffmpeg/libavcodec/i386/mmx.h b/contrib/ffmpeg/libavcodec/i386/mmx.h
new file mode 100644
index 000000000..41aae6c21
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/mmx.h
@@ -0,0 +1,283 @@
+/*
+ * mmx.h
+ * Copyright (C) 1997-2001 H. Dietz and R. Fisher
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_I386MMX_H
+#define AVCODEC_I386MMX_H
+
+/*
+ * The type of an value that fits in an MMX register (note that long
+ * long constant values MUST be suffixed by LL and unsigned long long
+ * values by ULL, lest they be truncated by the compiler)
+ */
+
+typedef        union {
+        long long               q;      /* Quadword (64-bit) value */
+        unsigned long long      uq;     /* Unsigned Quadword */
+        int                     d[2];   /* 2 Doubleword (32-bit) values */
+        unsigned int            ud[2];  /* 2 Unsigned Doubleword */
+        short                   w[4];   /* 4 Word (16-bit) values */
+        unsigned short          uw[4];  /* 4 Unsigned Word */
+        char                    b[8];   /* 8 Byte (8-bit) values */
+        unsigned char           ub[8];  /* 8 Unsigned Byte */
+        float                   s[2];   /* Single-precision (32-bit) value */
+} mmx_t;        /* On an 8-byte (64-bit) boundary */
+
+
+#define         mmx_i2r(op,imm,reg) \
+        __asm__ __volatile__ (#op " %0, %%" #reg \
+                              : /* nothing */ \
+                              : "i" (imm) )
+
+#define         mmx_m2r(op,mem,reg) \
+        __asm__ __volatile__ (#op " %0, %%" #reg \
+                              : /* nothing */ \
+                              : "m" (mem))
+
+#define         mmx_r2m(op,reg,mem) \
+        __asm__ __volatile__ (#op " %%" #reg ", %0" \
+                              : "=m" (mem) \
+                              : /* nothing */ )
+
+#define         mmx_r2r(op,regs,regd) \
+        __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+
+#define         emms() __asm__ __volatile__ ("emms")
+
+#define         movd_m2r(var,reg)           mmx_m2r (movd, var, reg)
+#define         movd_r2m(reg,var)           mmx_r2m (movd, reg, var)
+#define         movd_r2r(regs,regd)         mmx_r2r (movd, regs, regd)
+
+#define         movq_m2r(var,reg)           mmx_m2r (movq, var, reg)
+#define         movq_r2m(reg,var)           mmx_r2m (movq, reg, var)
+#define         movq_r2r(regs,regd)         mmx_r2r (movq, regs, regd)
+
+#define         packssdw_m2r(var,reg)       mmx_m2r (packssdw, var, reg)
+#define         packssdw_r2r(regs,regd)     mmx_r2r (packssdw, regs, regd)
+#define         packsswb_m2r(var,reg)       mmx_m2r (packsswb, var, reg)
+#define         packsswb_r2r(regs,regd)     mmx_r2r (packsswb, regs, regd)
+
+#define         packuswb_m2r(var,reg)       mmx_m2r (packuswb, var, reg)
+#define         packuswb_r2r(regs,regd)     mmx_r2r (packuswb, regs, regd)
+
+#define         paddb_m2r(var,reg)          mmx_m2r (paddb, var, reg)
+#define         paddb_r2r(regs,regd)        mmx_r2r (paddb, regs, regd)
+#define         paddd_m2r(var,reg)          mmx_m2r (paddd, var, reg)
+#define         paddd_r2r(regs,regd)        mmx_r2r (paddd, regs, regd)
+#define         paddw_m2r(var,reg)          mmx_m2r (paddw, var, reg)
+#define         paddw_r2r(regs,regd)        mmx_r2r (paddw, regs, regd)
+
+#define         paddsb_m2r(var,reg)         mmx_m2r (paddsb, var, reg)
+#define         paddsb_r2r(regs,regd)       mmx_r2r (paddsb, regs, regd)
+#define         paddsw_m2r(var,reg)         mmx_m2r (paddsw, var, reg)
+#define         paddsw_r2r(regs,regd)       mmx_r2r (paddsw, regs, regd)
+
+#define         paddusb_m2r(var,reg)        mmx_m2r (paddusb, var, reg)
+#define         paddusb_r2r(regs,regd)      mmx_r2r (paddusb, regs, regd)
+#define         paddusw_m2r(var,reg)        mmx_m2r (paddusw, var, reg)
+#define         paddusw_r2r(regs,regd)      mmx_r2r (paddusw, regs, regd)
+
+#define         pand_m2r(var,reg)           mmx_m2r (pand, var, reg)
+#define         pand_r2r(regs,regd)         mmx_r2r (pand, regs, regd)
+
+#define         pandn_m2r(var,reg)          mmx_m2r (pandn, var, reg)
+#define         pandn_r2r(regs,regd)        mmx_r2r (pandn, regs, regd)
+
+#define         pcmpeqb_m2r(var,reg)        mmx_m2r (pcmpeqb, var, reg)
+#define         pcmpeqb_r2r(regs,regd)      mmx_r2r (pcmpeqb, regs, regd)
+#define         pcmpeqd_m2r(var,reg)        mmx_m2r (pcmpeqd, var, reg)
+#define         pcmpeqd_r2r(regs,regd)      mmx_r2r (pcmpeqd, regs, regd)
+#define         pcmpeqw_m2r(var,reg)        mmx_m2r (pcmpeqw, var, reg)
+#define         pcmpeqw_r2r(regs,regd)      mmx_r2r (pcmpeqw, regs, regd)
+
+#define         pcmpgtb_m2r(var,reg)        mmx_m2r (pcmpgtb, var, reg)
+#define         pcmpgtb_r2r(regs,regd)      mmx_r2r (pcmpgtb, regs, regd)
+#define         pcmpgtd_m2r(var,reg)        mmx_m2r (pcmpgtd, var, reg)
+#define         pcmpgtd_r2r(regs,regd)      mmx_r2r (pcmpgtd, regs, regd)
+#define         pcmpgtw_m2r(var,reg)        mmx_m2r (pcmpgtw, var, reg)
+#define         pcmpgtw_r2r(regs,regd)      mmx_r2r (pcmpgtw, regs, regd)
+
+#define         pmaddwd_m2r(var,reg)        mmx_m2r (pmaddwd, var, reg)
+#define         pmaddwd_r2r(regs,regd)      mmx_r2r (pmaddwd, regs, regd)
+
+#define         pmulhw_m2r(var,reg)         mmx_m2r (pmulhw, var, reg)
+#define         pmulhw_r2r(regs,regd)       mmx_r2r (pmulhw, regs, regd)
+
+#define         pmullw_m2r(var,reg)         mmx_m2r (pmullw, var, reg)
+#define         pmullw_r2r(regs,regd)       mmx_r2r (pmullw, regs, regd)
+
+#define         por_m2r(var,reg)            mmx_m2r (por, var, reg)
+#define         por_r2r(regs,regd)          mmx_r2r (por, regs, regd)
+
+#define         pslld_i2r(imm,reg)          mmx_i2r (pslld, imm, reg)
+#define         pslld_m2r(var,reg)          mmx_m2r (pslld, var, reg)
+#define         pslld_r2r(regs,regd)        mmx_r2r (pslld, regs, regd)
+#define         psllq_i2r(imm,reg)          mmx_i2r (psllq, imm, reg)
+#define         psllq_m2r(var,reg)          mmx_m2r (psllq, var, reg)
+#define         psllq_r2r(regs,regd)        mmx_r2r (psllq, regs, regd)
+#define         psllw_i2r(imm,reg)          mmx_i2r (psllw, imm, reg)
+#define         psllw_m2r(var,reg)          mmx_m2r (psllw, var, reg)
+#define         psllw_r2r(regs,regd)        mmx_r2r (psllw, regs, regd)
+
+#define         psrad_i2r(imm,reg)          mmx_i2r (psrad, imm, reg)
+#define         psrad_m2r(var,reg)          mmx_m2r (psrad, var, reg)
+#define         psrad_r2r(regs,regd)        mmx_r2r (psrad, regs, regd)
+#define         psraw_i2r(imm,reg)          mmx_i2r (psraw, imm, reg)
+#define         psraw_m2r(var,reg)          mmx_m2r (psraw, var, reg)
+#define         psraw_r2r(regs,regd)        mmx_r2r (psraw, regs, regd)
+
+#define         psrld_i2r(imm,reg)          mmx_i2r (psrld, imm, reg)
+#define         psrld_m2r(var,reg)          mmx_m2r (psrld, var, reg)
+#define         psrld_r2r(regs,regd)        mmx_r2r (psrld, regs, regd)
+#define         psrlq_i2r(imm,reg)          mmx_i2r (psrlq, imm, reg)
+#define         psrlq_m2r(var,reg)          mmx_m2r (psrlq, var, reg)
+#define         psrlq_r2r(regs,regd)        mmx_r2r (psrlq, regs, regd)
+#define         psrlw_i2r(imm,reg)          mmx_i2r (psrlw, imm, reg)
+#define         psrlw_m2r(var,reg)          mmx_m2r (psrlw, var, reg)
+#define         psrlw_r2r(regs,regd)        mmx_r2r (psrlw, regs, regd)
+
+#define         psubb_m2r(var,reg)          mmx_m2r (psubb, var, reg)
+#define         psubb_r2r(regs,regd)        mmx_r2r (psubb, regs, regd)
+#define         psubd_m2r(var,reg)          mmx_m2r (psubd, var, reg)
+#define         psubd_r2r(regs,regd)        mmx_r2r (psubd, regs, regd)
+#define         psubw_m2r(var,reg)          mmx_m2r (psubw, var, reg)
+#define         psubw_r2r(regs,regd)        mmx_r2r (psubw, regs, regd)
+
+#define         psubsb_m2r(var,reg)         mmx_m2r (psubsb, var, reg)
+#define         psubsb_r2r(regs,regd)       mmx_r2r (psubsb, regs, regd)
+#define         psubsw_m2r(var,reg)         mmx_m2r (psubsw, var, reg)
+#define         psubsw_r2r(regs,regd)       mmx_r2r (psubsw, regs, regd)
+
+#define         psubusb_m2r(var,reg)        mmx_m2r (psubusb, var, reg)
+#define         psubusb_r2r(regs,regd)      mmx_r2r (psubusb, regs, regd)
+#define         psubusw_m2r(var,reg)        mmx_m2r (psubusw, var, reg)
+#define         psubusw_r2r(regs,regd)      mmx_r2r (psubusw, regs, regd)
+
+#define         punpckhbw_m2r(var,reg)      mmx_m2r (punpckhbw, var, reg)
+#define         punpckhbw_r2r(regs,regd)    mmx_r2r (punpckhbw, regs, regd)
+#define         punpckhdq_m2r(var,reg)      mmx_m2r (punpckhdq, var, reg)
+#define         punpckhdq_r2r(regs,regd)    mmx_r2r (punpckhdq, regs, regd)
+#define         punpckhwd_m2r(var,reg)      mmx_m2r (punpckhwd, var, reg)
+#define         punpckhwd_r2r(regs,regd)    mmx_r2r (punpckhwd, regs, regd)
+
+#define         punpcklbw_m2r(var,reg)      mmx_m2r (punpcklbw, var, reg)
+#define         punpcklbw_r2r(regs,regd)    mmx_r2r (punpcklbw, regs, regd)
+#define         punpckldq_m2r(var,reg)      mmx_m2r (punpckldq, var, reg)
+#define         punpckldq_r2r(regs,regd)    mmx_r2r (punpckldq, regs, regd)
+#define         punpcklwd_m2r(var,reg)      mmx_m2r (punpcklwd, var, reg)
+#define         punpcklwd_r2r(regs,regd)    mmx_r2r (punpcklwd, regs, regd)
+
+#define         pxor_m2r(var,reg)           mmx_m2r (pxor, var, reg)
+#define         pxor_r2r(regs,regd)         mmx_r2r (pxor, regs, regd)
+
+
+/* 3DNOW extensions */
+
+#define         pavgusb_m2r(var,reg)        mmx_m2r (pavgusb, var, reg)
+#define         pavgusb_r2r(regs,regd)      mmx_r2r (pavgusb, regs, regd)
+
+
+/* AMD MMX extensions - also available in intel SSE */
+
+
+#define         mmx_m2ri(op,mem,reg,imm) \
+        __asm__ __volatile__ (#op " %1, %0, %%" #reg \
+                              : /* nothing */ \
+                              : "m" (mem), "i" (imm))
+#define         mmx_r2ri(op,regs,regd,imm) \
+        __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+                              : /* nothing */ \
+                              : "i" (imm) )
+
+#define         mmx_fetch(mem,hint) \
+        __asm__ __volatile__ ("prefetch" #hint " %0" \
+                              : /* nothing */ \
+                              : "m" (mem))
+
+
+#define         maskmovq(regs,maskreg)      mmx_r2ri (maskmovq, regs, maskreg)
+
+#define         movntq_r2m(mmreg,var)       mmx_r2m (movntq, mmreg, var)
+
+#define         pavgb_m2r(var,reg)          mmx_m2r (pavgb, var, reg)
+#define         pavgb_r2r(regs,regd)        mmx_r2r (pavgb, regs, regd)
+#define         pavgw_m2r(var,reg)          mmx_m2r (pavgw, var, reg)
+#define         pavgw_r2r(regs,regd)        mmx_r2r (pavgw, regs, regd)
+
+#define         pextrw_r2r(mmreg,reg,imm)   mmx_r2ri (pextrw, mmreg, reg, imm)
+
+#define         pinsrw_r2r(reg,mmreg,imm)   mmx_r2ri (pinsrw, reg, mmreg, imm)
+
+#define         pmaxsw_m2r(var,reg)         mmx_m2r (pmaxsw, var, reg)
+#define         pmaxsw_r2r(regs,regd)       mmx_r2r (pmaxsw, regs, regd)
+
+#define         pmaxub_m2r(var,reg)         mmx_m2r (pmaxub, var, reg)
+#define         pmaxub_r2r(regs,regd)       mmx_r2r (pmaxub, regs, regd)
+
+#define         pminsw_m2r(var,reg)         mmx_m2r (pminsw, var, reg)
+#define         pminsw_r2r(regs,regd)       mmx_r2r (pminsw, regs, regd)
+
+#define         pminub_m2r(var,reg)         mmx_m2r (pminub, var, reg)
+#define         pminub_r2r(regs,regd)       mmx_r2r (pminub, regs, regd)
+
+#define         pmovmskb(mmreg,reg) \
+        __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+
+#define         pmulhuw_m2r(var,reg)        mmx_m2r (pmulhuw, var, reg)
+#define         pmulhuw_r2r(regs,regd)      mmx_r2r (pmulhuw, regs, regd)
+
+#define         prefetcht0(mem)             mmx_fetch (mem, t0)
+#define         prefetcht1(mem)             mmx_fetch (mem, t1)
+#define         prefetcht2(mem)             mmx_fetch (mem, t2)
+#define         prefetchnta(mem)            mmx_fetch (mem, nta)
+
+#define         psadbw_m2r(var,reg)         mmx_m2r (psadbw, var, reg)
+#define         psadbw_r2r(regs,regd)       mmx_r2r (psadbw, regs, regd)
+
+#define         pshufw_m2r(var,reg,imm)     mmx_m2ri(pshufw, var, reg, imm)
+#define         pshufw_r2r(regs,regd,imm)   mmx_r2ri(pshufw, regs, regd, imm)
+
+#define         sfence() __asm__ __volatile__ ("sfence\n\t")
+
+/* SSE2 */
+#define         pshufhw_m2r(var,reg,imm)    mmx_m2ri(pshufhw, var, reg, imm)
+#define         pshufhw_r2r(regs,regd,imm)  mmx_r2ri(pshufhw, regs, regd, imm)
+#define         pshuflw_m2r(var,reg,imm)    mmx_m2ri(pshuflw, var, reg, imm)
+#define         pshuflw_r2r(regs,regd,imm)  mmx_r2ri(pshuflw, regs, regd, imm)
+
+#define         pshufd_r2r(regs,regd,imm)   mmx_r2ri(pshufd, regs, regd, imm)
+
+#define         movdqa_m2r(var,reg)         mmx_m2r (movdqa, var, reg)
+#define         movdqa_r2m(reg,var)         mmx_r2m (movdqa, reg, var)
+#define         movdqa_r2r(regs,regd)       mmx_r2r (movdqa, regs, regd)
+#define         movdqu_m2r(var,reg)         mmx_m2r (movdqu, var, reg)
+#define         movdqu_r2m(reg,var)         mmx_r2m (movdqu, reg, var)
+#define         movdqu_r2r(regs,regd)       mmx_r2r (movdqu, regs, regd)
+
+#define         pmullw_r2m(reg,var)         mmx_r2m (pmullw, reg, var)
+
+#define         pslldq_i2r(imm,reg)         mmx_i2r (pslldq, imm, reg)
+#define         psrldq_i2r(imm,reg)         mmx_i2r (psrldq, imm, reg)
+
+#define         punpcklqdq_r2r(regs,regd)   mmx_r2r (punpcklqdq, regs, regd)
+#define         punpckhqdq_r2r(regs,regd)   mmx_r2r (punpckhqdq, regs, regd)
+
+
+#endif /* AVCODEC_I386MMX_H */
diff --git a/contrib/ffmpeg/libavcodec/i386/motion_est_mmx.c b/contrib/ffmpeg/libavcodec/i386/motion_est_mmx.c
new file mode 100644
index 000000000..e33870e0f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/motion_est_mmx.c
@@ -0,0 +1,408 @@
+/*
+ * MMX optimized motion estimation
+ * Copyright (c) 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * mostly by Michael Niedermayer <michaelni@gmx.at>
+ */
+#include "../dsputil.h"
+#include "x86_cpu.h"
+
+static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
+0x0000000000000000ULL,
+0x0001000100010001ULL,
+0x0002000200020002ULL,
+};
+
+static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL;
+
+static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+{
+    long len= -(stride*h);
+    asm volatile(
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "movq (%2, %%"REG_a"), %%mm4    \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        "psubusb %%mm0, %%mm2           \n\t"
+        "psubusb %%mm4, %%mm0           \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "movq (%2, %%"REG_a"), %%mm5    \n\t"
+        "psubusb %%mm1, %%mm3           \n\t"
+        "psubusb %%mm5, %%mm1           \n\t"
+        "por %%mm2, %%mm0               \n\t"
+        "por %%mm1, %%mm3               \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "movq %%mm3, %%mm2              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpcklbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
+        "paddw %%mm3, %%mm2             \n\t"
+        "paddw %%mm2, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
+        : "+a" (len)
+        : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
+    );
+}
+
+static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+{
+    long len= -(stride*h);
+    asm volatile(
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "psadbw %%mm2, %%mm0            \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "psadbw %%mm1, %%mm3            \n\t"
+        "paddw %%mm3, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
+        : "+a" (len)
+        : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
+    );
+}
+
+static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
+{
+    long len= -(stride*h);
+    asm volatile(
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "pavgb %%mm2, %%mm0             \n\t"
+        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "psadbw %%mm2, %%mm0            \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "pavgb %%mm1, %%mm3             \n\t"
+        "movq (%3, %%"REG_a"), %%mm1    \n\t"
+        "psadbw %%mm1, %%mm3            \n\t"
+        "paddw %%mm3, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
+        : "+a" (len)
+        : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
+    );
+}
+
+static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+{ //FIXME reuse src
+    long len= -(stride*h);
+    asm volatile(
+        ASMALIGN(4)
+        "movq "MANGLE(bone)", %%mm5     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm1   \n\t"
+        "movq 1(%2, %%"REG_a"), %%mm3   \n\t"
+        "pavgb %%mm2, %%mm0             \n\t"
+        "pavgb %%mm1, %%mm3             \n\t"
+        "psubusb %%mm5, %%mm3           \n\t"
+        "pavgb %%mm3, %%mm0             \n\t"
+        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "psadbw %%mm2, %%mm0            \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
+        "movq 1(%2, %%"REG_a"), %%mm4   \n\t"
+        "pavgb %%mm3, %%mm1             \n\t"
+        "pavgb %%mm4, %%mm2             \n\t"
+        "psubusb %%mm5, %%mm2           \n\t"
+        "pavgb %%mm1, %%mm2             \n\t"
+        "movq (%3, %%"REG_a"), %%mm1    \n\t"
+        "psadbw %%mm1, %%mm2            \n\t"
+        "paddw %%mm2, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
+        : "+a" (len)
+        : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
+    );
+}
+
+static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
+{
+    long len= -(stride*h);
+    asm volatile(
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm1    \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "paddw %%mm0, %%mm1             \n\t"
+        "paddw %%mm2, %%mm3             \n\t"
+        "movq (%3, %%"REG_a"), %%mm4    \n\t"
+        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "paddw %%mm5, %%mm1             \n\t"
+        "paddw %%mm5, %%mm3             \n\t"
+        "psrlw $1, %%mm1                \n\t"
+        "psrlw $1, %%mm3                \n\t"
+        "packuswb %%mm3, %%mm1          \n\t"
+        "psubusb %%mm1, %%mm4           \n\t"
+        "psubusb %%mm2, %%mm1           \n\t"
+        "por %%mm4, %%mm1               \n\t"
+        "movq %%mm1, %%mm0              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
+        : "+a" (len)
+        : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
+    );
+}
+
+static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
+{
+    long len= -(stride*h);
+    asm volatile(
+        ASMALIGN(4)
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm1    \n\t"
+        "movq %%mm0, %%mm4              \n\t"
+        "movq %%mm1, %%mm2              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
+        "paddw %%mm2, %%mm4             \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
+        "movq 1(%2, %%"REG_a"), %%mm3   \n\t"
+        "movq %%mm2, %%mm1              \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "paddw %%mm0, %%mm2             \n\t"
+        "paddw %%mm4, %%mm1             \n\t"
+        "movq %%mm3, %%mm4              \n\t"
+        "punpcklbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm4         \n\t"
+        "paddw %%mm3, %%mm2             \n\t"
+        "paddw %%mm4, %%mm1             \n\t"
+        "movq (%3, %%"REG_a"), %%mm3    \n\t"
+        "movq (%3, %%"REG_a"), %%mm4    \n\t"
+        "paddw %%mm5, %%mm2             \n\t"
+        "paddw %%mm5, %%mm1             \n\t"
+        "psrlw $2, %%mm2                \n\t"
+        "psrlw $2, %%mm1                \n\t"
+        "packuswb %%mm1, %%mm2          \n\t"
+        "psubusb %%mm2, %%mm3           \n\t"
+        "psubusb %%mm4, %%mm2           \n\t"
+        "por %%mm3, %%mm2               \n\t"
+        "movq %%mm2, %%mm0              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "paddw %%mm2, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
+        : "+a" (len)
+        : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
+    );
+}
+
+static inline int sum_mmx(void)
+{
+    int ret;
+    asm volatile(
+        "movq %%mm6, %%mm0              \n\t"
+        "psrlq $32, %%mm6               \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "movq %%mm6, %%mm0              \n\t"
+        "psrlq $16, %%mm6               \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "movd %%mm6, %0                 \n\t"
+        : "=r" (ret)
+    );
+    return ret&0xFFFF;
+}
+
+static inline int sum_mmx2(void)
+{
+    int ret;
+    asm volatile(
+        "movd %%mm6, %0                 \n\t"
+        : "=r" (ret)
+    );
+    return ret;
+}
+
+
+#define PIX_SAD(suf)\
+static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    assert(h==8);\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t":);\
+\
+    sad8_1_ ## suf(blk1, blk2, stride, 8);\
+\
+    return sum_ ## suf();\
+}\
+static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    assert(h==8);\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
+                 :: "m"(round_tab[1]) \
+                 );\
+\
+    sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\
+\
+    return sum_ ## suf();\
+}\
+\
+static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    assert(h==8);\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
+                 :: "m"(round_tab[1]) \
+                 );\
+\
+    sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\
+\
+    return sum_ ## suf();\
+}\
+\
+static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    assert(h==8);\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
+                 :: "m"(round_tab[2]) \
+                 );\
+\
+    sad8_4_ ## suf(blk1, blk2, stride, 8);\
+\
+    return sum_ ## suf();\
+}\
+\
+static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t":);\
+\
+    sad8_1_ ## suf(blk1  , blk2  , stride, h);\
+    sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
+\
+    return sum_ ## suf();\
+}\
+static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
+                 :: "m"(round_tab[1]) \
+                 );\
+\
+    sad8_2_ ## suf(blk1  , blk1+1, blk2  , stride, h);\
+    sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\
+\
+    return sum_ ## suf();\
+}\
+static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
+                 :: "m"(round_tab[1]) \
+                 );\
+\
+    sad8_2_ ## suf(blk1  , blk1+stride,  blk2  , stride, h);\
+    sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\
+\
+    return sum_ ## suf();\
+}\
+static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
+{\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
+                 :: "m"(round_tab[2]) \
+                 );\
+\
+    sad8_4_ ## suf(blk1  , blk2  , stride, h);\
+    sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\
+\
+    return sum_ ## suf();\
+}\
+
+PIX_SAD(mmx)
+PIX_SAD(mmx2)
+
+void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
+{
+    if (mm_flags & MM_MMX) {
+        c->pix_abs[0][0] = sad16_mmx;
+        c->pix_abs[0][1] = sad16_x2_mmx;
+        c->pix_abs[0][2] = sad16_y2_mmx;
+        c->pix_abs[0][3] = sad16_xy2_mmx;
+        c->pix_abs[1][0] = sad8_mmx;
+        c->pix_abs[1][1] = sad8_x2_mmx;
+        c->pix_abs[1][2] = sad8_y2_mmx;
+        c->pix_abs[1][3] = sad8_xy2_mmx;
+
+        c->sad[0]= sad16_mmx;
+        c->sad[1]= sad8_mmx;
+    }
+    if (mm_flags & MM_MMXEXT) {
+        c->pix_abs[0][0] = sad16_mmx2;
+        c->pix_abs[1][0] = sad8_mmx2;
+
+        c->sad[0]= sad16_mmx2;
+        c->sad[1]= sad8_mmx2;
+
+        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+            c->pix_abs[0][1] = sad16_x2_mmx2;
+            c->pix_abs[0][2] = sad16_y2_mmx2;
+            c->pix_abs[0][3] = sad16_xy2_mmx2;
+            c->pix_abs[1][1] = sad8_x2_mmx2;
+            c->pix_abs[1][2] = sad8_y2_mmx2;
+            c->pix_abs[1][3] = sad8_xy2_mmx2;
+        }
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c b/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c
new file mode 100644
index 000000000..1b7b1c19f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx.c
@@ -0,0 +1,725 @@
+/*
+ * The simplest mpeg encoder (well, it was the simplest!)
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
+ * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+#include "x86_cpu.h"
+
+extern uint16_t inv_zigzag_direct16[64];
+
+static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
+static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
+
+
+static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    long level, qmul, qadd, nCoeffs;
+
+    qmul = qscale << 1;
+
+    assert(s->block_last_index[n]>=0 || s->h263_aic);
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+        level= block[0];
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+//printf("%d %d  ", qmul, qadd);
+asm volatile(
+                "movd %1, %%mm6                 \n\t" //qmul
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "movd %2, %%mm5                 \n\t" //qadd
+                "pxor %%mm7, %%mm7              \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "psubw %%mm5, %%mm7             \n\t"
+                "pxor %%mm4, %%mm4              \n\t"
+                ASMALIGN(4)
+                "1:                             \n\t"
+                "movq (%0, %3), %%mm0           \n\t"
+                "movq 8(%0, %3), %%mm1          \n\t"
+
+                "pmullw %%mm6, %%mm0            \n\t"
+                "pmullw %%mm6, %%mm1            \n\t"
+
+                "movq (%0, %3), %%mm2           \n\t"
+                "movq 8(%0, %3), %%mm3          \n\t"
+
+                "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+
+                "paddw %%mm7, %%mm0             \n\t"
+                "paddw %%mm7, %%mm1             \n\t"
+
+                "pxor %%mm0, %%mm2              \n\t"
+                "pxor %%mm1, %%mm3              \n\t"
+
+                "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0
+
+                "pandn %%mm2, %%mm0             \n\t"
+                "pandn %%mm3, %%mm1             \n\t"
+
+                "movq %%mm0, (%0, %3)           \n\t"
+                "movq %%mm1, 8(%0, %3)          \n\t"
+
+                "add $16, %3                    \n\t"
+                "jng 1b                         \n\t"
+                ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
+                : "memory"
+        );
+        block[0]= level;
+}
+
+
+static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    long qmul, qadd, nCoeffs;
+
+    qmul = qscale << 1;
+    qadd = (qscale - 1) | 1;
+
+    assert(s->block_last_index[n]>=0 || s->h263_aic);
+
+    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+//printf("%d %d  ", qmul, qadd);
+asm volatile(
+                "movd %1, %%mm6                 \n\t" //qmul
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "movd %2, %%mm5                 \n\t" //qadd
+                "pxor %%mm7, %%mm7              \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "psubw %%mm5, %%mm7             \n\t"
+                "pxor %%mm4, %%mm4              \n\t"
+                ASMALIGN(4)
+                "1:                             \n\t"
+                "movq (%0, %3), %%mm0           \n\t"
+                "movq 8(%0, %3), %%mm1          \n\t"
+
+                "pmullw %%mm6, %%mm0            \n\t"
+                "pmullw %%mm6, %%mm1            \n\t"
+
+                "movq (%0, %3), %%mm2           \n\t"
+                "movq 8(%0, %3), %%mm3          \n\t"
+
+                "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+
+                "paddw %%mm7, %%mm0             \n\t"
+                "paddw %%mm7, %%mm1             \n\t"
+
+                "pxor %%mm0, %%mm2              \n\t"
+                "pxor %%mm1, %%mm3              \n\t"
+
+                "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0
+
+                "pandn %%mm2, %%mm0             \n\t"
+                "pandn %%mm3, %%mm1             \n\t"
+
+                "movq %%mm0, (%0, %3)           \n\t"
+                "movq %%mm1, 8(%0, %3)          \n\t"
+
+                "add $16, %3                    \n\t"
+                "jng 1b                         \n\t"
+                ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
+                : "memory"
+        );
+}
+
+
+/*
+  NK:
+  Note: looking at PARANOID:
+  "enable all paranoid tests for rounding, overflows, etc..."
+
+#ifdef PARANOID
+                if (level < -2048 || level > 2047)
+                    fprintf(stderr, "unquant error %d %d\n", i, level);
+#endif
+  We can suppose that result of two multiplications can't be greate of 0xFFFF
+  i.e. is 16-bit, so we use here only PMULLW instruction and can avoid
+  a complex multiplication.
+=====================================================
+ Full formula for multiplication of 2 integer numbers
+ which are represent as high:low words:
+ input: value1 = high1:low1
+        value2 = high2:low2
+ output: value3 = value1*value2
+ value3=high3:low3 (on overflow: modulus 2^32 wrap-around)
+ this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4
+ but this algorithm will compute only 0x66cb0ce4
+ this limited by 16-bit size of operands
+ ---------------------------------
+ tlow1 = high1*low2
+ tlow2 = high2*low1
+ tlow1 = tlow1 + tlow2
+ high3:low3 = low1*low2
+ high3 += tlow1
+*/
+static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
+                                     DCTELEM *block, int n, int qscale)
+{
+    long nCoeffs;
+    const uint16_t *quant_matrix;
+    int block0;
+
+    assert(s->block_last_index[n]>=0);
+
+    nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
+
+    if (n < 4)
+        block0 = block[0] * s->y_dc_scale;
+    else
+        block0 = block[0] * s->c_dc_scale;
+    /* XXX: only mpeg1 */
+    quant_matrix = s->intra_matrix;
+asm volatile(
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlw $15, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ASMALIGN(4)
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
+                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psraw $3, %%mm0                \n\t"
+                "psraw $3, %%mm1                \n\t"
+                "psubw %%mm7, %%mm0             \n\t"
+                "psubw %%mm7, %%mm1             \n\t"
+                "por %%mm7, %%mm0               \n\t"
+                "por %%mm7, %%mm1               \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "js 1b                          \n\t"
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
+    block[0]= block0;
+}
+
+static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
+                                     DCTELEM *block, int n, int qscale)
+{
+    long nCoeffs;
+    const uint16_t *quant_matrix;
+
+    assert(s->block_last_index[n]>=0);
+
+    nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
+
+        quant_matrix = s->inter_matrix;
+asm volatile(
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlw $15, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ASMALIGN(4)
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
+                "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
+                "paddw %%mm7, %%mm0             \n\t" // abs(block[i])*2 + 1
+                "paddw %%mm7, %%mm1             \n\t" // abs(block[i])*2 + 1
+                "pmullw %%mm4, %%mm0            \n\t" // (abs(block[i])*2 + 1)*q
+                "pmullw %%mm5, %%mm1            \n\t" // (abs(block[i])*2 + 1)*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psraw $4, %%mm0                \n\t"
+                "psraw $4, %%mm1                \n\t"
+                "psubw %%mm7, %%mm0             \n\t"
+                "psubw %%mm7, %%mm1             \n\t"
+                "por %%mm7, %%mm0               \n\t"
+                "por %%mm7, %%mm1               \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "js 1b                          \n\t"
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
+}
+
+static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
+                                     DCTELEM *block, int n, int qscale)
+{
+    long nCoeffs;
+    const uint16_t *quant_matrix;
+    int block0;
+
+    assert(s->block_last_index[n]>=0);
+
+    if(s->alternate_scan) nCoeffs= 63; //FIXME
+    else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+
+    if (n < 4)
+        block0 = block[0] * s->y_dc_scale;
+    else
+        block0 = block[0] * s->c_dc_scale;
+    quant_matrix = s->intra_matrix;
+asm volatile(
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlw $15, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ASMALIGN(4)
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
+                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psraw $3, %%mm0                \n\t"
+                "psraw $3, %%mm1                \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "jng 1b                         \n\t"
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
+    block[0]= block0;
+        //Note, we dont do mismatch control for intra as errors cannot accumulate
+}
+
+static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
+                                     DCTELEM *block, int n, int qscale)
+{
+    long nCoeffs;
+    const uint16_t *quant_matrix;
+
+    assert(s->block_last_index[n]>=0);
+
+    if(s->alternate_scan) nCoeffs= 63; //FIXME
+    else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+
+        quant_matrix = s->inter_matrix;
+asm volatile(
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlq $48, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ASMALIGN(4)
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
+                "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
+                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*2*q
+                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*2*q
+                "paddw %%mm4, %%mm0             \n\t" // (abs(block[i])*2 + 1)*q
+                "paddw %%mm5, %%mm1             \n\t" // (abs(block[i])*2 + 1)*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psrlw $4, %%mm0                \n\t"
+                "psrlw $4, %%mm1                \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "pxor %%mm4, %%mm7              \n\t"
+                "pxor %%mm5, %%mm7              \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "jng 1b                         \n\t"
+                "movd 124(%0, %3), %%mm0        \n\t"
+                "movq %%mm7, %%mm6              \n\t"
+                "psrlq $32, %%mm7               \n\t"
+                "pxor %%mm6, %%mm7              \n\t"
+                "movq %%mm7, %%mm6              \n\t"
+                "psrlq $16, %%mm7               \n\t"
+                "pxor %%mm6, %%mm7              \n\t"
+                "pslld $31, %%mm7               \n\t"
+                "psrlq $15, %%mm7               \n\t"
+                "pxor %%mm7, %%mm0              \n\t"
+                "movd %%mm0, 124(%0, %3)        \n\t"
+
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
+}
+
+/* draw the edges of width 'w' of an image of size width, height
+   this mmx version can only handle w==8 || w==16 */
+static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+
+    last_line = buf + (height - 1) * wrap;
+    /* left and right */
+    ptr = buf;
+    if(w==8)
+    {
+        asm volatile(
+                "1:                             \n\t"
+                "movd (%0), %%mm0               \n\t"
+                "punpcklbw %%mm0, %%mm0         \n\t"
+                "punpcklwd %%mm0, %%mm0         \n\t"
+                "punpckldq %%mm0, %%mm0         \n\t"
+                "movq %%mm0, -8(%0)             \n\t"
+                "movq -8(%0, %2), %%mm1         \n\t"
+                "punpckhbw %%mm1, %%mm1         \n\t"
+                "punpckhwd %%mm1, %%mm1         \n\t"
+                "punpckhdq %%mm1, %%mm1         \n\t"
+                "movq %%mm1, (%0, %2)           \n\t"
+                "add %1, %0                     \n\t"
+                "cmp %3, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
+        );
+    }
+    else
+    {
+        asm volatile(
+                "1:                             \n\t"
+                "movd (%0), %%mm0               \n\t"
+                "punpcklbw %%mm0, %%mm0         \n\t"
+                "punpcklwd %%mm0, %%mm0         \n\t"
+                "punpckldq %%mm0, %%mm0         \n\t"
+                "movq %%mm0, -8(%0)             \n\t"
+                "movq %%mm0, -16(%0)            \n\t"
+                "movq -8(%0, %2), %%mm1         \n\t"
+                "punpckhbw %%mm1, %%mm1         \n\t"
+                "punpckhwd %%mm1, %%mm1         \n\t"
+                "punpckhdq %%mm1, %%mm1         \n\t"
+                "movq %%mm1, (%0, %2)           \n\t"
+                "movq %%mm1, 8(%0, %2)          \n\t"
+                "add %1, %0                     \n\t"
+                "cmp %3, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
+        );
+    }
+
+    for(i=0;i<w;i+=4) {
+        /* top and bottom (and hopefully also the corners) */
+        ptr= buf - (i + 1) * wrap - w;
+        asm volatile(
+                "1:                             \n\t"
+                "movq (%1, %0), %%mm0           \n\t"
+                "movq %%mm0, (%0)               \n\t"
+                "movq %%mm0, (%0, %2)           \n\t"
+                "movq %%mm0, (%0, %2, 2)        \n\t"
+                "movq %%mm0, (%0, %3)           \n\t"
+                "add $8, %0                     \n\t"
+                "cmp %4, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
+        );
+        ptr= last_line + (i + 1) * wrap - w;
+        asm volatile(
+                "1:                             \n\t"
+                "movq (%1, %0), %%mm0           \n\t"
+                "movq %%mm0, (%0)               \n\t"
+                "movq %%mm0, (%0, %2)           \n\t"
+                "movq %%mm0, (%0, %2, 2)        \n\t"
+                "movq %%mm0, (%0, %3)           \n\t"
+                "add $8, %0                     \n\t"
+                "cmp %4, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
+        );
+    }
+}
+
+static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
+    const int intra= s->mb_intra;
+    int *sum= s->dct_error_sum[intra];
+    uint16_t *offset= s->dct_offset[intra];
+
+    s->dct_count[intra]++;
+
+    asm volatile(
+        "pxor %%mm7, %%mm7                      \n\t"
+        "1:                                     \n\t"
+        "pxor %%mm0, %%mm0                      \n\t"
+        "pxor %%mm1, %%mm1                      \n\t"
+        "movq (%0), %%mm2                       \n\t"
+        "movq 8(%0), %%mm3                      \n\t"
+        "pcmpgtw %%mm2, %%mm0                   \n\t"
+        "pcmpgtw %%mm3, %%mm1                   \n\t"
+        "pxor %%mm0, %%mm2                      \n\t"
+        "pxor %%mm1, %%mm3                      \n\t"
+        "psubw %%mm0, %%mm2                     \n\t"
+        "psubw %%mm1, %%mm3                     \n\t"
+        "movq %%mm2, %%mm4                      \n\t"
+        "movq %%mm3, %%mm5                      \n\t"
+        "psubusw (%2), %%mm2                    \n\t"
+        "psubusw 8(%2), %%mm3                   \n\t"
+        "pxor %%mm0, %%mm2                      \n\t"
+        "pxor %%mm1, %%mm3                      \n\t"
+        "psubw %%mm0, %%mm2                     \n\t"
+        "psubw %%mm1, %%mm3                     \n\t"
+        "movq %%mm2, (%0)                       \n\t"
+        "movq %%mm3, 8(%0)                      \n\t"
+        "movq %%mm4, %%mm2                      \n\t"
+        "movq %%mm5, %%mm3                      \n\t"
+        "punpcklwd %%mm7, %%mm4                 \n\t"
+        "punpckhwd %%mm7, %%mm2                 \n\t"
+        "punpcklwd %%mm7, %%mm5                 \n\t"
+        "punpckhwd %%mm7, %%mm3                 \n\t"
+        "paddd (%1), %%mm4                      \n\t"
+        "paddd 8(%1), %%mm2                     \n\t"
+        "paddd 16(%1), %%mm5                    \n\t"
+        "paddd 24(%1), %%mm3                    \n\t"
+        "movq %%mm4, (%1)                       \n\t"
+        "movq %%mm2, 8(%1)                      \n\t"
+        "movq %%mm5, 16(%1)                     \n\t"
+        "movq %%mm3, 24(%1)                     \n\t"
+        "add $16, %0                            \n\t"
+        "add $32, %1                            \n\t"
+        "add $16, %2                            \n\t"
+        "cmp %3, %0                             \n\t"
+            " jb 1b                             \n\t"
+        : "+r" (block), "+r" (sum), "+r" (offset)
+        : "r"(block+64)
+    );
+}
+
+static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
+    const int intra= s->mb_intra;
+    int *sum= s->dct_error_sum[intra];
+    uint16_t *offset= s->dct_offset[intra];
+
+    s->dct_count[intra]++;
+
+    asm volatile(
+        "pxor %%xmm7, %%xmm7                    \n\t"
+        "1:                                     \n\t"
+        "pxor %%xmm0, %%xmm0                    \n\t"
+        "pxor %%xmm1, %%xmm1                    \n\t"
+        "movdqa (%0), %%xmm2                    \n\t"
+        "movdqa 16(%0), %%xmm3                  \n\t"
+        "pcmpgtw %%xmm2, %%xmm0                 \n\t"
+        "pcmpgtw %%xmm3, %%xmm1                 \n\t"
+        "pxor %%xmm0, %%xmm2                    \n\t"
+        "pxor %%xmm1, %%xmm3                    \n\t"
+        "psubw %%xmm0, %%xmm2                   \n\t"
+        "psubw %%xmm1, %%xmm3                   \n\t"
+        "movdqa %%xmm2, %%xmm4                  \n\t"
+        "movdqa %%xmm3, %%xmm5                  \n\t"
+        "psubusw (%2), %%xmm2                   \n\t"
+        "psubusw 16(%2), %%xmm3                 \n\t"
+        "pxor %%xmm0, %%xmm2                    \n\t"
+        "pxor %%xmm1, %%xmm3                    \n\t"
+        "psubw %%xmm0, %%xmm2                   \n\t"
+        "psubw %%xmm1, %%xmm3                   \n\t"
+        "movdqa %%xmm2, (%0)                    \n\t"
+        "movdqa %%xmm3, 16(%0)                  \n\t"
+        "movdqa %%xmm4, %%xmm6                  \n\t"
+        "movdqa %%xmm5, %%xmm0                  \n\t"
+        "punpcklwd %%xmm7, %%xmm4               \n\t"
+        "punpckhwd %%xmm7, %%xmm6               \n\t"
+        "punpcklwd %%xmm7, %%xmm5               \n\t"
+        "punpckhwd %%xmm7, %%xmm0               \n\t"
+        "paddd (%1), %%xmm4                     \n\t"
+        "paddd 16(%1), %%xmm6                   \n\t"
+        "paddd 32(%1), %%xmm5                   \n\t"
+        "paddd 48(%1), %%xmm0                   \n\t"
+        "movdqa %%xmm4, (%1)                    \n\t"
+        "movdqa %%xmm6, 16(%1)                  \n\t"
+        "movdqa %%xmm5, 32(%1)                  \n\t"
+        "movdqa %%xmm0, 48(%1)                  \n\t"
+        "add $32, %0                            \n\t"
+        "add $64, %1                            \n\t"
+        "add $32, %2                            \n\t"
+        "cmp %3, %0                             \n\t"
+            " jb 1b                             \n\t"
+        : "+r" (block), "+r" (sum), "+r" (offset)
+        : "r"(block+64)
+    );
+}
+
+#undef HAVE_MMX2
+#define RENAME(a) a ## _MMX
+#define RENAMEl(a) a ## _mmx
+#include "mpegvideo_mmx_template.c"
+
+#define HAVE_MMX2
+#undef RENAME
+#undef RENAMEl
+#define RENAME(a) a ## _MMX2
+#define RENAMEl(a) a ## _mmx2
+#include "mpegvideo_mmx_template.c"
+
+#undef RENAME
+#undef RENAMEl
+#define RENAME(a) a ## _SSE2
+#define RENAMEl(a) a ## _sse2
+#include "mpegvideo_mmx_template.c"
+
+void MPV_common_init_mmx(MpegEncContext *s)
+{
+    if (mm_flags & MM_MMX) {
+        const int dct_algo = s->avctx->dct_algo;
+
+        s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
+        s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
+        s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
+        s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
+        if(!(s->flags & CODEC_FLAG_BITEXACT))
+            s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
+        s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
+
+        draw_edges = draw_edges_mmx;
+
+        if (mm_flags & MM_SSE2) {
+            s->denoise_dct= denoise_dct_sse2;
+        } else {
+                s->denoise_dct= denoise_dct_mmx;
+        }
+
+        if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
+            if(mm_flags & MM_SSE2){
+                s->dct_quantize= dct_quantize_SSE2;
+            } else if(mm_flags & MM_MMXEXT){
+                s->dct_quantize= dct_quantize_MMX2;
+            } else {
+                s->dct_quantize= dct_quantize_MMX;
+            }
+        }
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
new file mode 100644
index 000000000..d59b6efd9
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
@@ -0,0 +1,348 @@
+/*
+ * MPEG video MMX templates
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#undef SPREADW
+#undef PMAXW
+#undef PMAX
+#ifdef HAVE_MMX2
+#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
+#define PMAXW(a,b) "pmaxsw " #a ", " #b "     \n\t"
+#define PMAX(a,b) \
+            "pshufw $0x0E," #a ", " #b "        \n\t"\
+            PMAXW(b, a)\
+            "pshufw $0x01," #a ", " #b "        \n\t"\
+            PMAXW(b, a)
+#else
+#define SPREADW(a) \
+        "punpcklwd " #a ", " #a " \n\t"\
+        "punpcklwd " #a ", " #a " \n\t"
+#define PMAXW(a,b) \
+        "psubusw " #a ", " #b " \n\t"\
+        "paddw " #a ", " #b "   \n\t"
+#define PMAX(a,b)  \
+            "movq " #a ", " #b "                \n\t"\
+            "psrlq $32, " #a "                  \n\t"\
+            PMAXW(b, a)\
+            "movq " #a ", " #b "                \n\t"\
+            "psrlq $16, " #a "                  \n\t"\
+            PMAXW(b, a)
+
+#endif
+
+static int RENAME(dct_quantize)(MpegEncContext *s,
+                            DCTELEM *block, int n,
+                            int qscale, int *overflow)
+{
+    long last_non_zero_p1;
+    int level=0, q; //=0 is cuz gcc says uninitalized ...
+    const uint16_t *qmat, *bias;
+    DECLARE_ALIGNED_8(int16_t, temp_block[64]);
+
+    assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
+
+    //s->fdct (block);
+    RENAMEl(ff_fdct) (block); //cant be anything else ...
+
+    if(s->dct_error_sum)
+        s->denoise_dct(s, block);
+
+    if (s->mb_intra) {
+        int dummy;
+        if (n < 4)
+            q = s->y_dc_scale;
+        else
+            q = s->c_dc_scale;
+        /* note: block[0] is assumed to be positive */
+        if (!s->h263_aic) {
+#if 1
+        asm volatile (
+                "mul %%ecx                \n\t"
+                : "=d" (level), "=a"(dummy)
+                : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1])
+        );
+#else
+        asm volatile (
+                "xorl %%edx, %%edx        \n\t"
+                "divw %%cx                \n\t"
+                "movzwl %%ax, %%eax       \n\t"
+                : "=a" (level)
+                : "a" ((block[0]>>2) + q), "c" (q<<1)
+                : "%edx"
+        );
+#endif
+        } else
+            /* For AIC we skip quant/dequant of INTRADC */
+            level = (block[0] + 4)>>3;
+
+        block[0]=0; //avoid fake overflow
+//        temp_block[0] = (block[0] + (q >> 1)) / q;
+        last_non_zero_p1 = 1;
+        bias = s->q_intra_matrix16[qscale][1];
+        qmat = s->q_intra_matrix16[qscale][0];
+    } else {
+        last_non_zero_p1 = 0;
+        bias = s->q_inter_matrix16[qscale][1];
+        qmat = s->q_inter_matrix16[qscale][0];
+    }
+
+    if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
+
+        asm volatile(
+            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1
+            SPREADW(%%mm3)
+            "pxor %%mm7, %%mm7                  \n\t" // 0
+            "pxor %%mm4, %%mm4                  \n\t" // 0
+            "movq (%2), %%mm5                   \n\t" // qmat[0]
+            "pxor %%mm6, %%mm6                  \n\t"
+            "psubw (%3), %%mm6                  \n\t" // -bias[0]
+            "mov $-128, %%"REG_a"               \n\t"
+            ASMALIGN(4)
+            "1:                                 \n\t"
+            "pxor %%mm1, %%mm1                  \n\t" // 0
+            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
+            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])
+            "psubusw %%mm6, %%mm0               \n\t" // ABS(block[i]) + bias[0]
+            "pmulhw %%mm5, %%mm0                \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
+            "por %%mm0, %%mm4                   \n\t"
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+            "movq %%mm0, (%5, %%"REG_a")        \n\t"
+            "pcmpeqw %%mm7, %%mm0               \n\t" // out==0 ? 0xFF : 0x00
+            "movq (%4, %%"REG_a"), %%mm1        \n\t"
+            "movq %%mm7, (%1, %%"REG_a")        \n\t" // 0
+            "pandn %%mm1, %%mm0                 \n\t"
+            PMAXW(%%mm0, %%mm3)
+            "add $8, %%"REG_a"                  \n\t"
+            " js 1b                             \n\t"
+            PMAX(%%mm3, %%mm0)
+            "movd %%mm3, %%"REG_a"              \n\t"
+            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
+            : "+a" (last_non_zero_p1)
+            : "r" (block+64), "r" (qmat), "r" (bias),
+              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+        );
+        // note the asm is split cuz gcc doesnt like that many operands ...
+        asm volatile(
+            "movd %1, %%mm1                     \n\t" // max_qcoeff
+            SPREADW(%%mm1)
+            "psubusw %%mm1, %%mm4               \n\t"
+            "packuswb %%mm4, %%mm4              \n\t"
+            "movd %%mm4, %0                     \n\t" // *overflow
+        : "=g" (*overflow)
+        : "g" (s->max_qcoeff)
+        );
+    }else{ // FMT_H263
+        asm volatile(
+            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1
+            SPREADW(%%mm3)
+            "pxor %%mm7, %%mm7                  \n\t" // 0
+            "pxor %%mm4, %%mm4                  \n\t" // 0
+            "mov $-128, %%"REG_a"               \n\t"
+            ASMALIGN(4)
+            "1:                                 \n\t"
+            "pxor %%mm1, %%mm1                  \n\t" // 0
+            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
+            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])
+            "movq (%3, %%"REG_a"), %%mm6        \n\t" // bias[0]
+            "paddusw %%mm6, %%mm0               \n\t" // ABS(block[i]) + bias[0]
+            "movq (%2, %%"REG_a"), %%mm5        \n\t" // qmat[i]
+            "pmulhw %%mm5, %%mm0                \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
+            "por %%mm0, %%mm4                   \n\t"
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+            "movq %%mm0, (%5, %%"REG_a")        \n\t"
+            "pcmpeqw %%mm7, %%mm0               \n\t" // out==0 ? 0xFF : 0x00
+            "movq (%4, %%"REG_a"), %%mm1        \n\t"
+            "movq %%mm7, (%1, %%"REG_a")        \n\t" // 0
+            "pandn %%mm1, %%mm0                 \n\t"
+            PMAXW(%%mm0, %%mm3)
+            "add $8, %%"REG_a"                  \n\t"
+            " js 1b                             \n\t"
+            PMAX(%%mm3, %%mm0)
+            "movd %%mm3, %%"REG_a"              \n\t"
+            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
+            : "+a" (last_non_zero_p1)
+            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
+              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+        );
+        // note the asm is split cuz gcc doesnt like that many operands ...
+        asm volatile(
+            "movd %1, %%mm1                     \n\t" // max_qcoeff
+            SPREADW(%%mm1)
+            "psubusw %%mm1, %%mm4               \n\t"
+            "packuswb %%mm4, %%mm4              \n\t"
+            "movd %%mm4, %0                     \n\t" // *overflow
+        : "=g" (*overflow)
+        : "g" (s->max_qcoeff)
+        );
+    }
+
+    if(s->mb_intra) block[0]= level;
+    else            block[0]= temp_block[0];
+
+    if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
+        if(last_non_zero_p1 <= 1) goto end;
+        block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
+        block[0x20] = temp_block[0x10];
+        if(last_non_zero_p1 <= 4) goto end;
+        block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
+        block[0x09] = temp_block[0x03];
+        if(last_non_zero_p1 <= 7) goto end;
+        block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
+        block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
+        if(last_non_zero_p1 <= 11) goto end;
+        block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
+        block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
+        block[0x0C] = temp_block[0x05];
+        if(last_non_zero_p1 <= 16) goto end;
+        block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
+        block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
+        block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
+        block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
+        if(last_non_zero_p1 <= 24) goto end;
+        block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
+        block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
+        block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
+        block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
+        if(last_non_zero_p1 <= 32) goto end;
+        block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
+        block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
+        block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
+        block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
+        if(last_non_zero_p1 <= 40) goto end;
+        block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
+        block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
+        block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
+        block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
+        if(last_non_zero_p1 <= 48) goto end;
+        block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+        block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
+        block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
+        if(last_non_zero_p1 <= 56) goto end;
+        block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
+        block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
+        block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
+        block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+    }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
+        if(last_non_zero_p1 <= 1) goto end;
+        block[0x04] = temp_block[0x01];
+        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+        if(last_non_zero_p1 <= 4) goto end;
+        block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
+        block[0x05] = temp_block[0x03];
+        if(last_non_zero_p1 <= 7) goto end;
+        block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
+        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+        if(last_non_zero_p1 <= 11) goto end;
+        block[0x1C] = temp_block[0x19];
+        block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
+        block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
+        if(last_non_zero_p1 <= 16) goto end;
+        block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
+        block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
+        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+        block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
+        if(last_non_zero_p1 <= 24) goto end;
+        block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
+        block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
+        block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
+        block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
+        if(last_non_zero_p1 <= 32) goto end;
+        block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
+        block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+        block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
+        block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
+        if(last_non_zero_p1 <= 40) goto end;
+        block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
+        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+        block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
+        block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
+        if(last_non_zero_p1 <= 48) goto end;
+        block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
+        block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
+            block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
+        if(last_non_zero_p1 <= 56) goto end;
+        block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
+        block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
+        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+        block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+    }else{
+        if(last_non_zero_p1 <= 1) goto end;
+        block[0x01] = temp_block[0x01];
+        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+        if(last_non_zero_p1 <= 4) goto end;
+        block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
+        block[0x03] = temp_block[0x03];
+        if(last_non_zero_p1 <= 7) goto end;
+        block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
+        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+        if(last_non_zero_p1 <= 11) goto end;
+        block[0x19] = temp_block[0x19];
+        block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
+        block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
+        if(last_non_zero_p1 <= 16) goto end;
+        block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
+        block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
+        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+        block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
+        if(last_non_zero_p1 <= 24) goto end;
+        block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
+        block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
+        block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
+        block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
+        if(last_non_zero_p1 <= 32) goto end;
+        block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
+        block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+        block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
+        block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
+        if(last_non_zero_p1 <= 40) goto end;
+        block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
+        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+        block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
+        block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
+        if(last_non_zero_p1 <= 48) goto end;
+        block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+        block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
+        block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
+        if(last_non_zero_p1 <= 56) goto end;
+        block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
+        block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
+        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+        block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+    }
+    end:
+/*
+    for(i=0; i<last_non_zero_p1; i++)
+    {
+       int j= zigzag_direct_noperm[i];
+       block[block_permute_op(j)]= temp_block[j];
+    }
+*/
+
+    return last_non_zero_p1 - 1;
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/simple_idct_mmx.c b/contrib/ffmpeg/libavcodec/i386/simple_idct_mmx.c
new file mode 100644
index 000000000..525ef34f7
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/simple_idct_mmx.c
@@ -0,0 +1,1294 @@
+/*
+ * Simple IDCT MMX
+ *
+ * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "../dsputil.h"
+#include "../simple_idct.h"
+
+/*
+23170.475006
+22725.260826
+21406.727617
+19265.545870
+16384.000000
+12872.826198
+8866.956905
+4520.335430
+*/
+#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#if 0
+#define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#else
+#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
+#endif
+#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C6 8867  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C7 4520  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 20 // 6
+
+static const uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
+static const uint64_t attribute_used __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
+
+static const int16_t __attribute__((aligned(8))) coeffs[]= {
+        1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
+//        1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
+//        0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
+        1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
+        // the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
+//        0, 0, 0, 0,
+//        0, 0, 0, 0,
+
+ C4,  C4,  C4,  C4,
+ C4, -C4,  C4, -C4,
+
+ C2,  C6,  C2,  C6,
+ C6, -C2,  C6, -C2,
+
+ C1,  C3,  C1,  C3,
+ C5,  C7,  C5,  C7,
+
+ C3, -C7,  C3, -C7,
+-C1, -C5, -C1, -C5,
+
+ C5, -C1,  C5, -C1,
+ C7,  C3,  C7,  C3,
+
+ C7, -C5,  C7, -C5,
+ C3, -C1,  C3, -C1
+};
+
+#if 0
+static void unused_var_killer(){
+        int a= wm1010 + d40000;
+        temp[0]=a;
+}
+
+static void inline idctCol (int16_t * col, int16_t *input)
+{
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+        const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C6 = 8867;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C7 = 4520;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+/*
+        if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
+                col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
+                        col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
+                return;
+        }*/
+
+col[8*0] = input[8*0 + 0];
+col[8*1] = input[8*2 + 0];
+col[8*2] = input[8*0 + 1];
+col[8*3] = input[8*2 + 1];
+col[8*4] = input[8*4 + 0];
+col[8*5] = input[8*6 + 0];
+col[8*6] = input[8*4 + 1];
+col[8*7] = input[8*6 + 1];
+
+        a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
+        a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
+        a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
+        a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
+
+        b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
+        b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
+        b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
+        b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
+
+        col[8*0] = (a0 + b0) >> COL_SHIFT;
+        col[8*1] = (a1 + b1) >> COL_SHIFT;
+        col[8*2] = (a2 + b2) >> COL_SHIFT;
+        col[8*3] = (a3 + b3) >> COL_SHIFT;
+        col[8*4] = (a3 - b3) >> COL_SHIFT;
+        col[8*5] = (a2 - b2) >> COL_SHIFT;
+        col[8*6] = (a1 - b1) >> COL_SHIFT;
+        col[8*7] = (a0 - b0) >> COL_SHIFT;
+}
+
+static void inline idctRow (int16_t * output, int16_t * input)
+{
+        int16_t row[8];
+
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+        const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C6 = 8867;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C7 = 4520;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+row[0] = input[0];
+row[2] = input[1];
+row[4] = input[4];
+row[6] = input[5];
+row[1] = input[8];
+row[3] = input[9];
+row[5] = input[12];
+row[7] = input[13];
+
+        if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
+                row[0] = row[1] = row[2] = row[3] = row[4] =
+                        row[5] = row[6] = row[7] = row[0]<<3;
+        output[0]  = row[0];
+        output[2]  = row[1];
+        output[4]  = row[2];
+        output[6]  = row[3];
+        output[8]  = row[4];
+        output[10] = row[5];
+        output[12] = row[6];
+        output[14] = row[7];
+                return;
+        }
+
+        a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
+        a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
+        a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
+        a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
+
+        b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+        b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+        b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+        b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+        row[0] = (a0 + b0) >> ROW_SHIFT;
+        row[1] = (a1 + b1) >> ROW_SHIFT;
+        row[2] = (a2 + b2) >> ROW_SHIFT;
+        row[3] = (a3 + b3) >> ROW_SHIFT;
+        row[4] = (a3 - b3) >> ROW_SHIFT;
+        row[5] = (a2 - b2) >> ROW_SHIFT;
+        row[6] = (a1 - b1) >> ROW_SHIFT;
+        row[7] = (a0 - b0) >> ROW_SHIFT;
+
+        output[0]  = row[0];
+        output[2]  = row[1];
+        output[4]  = row[2];
+        output[6]  = row[3];
+        output[8]  = row[4];
+        output[10] = row[5];
+        output[12] = row[6];
+        output[14] = row[7];
+}
+#endif
+
+static inline void idct(int16_t *block)
+{
+        int64_t __attribute__((aligned(8))) align_tmp[16];
+        int16_t * const temp= (int16_t*)align_tmp;
+
+        asm volatile(
+#if 0 //Alternative, simpler variant
+
+#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+
+#define COL_IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm1, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm1             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm1             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq " #src1 ", %%mm0          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm0          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm0, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"\
+
+
+#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
+        "pand %%mm0, %%mm4              \n\t"\
+        "por %%mm1, %%mm4               \n\t"\
+        "por %%mm2, %%mm4               \n\t"\
+        "por %%mm3, %%mm4               \n\t"\
+        "packssdw %%mm4,%%mm4           \n\t"\
+        "movd %%mm4, %%eax              \n\t"\
+        "orl %%eax, %%eax               \n\t"\
+        "jz 1f                          \n\t"\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+        "jmp 2f                         \n\t"\
+        "1:                             \n\t"\
+        "pslld $16, %%mm0               \n\t"\
+        "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
+        "psrad $13, %%mm0               \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t"\
+        "movq %%mm0, " #dst "           \n\t"\
+        "movq %%mm0, 8+" #dst "         \n\t"\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 24+" #dst "        \n\t"\
+        "2:                             \n\t"
+
+
+//IDCT(      src0,   src4,   src1,   src5,    dst,    rounder, shift)
+ROW_IDCT(    (%0),  8(%0), 16(%0), 24(%0),  0(%1),paddd 8(%2), 11)
+/*ROW_IDCT(  32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11)
+ROW_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11)
+ROW_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/
+
+DC_COND_ROW_IDCT(  32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
+DC_COND_ROW_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
+DC_COND_ROW_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
+
+
+//IDCT(      src0,   src4,   src1,    src5,    dst, shift)
+COL_IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+COL_IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+COL_IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+COL_IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+
+#else
+
+#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
+        "pand %%mm0, %%mm4              \n\t"\
+        "por %%mm1, %%mm4               \n\t"\
+        "por %%mm2, %%mm4               \n\t"\
+        "por %%mm3, %%mm4               \n\t"\
+        "packssdw %%mm4,%%mm4           \n\t"\
+        "movd %%mm4, %%eax              \n\t"\
+        "orl %%eax, %%eax               \n\t"\
+        "jz 1f                          \n\t"\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+        "jmp 2f                         \n\t"\
+        "1:                             \n\t"\
+        "pslld $16, %%mm0               \n\t"\
+        "paddd "MANGLE(d40000)", %%mm0  \n\t"\
+        "psrad $13, %%mm0               \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t"\
+        "movq %%mm0, " #dst "           \n\t"\
+        "movq %%mm0, 8+" #dst "         \n\t"\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 24+" #dst "        \n\t"\
+        "2:                             \n\t"
+
+#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq %%mm0, %%mm4              \n\t"\
+        "por %%mm1, %%mm4               \n\t"\
+        "por %%mm2, %%mm4               \n\t"\
+        "por %%mm3, %%mm4               \n\t"\
+        "packssdw %%mm4,%%mm4           \n\t"\
+        "movd %%mm4, %%eax              \n\t"\
+        "orl %%eax, %%eax               \n\t"\
+        "jz " #bt "                     \n\t"\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+
+#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+
+//IDCT(         src0,   src4,   src1,   src5,    dst,   rounder, shift)
+DC_COND_IDCT(  0(%0),  8(%0), 16(%0), 24(%0),  0(%1),paddd 8(%2), 11)
+Z_COND_IDCT(  32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
+Z_COND_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
+Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm1, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm1             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm1             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq " #src1 ", %%mm0          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm0          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm0, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+        "#" ASMALIGN(4)                      \
+        "4:                             \n\t"
+Z_COND_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
+Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "movq 72(%2), %%mm7             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm1             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm1, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm7, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm7, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm1, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 88(%2), %%mm1             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm1, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm1, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm1              \n\t" /* A3             a3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm1             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm1, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+        "#" ASMALIGN(4)                      \
+        "6:                             \n\t"
+Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "movq 72(%2), %%mm7             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm1             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm1, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm7, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm7, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm1, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 88(%2), %%mm1             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm1, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm1, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm1              \n\t" /* A3             a3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm1             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm1, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+        "#" ASMALIGN(4)                      \
+        "2:                             \n\t"
+Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm1, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm1             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm1             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq " #src1 ", %%mm0          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm0          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm0, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+        "#" ASMALIGN(4)                      \
+        "3:                             \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 64(%2), %%mm3             \n\t"\
+        "pmaddwd %%mm2, %%mm3           \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm1              \n\t" /* A1             a1 */\
+        "paddd %%mm3, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm3, %%mm1             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm1, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm2, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "pmaddwd 96(%2), %%mm2          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "movq %%mm5, %%mm1              \n\t" /* A2             a2 */\
+        "paddd %%mm4, %%mm1             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm2, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm2, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm1, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+        "#" ASMALIGN(4)                      \
+        "5:                             \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 8+" #src0 ", %%mm2        \n\t" /* R4     R0      r4      r0 */\
+        "movq 8+" #src4 ", %%mm3        \n\t" /* R6     R2      r6      r2 */\
+        "movq 16(%2), %%mm1             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm2, %%mm1           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm7             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm7, %%mm2           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm7             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "pmaddwd 40(%2), %%mm3          \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "paddd %%mm1, %%mm7             \n\t" /* A0             a0 */\
+        "paddd %%mm1, %%mm1             \n\t" /* 2C0            2c0 */\
+        "psubd %%mm7, %%mm1             \n\t" /* A3             a3 */\
+        "paddd %%mm2, %%mm3             \n\t" /* A1             a1 */\
+        "paddd %%mm2, %%mm2             \n\t" /* 2C1            2c1 */\
+        "psubd %%mm3, %%mm2             \n\t" /* A2             a2 */\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm3       \n\t"\
+        "packssdw %%mm7, %%mm4          \n\t" /* A0     a0 */\
+        "movq %%mm4, " #dst "           \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "packssdw %%mm3, %%mm0          \n\t" /* A1     a1 */\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 96+" #dst "        \n\t"\
+        "movq %%mm4, 112+" #dst "       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm2, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movq %%mm5, 32+" #dst "        \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm1, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movq %%mm6, 48+" #dst "        \n\t"\
+        "movq %%mm6, 64+" #dst "        \n\t"\
+        "movq %%mm5, 80+" #dst "        \n\t"
+
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    0(%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+//IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+//IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+
+        "#" ASMALIGN(4)                      \
+        "1:                             \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 64(%2), %%mm1             \n\t"\
+        "pmaddwd %%mm2, %%mm1           \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm3              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm3             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm3       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm3, %%mm3          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm3, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm2, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "pmaddwd 96(%2), %%mm2          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "movq %%mm5, %%mm3              \n\t" /* A2             a2 */\
+        "paddd %%mm4, %%mm3             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm3       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm2, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm2, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm3, %%mm3          \n\t" /* A2+B2  a2+b2 */\
+        "movd %%mm3, 32+" #dst "        \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+        "jmp 9f                         \n\t"
+
+
+        "#" ASMALIGN(4)
+        "7:                             \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq 8+" #src0 ", %%mm2        \n\t" /* R4     R0      r4      r0 */\
+        "movq 16(%2), %%mm1             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm2, %%mm1           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm7             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm7, %%mm2           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm7             \n\t" /* C6     C2      C6      C2 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm1, %%mm4          \n\t" /* A0     a0 */\
+        "movq %%mm4, " #dst "           \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm2, %%mm0          \n\t" /* A1     a1 */\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 96+" #dst "        \n\t"\
+        "movq %%mm4, 112+" #dst "       \n\t"\
+        "movq %%mm0, 32+" #dst "        \n\t"\
+        "movq %%mm4, 48+" #dst "        \n\t"\
+        "movq %%mm4, 64+" #dst "        \n\t"\
+        "movq %%mm0, 80+" #dst "        \n\t"
+
+//IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+IDCT(   0(%1), 64(%1), 32(%1),  96(%1),  0(%0), 20)
+//IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0), 20)
+IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0), 20)
+//IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+
+
+#endif
+
+/*
+Input
+ 00 40 04 44 20 60 24 64
+ 10 30 14 34 50 70 54 74
+ 01 41 03 43 21 61 23 63
+ 11 31 13 33 51 71 53 73
+ 02 42 06 46 22 62 26 66
+ 12 32 16 36 52 72 56 76
+ 05 45 07 47 25 65 27 67
+ 15 35 17 37 55 75 57 77
+
+Temp
+ 00 04 10 14 20 24 30 34
+ 40 44 50 54 60 64 70 74
+ 01 03 11 13 21 23 31 33
+ 41 43 51 53 61 63 71 73
+ 02 06 12 16 22 26 32 36
+ 42 46 52 56 62 66 72 76
+ 05 07 15 17 25 27 35 37
+ 45 47 55 57 65 67 75 77
+*/
+
+"9: \n\t"
+                :: "r" (block), "r" (temp), "r" (coeffs)
+                : "%eax"
+        );
+}
+
+void ff_simple_idct_mmx(int16_t *block)
+{
+    idct(block);
+}
+
+//FIXME merge add/put into the idct
+
+void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    idct(block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    idct(block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
new file mode 100644
index 000000000..718202632
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/snowdsp_mmx.c
@@ -0,0 +1,921 @@
+/*
+ * MMX and SSE2 optimized snow DSP utils
+ * Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../avcodec.h"
+#include "../snow.h"
+#include "x86_cpu.h"
+
+void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
+    const int w2= (width+1)>>1;
+    // SSE2 code runs faster with pointers aligned on a 32-byte boundary.
+    DWTELEM temp_buf[(width>>1) + 4];
+    DWTELEM * const temp = temp_buf + 4 - (((int)temp_buf & 0xF) >> 2);
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+
+    { // Lift 0
+        DWTELEM * const ref = b + w2 - 1;
+        DWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calculated twice
+        // (the first time erroneously), we allow the SSE2 code to run an extra pass.
+        // The savings in code and time are well worth having to store this value and
+        // calculate b[0] correctly afterwards.
+
+        i = 0;
+        asm volatile(
+            "pcmpeqd   %%xmm7, %%xmm7         \n\t"
+            "pslld        $31, %%xmm7         \n\t"
+            "psrld        $29, %%xmm7         \n\t"
+        ::);
+        for(; i<w_l-7; i+=8){
+            asm volatile(
+                "movdqu   (%1), %%xmm1        \n\t"
+                "movdqu 16(%1), %%xmm5        \n\t"
+                "movdqu  4(%1), %%xmm2        \n\t"
+                "movdqu 20(%1), %%xmm6        \n\t"
+                "paddd  %%xmm1, %%xmm2        \n\t"
+                "paddd  %%xmm5, %%xmm6        \n\t"
+                "movdqa %%xmm2, %%xmm0        \n\t"
+                "movdqa %%xmm6, %%xmm4        \n\t"
+                "paddd  %%xmm2, %%xmm2        \n\t"
+                "paddd  %%xmm6, %%xmm6        \n\t"
+                "paddd  %%xmm0, %%xmm2        \n\t"
+                "paddd  %%xmm4, %%xmm6        \n\t"
+                "paddd  %%xmm7, %%xmm2        \n\t"
+                "paddd  %%xmm7, %%xmm6        \n\t"
+                "psrad      $3, %%xmm2        \n\t"
+                "psrad      $3, %%xmm6        \n\t"
+                "movdqa   (%0), %%xmm0        \n\t"
+                "movdqa 16(%0), %%xmm4        \n\t"
+                "psubd  %%xmm2, %%xmm0        \n\t"
+                "psubd  %%xmm6, %%xmm4        \n\t"
+                "movdqa %%xmm0, (%0)          \n\t"
+                "movdqa %%xmm4, 16(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                : "memory"
+            );
+        }
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+        b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+    }
+
+    { // Lift 1
+        DWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; (((long)&dst[i]) & 0xF) && i<w_r; i++){
+            dst[i] = dst[i] - (b[i] + b[i + 1]);
+        }
+        for(; i<w_r-7; i+=8){
+            asm volatile(
+                "movdqu   (%1), %%xmm1        \n\t"
+                "movdqu 16(%1), %%xmm5        \n\t"
+                "movdqu  4(%1), %%xmm2        \n\t"
+                "movdqu 20(%1), %%xmm6        \n\t"
+                "paddd  %%xmm1, %%xmm2        \n\t"
+                "paddd  %%xmm5, %%xmm6        \n\t"
+                "movdqa   (%0), %%xmm0        \n\t"
+                "movdqa 16(%0), %%xmm4        \n\t"
+                "psubd  %%xmm2, %%xmm0        \n\t"
+                "psubd  %%xmm6, %%xmm4        \n\t"
+                "movdqa %%xmm0, (%0)          \n\t"
+                "movdqa %%xmm4, 16(%0)        \n\t"
+                :: "r"(&dst[i]), "r"(&b[i])
+                : "memory"
+            );
+        }
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        DWTELEM * const ref = b+w2 - 1;
+        DWTELEM b_0 = b[0];
+
+        i = 0;
+        asm volatile(
+            "pslld          $1, %%xmm7       \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */
+        ::);
+        for(; i<w_l-7; i+=8){
+            asm volatile(
+                "movdqu   (%1), %%xmm1        \n\t"
+                "movdqu 16(%1), %%xmm5        \n\t"
+                "movdqu  4(%1), %%xmm0        \n\t"
+                "movdqu 20(%1), %%xmm4        \n\t"
+                "paddd  %%xmm1, %%xmm0        \n\t"
+                "paddd  %%xmm5, %%xmm4        \n\t"
+                "movdqa %%xmm7, %%xmm1        \n\t"
+                "movdqa %%xmm7, %%xmm5        \n\t"
+                "psubd  %%xmm0, %%xmm1        \n\t"
+                "psubd  %%xmm4, %%xmm5        \n\t"
+                "movdqa   (%0), %%xmm0        \n\t"
+                "movdqa 16(%0), %%xmm4        \n\t"
+                "pslld      $2, %%xmm0        \n\t"
+                "pslld      $2, %%xmm4        \n\t"
+                "psubd  %%xmm0, %%xmm1        \n\t"
+                "psubd  %%xmm4, %%xmm5        \n\t"
+                "psrad      $4, %%xmm1        \n\t"
+                "psrad      $4, %%xmm5        \n\t"
+                "movdqa   (%0), %%xmm0        \n\t"
+                "movdqa 16(%0), %%xmm4        \n\t"
+                "psubd  %%xmm1, %%xmm0        \n\t"
+                "psubd  %%xmm5, %%xmm4        \n\t"
+                "movdqa %%xmm0, (%0)          \n\t"
+                "movdqa %%xmm4, 16(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                : "memory"
+            );
+        }
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+        b[0] = b_0 - (((-2 * ref[1] + W_BO) - 4 * b_0) >> W_BS);
+    }
+
+    { // Lift 3
+        DWTELEM * const src = b+w2;
+
+        i = 0;
+        for(; (((long)&temp[i]) & 0xF) && i<w_r; i++){
+            temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
+        }
+        for(; i<w_r-7; i+=8){
+            asm volatile(
+                "movdqu  4(%1), %%xmm2        \n\t"
+                "movdqu 20(%1), %%xmm6        \n\t"
+                "paddd    (%1), %%xmm2        \n\t"
+                "paddd  16(%1), %%xmm6        \n\t"
+                "movdqa %%xmm2, %%xmm0        \n\t"
+                "movdqa %%xmm6, %%xmm4        \n\t"
+                "pslld      $2, %%xmm2        \n\t"
+                "pslld      $2, %%xmm6        \n\t"
+                "psubd  %%xmm2, %%xmm0        \n\t"
+                "psubd  %%xmm6, %%xmm4        \n\t"
+                "psrad      $1, %%xmm0        \n\t"
+                "psrad      $1, %%xmm4        \n\t"
+                "movdqu   (%0), %%xmm2        \n\t"
+                "movdqu 16(%0), %%xmm6        \n\t"
+                "psubd  %%xmm0, %%xmm2        \n\t"
+                "psubd  %%xmm4, %%xmm6        \n\t"
+                "movdqa %%xmm2, (%2)          \n\t"
+                "movdqa %%xmm6, 16(%2)        \n\t"
+                :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
+    }
+
+    {
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0x1E) != 0x1E; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=30; i>=0; i-=32){
+            asm volatile(
+                "movdqa      (%1), %%xmm0       \n\t"
+                "movdqa    16(%1), %%xmm2       \n\t"
+                "movdqa    32(%1), %%xmm4       \n\t"
+                "movdqa    48(%1), %%xmm6       \n\t"
+                "movdqa      (%1), %%xmm1       \n\t"
+                "movdqa    16(%1), %%xmm3       \n\t"
+                "movdqa    32(%1), %%xmm5       \n\t"
+                "movdqa    48(%1), %%xmm7       \n\t"
+                "punpckldq   (%2), %%xmm0       \n\t"
+                "punpckldq 16(%2), %%xmm2       \n\t"
+                "punpckldq 32(%2), %%xmm4       \n\t"
+                "punpckldq 48(%2), %%xmm6       \n\t"
+                "movdqa    %%xmm0, (%0)         \n\t"
+                "movdqa    %%xmm2, 32(%0)       \n\t"
+                "movdqa    %%xmm4, 64(%0)       \n\t"
+                "movdqa    %%xmm6, 96(%0)       \n\t"
+                "punpckhdq   (%2), %%xmm1       \n\t"
+                "punpckhdq 16(%2), %%xmm3       \n\t"
+                "punpckhdq 32(%2), %%xmm5       \n\t"
+                "punpckhdq 48(%2), %%xmm7       \n\t"
+                "movdqa    %%xmm1, 16(%0)       \n\t"
+                "movdqa    %%xmm3, 48(%0)       \n\t"
+                "movdqa    %%xmm5, 80(%0)       \n\t"
+                "movdqa    %%xmm7, 112(%0)      \n\t"
+                :: "r"(&(b)[i]), "r"(&(b)[i>>1]), "r"(&(temp)[i>>1])
+                 : "memory"
+               );
+        }
+    }
+}
+
+void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
+    const int w2= (width+1)>>1;
+    DWTELEM temp[width >> 1];
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+
+    { // Lift 0
+        DWTELEM * const ref = b + w2 - 1;
+
+        i = 1;
+        b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+        asm volatile(
+            "pcmpeqd    %%mm7, %%mm7         \n\t"
+            "pslld        $31, %%mm7         \n\t"
+            "psrld        $29, %%mm7         \n\t"
+           ::);
+        for(; i<w_l-3; i+=4){
+            asm volatile(
+                "movq     (%1), %%mm2        \n\t"
+                "movq    8(%1), %%mm6        \n\t"
+                "paddd   4(%1), %%mm2        \n\t"
+                "paddd  12(%1), %%mm6        \n\t"
+                "movq    %%mm2, %%mm0        \n\t"
+                "movq    %%mm6, %%mm4        \n\t"
+                "paddd   %%mm2, %%mm2        \n\t"
+                "paddd   %%mm6, %%mm6        \n\t"
+                "paddd   %%mm0, %%mm2        \n\t"
+                "paddd   %%mm4, %%mm6        \n\t"
+                "paddd   %%mm7, %%mm2        \n\t"
+                "paddd   %%mm7, %%mm6        \n\t"
+                "psrad      $3, %%mm2        \n\t"
+                "psrad      $3, %%mm6        \n\t"
+                "movq     (%0), %%mm0        \n\t"
+                "movq    8(%0), %%mm4        \n\t"
+                "psubd   %%mm2, %%mm0        \n\t"
+                "psubd   %%mm6, %%mm4        \n\t"
+                "movq    %%mm0, (%0)         \n\t"
+                "movq    %%mm4, 8(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+    }
+
+    { // Lift 1
+        DWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; i<w_r-3; i+=4){
+            asm volatile(
+                "movq     (%1), %%mm2        \n\t"
+                "movq    8(%1), %%mm6        \n\t"
+                "paddd   4(%1), %%mm2        \n\t"
+                "paddd  12(%1), %%mm6        \n\t"
+                "movq     (%0), %%mm0        \n\t"
+                "movq    8(%0), %%mm4        \n\t"
+                "psubd   %%mm2, %%mm0        \n\t"
+                "psubd   %%mm6, %%mm4        \n\t"
+                "movq    %%mm0, (%0)         \n\t"
+                "movq    %%mm4, 8(%0)        \n\t"
+                :: "r"(&dst[i]), "r"(&b[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        DWTELEM * const ref = b+w2 - 1;
+
+        i = 1;
+        b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS);
+        asm volatile(
+            "pslld          $1, %%mm7       \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */
+           ::);
+        for(; i<w_l-3; i+=4){
+            asm volatile(
+                "movq     (%1), %%mm0        \n\t"
+                "movq    8(%1), %%mm4        \n\t"
+                "paddd   4(%1), %%mm0        \n\t"
+                "paddd  12(%1), %%mm4        \n\t"
+                "movq    %%mm7, %%mm1        \n\t"
+                "movq    %%mm7, %%mm5        \n\t"
+                "psubd   %%mm0, %%mm1        \n\t"
+                "psubd   %%mm4, %%mm5        \n\t"
+                "movq     (%0), %%mm0        \n\t"
+                "movq    8(%0), %%mm4        \n\t"
+                "pslld      $2, %%mm0        \n\t"
+                "pslld      $2, %%mm4        \n\t"
+                "psubd   %%mm0, %%mm1        \n\t"
+                "psubd   %%mm4, %%mm5        \n\t"
+                "psrad      $4, %%mm1        \n\t"
+                "psrad      $4, %%mm5        \n\t"
+                "movq     (%0), %%mm0        \n\t"
+                "movq    8(%0), %%mm4        \n\t"
+                "psubd   %%mm1, %%mm0        \n\t"
+                "psubd   %%mm5, %%mm4        \n\t"
+                "movq    %%mm0, (%0)         \n\t"
+                "movq    %%mm4, 8(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+    }
+
+    { // Lift 3
+        DWTELEM * const src = b+w2;
+        i = 0;
+
+        for(; i<w_r-3; i+=4){
+            asm volatile(
+                "movq    4(%1), %%mm2        \n\t"
+                "movq   12(%1), %%mm6        \n\t"
+                "paddd    (%1), %%mm2        \n\t"
+                "paddd   8(%1), %%mm6        \n\t"
+                "movq    %%mm2, %%mm0        \n\t"
+                "movq    %%mm6, %%mm4        \n\t"
+                "pslld      $2, %%mm2        \n\t"
+                "pslld      $2, %%mm6        \n\t"
+                "psubd   %%mm2, %%mm0        \n\t"
+                "psubd   %%mm6, %%mm4        \n\t"
+                "psrad      $1, %%mm0        \n\t"
+                "psrad      $1, %%mm4        \n\t"
+                "movq     (%0), %%mm2        \n\t"
+                "movq    8(%0), %%mm6        \n\t"
+                "psubd   %%mm0, %%mm2        \n\t"
+                "psubd   %%mm4, %%mm6        \n\t"
+                "movq    %%mm2, (%2)         \n\t"
+                "movq    %%mm6, 8(%2)        \n\t"
+                :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
+    }
+
+    {
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0xE) != 0xE; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=14; i>=0; i-=16){
+            asm volatile(
+                "movq        (%1), %%mm0       \n\t"
+                "movq       8(%1), %%mm2       \n\t"
+                "movq      16(%1), %%mm4       \n\t"
+                "movq      24(%1), %%mm6       \n\t"
+                "movq        (%1), %%mm1       \n\t"
+                "movq       8(%1), %%mm3       \n\t"
+                "movq      16(%1), %%mm5       \n\t"
+                "movq      24(%1), %%mm7       \n\t"
+                "punpckldq   (%2), %%mm0       \n\t"
+                "punpckldq  8(%2), %%mm2       \n\t"
+                "punpckldq 16(%2), %%mm4       \n\t"
+                "punpckldq 24(%2), %%mm6       \n\t"
+                "movq       %%mm0, (%0)        \n\t"
+                "movq       %%mm2, 16(%0)      \n\t"
+                "movq       %%mm4, 32(%0)      \n\t"
+                "movq       %%mm6, 48(%0)      \n\t"
+                "punpckhdq   (%2), %%mm1       \n\t"
+                "punpckhdq  8(%2), %%mm3       \n\t"
+                "punpckhdq 16(%2), %%mm5       \n\t"
+                "punpckhdq 24(%2), %%mm7       \n\t"
+                "movq       %%mm1, 8(%0)       \n\t"
+                "movq       %%mm3, 24(%0)      \n\t"
+                "movq       %%mm5, 40(%0)      \n\t"
+                "movq       %%mm7, 56(%0)      \n\t"
+                :: "r"(&b[i]), "r"(&b[i>>1]), "r"(&temp[i>>1])
+                 : "memory"
+               );
+        }
+    }
+}
+
+#define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
+        ""op" (%%"r",%%"REG_d",4), %%"t0"      \n\t"\
+        ""op" 16(%%"r",%%"REG_d",4), %%"t1"    \n\t"\
+        ""op" 32(%%"r",%%"REG_d",4), %%"t2"    \n\t"\
+        ""op" 48(%%"r",%%"REG_d",4), %%"t3"    \n\t"
+
+#define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_sse2_add(r,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_load_add("paddd",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_sse2_sub(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "psubd %%"s0", %%"t0" \n\t"\
+        "psubd %%"s1", %%"t1" \n\t"\
+        "psubd %%"s2", %%"t2" \n\t"\
+        "psubd %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
+        "movdqa %%"s0", (%%"w",%%"REG_d",4)      \n\t"\
+        "movdqa %%"s1", 16(%%"w",%%"REG_d",4)    \n\t"\
+        "movdqa %%"s2", 32(%%"w",%%"REG_d",4)    \n\t"\
+        "movdqa %%"s3", 48(%%"w",%%"REG_d",4)    \n\t"
+
+#define snow_vertical_compose_sse2_sra(n,t0,t1,t2,t3)\
+        "psrad $"n", %%"t0" \n\t"\
+        "psrad $"n", %%"t1" \n\t"\
+        "psrad $"n", %%"t2" \n\t"\
+        "psrad $"n", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "paddd %%"s0", %%"t0" \n\t"\
+        "paddd %%"s1", %%"t1" \n\t"\
+        "paddd %%"s2", %%"t2" \n\t"\
+        "paddd %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_sll(n,t0,t1,t2,t3)\
+        "pslld $"n", %%"t0" \n\t"\
+        "pslld $"n", %%"t1" \n\t"\
+        "pslld $"n", %%"t2" \n\t"\
+        "pslld $"n", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_move(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "movdqa %%"s0", %%"t0" \n\t"\
+        "movdqa %%"s1", %%"t1" \n\t"\
+        "movdqa %%"s2", %%"t2" \n\t"\
+        "movdqa %%"s3", %%"t3" \n\t"
+
+void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
+    long i = width;
+
+    while(i & 0xF)
+    {
+        i--;
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+
+         asm volatile (
+        "jmp 2f                                      \n\t"
+        "1:                                          \n\t"
+
+        "mov %6, %%"REG_a"                           \n\t"
+        "mov %4, %%"REG_S"                           \n\t"
+
+        snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\
+        snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+
+        "pcmpeqd %%xmm1, %%xmm1                      \n\t"
+        "pslld $31, %%xmm1                           \n\t"
+        "psrld $29, %%xmm1                           \n\t"
+        "mov %5, %%"REG_a"                           \n\t"
+
+        snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_sra("3","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_load(REG_a,"xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_sub("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_store(REG_a,"xmm1","xmm3","xmm5","xmm7")
+        "mov %3, %%"REG_c"                           \n\t"
+        snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add(REG_c,"xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_store(REG_S,"xmm0","xmm2","xmm4","xmm6")
+        "mov %2, %%"REG_a"                           \n\t"
+        snow_vertical_compose_sse2_load(REG_c,"xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_sll("2","xmm1","xmm3","xmm5","xmm7")\
+        snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+
+        "pcmpeqd %%xmm1, %%xmm1                      \n\t"
+        "pslld $31, %%xmm1                           \n\t"
+        "psrld $28, %%xmm1                           \n\t"
+        "mov %1, %%"REG_S"                           \n\t"
+
+        snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_sra("4","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add(REG_c,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_store(REG_c,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add(REG_S,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\
+        snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_sra("1","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_store(REG_a,"xmm0","xmm2","xmm4","xmm6")
+
+        "2:                                          \n\t"
+        "sub $16, %%"REG_d"                          \n\t"
+        "jge 1b                                      \n\t"
+        :"+d"(i)
+        :
+        "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5):
+        "%"REG_a"","%"REG_S"","%"REG_c"");
+}
+
+#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
+        ""op" (%%"r",%%"REG_d",4), %%"t0"   \n\t"\
+        ""op" 8(%%"r",%%"REG_d",4), %%"t1"  \n\t"\
+        ""op" 16(%%"r",%%"REG_d",4), %%"t2" \n\t"\
+        ""op" 24(%%"r",%%"REG_d",4), %%"t3" \n\t"
+
+#define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
+        snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_add(r,t0,t1,t2,t3)\
+        snow_vertical_compose_mmx_load_add("paddd",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_sub(s0,s1,s2,s3,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_sub(s0,s1,s2,s3,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
+        "movq %%"s0", (%%"w",%%"REG_d",4)   \n\t"\
+        "movq %%"s1", 8(%%"w",%%"REG_d",4)  \n\t"\
+        "movq %%"s2", 16(%%"w",%%"REG_d",4) \n\t"\
+        "movq %%"s3", 24(%%"w",%%"REG_d",4) \n\t"
+
+#define snow_vertical_compose_mmx_sra(n,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_sra(n,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_sll(n,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_sll(n,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "movq %%"s0", %%"t0" \n\t"\
+        "movq %%"s1", %%"t1" \n\t"\
+        "movq %%"s2", %%"t2" \n\t"\
+        "movq %%"s3", %%"t3" \n\t"
+
+void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
+    long i = width;
+    while(i & 0x7)
+    {
+        i--;
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+
+    asm volatile(
+        "jmp 2f                                      \n\t"
+        "1:                                          \n\t"
+
+        "mov %6, %%"REG_a"                           \n\t"
+        "mov %4, %%"REG_S"                           \n\t"
+
+        snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+
+        "pcmpeqd %%mm1, %%mm1                        \n\t"
+        "pslld $31, %%mm1                            \n\t"
+        "psrld $29, %%mm1                            \n\t"
+        "mov %5, %%"REG_a"                           \n\t"
+
+        snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_sra("3","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_load(REG_a,"mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_sub("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_store(REG_a,"mm1","mm3","mm5","mm7")
+        "mov %3, %%"REG_c"                           \n\t"
+        snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add(REG_c,"mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_store(REG_S,"mm0","mm2","mm4","mm6")
+        "mov %2, %%"REG_a"                           \n\t"
+        snow_vertical_compose_mmx_load(REG_c,"mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_sll("2","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+
+        "pcmpeqd %%mm1, %%mm1                        \n\t"
+        "pslld $31, %%mm1                            \n\t"
+        "psrld $28, %%mm1                            \n\t"
+        "mov %1, %%"REG_S"                           \n\t"
+
+        snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_sra("4","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add(REG_c,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_store(REG_c,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add(REG_S,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_sra("1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_store(REG_a,"mm0","mm2","mm4","mm6")
+
+        "2:                                          \n\t"
+        "sub $8, %%"REG_d"                           \n\t"
+        "jge 1b                                      \n\t"
+        :"+d"(i)
+        :
+        "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5):
+        "%"REG_a"","%"REG_S"","%"REG_c"");
+}
+
+#define snow_inner_add_yblock_sse2_header \
+    DWTELEM * * dst_array = sb->line + src_y;\
+    long tmp;\
+    asm volatile(\
+             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %6, %2                    \n\t"\
+             "mov  %4, %%"REG_S"             \n\t"\
+             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
+             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
+             "pslld $31, %%xmm3              \n\t"\
+             "psrld $24, %%xmm3              \n\t" /* FRAC_BITS >> 1 */\
+             "1:                             \n\t"\
+             "mov %1, %%"REG_D"              \n\t"\
+             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
+             "add %3, %%"REG_D"              \n\t"
+
+#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
+             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
+             "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
+             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
+             "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
+             "punpcklbw %%xmm7, %%xmm0       \n\t"\
+             "punpcklbw %%xmm7, %%xmm4       \n\t"\
+             "pmullw %%xmm0, %%"out_reg1"    \n\t"\
+             "pmullw %%xmm4, %%"out_reg2"    \n\t"
+
+#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
+             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
+             "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
+             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
+             "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
+             "punpcklbw %%xmm7, %%xmm0       \n\t"\
+             "punpcklbw %%xmm7, %%xmm4       \n\t"\
+             "pmullw %%xmm0, %%"out_reg1"    \n\t"\
+             "pmullw %%xmm4, %%"out_reg2"    \n\t"
+
+#define snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) \
+             snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\
+             "paddusw %%xmm2, %%xmm1         \n\t"\
+             "paddusw %%xmm6, %%xmm5         \n\t"
+
+#define snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) \
+             snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\
+             "paddusw %%xmm2, %%xmm1         \n\t"\
+             "paddusw %%xmm6, %%xmm5         \n\t"
+
+#define snow_inner_add_yblock_sse2_end_common1\
+             "add $32, %%"REG_S"             \n\t"\
+             "add %%"REG_c", %0              \n\t"\
+             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
+             "add %%"REG_c", (%%"REG_a")     \n\t"
+
+#define snow_inner_add_yblock_sse2_end_common2\
+             "jnz 1b                         \n\t"\
+             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
+             :\
+             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
+             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+
+#define snow_inner_add_yblock_sse2_end_8\
+             "sal $1, %%"REG_c"              \n\t"\
+             "add $"PTR_SIZE"*2, %1          \n\t"\
+             snow_inner_add_yblock_sse2_end_common1\
+             "sar $1, %%"REG_c"              \n\t"\
+             "sub $2, %2                     \n\t"\
+             snow_inner_add_yblock_sse2_end_common2
+
+#define snow_inner_add_yblock_sse2_end_16\
+             "add $"PTR_SIZE"*1, %1          \n\t"\
+             snow_inner_add_yblock_sse2_end_common1\
+             "dec %2                         \n\t"\
+             snow_inner_add_yblock_sse2_end_common2
+
+static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+                      int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_sse2_header
+snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
+snow_inner_add_yblock_sse2_accum_8("2", "8")
+snow_inner_add_yblock_sse2_accum_8("1", "128")
+snow_inner_add_yblock_sse2_accum_8("0", "136")
+
+             "mov %0, %%"REG_d"              \n\t"
+             "movdqa (%%"REG_D"), %%xmm0     \n\t"
+             "movdqa %%xmm1, %%xmm2          \n\t"
+
+             "punpckhwd %%xmm7, %%xmm1       \n\t"
+             "punpcklwd %%xmm7, %%xmm2       \n\t"
+             "paddd %%xmm2, %%xmm0           \n\t"
+             "movdqa 16(%%"REG_D"), %%xmm2   \n\t"
+             "paddd %%xmm1, %%xmm2           \n\t"
+             "paddd %%xmm3, %%xmm0           \n\t"
+             "paddd %%xmm3, %%xmm2           \n\t"
+
+             "mov %1, %%"REG_D"              \n\t"
+             "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
+             "add %3, %%"REG_D"              \n\t"
+
+             "movdqa (%%"REG_D"), %%xmm4     \n\t"
+             "movdqa %%xmm5, %%xmm6          \n\t"
+             "punpckhwd %%xmm7, %%xmm5       \n\t"
+             "punpcklwd %%xmm7, %%xmm6       \n\t"
+             "paddd %%xmm6, %%xmm4           \n\t"
+             "movdqa 16(%%"REG_D"), %%xmm6   \n\t"
+             "paddd %%xmm5, %%xmm6           \n\t"
+             "paddd %%xmm3, %%xmm4           \n\t"
+             "paddd %%xmm3, %%xmm6           \n\t"
+
+             "psrad $8, %%xmm0               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm2               \n\t" /* FRAC_BITS. */
+             "packssdw %%xmm2, %%xmm0        \n\t"
+             "packuswb %%xmm7, %%xmm0        \n\t"
+             "movq %%xmm0, (%%"REG_d")       \n\t"
+
+             "psrad $8, %%xmm4               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm6               \n\t" /* FRAC_BITS. */
+             "packssdw %%xmm6, %%xmm4        \n\t"
+             "packuswb %%xmm7, %%xmm4        \n\t"
+             "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t"
+snow_inner_add_yblock_sse2_end_8
+}
+
+static void inner_add_yblock_bw_16_obmc_32_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+                      int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_sse2_header
+snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
+snow_inner_add_yblock_sse2_accum_16("2", "16")
+snow_inner_add_yblock_sse2_accum_16("1", "512")
+snow_inner_add_yblock_sse2_accum_16("0", "528")
+
+             "mov %0, %%"REG_d"              \n\t"
+             "movdqa %%xmm1, %%xmm0          \n\t"
+             "movdqa %%xmm5, %%xmm4          \n\t"
+             "punpcklwd %%xmm7, %%xmm0       \n\t"
+             "paddd (%%"REG_D"), %%xmm0      \n\t"
+             "punpckhwd %%xmm7, %%xmm1       \n\t"
+             "paddd 16(%%"REG_D"), %%xmm1    \n\t"
+             "punpcklwd %%xmm7, %%xmm4       \n\t"
+             "paddd 32(%%"REG_D"), %%xmm4    \n\t"
+             "punpckhwd %%xmm7, %%xmm5       \n\t"
+             "paddd 48(%%"REG_D"), %%xmm5    \n\t"
+             "paddd %%xmm3, %%xmm0           \n\t"
+             "paddd %%xmm3, %%xmm1           \n\t"
+             "paddd %%xmm3, %%xmm4           \n\t"
+             "paddd %%xmm3, %%xmm5           \n\t"
+             "psrad $8, %%xmm0               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm1               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm4               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm5               \n\t" /* FRAC_BITS. */
+
+             "packssdw %%xmm1, %%xmm0        \n\t"
+             "packssdw %%xmm5, %%xmm4        \n\t"
+             "packuswb %%xmm4, %%xmm0        \n\t"
+
+             "movdqu %%xmm0, (%%"REG_d")       \n\t"
+
+snow_inner_add_yblock_sse2_end_16
+}
+
+#define snow_inner_add_yblock_mmx_header \
+    DWTELEM * * dst_array = sb->line + src_y;\
+    long tmp;\
+    asm volatile(\
+             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %6, %2                    \n\t"\
+             "mov  %4, %%"REG_S"             \n\t"\
+             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
+             "pcmpeqd %%mm3, %%mm3           \n\t"\
+             "pslld $31, %%mm3               \n\t"\
+             "psrld $24, %%mm3               \n\t" /* FRAC_BITS >> 1 */\
+             "1:                             \n\t"\
+             "mov %1, %%"REG_D"              \n\t"\
+             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
+             "add %3, %%"REG_D"              \n\t"
+
+#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
+             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+             "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\
+             "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\
+             "punpcklbw %%mm7, %%"out_reg1" \n\t"\
+             "punpcklbw %%mm7, %%"out_reg2" \n\t"\
+             "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\
+             "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
+             "punpcklbw %%mm7, %%mm0       \n\t"\
+             "punpcklbw %%mm7, %%mm4       \n\t"\
+             "pmullw %%mm0, %%"out_reg1"    \n\t"\
+             "pmullw %%mm4, %%"out_reg2"    \n\t"
+
+#define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \
+             snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\
+             "paddusw %%mm2, %%mm1         \n\t"\
+             "paddusw %%mm6, %%mm5         \n\t"
+
+#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
+             "mov %0, %%"REG_d"              \n\t"\
+             "movq %%mm1, %%mm0              \n\t"\
+             "movq %%mm5, %%mm4              \n\t"\
+             "punpcklwd %%mm7, %%mm0         \n\t"\
+             "paddd "read_offset"(%%"REG_D"), %%mm0 \n\t"\
+             "punpckhwd %%mm7, %%mm1         \n\t"\
+             "paddd "read_offset"+8(%%"REG_D"), %%mm1 \n\t"\
+             "punpcklwd %%mm7, %%mm4         \n\t"\
+             "paddd "read_offset"+16(%%"REG_D"), %%mm4 \n\t"\
+             "punpckhwd %%mm7, %%mm5         \n\t"\
+             "paddd "read_offset"+24(%%"REG_D"), %%mm5 \n\t"\
+             "paddd %%mm3, %%mm0             \n\t"\
+             "paddd %%mm3, %%mm1             \n\t"\
+             "paddd %%mm3, %%mm4             \n\t"\
+             "paddd %%mm3, %%mm5             \n\t"\
+             "psrad $8, %%mm0                \n\t"\
+             "psrad $8, %%mm1                \n\t"\
+             "psrad $8, %%mm4                \n\t"\
+             "psrad $8, %%mm5                \n\t"\
+\
+             "packssdw %%mm1, %%mm0          \n\t"\
+             "packssdw %%mm5, %%mm4          \n\t"\
+             "packuswb %%mm4, %%mm0          \n\t"\
+             "movq %%mm0, "write_offset"(%%"REG_d") \n\t"
+
+#define snow_inner_add_yblock_mmx_end(s_step)\
+             "add $"s_step", %%"REG_S"             \n\t"\
+             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
+             "add %%"REG_c", (%%"REG_a")     \n\t"\
+             "add $"PTR_SIZE"*1, %1          \n\t"\
+             "add %%"REG_c", %0              \n\t"\
+             "dec %2                         \n\t"\
+             "jnz 1b                         \n\t"\
+             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
+             :\
+             "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\
+             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+
+static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+                      int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_mmx_header
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
+snow_inner_add_yblock_mmx_accum("2", "8", "0")
+snow_inner_add_yblock_mmx_accum("1", "128", "0")
+snow_inner_add_yblock_mmx_accum("0", "136", "0")
+snow_inner_add_yblock_mmx_mix("0", "0")
+snow_inner_add_yblock_mmx_end("16")
+}
+
+static void inner_add_yblock_bw_16_obmc_32_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h,
+                      int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_mmx_header
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
+snow_inner_add_yblock_mmx_accum("2", "16", "0")
+snow_inner_add_yblock_mmx_accum("1", "512", "0")
+snow_inner_add_yblock_mmx_accum("0", "528", "0")
+snow_inner_add_yblock_mmx_mix("0", "0")
+
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "8", "8")
+snow_inner_add_yblock_mmx_accum("2", "24", "8")
+snow_inner_add_yblock_mmx_accum("1", "520", "8")
+snow_inner_add_yblock_mmx_accum("0", "536", "8")
+snow_inner_add_yblock_mmx_mix("32", "8")
+snow_inner_add_yblock_mmx_end("32")
+}
+
+void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+
+    if (b_w == 16)
+        inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    else if (b_w == 8 && obmc_stride == 16) {
+        if (!(b_h & 1))
+            inner_add_yblock_bw_8_obmc_16_bh_even_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+        else
+            inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    } else
+         ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}
+
+void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                          int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+    if (b_w == 16)
+        inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    else if (b_w == 8 && obmc_stride == 16)
+        inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    else
+        ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/vp3dsp_mmx.c b/contrib/ffmpeg/libavcodec/i386/vp3dsp_mmx.c
new file mode 100644
index 000000000..f715dc803
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/vp3dsp_mmx.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file vp3dsp_mmx.c
+ * MMX-optimized functions cribbed from the original VP3 source code.
+ */
+
+#include "../dsputil.h"
+#include "mmx.h"
+
+#define IdctAdjustBeforeShift 8
+
+/* (12 * 4) 2-byte memory locations ( = 96 bytes total)
+ * idct_constants[0..15] = Mask table (M(I))
+ * idct_constants[16..43] = Cosine table (C(I))
+ * idct_constants[44..47] = 8
+ */
+static uint16_t idct_constants[(4 + 7 + 1) * 4];
+static const uint16_t idct_cosine_table[7] = {
+    64277, 60547, 54491, 46341, 36410, 25080, 12785
+};
+
+#define r0 mm0
+#define r1 mm1
+#define r2 mm2
+#define r3 mm3
+#define r4 mm4
+#define r5 mm5
+#define r6 mm6
+#define r7 mm7
+
+/* from original comments: The Macro does IDct on 4 1-D Dcts */
+#define BeginIDCT() { \
+    movq_m2r(*I(3), r2); \
+    movq_m2r(*C(3), r6); \
+    movq_r2r(r2, r4); \
+    movq_m2r(*J(5), r7); \
+    pmulhw_r2r(r6, r4);       /* r4 = c3*i3 - i3 */ \
+    movq_m2r(*C(5), r1); \
+    pmulhw_r2r(r7, r6);       /* r6 = c3*i5 - i5 */ \
+    movq_r2r(r1, r5); \
+    pmulhw_r2r(r2, r1);       /* r1 = c5*i3 - i3 */ \
+    movq_m2r(*I(1), r3); \
+    pmulhw_r2r(r7, r5);       /* r5 = c5*i5 - i5 */ \
+    movq_m2r(*C(1), r0);      /* (all registers are in use) */ \
+    paddw_r2r(r2, r4);        /* r4 = c3*i3 */ \
+    paddw_r2r(r7, r6);        /* r6 = c3*i5 */ \
+    paddw_r2r(r1, r2);        /* r2 = c5*i3 */ \
+    movq_m2r(*J(7), r1); \
+    paddw_r2r(r5, r7);        /* r7 = c5*i5 */ \
+    movq_r2r(r0, r5);         /* r5 = c1 */ \
+    pmulhw_r2r(r3, r0);       /* r0 = c1*i1 - i1 */ \
+    paddsw_r2r(r7, r4);       /* r4 = C = c3*i3 + c5*i5 */ \
+    pmulhw_r2r(r1, r5);       /* r5 = c1*i7 - i7 */ \
+    movq_m2r(*C(7), r7); \
+    psubsw_r2r(r2, r6);       /* r6 = D = c3*i5 - c5*i3 */ \
+    paddw_r2r(r3, r0);        /* r0 = c1*i1 */ \
+    pmulhw_r2r(r7, r3);       /* r3 = c7*i1 */ \
+    movq_m2r(*I(2), r2); \
+    pmulhw_r2r(r1, r7);       /* r7 = c7*i7 */ \
+    paddw_r2r(r1, r5);        /* r5 = c1*i7 */ \
+    movq_r2r(r2, r1);         /* r1 = i2 */ \
+    pmulhw_m2r(*C(2), r2);    /* r2 = c2*i2 - i2 */ \
+    psubsw_r2r(r5, r3);       /* r3 = B = c7*i1 - c1*i7 */ \
+    movq_m2r(*J(6), r5); \
+    paddsw_r2r(r7, r0);       /* r0 = A = c1*i1 + c7*i7 */ \
+    movq_r2r(r5, r7);         /* r7 = i6 */ \
+    psubsw_r2r(r4, r0);       /* r0 = A - C */ \
+    pmulhw_m2r(*C(2), r5);    /* r5 = c2*i6 - i6 */ \
+    paddw_r2r(r1, r2);        /* r2 = c2*i2 */ \
+    pmulhw_m2r(*C(6), r1);    /* r1 = c6*i2 */ \
+    paddsw_r2r(r4, r4);       /* r4 = C + C */ \
+    paddsw_r2r(r0, r4);       /* r4 = C. = A + C */ \
+    psubsw_r2r(r6, r3);       /* r3 = B - D */ \
+    paddw_r2r(r7, r5);        /* r5 = c2*i6 */ \
+    paddsw_r2r(r6, r6);       /* r6 = D + D */ \
+    pmulhw_m2r(*C(6), r7);    /* r7 = c6*i6 */ \
+    paddsw_r2r(r3, r6);       /* r6 = D. = B + D */ \
+    movq_r2m(r4, *I(1));      /* save C. at I(1) */ \
+    psubsw_r2r(r5, r1);       /* r1 = H = c6*i2 - c2*i6 */ \
+    movq_m2r(*C(4), r4); \
+    movq_r2r(r3, r5);         /* r5 = B - D */ \
+    pmulhw_r2r(r4, r3);       /* r3 = (c4 - 1) * (B - D) */ \
+    paddsw_r2r(r2, r7);       /* r7 = G = c6*i6 + c2*i2 */ \
+    movq_r2m(r6, *I(2));      /* save D. at I(2) */ \
+    movq_r2r(r0, r2);         /* r2 = A - C */ \
+    movq_m2r(*I(0), r6); \
+    pmulhw_r2r(r4, r0);       /* r0 = (c4 - 1) * (A - C) */ \
+    paddw_r2r(r3, r5);        /* r5 = B. = c4 * (B - D) */ \
+    movq_m2r(*J(4), r3); \
+    psubsw_r2r(r1, r5);       /* r5 = B.. = B. - H */ \
+    paddw_r2r(r0, r2);        /* r0 = A. = c4 * (A - C) */ \
+    psubsw_r2r(r3, r6);       /* r6 = i0 - i4 */ \
+    movq_r2r(r6, r0); \
+    pmulhw_r2r(r4, r6);       /* r6 = (c4 - 1) * (i0 - i4) */ \
+    paddsw_r2r(r3, r3);       /* r3 = i4 + i4 */ \
+    paddsw_r2r(r1, r1);       /* r1 = H + H */ \
+    paddsw_r2r(r0, r3);       /* r3 = i0 + i4 */ \
+    paddsw_r2r(r5, r1);       /* r1 = H. = B + H */ \
+    pmulhw_r2r(r3, r4);       /* r4 = (c4 - 1) * (i0 + i4) */ \
+    paddsw_r2r(r0, r6);       /* r6 = F = c4 * (i0 - i4) */ \
+    psubsw_r2r(r2, r6);       /* r6 = F. = F - A. */ \
+    paddsw_r2r(r2, r2);       /* r2 = A. + A. */ \
+    movq_m2r(*I(1), r0);      /* r0 = C. */ \
+    paddsw_r2r(r6, r2);       /* r2 = A.. = F + A. */ \
+    paddw_r2r(r3, r4);        /* r4 = E = c4 * (i0 + i4) */ \
+    psubsw_r2r(r1, r2);       /* r2 = R2 = A.. - H. */ \
+}
+
+/* RowIDCT gets ready to transpose */
+#define RowIDCT() { \
+    \
+    BeginIDCT(); \
+    \
+    movq_m2r(*I(2), r3);   /* r3 = D. */ \
+    psubsw_r2r(r7, r4);    /* r4 = E. = E - G */ \
+    paddsw_r2r(r1, r1);    /* r1 = H. + H. */ \
+    paddsw_r2r(r7, r7);    /* r7 = G + G */ \
+    paddsw_r2r(r2, r1);    /* r1 = R1 = A.. + H. */ \
+    paddsw_r2r(r4, r7);    /* r7 = G. = E + G */ \
+    psubsw_r2r(r3, r4);    /* r4 = R4 = E. - D. */ \
+    paddsw_r2r(r3, r3); \
+    psubsw_r2r(r5, r6);    /* r6 = R6 = F. - B.. */ \
+    paddsw_r2r(r5, r5); \
+    paddsw_r2r(r4, r3);    /* r3 = R3 = E. + D. */ \
+    paddsw_r2r(r6, r5);    /* r5 = R5 = F. + B.. */ \
+    psubsw_r2r(r0, r7);    /* r7 = R7 = G. - C. */ \
+    paddsw_r2r(r0, r0); \
+    movq_r2m(r1, *I(1));   /* save R1 */ \
+    paddsw_r2r(r7, r0);    /* r0 = R0 = G. + C. */ \
+}
+
+/* Column IDCT normalizes and stores final results */
+#define ColumnIDCT() { \
+    \
+    BeginIDCT(); \
+    \
+    paddsw_m2r(*Eight, r2);    /* adjust R2 (and R1) for shift */ \
+    paddsw_r2r(r1, r1);        /* r1 = H. + H. */ \
+    paddsw_r2r(r2, r1);        /* r1 = R1 = A.. + H. */ \
+    psraw_i2r(4, r2);          /* r2 = NR2 */ \
+    psubsw_r2r(r7, r4);        /* r4 = E. = E - G */ \
+    psraw_i2r(4, r1);          /* r1 = NR1 */ \
+    movq_m2r(*I(2), r3);       /* r3 = D. */ \
+    paddsw_r2r(r7, r7);        /* r7 = G + G */ \
+    movq_r2m(r2, *I(2));       /* store NR2 at I2 */ \
+    paddsw_r2r(r4, r7);        /* r7 = G. = E + G */ \
+    movq_r2m(r1, *I(1));       /* store NR1 at I1 */ \
+    psubsw_r2r(r3, r4);        /* r4 = R4 = E. - D. */ \
+    paddsw_m2r(*Eight, r4);    /* adjust R4 (and R3) for shift */ \
+    paddsw_r2r(r3, r3);        /* r3 = D. + D. */ \
+    paddsw_r2r(r4, r3);        /* r3 = R3 = E. + D. */ \
+    psraw_i2r(4, r4);          /* r4 = NR4 */ \
+    psubsw_r2r(r5, r6);        /* r6 = R6 = F. - B.. */ \
+    psraw_i2r(4, r3);          /* r3 = NR3 */ \
+    paddsw_m2r(*Eight, r6);    /* adjust R6 (and R5) for shift */ \
+    paddsw_r2r(r5, r5);        /* r5 = B.. + B.. */ \
+    paddsw_r2r(r6, r5);        /* r5 = R5 = F. + B.. */ \
+    psraw_i2r(4, r6);          /* r6 = NR6 */ \
+    movq_r2m(r4, *J(4));       /* store NR4 at J4 */ \
+    psraw_i2r(4, r5);          /* r5 = NR5 */ \
+    movq_r2m(r3, *I(3));       /* store NR3 at I3 */ \
+    psubsw_r2r(r0, r7);        /* r7 = R7 = G. - C. */ \
+    paddsw_m2r(*Eight, r7);    /* adjust R7 (and R0) for shift */ \
+    paddsw_r2r(r0, r0);        /* r0 = C. + C. */ \
+    paddsw_r2r(r7, r0);        /* r0 = R0 = G. + C. */ \
+    psraw_i2r(4, r7);          /* r7 = NR7 */ \
+    movq_r2m(r6, *J(6));       /* store NR6 at J6 */ \
+    psraw_i2r(4, r0);          /* r0 = NR0 */ \
+    movq_r2m(r5, *J(5));       /* store NR5 at J5 */ \
+    movq_r2m(r7, *J(7));       /* store NR7 at J7 */ \
+    movq_r2m(r0, *I(0));       /* store NR0 at I0 */ \
+}
+
+/* Following macro does two 4x4 transposes in place.
+
+  At entry (we assume):
+
+    r0 = a3 a2 a1 a0
+    I(1) = b3 b2 b1 b0
+    r2 = c3 c2 c1 c0
+    r3 = d3 d2 d1 d0
+
+    r4 = e3 e2 e1 e0
+    r5 = f3 f2 f1 f0
+    r6 = g3 g2 g1 g0
+    r7 = h3 h2 h1 h0
+
+  At exit, we have:
+
+    I(0) = d0 c0 b0 a0
+    I(1) = d1 c1 b1 a1
+    I(2) = d2 c2 b2 a2
+    I(3) = d3 c3 b3 a3
+
+    J(4) = h0 g0 f0 e0
+    J(5) = h1 g1 f1 e1
+    J(6) = h2 g2 f2 e2
+    J(7) = h3 g3 f3 e3
+
+   I(0) I(1) I(2) I(3)  is the transpose of r0 I(1) r2 r3.
+   J(4) J(5) J(6) J(7)  is the transpose of r4 r5 r6 r7.
+
+   Since r1 is free at entry, we calculate the Js first. */
+
+#define Transpose() { \
+    movq_r2r(r4, r1);         /* r1 = e3 e2 e1 e0 */ \
+    punpcklwd_r2r(r5, r4);    /* r4 = f1 e1 f0 e0 */ \
+    movq_r2m(r0, *I(0));      /* save a3 a2 a1 a0 */ \
+    punpckhwd_r2r(r5, r1);    /* r1 = f3 e3 f2 e2 */ \
+    movq_r2r(r6, r0);         /* r0 = g3 g2 g1 g0 */ \
+    punpcklwd_r2r(r7, r6);    /* r6 = h1 g1 h0 g0 */ \
+    movq_r2r(r4, r5);         /* r5 = f1 e1 f0 e0 */ \
+    punpckldq_r2r(r6, r4);    /* r4 = h0 g0 f0 e0 = R4 */ \
+    punpckhdq_r2r(r6, r5);    /* r5 = h1 g1 f1 e1 = R5 */ \
+    movq_r2r(r1, r6);         /* r6 = f3 e3 f2 e2 */ \
+    movq_r2m(r4, *J(4)); \
+    punpckhwd_r2r(r7, r0);    /* r0 = h3 g3 h2 g2 */ \
+    movq_r2m(r5, *J(5)); \
+    punpckhdq_r2r(r0, r6);    /* r6 = h3 g3 f3 e3 = R7 */ \
+    movq_m2r(*I(0), r4);      /* r4 = a3 a2 a1 a0 */ \
+    punpckldq_r2r(r0, r1);    /* r1 = h2 g2 f2 e2 = R6 */ \
+    movq_m2r(*I(1), r5);      /* r5 = b3 b2 b1 b0 */ \
+    movq_r2r(r4, r0);         /* r0 = a3 a2 a1 a0 */ \
+    movq_r2m(r6, *J(7)); \
+    punpcklwd_r2r(r5, r0);    /* r0 = b1 a1 b0 a0 */ \
+    movq_r2m(r1, *J(6)); \
+    punpckhwd_r2r(r5, r4);    /* r4 = b3 a3 b2 a2 */ \
+    movq_r2r(r2, r5);         /* r5 = c3 c2 c1 c0 */ \
+    punpcklwd_r2r(r3, r2);    /* r2 = d1 c1 d0 c0 */ \
+    movq_r2r(r0, r1);         /* r1 = b1 a1 b0 a0 */ \
+    punpckldq_r2r(r2, r0);    /* r0 = d0 c0 b0 a0 = R0 */ \
+    punpckhdq_r2r(r2, r1);    /* r1 = d1 c1 b1 a1 = R1 */ \
+    movq_r2r(r4, r2);         /* r2 = b3 a3 b2 a2 */ \
+    movq_r2m(r0, *I(0)); \
+    punpckhwd_r2r(r3, r5);    /* r5 = d3 c3 d2 c2 */ \
+    movq_r2m(r1, *I(1)); \
+    punpckhdq_r2r(r5, r4);    /* r4 = d3 c3 b3 a3 = R3 */ \
+    punpckldq_r2r(r5, r2);    /* r2 = d2 c2 b2 a2 = R2 */ \
+    movq_r2m(r4, *I(3)); \
+    movq_r2m(r2, *I(2)); \
+}
+
+void ff_vp3_dsp_init_mmx(void)
+{
+    int j = 16;
+    uint16_t *p;
+
+    j = 1;
+    do {
+        p = idct_constants + ((j + 3) << 2);
+        p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1];
+    } while (++j <= 7);
+
+    idct_constants[44] = idct_constants[45] =
+    idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift;
+}
+
+void ff_vp3_idct_mmx(int16_t *output_data)
+{
+    /* eax = quantized input
+     * ebx = dequantizer matrix
+     * ecx = IDCT constants
+     *  M(I) = ecx + MaskOffset(0) + I * 8
+     *  C(I) = ecx + CosineOffset(32) + (I-1) * 8
+     * edx = output
+     * r0..r7 = mm0..mm7
+     */
+
+#define C(x) (idct_constants + 16 + (x - 1) * 4)
+#define Eight (idct_constants + 44)
+
+    /* at this point, function has completed dequantization + dezigzag +
+     * partial transposition; now do the idct itself */
+#define I(K) (output_data + K * 8)
+#define J(K) (output_data + ((K - 4) * 8) + 4)
+
+    RowIDCT();
+    Transpose();
+
+#undef I
+#undef J
+#define I(K) (output_data + (K * 8) + 32)
+#define J(K) (output_data + ((K - 4) * 8) + 36)
+
+    RowIDCT();
+    Transpose();
+
+#undef I
+#undef J
+#define I(K) (output_data + K * 8)
+#define J(K) (output_data + K * 8)
+
+    ColumnIDCT();
+
+#undef I
+#undef J
+#define I(K) (output_data + (K * 8) + 4)
+#define J(K) (output_data + (K * 8) + 4)
+
+    ColumnIDCT();
+
+#undef I
+#undef J
+
+}
diff --git a/contrib/ffmpeg/libavcodec/i386/vp3dsp_sse2.c b/contrib/ffmpeg/libavcodec/i386/vp3dsp_sse2.c
new file mode 100644
index 000000000..bd2911d59
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/i386/vp3dsp_sse2.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright (C) 2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file vp3dsp_sse2.c
+ * SSE2-optimized functions cribbed from the original VP3 source code.
+ */
+
+#include "../dsputil.h"
+#include "mmx.h"
+
+static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) =
+{
+    0,65535,65535,0,0,0,0,0,    // 0x0000 0000 0000 0000 0000 FFFF FFFF 0000
+    0,0,0,0,65535,65535,0,0,    // 0x0000 0000 FFFF FFFF 0000 0000 0000 0000
+    65535,65535,65535,0,0,0,0,0,// 0x0000 0000 0000 0000 0000 FFFF FFFF FFFF
+    0,0,0,65535,0,0,0,0,        // 0x0000 0000 0000 0000 FFFF 0000 0000 0000
+    0,0,0,65535,65535,0,0,0,    // 0x0000 0000 0000 FFFF FFFF 0000 0000 0000
+    65535,0,0,0,0,65535,0,0,    // 0x0000 0000 FFFF 0000 0000 0000 0000 FFFF
+    0,0,65535,65535, 0,0,0,0    // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000
+};
+
+static DECLARE_ALIGNED_16(const unsigned int, eight_data[]) =
+{
+    0x00080008,
+    0x00080008,
+    0x00080008,
+    0x00080008
+};
+
+static DECLARE_ALIGNED_16(const unsigned short, SSE2_idct_data[7 * 8]) =
+{
+    64277,64277,64277,64277,64277,64277,64277,64277,
+    60547,60547,60547,60547,60547,60547,60547,60547,
+    54491,54491,54491,54491,54491,54491,54491,54491,
+    46341,46341,46341,46341,46341,46341,46341,46341,
+    36410,36410,36410,36410,36410,36410,36410,36410,
+    25080,25080,25080,25080,25080,25080,25080,25080,
+    12785,12785,12785,12785,12785,12785,12785,12785
+};
+
+
+#define SSE2_Column_IDCT() {        \
+    \
+    movdqu_m2r(*I(3), xmm2);     /* xmm2 = i3 */ \
+    movdqu_m2r(*C(3), xmm6);     /* xmm6 = c3 */ \
+    \
+    movdqu_r2r(xmm2, xmm4);      /* xmm4 = i3 */ \
+    movdqu_m2r(*I(5), xmm7);     /* xmm7 = i5 */ \
+    \
+    pmulhw_r2r(xmm6, xmm4);      /* xmm4 = c3 * i3 - i3 */ \
+    movdqu_m2r(*C(5), xmm1);     /* xmm1 = c5 */ \
+    \
+    pmulhw_r2r(xmm7, xmm6);      /* xmm6 = c3 * i5 - i5 */ \
+    movdqu_r2r(xmm1, xmm5);      /* xmm5 = c5 */ \
+    \
+    pmulhw_r2r(xmm2, xmm1);      /* xmm1 = c5 * i3 - i3 */ \
+    movdqu_m2r(*I(1), xmm3);     /* xmm3 = i1 */ \
+    \
+    pmulhw_r2r(xmm7, xmm5);      /* xmm5 = c5 * i5 - i5 */ \
+    movdqu_m2r(*C(1), xmm0);     /* xmm0 = c1 */ \
+    \
+    /* all registers are in use */ \
+    \
+    paddw_r2r(xmm2, xmm4);       /* xmm4 = c3 * i3 */ \
+    paddw_r2r(xmm7, xmm6);       /* xmm6 = c3 * i5 */ \
+    \
+    paddw_r2r(xmm1, xmm2);       /* xmm2 = c5 * i3 */ \
+    movdqu_m2r(*I(7), xmm1);     /* xmm1 = i7 */ \
+    \
+    paddw_r2r(xmm5, xmm7);       /* xmm7 = c5 * i5 */ \
+    movdqu_r2r(xmm0, xmm5);      /* xmm5 = c1 */ \
+    \
+    pmulhw_r2r(xmm3, xmm0);      /* xmm0 = c1 * i1 - i1 */ \
+    paddsw_r2r(xmm7, xmm4);      /* xmm4 = c3 * i3 + c5 * i5 = C */ \
+    \
+    pmulhw_r2r(xmm1, xmm5);      /* xmm5 = c1 * i7 - i7 */ \
+    movdqu_m2r(*C(7), xmm7);     /* xmm7 = c7 */ \
+    \
+    psubsw_r2r(xmm2, xmm6);      /* xmm6 = c3 * i5 - c5 * i3 = D */ \
+    paddw_r2r(xmm3, xmm0);       /* xmm0 = c1 * i1 */ \
+    \
+    pmulhw_r2r(xmm7, xmm3);      /* xmm3 = c7 * i1 */ \
+    movdqu_m2r(*I(2), xmm2);     /* xmm2 = i2 */ \
+    \
+    pmulhw_r2r(xmm1, xmm7);      /* xmm7 = c7 * i7 */ \
+    paddw_r2r(xmm1, xmm5);       /* xmm5 = c1 * i7 */ \
+    \
+    movdqu_r2r(xmm2, xmm1);      /* xmm1 = i2 */ \
+    pmulhw_m2r(*C(2), xmm2);     /* xmm2 = i2 * c2 -i2 */ \
+    \
+    psubsw_r2r(xmm5, xmm3);      /* xmm3 = c7 * i1 - c1 * i7 = B */ \
+    movdqu_m2r(*I(6), xmm5);     /* xmm5 = i6 */ \
+    \
+    paddsw_r2r(xmm7, xmm0);      /* xmm0 = c1 * i1 + c7 * i7 = A */ \
+    movdqu_r2r(xmm5, xmm7);      /* xmm7 = i6 */ \
+    \
+    psubsw_r2r(xmm4, xmm0);      /* xmm0 = A - C */ \
+    pmulhw_m2r(*C(2), xmm5);     /* xmm5 = c2 * i6 - i6 */ \
+    \
+    paddw_r2r(xmm1, xmm2);       /* xmm2 = i2 * c2 */ \
+    pmulhw_m2r(*C(6), xmm1);     /* xmm1 = c6 * i2 */ \
+    \
+    paddsw_r2r(xmm4, xmm4);      /* xmm4 = C + C */ \
+    paddsw_r2r(xmm0, xmm4);      /* xmm4 = A + C = C. */ \
+    \
+    psubsw_r2r(xmm6, xmm3);      /* xmm3 = B - D */ \
+    paddw_r2r(xmm7, xmm5);       /* xmm5 = c2 * i6 */ \
+    \
+    paddsw_r2r(xmm6, xmm6);      /* xmm6 = D + D */ \
+    pmulhw_m2r(*C(6), xmm7);     /* xmm7 = c6 * i6 */ \
+    \
+    paddsw_r2r(xmm3, xmm6);      /* xmm6 = B + D = D. */ \
+    movdqu_r2m(xmm4, *I(1));     /* Save C. at I(1) */ \
+    \
+    psubsw_r2r(xmm5, xmm1);      /* xmm1 = c6 * i2 - c2 * i6 = H */ \
+    movdqu_m2r(*C(4), xmm4);     /* xmm4 = c4 */ \
+    \
+    movdqu_r2r(xmm3, xmm5);      /* xmm5 = B - D */ \
+    pmulhw_r2r(xmm4, xmm3);      /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
+    \
+    paddsw_r2r(xmm2, xmm7);      /* xmm7 = c2 * i2 + c6 * i6 = G */ \
+    movdqu_r2m(xmm6, *I(2));     /* Save D. at I(2) */ \
+    \
+    movdqu_r2r(xmm0, xmm2);      /* xmm2 = A - C */ \
+    movdqu_m2r(*I(0), xmm6);     /* xmm6 = i0 */ \
+    \
+    pmulhw_r2r(xmm4, xmm0);      /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \
+    paddw_r2r(xmm3, xmm5);       /* xmm5 = c4 * ( B - D ) = B. */ \
+    \
+    movdqu_m2r(*I(4), xmm3);     /* xmm3 = i4 */ \
+    psubsw_r2r(xmm1, xmm5);      /* xmm5 = B. - H = B.. */ \
+    \
+    paddw_r2r(xmm0, xmm2);       /* xmm2 = c4 * ( A - C) = A. */ \
+    psubsw_r2r(xmm3, xmm6);      /* xmm6 = i0 - i4 */ \
+    \
+    movdqu_r2r(xmm6, xmm0);      /* xmm0 = i0 - i4 */ \
+    pmulhw_r2r(xmm4, xmm6);      /* xmm6 = (c4 - 1) * (i0 - i4) = F */ \
+    \
+    paddsw_r2r(xmm3, xmm3);      /* xmm3 = i4 + i4 */ \
+    paddsw_r2r(xmm1, xmm1);      /* xmm1 = H + H */ \
+    \
+    paddsw_r2r(xmm0, xmm3);      /* xmm3 = i0 + i4 */ \
+    paddsw_r2r(xmm5, xmm1);      /* xmm1 = B. + H = H. */ \
+    \
+    pmulhw_r2r(xmm3, xmm4);      /* xmm4 = ( c4 - 1 ) * ( i0 + i4 )  */ \
+    paddw_r2r(xmm0, xmm6);       /* xmm6 = c4 * ( i0 - i4 ) */ \
+    \
+    psubsw_r2r(xmm2, xmm6);      /* xmm6 = F - A. = F. */ \
+    paddsw_r2r(xmm2, xmm2);      /* xmm2 = A. + A. */ \
+    \
+    movdqu_m2r(*I(1), xmm0);     /* Load        C. from I(1) */ \
+    paddsw_r2r(xmm6, xmm2);      /* xmm2 = F + A. = A.. */ \
+    \
+    paddw_r2r(xmm3, xmm4);       /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \
+    psubsw_r2r(xmm1, xmm2);      /* xmm2 = A.. - H. = R2 */ \
+    \
+    paddsw_m2r(*Eight, xmm2);    /* Adjust R2 and R1 before shifting */ \
+    paddsw_r2r(xmm1, xmm1);      /* xmm1 = H. + H. */ \
+    \
+    paddsw_r2r(xmm2, xmm1);      /* xmm1 = A.. + H. = R1 */ \
+    psraw_i2r(4, xmm2);          /* xmm2 = op2 */ \
+    \
+    psubsw_r2r(xmm7, xmm4);      /* xmm4 = E - G = E. */ \
+    psraw_i2r(4, xmm1);          /* xmm1 = op1 */ \
+    \
+    movdqu_m2r(*I(2), xmm3);     /* Load D. from I(2) */ \
+    paddsw_r2r(xmm7, xmm7);      /* xmm7 = G + G */ \
+    \
+    movdqu_r2m(xmm2, *O(2));     /* Write out op2 */ \
+    paddsw_r2r(xmm4, xmm7);      /* xmm7 = E + G = G. */ \
+    \
+    movdqu_r2m(xmm1, *O(1));     /* Write out op1 */ \
+    psubsw_r2r(xmm3, xmm4);      /* xmm4 = E. - D. = R4 */ \
+    \
+    paddsw_m2r(*Eight, xmm4);    /* Adjust R4 and R3 before shifting */ \
+    paddsw_r2r(xmm3, xmm3);      /* xmm3 = D. + D. */ \
+    \
+    paddsw_r2r(xmm4, xmm3);      /* xmm3 = E. + D. = R3 */ \
+    psraw_i2r(4, xmm4);          /* xmm4 = op4 */ \
+    \
+    psubsw_r2r(xmm5, xmm6);      /* xmm6 = F. - B..= R6 */ \
+    psraw_i2r(4, xmm3);          /* xmm3 = op3 */ \
+    \
+    paddsw_m2r(*Eight, xmm6);    /* Adjust R6 and R5 before shifting */ \
+    paddsw_r2r(xmm5, xmm5);      /* xmm5 = B.. + B.. */ \
+    \
+    paddsw_r2r(xmm6, xmm5);      /* xmm5 = F. + B.. = R5 */ \
+    psraw_i2r(4, xmm6);          /* xmm6 = op6 */ \
+    \
+    movdqu_r2m(xmm4, *O(4));     /* Write out op4 */ \
+    psraw_i2r(4, xmm5);          /* xmm5 = op5 */ \
+    \
+    movdqu_r2m(xmm3, *O(3));     /* Write out op3 */ \
+    psubsw_r2r(xmm0, xmm7);      /* xmm7 = G. - C. = R7 */ \
+    \
+    paddsw_m2r(*Eight, xmm7);    /* Adjust R7 and R0 before shifting */ \
+    paddsw_r2r(xmm0, xmm0);      /* xmm0 = C. + C. */ \
+    \
+    paddsw_r2r(xmm7, xmm0);      /* xmm0 = G. + C. */ \
+    psraw_i2r(4, xmm7);          /* xmm7 = op7 */ \
+    \
+    movdqu_r2m(xmm6, *O(6));     /* Write out op6 */ \
+    psraw_i2r(4, xmm0);          /* xmm0 = op0 */ \
+    \
+    movdqu_r2m(xmm5, *O(5));     /* Write out op5 */ \
+    movdqu_r2m(xmm7, *O(7));     /* Write out op7 */ \
+    \
+    movdqu_r2m(xmm0, *O(0));     /* Write out op0 */ \
+    \
+} /* End of SSE2_Column_IDCT macro */
+
+
+#define SSE2_Row_IDCT() {        \
+    \
+    movdqu_m2r(*I(3), xmm2);     /* xmm2 = i3 */ \
+    movdqu_m2r(*C(3), xmm6);     /* xmm6 = c3 */ \
+    \
+    movdqu_r2r(xmm2, xmm4);      /* xmm4 = i3 */ \
+    movdqu_m2r(*I(5), xmm7);     /* xmm7 = i5 */ \
+    \
+    pmulhw_r2r(xmm6, xmm4);      /* xmm4 = c3 * i3 - i3 */ \
+    movdqu_m2r(*C(5), xmm1);     /* xmm1 = c5 */ \
+    \
+    pmulhw_r2r(xmm7, xmm6);      /* xmm6 = c3 * i5 - i5 */ \
+    movdqu_r2r(xmm1, xmm5);      /* xmm5 = c5 */ \
+    \
+    pmulhw_r2r(xmm2, xmm1);      /* xmm1 = c5 * i3 - i3 */ \
+    movdqu_m2r(*I(1), xmm3);     /* xmm3 = i1 */ \
+    \
+    pmulhw_r2r(xmm7, xmm5);      /* xmm5 = c5 * i5 - i5 */ \
+    movdqu_m2r(*C(1), xmm0);     /* xmm0 = c1 */ \
+    \
+    /* all registers are in use */ \
+    \
+    paddw_r2r(xmm2, xmm4);      /* xmm4 = c3 * i3 */ \
+    paddw_r2r(xmm7, xmm6);      /* xmm6 = c3 * i5 */ \
+    \
+    paddw_r2r(xmm1, xmm2);      /* xmm2 = c5 * i3 */ \
+    movdqu_m2r(*I(7), xmm1);    /* xmm1 = i7 */ \
+    \
+    paddw_r2r(xmm5, xmm7);      /* xmm7 = c5 * i5 */ \
+    movdqu_r2r(xmm0, xmm5);     /* xmm5 = c1 */ \
+    \
+    pmulhw_r2r(xmm3, xmm0);     /* xmm0 = c1 * i1 - i1 */ \
+    paddsw_r2r(xmm7, xmm4);     /* xmm4 = c3 * i3 + c5 * i5 = C */ \
+    \
+    pmulhw_r2r(xmm1, xmm5);     /* xmm5 = c1 * i7 - i7 */ \
+    movdqu_m2r(*C(7), xmm7);    /* xmm7 = c7 */ \
+    \
+    psubsw_r2r(xmm2, xmm6);     /* xmm6 = c3 * i5 - c5 * i3 = D */ \
+    paddw_r2r(xmm3, xmm0);      /* xmm0 = c1 * i1 */ \
+    \
+    pmulhw_r2r(xmm7, xmm3);     /* xmm3 = c7 * i1 */ \
+    movdqu_m2r(*I(2), xmm2);    /* xmm2 = i2 */ \
+    \
+    pmulhw_r2r(xmm1, xmm7);     /* xmm7 = c7 * i7 */ \
+    paddw_r2r(xmm1, xmm5);      /* xmm5 = c1 * i7 */ \
+    \
+    movdqu_r2r(xmm2, xmm1);     /* xmm1 = i2 */ \
+    pmulhw_m2r(*C(2), xmm2);    /* xmm2 = i2 * c2 -i2 */ \
+    \
+    psubsw_r2r(xmm5, xmm3);     /* xmm3 = c7 * i1 - c1 * i7 = B */ \
+    movdqu_m2r(*I(6), xmm5);    /* xmm5 = i6 */ \
+    \
+    paddsw_r2r(xmm7, xmm0);     /* xmm0 = c1 * i1 + c7 * i7        = A */ \
+    movdqu_r2r(xmm5, xmm7);     /* xmm7 = i6 */ \
+    \
+    psubsw_r2r(xmm4, xmm0);     /* xmm0 = A - C */ \
+    pmulhw_m2r(*C(2), xmm5);    /* xmm5 = c2 * i6 - i6 */ \
+    \
+    paddw_r2r(xmm1, xmm2);      /* xmm2 = i2 * c2 */ \
+    pmulhw_m2r(*C(6), xmm1);    /* xmm1 = c6 * i2 */ \
+    \
+    paddsw_r2r(xmm4, xmm4);     /* xmm4 = C + C */ \
+    paddsw_r2r(xmm0, xmm4);     /* xmm4 = A + C = C. */ \
+    \
+    psubsw_r2r(xmm6, xmm3);     /* xmm3 = B - D */ \
+    paddw_r2r(xmm7, xmm5);      /* xmm5 = c2 * i6 */ \
+    \
+    paddsw_r2r(xmm6, xmm6);     /* xmm6 = D + D */ \
+    pmulhw_m2r(*C(6), xmm7);    /* xmm7 = c6 * i6 */ \
+    \
+    paddsw_r2r(xmm3, xmm6);     /* xmm6 = B + D = D. */ \
+    movdqu_r2m(xmm4, *I(1));    /* Save C. at I(1)        */ \
+    \
+    psubsw_r2r(xmm5, xmm1);     /* xmm1 = c6 * i2 - c2 * i6 = H */ \
+    movdqu_m2r(*C(4), xmm4);    /* xmm4 = c4 */ \
+    \
+    movdqu_r2r(xmm3, xmm5);     /* xmm5 = B - D */ \
+    pmulhw_r2r(xmm4, xmm3);     /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
+    \
+    paddsw_r2r(xmm2, xmm7);     /* xmm7 = c2 * i2 + c6 * i6 = G */ \
+    movdqu_r2m(xmm6, *I(2));    /* Save D. at I(2) */ \
+    \
+    movdqu_r2r(xmm0, xmm2);     /* xmm2 = A - C */ \
+    movdqu_m2r(*I(0), xmm6);    /* xmm6 = i0 */ \
+    \
+    pmulhw_r2r(xmm4, xmm0);     /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \
+    paddw_r2r(xmm3, xmm5);      /* xmm5 = c4 * ( B - D ) = B. */ \
+    \
+    movdqu_m2r(*I(4), xmm3);    /* xmm3 = i4 */ \
+    psubsw_r2r(xmm1, xmm5);     /* xmm5 = B. - H = B.. */ \
+    \
+    paddw_r2r(xmm0, xmm2);      /* xmm2 = c4 * ( A - C) = A. */ \
+    psubsw_r2r(xmm3, xmm6);     /* xmm6 = i0 - i4 */ \
+    \
+    movdqu_r2r(xmm6, xmm0);     /* xmm0 = i0 - i4 */ \
+    pmulhw_r2r(xmm4, xmm6);     /* xmm6 = ( c4 - 1 ) * ( i0 - i4 ) = F */ \
+    \
+    paddsw_r2r(xmm3, xmm3);     /* xmm3 = i4 + i4 */ \
+    paddsw_r2r(xmm1, xmm1);     /* xmm1 = H + H */ \
+    \
+    paddsw_r2r(xmm0, xmm3);     /* xmm3 = i0 + i4 */ \
+    paddsw_r2r(xmm5, xmm1);     /* xmm1 = B. + H = H. */ \
+    \
+    pmulhw_r2r(xmm3, xmm4);     /* xmm4 = ( c4 - 1 ) * ( i0 + i4 )  */ \
+    paddw_r2r(xmm0, xmm6);      /* xmm6 = c4 * ( i0 - i4 ) */ \
+    \
+    psubsw_r2r(xmm2, xmm6);     /* xmm6 = F - A. = F. */ \
+    paddsw_r2r(xmm2, xmm2);     /* xmm2 = A. + A. */ \
+    \
+    movdqu_m2r(*I(1), xmm0);    /* Load C. from I(1) */ \
+    paddsw_r2r(xmm6, xmm2);     /* xmm2 = F + A. = A.. */ \
+    \
+    paddw_r2r(xmm3, xmm4);      /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \
+    psubsw_r2r(xmm1, xmm2);     /* xmm2 = A.. - H. = R2 */ \
+    \
+    paddsw_r2r(xmm1, xmm1);     /* xmm1 = H. + H. */ \
+    paddsw_r2r(xmm2, xmm1);     /* xmm1 = A.. + H. = R1 */ \
+    \
+    psubsw_r2r(xmm7, xmm4);     /* xmm4 = E - G = E. */ \
+    \
+    movdqu_m2r(*I(2), xmm3);    /* Load D. from I(2) */ \
+    paddsw_r2r(xmm7, xmm7);     /* xmm7 = G + G */ \
+    \
+    movdqu_r2m(xmm2, *I(2));    /* Write out op2 */ \
+    paddsw_r2r(xmm4, xmm7);     /* xmm7 = E + G = G. */ \
+    \
+    movdqu_r2m(xmm1, *I(1));    /* Write out op1 */ \
+    psubsw_r2r(xmm3, xmm4);     /* xmm4 = E. - D. = R4 */ \
+    \
+    paddsw_r2r(xmm3, xmm3);     /* xmm3 = D. + D. */ \
+    \
+    paddsw_r2r(xmm4, xmm3);     /* xmm3 = E. + D. = R3 */ \
+    \
+    psubsw_r2r(xmm5, xmm6);     /* xmm6 = F. - B..= R6 */ \
+    \
+    paddsw_r2r(xmm5, xmm5);     /* xmm5 = B.. + B.. */ \
+    \
+    paddsw_r2r(xmm6, xmm5);     /* xmm5 = F. + B.. = R5 */ \
+    \
+    movdqu_r2m(xmm4, *I(4));    /* Write out op4 */ \
+    \
+    movdqu_r2m(xmm3, *I(3));    /* Write out op3 */ \
+    psubsw_r2r(xmm0, xmm7);     /* xmm7 = G. - C. = R7 */ \
+    \
+    paddsw_r2r(xmm0, xmm0);     /* xmm0 = C. + C. */ \
+    \
+    paddsw_r2r(xmm7, xmm0);     /* xmm0 = G. + C. */ \
+    \
+    movdqu_r2m(xmm6, *I(6));    /* Write out op6 */ \
+    \
+    movdqu_r2m(xmm5, *I(5));    /* Write out op5 */ \
+    movdqu_r2m(xmm7, *I(7));    /* Write out op7 */ \
+    \
+    movdqu_r2m(xmm0, *I(0));    /* Write out op0 */ \
+    \
+} /* End of SSE2_Row_IDCT macro */
+
+
+#define SSE2_Transpose() {    \
+    \
+    movdqu_m2r(*I(4), xmm4);    /* xmm4=e7e6e5e4e3e2e1e0 */ \
+    movdqu_m2r(*I(5), xmm0);    /* xmm4=f7f6f5f4f3f2f1f0 */ \
+    \
+    movdqu_r2r(xmm4, xmm5);     /* make a copy */ \
+    punpcklwd_r2r(xmm0, xmm4);  /* xmm4=f3e3f2e2f1e1f0e0 */ \
+    \
+    punpckhwd_r2r(xmm0, xmm5);  /* xmm5=f7e7f6e6f5e5f4e4 */ \
+    movdqu_m2r(*I(6), xmm6);    /* xmm6=g7g6g5g4g3g2g1g0 */ \
+    \
+    movdqu_m2r(*I(7), xmm0);    /* xmm0=h7h6h5h4h3h2h1h0 */ \
+    movdqu_r2r(xmm6, xmm7);     /* make a copy */ \
+    \
+    punpcklwd_r2r(xmm0, xmm6);  /* xmm6=h3g3h3g2h1g1h0g0 */ \
+    punpckhwd_r2r(xmm0, xmm7);  /* xmm7=h7g7h6g6h5g5h4g4 */ \
+    \
+    movdqu_r2r(xmm4, xmm3);     /* make a copy */ \
+    punpckldq_r2r(xmm6, xmm4);  /* xmm4=h1g1f1e1h0g0f0e0 */ \
+    \
+    punpckhdq_r2r(xmm6, xmm3);  /* xmm3=h3g3g3e3h2g2f2e2 */ \
+    movdqu_r2m(xmm3, *I(6));    /* save h3g3g3e3h2g2f2e2 */ \
+    /* Free xmm6 */ \
+    movdqu_r2r(xmm5, xmm6);     /* make a copy */ \
+    punpckldq_r2r(xmm7, xmm5);  /* xmm5=h5g5f5e5h4g4f4e4 */ \
+    \
+    punpckhdq_r2r(xmm7, xmm6);  /* xmm6=h7g7f7e7h6g6f6e6 */ \
+    movdqu_m2r(*I(0), xmm0);    /* xmm0=a7a6a5a4a3a2a1a0 */ \
+    /* Free xmm7 */ \
+    movdqu_m2r(*I(1), xmm1);    /* xmm1=b7b6b5b4b3b2b1b0 */ \
+    movdqu_r2r(xmm0, xmm7);     /* make a copy */ \
+    \
+    punpcklwd_r2r(xmm1, xmm0);  /* xmm0=b3a3b2a2b1a1b0a0 */ \
+    punpckhwd_r2r(xmm1, xmm7);  /* xmm7=b7a7b6a6b5a5b4a4 */ \
+    /* Free xmm1 */ \
+    movdqu_m2r(*I(2), xmm2);    /* xmm2=c7c6c5c4c3c2c1c0 */ \
+    movdqu_m2r(*I(3), xmm3);    /* xmm3=d7d6d5d4d3d2d1d0 */ \
+    \
+    movdqu_r2r(xmm2, xmm1);     /* make a copy */ \
+    punpcklwd_r2r(xmm3, xmm2);  /* xmm2=d3c3d2c2d1c1d0c0 */ \
+    \
+    punpckhwd_r2r(xmm3, xmm1);  /* xmm1=d7c7d6c6d5c5d4c4 */ \
+    movdqu_r2r(xmm0, xmm3);     /* make a copy        */ \
+    \
+    punpckldq_r2r(xmm2, xmm0);  /* xmm0=d1c1b1a1d0c0b0a0 */ \
+    punpckhdq_r2r(xmm2, xmm3);  /* xmm3=d3c3b3a3d2c2b2a2 */ \
+    /* Free xmm2 */ \
+    movdqu_r2r(xmm7, xmm2);     /* make a copy */ \
+    punpckldq_r2r(xmm1, xmm2);  /* xmm2=d5c5b5a5d4c4b4a4 */ \
+    \
+    punpckhdq_r2r(xmm1, xmm7);  /* xmm7=d7c7b7a7d6c6b6a6 */ \
+    movdqu_r2r(xmm0, xmm1);     /* make a copy */ \
+    \
+    punpcklqdq_r2r(xmm4, xmm0); /* xmm0=h0g0f0e0d0c0b0a0 */ \
+    punpckhqdq_r2r(xmm4, xmm1); /* xmm1=h1g1g1e1d1c1b1a1 */ \
+    \
+    movdqu_r2m(xmm0, *I(0));    /* save I(0) */ \
+    movdqu_r2m(xmm1, *I(1));    /* save I(1) */ \
+    \
+    movdqu_m2r(*I(6), xmm0);    /* load h3g3g3e3h2g2f2e2 */ \
+    movdqu_r2r(xmm3, xmm1);     /* make a copy */ \
+    \
+    punpcklqdq_r2r(xmm0, xmm1); /* xmm1=h2g2f2e2d2c2b2a2 */ \
+    punpckhqdq_r2r(xmm0, xmm3); /* xmm3=h3g3f3e3d3c3b3a3 */ \
+    \
+    movdqu_r2r(xmm2, xmm4);     /* make a copy */ \
+    punpcklqdq_r2r(xmm5, xmm4); /* xmm4=h4g4f4e4d4c4b4a4 */ \
+    \
+    punpckhqdq_r2r(xmm5, xmm2); /* xmm2=h5g5f5e5d5c5b5a5 */ \
+    movdqu_r2m(xmm1, *I(2));    /* save I(2) */ \
+    \
+    movdqu_r2m(xmm3, *I(3));    /* save I(3) */ \
+    movdqu_r2m(xmm4, *I(4));    /* save I(4) */ \
+    \
+    movdqu_r2m(xmm2, *I(5));    /* save I(5) */ \
+    movdqu_r2r(xmm7, xmm5);     /* make a copy */ \
+    \
+    punpcklqdq_r2r(xmm6, xmm5); /* xmm5=h6g6f6e6d6c6b6a6 */ \
+    punpckhqdq_r2r(xmm6, xmm7); /* xmm7=h7g7f7e7d7c7b7a7 */ \
+    \
+    movdqu_r2m(xmm5, *I(6));    /* save I(6) */ \
+    movdqu_r2m(xmm7, *I(7));    /* save I(7) */ \
+    \
+} /* End of Transpose Macro */
+
+
+#define SSE2_Dequantize() {        \
+    movdqu_m2r(*(eax), xmm0);     \
+    \
+    pmullw_m2r(*(ebx), xmm0);          /* xmm0 = 07 06 05 04 03 02 01 00 */ \
+    movdqu_m2r(*(eax + 16), xmm1);     \
+    \
+    pmullw_m2r(*(ebx + 16), xmm1);     /* xmm1 = 17 16 15 14 13 12 11 10 */ \
+    pshuflw_r2r(xmm0, xmm3, 0x078);    /* xmm3 = 07 06 05 04 01 03 02 00 */ \
+    \
+    movdqu_r2r(xmm1, xmm2);            /* xmm2 = 17 16 15 14 13 12 11 10 */ \
+    movdqu_m2r(*(ecx), xmm7);          /* xmm7 = -- -- -- -- -- FF FF -- */ \
+    \
+    movdqu_m2r(*(eax + 32), xmm4);     \
+    movdqu_m2r(*(eax + 64), xmm5);     \
+    \
+    pmullw_m2r(*(ebx + 32), xmm4);     /* xmm4 = 27 26 25 24 23 22 21 20 */ \
+    pmullw_m2r(*(ebx + 64), xmm5);     /* xmm5 = 47 46 45 44 43 42 41 40 */ \
+    \
+    movdqu_m2r(*(ecx + 16), xmm6);     /* xmm6 = -- -- FF FF -- -- -- -- */ \
+    pand_r2r(xmm2, xmm7);              /* xmm7 = -- -- -- -- -- 12 11 -- */ \
+    \
+    pand_r2r(xmm4, xmm6);              /* xmm6 = -- -- 25 24 -- -- -- -- */ \
+    pxor_r2r(xmm7, xmm2);              /* xmm2 = 17 16 15 14 13 -- -- 10 */ \
+    \
+    pxor_r2r(xmm6, xmm4);              /* xmm4 = 27 26 -- -- 23 22 21 20 */ \
+    pslldq_i2r(4, xmm7);               /* xmm7 = -- -- -- 12 11 -- -- -- */ \
+    \
+    pslldq_i2r(2, xmm6);               /* xmm6 = -- 25 24 -- -- -- -- -- */ \
+    por_r2r(xmm6, xmm7);               /* xmm7 = -- 25 24 12 11 -- -- -- */ \
+    \
+    movdqu_m2r(*(ecx + 32), xmm0);     /* xmm0 = -- -- -- -- -- FF FF FF */ \
+    movdqu_m2r(*(ecx + 48), xmm6);     /* xmm6 = -- -- -- -- FF -- -- -- */ \
+    \
+    pand_r2r(xmm3, xmm0);              /* xmm0 = -- -- -- -- -- 03 02 00 */ \
+    pand_r2r(xmm5, xmm6);              /* xmm6 = -- -- -- -- 43 -- -- -- */ \
+    \
+    pxor_r2r(xmm0, xmm3);              /* xmm3 = 07 06 05 04 01 -- -- -- */ \
+    pxor_r2r(xmm6, xmm5);              /* xmm5 = 47 46 45 44 -- 42 41 40 */ \
+    \
+    por_r2r(xmm7, xmm0);               /* xmm0 = -- 25 24 12 11 03 02 00 */ \
+    pslldq_i2r(8, xmm6);               /* xmm6 = 43 -- -- -- -- -- -- -- */ \
+    \
+    por_r2r(xmm6, xmm0);               /* xmm0 = 43 25 24 12 11 03 02 00 */ \
+    /* 02345 in use */ \
+    \
+    movdqu_m2r(*(ecx + 64 ), xmm1);    /* xmm1 = -- -- -- FF FF -- -- -- */ \
+    pshuflw_r2r(xmm5, xmm5, 0x0B4);    /* xmm5 = 47 46 45 44 42 -- 41 40 */ \
+    \
+    movdqu_r2r(xmm1, xmm7);            /* xmm7 = -- -- -- FF FF -- -- -- */ \
+    movdqu_r2r(xmm1, xmm6);            /* xmm6 = -- -- -- FF FF -- -- -- */ \
+    \
+    movdqu_r2m(xmm0, *(eax));          /* write  43 25 24 12 11 03 02 00 */ \
+    pshufhw_r2r(xmm4, xmm4, 0x0C2);    /* xmm4 = 27 -- -- 26 23 22 21 20 */ \
+    \
+    pand_r2r(xmm4, xmm7);              /* xmm7 = -- -- -- 26 23 -- -- -- */ \
+    pand_r2r(xmm5, xmm1);              /* xmm1 = -- -- -- 44 42 -- -- -- */ \
+    \
+    pxor_r2r(xmm7, xmm4);              /* xmm4 = 27 -- -- -- -- 22 21 20 */ \
+    pxor_r2r(xmm1, xmm5);              /* xmm5 = 47 46 45 -- -- -- 41 40 */ \
+    \
+    pshuflw_r2r(xmm2, xmm2, 0x0C6);    /* xmm2 = 17 16 15 14 13 10 -- -- */ \
+    movdqu_r2r(xmm6, xmm0);            /* xmm0 = -- -- -- FF FF -- -- -- */ \
+    \
+    pslldq_i2r(2, xmm7);               /* xmm7 = -- -- 26 23 -- -- -- -- */ \
+    pslldq_i2r(6, xmm1);               /* xmm1 = 44 42 -- -- -- -- -- -- */ \
+    \
+    psrldq_i2r(2, xmm0);               /* xmm0 = -- -- -- -- FF FF -- -- */ \
+    pand_r2r(xmm3, xmm6);              /* xmm6 = -- -- -- 04 01 -- -- -- */ \
+    \
+    pand_r2r(xmm2, xmm0);              /* xmm0 = -- -- -- -- 13 10 -- -- */ \
+    pxor_r2r(xmm6, xmm3);              /* xmm3 = 07 06 05 -- -- -- -- -- */ \
+    \
+    pxor_r2r(xmm0, xmm2);              /* xmm2 = 17 16 15 14 -- -- -- -- */ \
+    psrldq_i2r(6, xmm6);               /* xmm0 = -- -- -- -- -- -- 04 01 */ \
+    \
+    por_r2r(xmm7, xmm1);               /* xmm1 = 44 42 26 23 -- -- -- -- */ \
+    por_r2r(xmm6, xmm0);               /* xmm1 = -- -- -- -- 13 10 04 01 */ \
+    /* 12345 in use */    \
+    por_r2r(xmm0, xmm1);               /* xmm1 = 44 42 26 23 13 10 04 01 */ \
+    pshuflw_r2r(xmm4, xmm4, 0x093);    /* xmm4 = 27 -- -- -- 22 21 20 -- */ \
+    \
+    pshufhw_r2r(xmm4, xmm4, 0x093);    /* xmm4 = -- -- -- 27 22 21 20 -- */ \
+    movdqu_r2m(xmm1, *(eax + 16));     /* write  44 42 26 23 13 10 04 01 */ \
+    \
+    pshufhw_r2r(xmm3, xmm3, 0x0D2);    /* xmm3 = 07 05 -- 06 -- -- -- -- */ \
+    movdqu_m2r(*(ecx + 64), xmm0);     /* xmm0 = -- -- -- FF FF -- -- -- */ \
+    \
+    pand_r2r(xmm3, xmm0);              /* xmm0 = -- -- -- 06 -- -- -- -- */ \
+    psrldq_i2r(12, xmm3);              /* xmm3 = -- -- -- -- -- -- 07 05 */ \
+    \
+    psrldq_i2r(8, xmm0);               /* xmm0 = -- -- -- -- -- -- -- 06 */ \
+    \
+    movdqu_m2r(*(ecx + 64), xmm6);     /* xmm6 = -- -- -- FF FF -- -- -- */ \
+    movdqu_m2r(*(ecx + 96), xmm7);     /* xmm7 = -- -- -- -- FF FF -- -- */ \
+    \
+    pand_r2r(xmm4, xmm6);              /* xmm6 = -- -- -- 27 22 -- -- -- */ \
+    pxor_r2r(xmm6, xmm4);              /* xmm4 = -- -- -- -- -- 21 20 -- */ \
+    \
+    por_r2r(xmm6, xmm3);               /* xmm3 = -- -- -- 27 22 -- 07 05 */ \
+    pand_r2r(xmm4, xmm7);              /* xmm7 = -- -- -- -- -- 21 -- -- */ \
+    \
+    por_r2r(xmm7, xmm0);               /* xmm0 = -- -- -- -- -- 21 -- 06 */ \
+    pxor_r2r(xmm7, xmm4);              /* xmm4 = -- -- -- -- -- -- 20 -- */ \
+    \
+    movdqu_m2r(*(ecx + 16 ), xmm6);    /* xmm6 = -- -- FF FF -- -- -- -- */ \
+    movdqu_m2r(*(ecx + 64 ), xmm1);    /* xmm1 = -- -- -- FF FF -- -- -- */ \
+    \
+    pand_r2r(xmm2, xmm6);              /* xmm6 = -- -- 15 14 -- -- -- -- */ \
+    pand_r2r(xmm6, xmm1);              /* xmm1 = -- -- -- 14 -- -- -- -- */ \
+    \
+    pxor_r2r(xmm6, xmm2);              /* xmm2 = 17 16 -- -- -- -- -- -- */ \
+    pxor_r2r(xmm1, xmm6);              /* xmm6 = -- -- 15 -- -- -- -- -- */ \
+    \
+    psrldq_i2r(4, xmm1);               /* xmm1 = -- -- -- -- -- 14 -- -- */ \
+    \
+    psrldq_i2r(8, xmm6);               /* xmm6 = -- -- -- -- -- -- 15 -- */ \
+    por_r2r(xmm1, xmm3);               /* xmm3 = -- -- -- 27 22 14 07 05 */ \
+    \
+    por_r2r(xmm6, xmm0);               /* xmm0 = -- -- -- -- -- 21 15 06 */ \
+    pshufhw_r2r(xmm5, xmm5, 0x0E1);    /* xmm5 = 47 46 -- 45 -- -- 41 40 */ \
+    \
+    movdqu_m2r(*(ecx + 64), xmm1);     /* xmm1 = -- -- -- FF FF -- -- -- */ \
+    pshuflw_r2r(xmm5, xmm5, 0x072);    /* xmm5 = 47 46 -- 45 41 -- 40 -- */ \
+    \
+    movdqu_r2r(xmm1, xmm6);            /* xmm6 = -- -- -- FF FF -- -- -- */ \
+    pand_r2r(xmm5, xmm1);              /* xmm1 = -- -- -- 45 41 -- -- -- */ \
+    \
+    pxor_r2r(xmm1, xmm5);              /* xmm5 = 47 46 -- -- -- -- 40 -- */ \
+    pslldq_i2r(4, xmm1);               /* xmm1 = -- 45 41 -- -- -- -- -- */ \
+    \
+    pshufd_r2r(xmm5, xmm5, 0x09C);     /* xmm5 = -- -- -- -- 47 46 40 -- */ \
+    por_r2r(xmm1, xmm3);               /* xmm3 = -- 45 41 27 22 14 07 05 */ \
+    \
+    movdqu_m2r(*(eax + 96), xmm1);     /* xmm1 = 67 66 65 64 63 62 61 60 */ \
+    pmullw_m2r(*(ebx + 96), xmm1);     \
+    \
+    movdqu_m2r(*(ecx), xmm7);          /* xmm7 = -- -- -- -- -- FF FF -- */ \
+    \
+    psrldq_i2r(8, xmm6);               /* xmm6 = -- -- -- -- -- -- -- FF */ \
+    pand_r2r(xmm5, xmm7);              /* xmm7 = -- -- -- -- -- 46 40 -- */ \
+    \
+    pand_r2r(xmm1, xmm6);              /* xmm6 = -- -- -- -- -- -- -- 60 */ \
+    pxor_r2r(xmm7, xmm5);              /* xmm5 = -- -- -- -- 47 -- -- -- */ \
+    \
+    pxor_r2r(xmm6, xmm1);              /* xmm1 = 67 66 65 64 63 62 61 -- */ \
+    pslldq_i2r(2, xmm5);               /* xmm5 = -- -- -- 47 -- -- -- -- */ \
+    \
+    pslldq_i2r(14, xmm6);              /* xmm6 = 60 -- -- -- -- -- -- -- */ \
+    por_r2r(xmm5, xmm4);               /* xmm4 = -- -- -- 47 -- -- 20 -- */ \
+    \
+    por_r2r(xmm6, xmm3);               /* xmm3 = 60 45 41 27 22 14 07 05 */ \
+    pslldq_i2r(6, xmm7);               /* xmm7 = -- -- 46 40 -- -- -- -- */ \
+    \
+    movdqu_r2m(xmm3, *(eax+32));       /* write  60 45 41 27 22 14 07 05 */ \
+    por_r2r(xmm7, xmm0);               /* xmm0 = -- -- 46 40 -- 21 15 06 */ \
+    /* 0, 1, 2, 4 in use */    \
+    movdqu_m2r(*(eax + 48), xmm3);     /* xmm3 = 37 36 35 34 33 32 31 30 */ \
+    movdqu_m2r(*(eax + 80), xmm5);     /* xmm5 = 57 56 55 54 53 52 51 50 */ \
+    \
+    pmullw_m2r(*(ebx + 48), xmm3);     \
+    pmullw_m2r(*(ebx + 80), xmm5);     \
+    \
+    movdqu_m2r(*(ecx + 64), xmm6);     /* xmm6 = -- -- -- FF FF -- -- -- */ \
+    movdqu_m2r(*(ecx + 64), xmm7);     /* xmm7 = -- -- -- FF FF -- -- -- */ \
+    \
+    psrldq_i2r(8, xmm6);               /* xmm6 = -- -- -- -- -- -- -- FF */ \
+    pslldq_i2r(8, xmm7);               /* xmm7 = FF -- -- -- -- -- -- -- */ \
+    \
+    pand_r2r(xmm3, xmm6);              /* xmm6 = -- -- -- -- -- -- -- 30 */ \
+    pand_r2r(xmm5, xmm7);              /* xmm7 = 57 -- -- -- -- -- -- -- */ \
+    \
+    pxor_r2r(xmm6, xmm3);              /* xmm3 = 37 36 35 34 33 32 31 -- */ \
+    pxor_r2r(xmm7, xmm5);              /* xmm5 = __ 56 55 54 53 52 51 50 */ \
+    \
+    pslldq_i2r(6, xmm6);               /* xmm6 = -- -- -- -- 30 -- -- -- */ \
+    psrldq_i2r(2, xmm7);               /* xmm7 = -- 57 -- -- -- -- -- -- */ \
+    \
+    por_r2r(xmm7, xmm6);               /* xmm6 = -- 57 -- -- 30 -- -- -- */ \
+    movdqu_m2r(*(ecx), xmm7);          /* xmm7 = -- -- -- -- -- FF FF -- */ \
+    \
+    por_r2r(xmm6, xmm0);               /* xmm0 = -- 57 46 40 30 21 15 06 */ \
+    psrldq_i2r(2, xmm7);               /* xmm7 = -- -- -- -- -- -- FF FF */ \
+    \
+    movdqu_r2r(xmm2, xmm6);            /* xmm6 = 17 16 -- -- -- -- -- -- */ \
+    pand_r2r(xmm1, xmm7);              /* xmm7 = -- -- -- -- -- -- 61 -- */ \
+    \
+    pslldq_i2r(2, xmm6);               /* xmm6 = 16 -- -- -- -- -- -- -- */ \
+    psrldq_i2r(14, xmm2);              /* xmm2 = -- -- -- -- -- -- -- 17 */ \
+    \
+    pxor_r2r(xmm7, xmm1);              /* xmm1 = 67 66 65 64 63 62 -- -- */ \
+    pslldq_i2r(12, xmm7);              /* xmm7 = 61 -- -- -- -- -- -- -- */ \
+    \
+    psrldq_i2r(14, xmm6);              /* xmm6 = -- -- -- -- -- -- -- 16 */ \
+    por_r2r(xmm6, xmm4);               /* xmm4 = -- -- -- 47 -- -- 20 16 */ \
+    \
+    por_r2r(xmm7, xmm0);               /* xmm0 = 61 57 46 40 30 21 15 06 */ \
+    movdqu_m2r(*(ecx), xmm6);          /* xmm6 = -- -- -- -- -- FF FF -- */ \
+    \
+    psrldq_i2r(2, xmm6);               /* xmm6 = -- -- -- -- -- -- FF FF */ \
+    movdqu_r2m(xmm0, *(eax+48));       /* write  61 57 46 40 30 21 15 06 */ \
+    /* 1, 2, 3, 4, 5 in use */\
+    movdqu_m2r(*(ecx), xmm0);          /* xmm0 = -- -- -- -- -- FF FF -- */ \
+    pand_r2r(xmm3, xmm6);              /* xmm6 = -- -- -- -- -- -- 31 -- */ \
+    \
+    movdqu_r2r(xmm3, xmm7);            /* xmm7 = 37 36 35 34 33 32 31 -- */ \
+    pxor_r2r(xmm6, xmm3);              /* xmm3 = 37 36 35 34 33 32 -- -- */ \
+    \
+    pslldq_i2r(2, xmm3);               /* xmm3 = 36 35 34 33 32 -- -- -- */ \
+    pand_r2r(xmm1, xmm0);              /* xmm0 = -- -- -- -- -- 62 -- -- */ \
+    \
+    psrldq_i2r(14, xmm7);              /* xmm7 = -- -- -- -- -- -- -- 37 */ \
+    pxor_r2r(xmm0, xmm1);              /* xmm1 = 67 66 65 64 63 -- -- -- */ \
+    \
+    por_r2r(xmm7, xmm6);               /* xmm6 = -- -- -- -- -- -- 31 37 */ \
+    movdqu_m2r(*(ecx + 64), xmm7);     /* xmm7 = -- -- -- FF FF -- -- -- */ \
+    \
+    pshuflw_r2r(xmm6, xmm6, 0x01E);    /* xmm6 = -- -- -- -- 37 31 -- -- */ \
+    pslldq_i2r(6, xmm7);               /* xmm7 = FF FF -- -- -- -- -- -- */ \
+    \
+    por_r2r(xmm6, xmm4);               /* xmm4 = -- -- -- 47 37 31 20 16 */ \
+    pand_r2r(xmm5, xmm7);              /* xmm7 = -- 56 -- -- -- -- -- -- */ \
+    \
+    pslldq_i2r(8, xmm0);               /* xmm0 = -- 62 -- -- -- -- -- -- */ \
+    pxor_r2r(xmm7, xmm5);              /* xmm5 = -- -- 55 54 53 52 51 50 */ \
+    \
+    psrldq_i2r(2, xmm7);               /* xmm7 = -- -- 56 -- -- -- -- -- */ \
+    \
+    pshufhw_r2r(xmm3, xmm3, 0x087);    /* xmm3 = 35 33 34 36 32 -- -- -- */ \
+    por_r2r(xmm7, xmm0);               /* xmm0 = -- 62 56 -- -- -- -- -- */ \
+    \
+    movdqu_m2r(*(eax + 112), xmm7);    /* xmm7 = 77 76 75 74 73 72 71 70 */ \
+    pmullw_m2r(*(ebx + 112), xmm7);     \
+    \
+    movdqu_m2r(*(ecx + 64), xmm6);     /* xmm6 = -- -- -- FF FF -- -- -- */ \
+    por_r2r(xmm0, xmm4);               /* xmm4 = -- 62 56 47 37 31 20 16 */ \
+    \
+    pshuflw_r2r(xmm7, xmm7, 0x0E1);    /* xmm7 = 77 76 75 74 73 72 70 71 */ \
+    psrldq_i2r(8, xmm6);               /* xmm6 = -- -- -- -- -- -- -- FF */ \
+    \
+    movdqu_m2r(*(ecx + 64), xmm0);     /* xmm0 = -- -- -- FF FF -- -- -- */ \
+    pand_r2r(xmm7, xmm6);              /* xmm6 = -- -- -- -- -- -- -- 71 */ \
+    \
+    pand_r2r(xmm3, xmm0);              /* xmm0 = -- -- -- 36 32 -- -- -- */ \
+    pxor_r2r(xmm6, xmm7);              /* xmm7 = 77 76 75 74 73 72 70 -- */ \
+    \
+    pxor_r2r(xmm0, xmm3);              /* xmm3 = 35 33 34 -- -- -- -- -- */ \
+    pslldq_i2r(14, xmm6);              /* xmm6 = 71 -- -- -- -- -- -- -- */ \
+    \
+    psrldq_i2r(4, xmm0);               /* xmm0 = -- -- -- -- -- 36 32 -- */ \
+    por_r2r(xmm6, xmm4);               /* xmm4 = 71 62 56 47 37 31 20 16 */ \
+    \
+    por_r2r(xmm0, xmm2);               /* xmm2 = -- -- -- -- -- 36 32 17 */ \
+    movdqu_r2m(xmm4, *(eax + 64));     /* write  71 62 56 47 37 31 20 16 */ \
+    /* 1, 2, 3, 5, 7 in use */ \
+    movdqu_m2r(*(ecx + 80), xmm6);     /* xmm6 = -- -- FF -- -- -- -- FF */ \
+    pshufhw_r2r(xmm7, xmm7, 0x0D2);    /* xmm7 = 77 75 74 76 73 72 70 __ */ \
+    \
+    movdqu_m2r(*(ecx), xmm4);          /* xmm4 = -- -- -- -- -- FF FF -- */ \
+    movdqu_m2r(*(ecx+48), xmm0);       /* xmm0 = -- -- -- -- FF -- -- -- */ \
+    \
+    pand_r2r(xmm5, xmm6);              /* xmm6 = -- -- 55 -- -- -- -- 50 */ \
+    pand_r2r(xmm7, xmm4);              /* xmm4 = -- -- -- -- -- 72 70 -- */ \
+    \
+    pand_r2r(xmm1, xmm0);              /* xmm0 = -- -- -- -- 63 -- -- -- */ \
+    pxor_r2r(xmm6, xmm5);              /* xmm5 = -- -- -- 54 53 52 51 -- */ \
+    \
+    pxor_r2r(xmm4, xmm7);              /* xmm7 = 77 75 74 76 73 -- -- -- */ \
+    pxor_r2r(xmm0, xmm1);              /* xmm1 = 67 66 65 64 -- -- -- -- */ \
+    \
+    pshuflw_r2r(xmm6, xmm6, 0x02B);    /* xmm6 = -- -- 55 -- 50 -- -- -- */ \
+    pslldq_i2r(10, xmm4);              /* xmm4 = 72 20 -- -- -- -- -- -- */ \
+    \
+    pshufhw_r2r(xmm6, xmm6, 0x0B1);    /* xmm6 = -- -- -- 55 50 -- -- -- */ \
+    pslldq_i2r(4, xmm0);               /* xmm0 = -- -- 63 -- -- -- -- -- */ \
+    \
+    por_r2r(xmm4, xmm6);               /* xmm6 = 72 70 -- 55 50 -- -- -- */ \
+    por_r2r(xmm0, xmm2);               /* xmm2 = -- -- 63 -- -- 36 32 17 */ \
+    \
+    por_r2r(xmm6, xmm2);               /* xmm2 = 72 70 64 55 50 36 32 17 */ \
+    pshufhw_r2r(xmm1, xmm1, 0x0C9);    /* xmm1 = 67 64 66 65 -- -- -- -- */ \
+    \
+    movdqu_r2r(xmm3, xmm6);            /* xmm6 = 35 33 34 -- -- -- -- -- */ \
+    movdqu_r2m(xmm2, *(eax+80));       /* write  72 70 64 55 50 36 32 17 */ \
+    \
+    psrldq_i2r(12, xmm6);              /* xmm6 = -- -- -- -- -- -- 35 33 */ \
+    pslldq_i2r(4, xmm3);               /* xmm3 = 34 -- -- -- -- -- -- -- */ \
+    \
+    pshuflw_r2r(xmm5, xmm5, 0x04E);    /* xmm5 = -- -- -- 54 51 -- 53 52 */ \
+    movdqu_r2r(xmm7, xmm4);            /* xmm4 = 77 75 74 76 73 -- -- -- */ \
+    \
+    movdqu_r2r(xmm5, xmm2);            /* xmm2 = -- -- -- 54 51 -- 53 52 */ \
+    psrldq_i2r(10, xmm7);              /* xmm7 = -- -- -- -- -- 77 75 74 */ \
+    \
+    pslldq_i2r(6, xmm4);               /* xmm4 = 76 73 -- -- -- -- -- -- */ \
+    pslldq_i2r(12, xmm2);              /* xmm2 = 53 52 -- -- -- -- -- -- */ \
+    \
+    movdqu_r2r(xmm1, xmm0);            /* xmm0 = 67 64 66 65 -- -- -- -- */ \
+    psrldq_i2r(12, xmm1);              /* xmm1 = -- -- -- -- -- -- 67 64 */ \
+    \
+    psrldq_i2r(6, xmm5);               /* xmm5 = -- -- -- -- -- -- 54 51 */ \
+    psrldq_i2r(14, xmm3);              /* xmm3 = -- -- -- -- -- -- -- 34 */ \
+    \
+    pslldq_i2r(10, xmm7);              /* xmm7 = 77 75 74 -- -- -- -- -- */ \
+    por_r2r(xmm6, xmm4);               /* xmm4 = 76 73 -- -- -- -- 35 33 */ \
+    \
+    psrldq_i2r(10, xmm2);              /* xmm2 = -- -- -- -- -- 53 52 -- */ \
+    pslldq_i2r(4, xmm0);               /* xmm0 = 66 65 -- -- -- -- -- -- */ \
+    \
+    pslldq_i2r(8, xmm1);               /* xmm1 = -- -- 67 64 -- -- -- -- */ \
+    por_r2r(xmm7, xmm3);               /* xmm3 = 77 75 74 -- -- -- -- 34 */ \
+    \
+    psrldq_i2r(6, xmm0);               /* xmm0 = -- -- -- 66 65 -- -- -- */ \
+    pslldq_i2r(4, xmm5);               /* xmm5 = -- -- -- -- 54 51 -- -- */ \
+    \
+    por_r2r(xmm1, xmm4);               /* xmm4 = 76 73 67 64 -- -- 35 33 */ \
+    por_r2r(xmm2, xmm3);               /* xmm3 = 77 75 74 -- -- 53 52 34 */ \
+    \
+    por_r2r(xmm5, xmm4);               /* xmm4 = 76 73 67 64 54 51 35 33 */ \
+    por_r2r(xmm0, xmm3);               /* xmm3 = 77 75 74 66 65 53 52 34 */ \
+    \
+    movdqu_r2m(xmm4, *(eax+96));       /* write  76 73 67 64 54 51 35 33 */ \
+    movdqu_r2m(xmm3, *(eax+112));      /* write  77 75 74 66 65 53 52 34 */ \
+    \
+} /* end of SSE2_Dequantize Macro */
+
+
+void ff_vp3_idct_sse2(int16_t *input_data)
+{
+    unsigned char *input_bytes = (unsigned char *)input_data;
+    unsigned char *output_data_bytes = (unsigned char *)input_data;
+    unsigned char *idct_data_bytes = (unsigned char *)SSE2_idct_data;
+    unsigned char *Eight = (unsigned char *)eight_data;
+
+#define eax input_bytes
+//#define ebx dequant_matrix_bytes
+#define ecx dequant_const_bytes
+#define edx idct_data_bytes
+
+#define I(i) (eax + 16 * i)
+#define O(i) (ebx + 16 * i)
+#define C(i) (edx + 16 * (i-1))
+
+ //   SSE2_Dequantize();
+
+#undef ebx
+#define ebx output_data_bytes
+
+    SSE2_Row_IDCT();
+
+    SSE2_Transpose();
+
+    SSE2_Column_IDCT();
+}
diff --git a/contrib/ffmpeg/libavcodec/idcinvideo.c b/contrib/ffmpeg/libavcodec/idcinvideo.c
new file mode 100644
index 000000000..112da7bc6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/idcinvideo.c
@@ -0,0 +1,272 @@
+/*
+ * Id Quake II CIN Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file idcinvideo.c
+ * Id Quake II Cin Video Decoder by Dr. Tim Ferguson
+ * For more information about the Id CIN format, visit:
+ *   http://www.csse.monash.edu.au/~timf/
+ *
+ * This video decoder outputs PAL8 colorspace data. Interacting with this
+ * decoder is a little involved. During initialization, the demuxer must
+ * transmit the 65536-byte Huffman table(s) to the decoder via extradata.
+ * Then, whenever a palette change is encountered while demuxing the file,
+ * the demuxer must use the same extradata space to transmit an
+ * AVPaletteControl structure.
+ *
+ * Id CIN video is purely Huffman-coded, intraframe-only codec. It achieves
+ * a little more compression by exploiting the fact that adjacent pixels
+ * tend to be similar.
+ *
+ * Note that this decoder could use ffmpeg's optimized VLC facilities
+ * rather than naive, tree-based Huffman decoding. However, there are 256
+ * Huffman tables. Plus, the VLC bit coding order is right -> left instead
+ * or left -> right, so all of the bits would have to be reversed. Further,
+ * the original Quake II implementation likely used a similar naive
+ * decoding algorithm and it worked fine on much lower spec machines.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define HUFFMAN_TABLE_SIZE 64 * 1024
+#define HUF_TOKENS 256
+#define PALETTE_COUNT 256
+
+typedef struct
+{
+  int count;
+  unsigned char used;
+  int children[2];
+} hnode_t;
+
+typedef struct IdcinContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+    hnode_t huff_nodes[256][HUF_TOKENS*2];
+    int num_huff_nodes[256];
+
+} IdcinContext;
+
+/*
+ * Find the lowest probability node in a Huffman table, and mark it as
+ * being assigned to a higher probability.
+ * Returns the node index of the lowest unused node, or -1 if all nodes
+ * are used.
+ */
+static int huff_smallest_node(hnode_t *hnodes, int num_hnodes) {
+    int i;
+    int best, best_node;
+
+    best = 99999999;
+    best_node = -1;
+    for(i = 0; i < num_hnodes; i++) {
+        if(hnodes[i].used)
+            continue;
+        if(!hnodes[i].count)
+            continue;
+        if(hnodes[i].count < best) {
+            best = hnodes[i].count;
+            best_node = i;
+        }
+    }
+
+    if(best_node == -1)
+        return -1;
+    hnodes[best_node].used = 1;
+    return best_node;
+}
+
+/*
+ * Build the Huffman tree using the generated/loaded probabilities histogram.
+ *
+ * On completion:
+ *  huff_nodes[prev][i < HUF_TOKENS] - are the nodes at the base of the tree.
+ *  huff_nodes[prev][i >= HUF_TOKENS] - are used to construct the tree.
+ *  num_huff_nodes[prev] - contains the index to the root node of the tree.
+ *    That is: huff_nodes[prev][num_huff_nodes[prev]] is the root node.
+ */
+static void huff_build_tree(IdcinContext *s, int prev) {
+    hnode_t *node, *hnodes;
+     int num_hnodes, i;
+
+    num_hnodes = HUF_TOKENS;
+    hnodes = s->huff_nodes[prev];
+    for(i = 0; i < HUF_TOKENS * 2; i++)
+        hnodes[i].used = 0;
+
+    while (1) {
+        node = &hnodes[num_hnodes];             /* next free node */
+
+        /* pick two lowest counts */
+        node->children[0] = huff_smallest_node(hnodes, num_hnodes);
+        if(node->children[0] == -1)
+            break;      /* reached the root node */
+
+        node->children[1] = huff_smallest_node(hnodes, num_hnodes);
+        if(node->children[1] == -1)
+            break;      /* reached the root node */
+
+        /* combine nodes probability for new node */
+        node->count = hnodes[node->children[0]].count +
+        hnodes[node->children[1]].count;
+        num_hnodes++;
+    }
+
+    s->num_huff_nodes[prev] = num_hnodes - 1;
+}
+
+static int idcin_decode_init(AVCodecContext *avctx)
+{
+    IdcinContext *s = (IdcinContext *)avctx->priv_data;
+    int i, j, histogram_index = 0;
+    unsigned char *histograms;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    /* make sure the Huffman tables make it */
+    if (s->avctx->extradata_size != HUFFMAN_TABLE_SIZE) {
+        av_log(s->avctx, AV_LOG_ERROR, "  Id CIN video: expected extradata size of %d\n", HUFFMAN_TABLE_SIZE);
+        return -1;
+    }
+
+    /* build the 256 Huffman decode trees */
+    histograms = (unsigned char *)s->avctx->extradata;
+    for (i = 0; i < 256; i++) {
+        for(j = 0; j < HUF_TOKENS; j++)
+            s->huff_nodes[i][j].count = histograms[histogram_index++];
+        huff_build_tree(s, i);
+    }
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static void idcin_decode_vlcs(IdcinContext *s)
+{
+    hnode_t *hnodes;
+    long x, y;
+    int prev;
+    unsigned char v = 0;
+    int bit_pos, node_num, dat_pos;
+
+    prev = bit_pos = dat_pos = 0;
+    for (y = 0; y < (s->frame.linesize[0] * s->avctx->height);
+        y += s->frame.linesize[0]) {
+        for (x = y; x < y + s->avctx->width; x++) {
+            node_num = s->num_huff_nodes[prev];
+            hnodes = s->huff_nodes[prev];
+
+            while(node_num >= HUF_TOKENS) {
+                if(!bit_pos) {
+                    if(dat_pos >= s->size) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Huffman decode error.\n");
+                        return;
+                    }
+                    bit_pos = 8;
+                    v = s->buf[dat_pos++];
+                }
+
+                node_num = hnodes[node_num].children[v & 0x01];
+                v = v >> 1;
+                bit_pos--;
+            }
+
+            s->frame.data[0][x] = node_num;
+            prev = node_num;
+        }
+    }
+}
+
+static int idcin_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+    IdcinContext *s = (IdcinContext *)avctx->priv_data;
+    AVPaletteControl *palette_control = avctx->palctrl;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    if (avctx->get_buffer(avctx, &s->frame)) {
+        av_log(avctx, AV_LOG_ERROR, "  Id CIN Video: get_buffer() failed\n");
+        return -1;
+    }
+
+    idcin_decode_vlcs(s);
+
+    /* make the palette available on the way out */
+    memcpy(s->frame.data[1], palette_control->palette, PALETTE_COUNT * 4);
+    /* If palette changed inform application*/
+    if (palette_control->palette_changed) {
+        palette_control->palette_changed = 0;
+        s->frame.palette_has_changed = 1;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int idcin_decode_end(AVCodecContext *avctx)
+{
+    IdcinContext *s = (IdcinContext *)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec idcin_decoder = {
+    "idcinvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_IDCIN,
+    sizeof(IdcinContext),
+    idcin_decode_init,
+    NULL,
+    idcin_decode_end,
+    idcin_decode_frame,
+    CODEC_CAP_DR1,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/imc.c b/contrib/ffmpeg/libavcodec/imc.c
new file mode 100644
index 000000000..7360b6409
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/imc.c
@@ -0,0 +1,813 @@
+/*
+ * IMC compatible decoder
+ * Copyright (c) 2002-2004 Maxim Poliakovski
+ * Copyright (c) 2006 Benjamin Larsson
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ *  @file imc.c IMC - Intel Music Coder
+ *  A mdct based codec using a 256 points large transform
+ *  divied into 32 bands with some mix of scale factors.
+ *  Only mono is supported.
+ *
+ */
+
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#define ALT_BITSTREAM_READER
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+#include "imcdata.h"
+
+#define IMC_FRAME_ID 0x21
+#define BANDS 32
+#define COEFFS 256
+
+typedef struct {
+    float old_floor[BANDS];
+    float flcoeffs1[BANDS];
+    float flcoeffs2[BANDS];
+    float flcoeffs3[BANDS];
+    float flcoeffs4[BANDS];
+    float flcoeffs5[BANDS];
+    float flcoeffs6[BANDS];
+    float CWdecoded[COEFFS];
+
+    /** MDCT tables */
+    //@{
+    float mdct_sine_window[COEFFS];
+    float post_cos[COEFFS];
+    float post_sin[COEFFS];
+    float pre_coef1[COEFFS];
+    float pre_coef2[COEFFS];
+    float last_fft_im[COEFFS];
+    //@}
+
+    int bandWidthT[BANDS];     ///< codewords per band
+    int bitsBandT[BANDS];      ///< how many bits per codeword in band
+    int CWlengthT[COEFFS];     ///< how many bits in each codeword
+    int levlCoeffBuf[BANDS];
+    int bandFlagsBuf[BANDS];   ///< flags for each band
+    int sumLenArr[BANDS];      ///< bits for all coeffs in band
+    int skipFlagRaw[BANDS];    ///< skip flags are stored in raw form or not
+    int skipFlagBits[BANDS];   ///< bits used to code skip flags
+    int skipFlagCount[BANDS];  ///< skipped coeffients per band
+    int skipFlags[COEFFS];     ///< skip coefficient decoding or not
+    int codewords[COEFFS];     ///< raw codewords read from bitstream
+    float sqrt_tab[30];
+    GetBitContext gb;
+    VLC huffman_vlc[4][4];
+    int decoder_reset;
+    float one_div_log2;
+
+    DSPContext dsp;
+    FFTContext fft;
+    DECLARE_ALIGNED_16(FFTComplex, samples[COEFFS/2]);
+    DECLARE_ALIGNED_16(float, out_samples[COEFFS]);
+} IMCContext;
+
+
+static int imc_decode_init(AVCodecContext * avctx)
+{
+    int i, j;
+    IMCContext *q = avctx->priv_data;
+    double r1, r2;
+
+    q->decoder_reset = 1;
+
+    for(i = 0; i < BANDS; i++)
+        q->old_floor[i] = 1.0;
+
+    /* Build mdct window, a simple sine window normalized with sqrt(2) */
+    for(i = 0; i < COEFFS; i++)
+        q->mdct_sine_window[i] = sin((i + 0.5) / 512.0 * M_PI) * sqrt(2.0);
+    for(i = 0; i < COEFFS/2; i++){
+        q->post_cos[i] = cos(i / 256.0 * M_PI);
+        q->post_sin[i] = sin(i / 256.0 * M_PI);
+
+        r1 = sin((i * 4.0 + 1.0) / 1024.0 * M_PI);
+        r2 = cos((i * 4.0 + 1.0) / 1024.0 * M_PI);
+
+        if (i & 0x1)
+        {
+            q->pre_coef1[i] =  (r1 + r2) * sqrt(2.0);
+            q->pre_coef2[i] = -(r1 - r2) * sqrt(2.0);
+        }
+        else
+        {
+            q->pre_coef1[i] = -(r1 + r2) * sqrt(2.0);
+            q->pre_coef2[i] =  (r1 - r2) * sqrt(2.0);
+        }
+
+        q->last_fft_im[i] = 0;
+    }
+
+    /* Generate a square root table */
+
+    for(i = 0; i < 30; i++) {
+        q->sqrt_tab[i] = sqrt(i);
+    }
+
+    /* initialize the VLC tables */
+    for(i = 0; i < 4 ; i++) {
+        for(j = 0; j < 4; j++) {
+            init_vlc (&q->huffman_vlc[i][j], 9, imc_huffman_sizes[i],
+                     imc_huffman_lens[i][j], 1, 1,
+                     imc_huffman_bits[i][j], 2, 2, 1);
+        }
+    }
+    q->one_div_log2 = 1/log(2);
+
+    ff_fft_init(&q->fft, 7, 1);
+    dsputil_init(&q->dsp, avctx);
+    return 0;
+}
+
+static void imc_calculate_coeffs(IMCContext* q, float* flcoeffs1, float* flcoeffs2, int* bandWidthT,
+                                float* flcoeffs3, float* flcoeffs5)
+{
+    float   workT1[BANDS];
+    float   workT2[BANDS];
+    float   workT3[BANDS];
+    float   snr_limit = 1.e-30;
+    float   accum = 0.0;
+    int i, cnt2;
+
+    for(i = 0; i < BANDS; i++) {
+        flcoeffs5[i] = workT2[i] = 0.0;
+        if (bandWidthT[i]){
+            workT1[i] = flcoeffs1[i] * flcoeffs1[i];
+            flcoeffs3[i] = 2.0 * flcoeffs2[i];
+        } else {
+            workT1[i] = 0.0;
+            flcoeffs3[i] = -30000.0;
+        }
+        workT3[i] = bandWidthT[i] * workT1[i] * 0.01;
+        if (workT3[i] <= snr_limit)
+            workT3[i] = 0.0;
+    }
+
+    for(i = 0; i < BANDS; i++) {
+        for(cnt2 = i; cnt2 < cyclTab[i]; cnt2++)
+            flcoeffs5[cnt2] = flcoeffs5[cnt2] + workT3[i];
+        workT2[cnt2-1] = workT2[cnt2-1] + workT3[i];
+    }
+
+    for(i = 1; i < BANDS; i++) {
+        accum = (workT2[i-1] + accum) * imc_weights1[i-1];
+        flcoeffs5[i] += accum;
+    }
+
+    for(i = 0; i < BANDS; i++)
+        workT2[i] = 0.0;
+
+    for(i = 0; i < BANDS; i++) {
+        for(cnt2 = i-1; cnt2 > cyclTab2[i]; cnt2--)
+            flcoeffs5[cnt2] += workT3[i];
+        workT2[cnt2+1] += workT3[i];
+    }
+
+    accum = 0.0;
+
+    for(i = BANDS-2; i >= 0; i--) {
+        accum = (workT2[i+1] + accum) * imc_weights2[i];
+        flcoeffs5[i] += accum;
+        //there is missing code here, but it seems to never be triggered
+    }
+}
+
+
+static void imc_read_level_coeffs(IMCContext* q, int stream_format_code, int* levlCoeffs)
+{
+    int i;
+    VLC *hufftab[4];
+    int start = 0;
+    const uint8_t *cb_sel;
+    int s;
+
+    s = stream_format_code >> 1;
+    hufftab[0] = &q->huffman_vlc[s][0];
+    hufftab[1] = &q->huffman_vlc[s][1];
+    hufftab[2] = &q->huffman_vlc[s][2];
+    hufftab[3] = &q->huffman_vlc[s][3];
+    cb_sel = imc_cb_select[s];
+
+    if(stream_format_code & 4)
+        start = 1;
+    if(start)
+        levlCoeffs[0] = get_bits(&q->gb, 7);
+    for(i = start; i < BANDS; i++){
+        levlCoeffs[i] = get_vlc2(&q->gb, hufftab[cb_sel[i]]->table, hufftab[cb_sel[i]]->bits, 2);
+        if(levlCoeffs[i] == 17)
+            levlCoeffs[i] += get_bits(&q->gb, 4);
+    }
+}
+
+static void imc_decode_level_coefficients(IMCContext* q, int* levlCoeffBuf, float* flcoeffs1,
+                                         float* flcoeffs2)
+{
+    int i, level;
+    float tmp, tmp2;
+    //maybe some frequency division thingy
+
+    flcoeffs1[0] = 20000.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125
+    flcoeffs2[0] = log(flcoeffs1[0])/log(2);
+    tmp = flcoeffs1[0];
+    tmp2 = flcoeffs2[0];
+
+    for(i = 1; i < BANDS; i++) {
+        level = levlCoeffBuf[i];
+        if (level == 16) {
+            flcoeffs1[i] = 1.0;
+            flcoeffs2[i] = 0.0;
+        } else {
+            if (level < 17)
+                level -=7;
+            else if (level <= 24)
+                level -=32;
+            else
+                level -=16;
+
+            tmp  *= imc_exp_tab[15 + level];
+            tmp2 += 0.83048 * level;  // 0.83048 = log2(10) * 0.25
+            flcoeffs1[i] = tmp;
+            flcoeffs2[i] = tmp2;
+        }
+    }
+}
+
+
+static void imc_decode_level_coefficients2(IMCContext* q, int* levlCoeffBuf, float* old_floor, float* flcoeffs1,
+                                          float* flcoeffs2) {
+    int i;
+        //FIXME maybe flag_buf = noise coding and flcoeffs1 = new scale factors
+        //      and flcoeffs2 old scale factors
+        //      might be incomplete due to a missing table that is in the binary code
+    for(i = 0; i < BANDS; i++) {
+        flcoeffs1[i] = 0;
+        if(levlCoeffBuf[i] < 16) {
+            flcoeffs1[i] = imc_exp_tab2[levlCoeffBuf[i]] * old_floor[i];
+            flcoeffs2[i] = (levlCoeffBuf[i]-7) * 0.83048 + flcoeffs2[i]; // 0.83048 = log2(10) * 0.25
+        } else {
+            flcoeffs1[i] = old_floor[i];
+        }
+    }
+}
+
+/**
+ * Perform bit allocation depending on bits available
+ */
+static int bit_allocation (IMCContext* q, int stream_format_code, int freebits, int flag) {
+    int i, j;
+    const float limit = -1.e20;
+    float highest = 0.0;
+    int indx;
+    int t1 = 0;
+    int t2 = 1;
+    float summa = 0.0;
+    int iacc = 0;
+    int summer = 0;
+    int rres, cwlen;
+    float lowest = 1.e10;
+    int low_indx = 0;
+    float workT[32];
+    int flg;
+    int found_indx = 0;
+
+    for(i = 0; i < BANDS; i++)
+        highest = FFMAX(highest, q->flcoeffs1[i]);
+
+    for(i = 0; i < BANDS-1; i++) {
+        q->flcoeffs4[i] = q->flcoeffs3[i] - log(q->flcoeffs5[i])/log(2);
+    }
+    q->flcoeffs4[BANDS - 1] = limit;
+
+    highest = highest * 0.25;
+
+    for(i = 0; i < BANDS; i++) {
+        indx = -1;
+        if ((band_tab[i+1] - band_tab[i]) == q->bandWidthT[i])
+            indx = 0;
+
+        if ((band_tab[i+1] - band_tab[i]) > q->bandWidthT[i])
+            indx = 1;
+
+        if (((band_tab[i+1] - band_tab[i])/2) >= q->bandWidthT[i])
+            indx = 2;
+
+        if (indx == -1)
+            return -1;
+
+        q->flcoeffs4[i] = q->flcoeffs4[i] + xTab[(indx*2 + (q->flcoeffs1[i] < highest)) * 2 + flag];
+    }
+
+    if (stream_format_code & 0x2) {
+        q->flcoeffs4[0] = limit;
+        q->flcoeffs4[1] = limit;
+        q->flcoeffs4[2] = limit;
+        q->flcoeffs4[3] = limit;
+    }
+
+    for(i = (stream_format_code & 0x2)?4:0; i < BANDS-1; i++) {
+        iacc += q->bandWidthT[i];
+        summa += q->bandWidthT[i] * q->flcoeffs4[i];
+    }
+    q->bandWidthT[BANDS-1] = 0;
+    summa = (summa * 0.5 - freebits) / iacc;
+
+
+    for(i = 0; i < BANDS/2; i++) {
+        rres = summer - freebits;
+        if((rres >= -8) && (rres <= 8)) break;
+
+        summer = 0;
+        iacc = 0;
+
+        for(j = (stream_format_code & 0x2)?4:0; j < BANDS; j++) {
+            cwlen = clip((int)((q->flcoeffs4[j] * 0.5) - summa + 0.5), 0, 6);
+
+            q->bitsBandT[j] = cwlen;
+            summer += q->bandWidthT[j] * cwlen;
+
+            if (cwlen > 0)
+                iacc += q->bandWidthT[j];
+        }
+
+        flg = t2;
+        t2 = 1;
+        if (freebits < summer)
+            t2 = -1;
+        if (i == 0)
+            flg = t2;
+        if(flg != t2)
+            t1++;
+
+        summa = (float)(summer - freebits) / ((t1 + 1) * iacc) + summa;
+    }
+
+    for(i = (stream_format_code & 0x2)?4:0; i < BANDS; i++) {
+        for(j = band_tab[i]; j < band_tab[i+1]; j++)
+            q->CWlengthT[j] = q->bitsBandT[i];
+    }
+
+    if (freebits > summer) {
+        for(i = 0; i < BANDS; i++) {
+            workT[i] = (q->bitsBandT[i] == 6) ? -1.e20 : (q->bitsBandT[i] * -2 + q->flcoeffs4[i] - 0.415);
+        }
+
+        highest = 0.0;
+
+        do{
+            if (highest <= -1.e20)
+                break;
+
+            found_indx = 0;
+            highest = -1.e20;
+
+            for(i = 0; i < BANDS; i++) {
+                if (workT[i] > highest) {
+                    highest = workT[i];
+                    found_indx = i;
+                }
+            }
+
+            if (highest > -1.e20) {
+                workT[found_indx] -= 2.0;
+                if (++(q->bitsBandT[found_indx]) == 6)
+                    workT[found_indx] = -1.e20;
+
+                for(j = band_tab[found_indx]; j < band_tab[found_indx+1] && (freebits > summer); j++){
+                    q->CWlengthT[j]++;
+                    summer++;
+                }
+            }
+        }while (freebits > summer);
+    }
+    if (freebits < summer) {
+        for(i = 0; i < BANDS; i++) {
+            workT[i] = q->bitsBandT[i] ? (q->bitsBandT[i] * -2 + q->flcoeffs4[i] + 1.585) : 1.e20;
+        }
+        if (stream_format_code & 0x2) {
+            workT[0] = 1.e20;
+            workT[1] = 1.e20;
+            workT[2] = 1.e20;
+            workT[3] = 1.e20;
+        }
+        while (freebits < summer){
+            lowest = 1.e10;
+            low_indx = 0;
+            for(i = 0; i < BANDS; i++) {
+                if (workT[i] < lowest) {
+                    lowest = workT[i];
+                    low_indx = i;
+                }
+            }
+            //if(lowest >= 1.e10) break;
+            workT[low_indx] = lowest + 2.0;
+
+            if (!(--q->bitsBandT[low_indx]))
+                workT[low_indx] = 1.e20;
+
+            for(j = band_tab[low_indx]; j < band_tab[low_indx+1] && (freebits < summer); j++){
+                if(q->CWlengthT[j] > 0){
+                    q->CWlengthT[j]--;
+                    summer--;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static void imc_get_skip_coeff(IMCContext* q) {
+    int i, j;
+
+    memset(q->skipFlagBits, 0, sizeof(q->skipFlagBits));
+    memset(q->skipFlagCount, 0, sizeof(q->skipFlagCount));
+    for(i = 0; i < BANDS; i++) {
+        if (!q->bandFlagsBuf[i] || !q->bandWidthT[i])
+            continue;
+
+        if (!q->skipFlagRaw[i]) {
+            q->skipFlagBits[i] = band_tab[i+1] - band_tab[i];
+
+            for(j = band_tab[i]; j < band_tab[i+1]; j++) {
+                if ((q->skipFlags[j] = get_bits(&q->gb,1)))
+                    q->skipFlagCount[i]++;
+            }
+        } else {
+            for(j = band_tab[i]; j < (band_tab[i+1]-1); j += 2) {
+                if(!get_bits1(&q->gb)){//0
+                    q->skipFlagBits[i]++;
+                    q->skipFlags[j]=1;
+                    q->skipFlags[j+1]=1;
+                    q->skipFlagCount[i] += 2;
+                }else{
+                    if(get_bits1(&q->gb)){//11
+                        q->skipFlagBits[i] +=2;
+                        q->skipFlags[j]=0;
+                        q->skipFlags[j+1]=1;
+                        q->skipFlagCount[i]++;
+                    }else{
+                        q->skipFlagBits[i] +=3;
+                        q->skipFlags[j+1]=0;
+                        if(!get_bits1(&q->gb)){//100
+                            q->skipFlags[j]=1;
+                            q->skipFlagCount[i]++;
+                        }else{//101
+                            q->skipFlags[j]=0;
+                        }
+                    }
+                }
+            }
+
+            if (j < band_tab[i+1]) {
+                q->skipFlagBits[i]++;
+                if ((q->skipFlags[j] = get_bits(&q->gb,1)))
+                    q->skipFlagCount[i]++;
+            }
+        }
+    }
+}
+
+/**
+ * Increase highest' band coefficient sizes as some bits won't be used
+ */
+static void imc_adjust_bit_allocation (IMCContext* q, int summer) {
+    float workT[32];
+    int corrected = 0;
+    int i, j;
+    float highest = 0;
+    int found_indx=0;
+
+    for(i = 0; i < BANDS; i++) {
+        workT[i] = (q->bitsBandT[i] == 6) ? -1.e20 : (q->bitsBandT[i] * -2 + q->flcoeffs4[i] - 0.415);
+    }
+
+    while (corrected < summer) {
+        if(highest <= -1.e20)
+            break;
+
+        highest = -1.e20;
+
+        for(i = 0; i < BANDS; i++) {
+            if (workT[i] > highest) {
+                highest = workT[i];
+                found_indx = i;
+            }
+        }
+
+        if (highest > -1.e20) {
+            workT[found_indx] -= 2.0;
+            if (++(q->bitsBandT[found_indx]) == 6)
+                workT[found_indx] = -1.e20;
+
+            for(j = band_tab[found_indx]; j < band_tab[found_indx+1] && (corrected < summer); j++) {
+                if (!q->skipFlags[j] && (q->CWlengthT[j] < 6)) {
+                    q->CWlengthT[j]++;
+                    corrected++;
+                }
+            }
+        }
+    }
+}
+
+static void imc_imdct256(IMCContext *q) {
+    int i;
+    float re, im;
+
+    /* prerotation */
+    for(i=0; i < COEFFS/2; i++){
+        q->samples[i].re = -(q->pre_coef1[i] * q->CWdecoded[COEFFS-1-i*2]) -
+                           (q->pre_coef2[i] * q->CWdecoded[i*2]);
+        q->samples[i].im = (q->pre_coef2[i] * q->CWdecoded[COEFFS-1-i*2]) -
+                           (q->pre_coef1[i] * q->CWdecoded[i*2]);
+    }
+
+    /* FFT */
+    ff_fft_permute(&q->fft, q->samples);
+    ff_fft_calc (&q->fft, q->samples);
+
+    /* postrotation, window and reorder */
+    for(i = 0; i < COEFFS/2; i++){
+        re = (q->samples[i].re * q->post_cos[i]) + (-q->samples[i].im * q->post_sin[i]);
+        im = (-q->samples[i].im * q->post_cos[i]) - (q->samples[i].re * q->post_sin[i]);
+        q->out_samples[i*2] = (q->mdct_sine_window[COEFFS-1-i*2] * q->last_fft_im[i]) + (q->mdct_sine_window[i*2] * re);
+        q->out_samples[COEFFS-1-i*2] = (q->mdct_sine_window[i*2] * q->last_fft_im[i]) - (q->mdct_sine_window[COEFFS-1-i*2] * re);
+        q->last_fft_im[i] = im;
+    }
+}
+
+static int inverse_quant_coeff (IMCContext* q, int stream_format_code) {
+    int i, j;
+    int middle_value, cw_len, max_size;
+    const float* quantizer;
+
+    for(i = 0; i < BANDS; i++) {
+        for(j = band_tab[i]; j < band_tab[i+1]; j++) {
+            q->CWdecoded[j] = 0;
+            cw_len = q->CWlengthT[j];
+
+            if (cw_len <= 0 || q->skipFlags[j])
+                continue;
+
+            max_size = 1 << cw_len;
+            middle_value = max_size >> 1;
+
+            if (q->codewords[j] >= max_size || q->codewords[j] < 0)
+                return -1;
+
+            if (cw_len >= 4){
+                quantizer = imc_quantizer2[(stream_format_code & 2) >> 1];
+                if (q->codewords[j] >= middle_value)
+                    q->CWdecoded[j] = quantizer[q->codewords[j] - 8] * q->flcoeffs6[i];
+                else
+                    q->CWdecoded[j] = -quantizer[max_size - q->codewords[j] - 8 - 1] * q->flcoeffs6[i];
+            }else{
+                quantizer = imc_quantizer1[((stream_format_code & 2) >> 1) | (q->bandFlagsBuf[i] << 1)];
+                if (q->codewords[j] >= middle_value)
+                    q->CWdecoded[j] = quantizer[q->codewords[j] - 1] * q->flcoeffs6[i];
+                else
+                    q->CWdecoded[j] = -quantizer[max_size - 2 - q->codewords[j]] * q->flcoeffs6[i];
+            }
+        }
+    }
+    return 0;
+}
+
+
+static int imc_get_coeffs (IMCContext* q) {
+    int i, j, cw_len, cw;
+
+    for(i = 0; i < BANDS; i++) {
+        if(!q->sumLenArr[i]) continue;
+        if (q->bandFlagsBuf[i] || q->bandWidthT[i]) {
+            for(j = band_tab[i]; j < band_tab[i+1]; j++) {
+                cw_len = q->CWlengthT[j];
+                cw = 0;
+
+                if (get_bits_count(&q->gb) + cw_len > 512){
+//av_log(NULL,0,"Band %i coeff %i cw_len %i\n",i,j,cw_len);
+                    return -1;
+                }
+
+                if(cw_len && (!q->bandFlagsBuf[i] || !q->skipFlags[j]))
+                    cw = get_bits(&q->gb, cw_len);
+
+                q->codewords[j] = cw;
+            }
+        }
+    }
+    return 0;
+}
+
+static int imc_decode_frame(AVCodecContext * avctx,
+                            void *data, int *data_size,
+                            uint8_t * buf, int buf_size)
+{
+
+    IMCContext *q = avctx->priv_data;
+
+    int stream_format_code;
+    int imc_hdr, i, j;
+    int flag;
+    int bits, summer;
+    int counter, bitscount;
+    uint16_t *buf16 = (uint16_t *) buf;
+
+    /* FIXME: input should not be modified */
+    for(i = 0; i < FFMIN(buf_size, avctx->block_align) / 2; i++)
+        buf16[i] = bswap_16(buf16[i]);
+
+    init_get_bits(&q->gb, buf, 512);
+
+    /* Check the frame header */
+    imc_hdr = get_bits(&q->gb, 9);
+    if (imc_hdr != IMC_FRAME_ID) {
+        av_log(avctx, AV_LOG_ERROR, "imc frame header check failed!\n");
+        av_log(avctx, AV_LOG_ERROR, "got %x instead of 0x21.\n", imc_hdr);
+        return -1;
+    }
+    stream_format_code = get_bits(&q->gb, 3);
+
+    if(stream_format_code & 1){
+        av_log(avctx, AV_LOG_ERROR, "Stream code format %X is not supported\n", stream_format_code);
+        return -1;
+    }
+
+//    av_log(avctx, AV_LOG_DEBUG, "stream_format_code = %d\n", stream_format_code);
+
+    if (stream_format_code & 0x04)
+        q->decoder_reset = 1;
+
+    if(q->decoder_reset) {
+        memset(q->out_samples, 0, sizeof(q->out_samples));
+        for(i = 0; i < BANDS; i++)q->old_floor[i] = 1.0;
+        for(i = 0; i < COEFFS; i++)q->CWdecoded[i] = 0;
+        q->decoder_reset = 0;
+    }
+
+    flag = get_bits1(&q->gb);
+    imc_read_level_coeffs(q, stream_format_code, q->levlCoeffBuf);
+
+    if (stream_format_code & 0x4)
+        imc_decode_level_coefficients(q, q->levlCoeffBuf, q->flcoeffs1, q->flcoeffs2);
+    else
+        imc_decode_level_coefficients2(q, q->levlCoeffBuf, q->old_floor, q->flcoeffs1, q->flcoeffs2);
+
+    memcpy(q->old_floor, q->flcoeffs1, 32 * sizeof(float));
+
+    counter = 0;
+    for (i=0 ; i<BANDS ; i++) {
+        if (q->levlCoeffBuf[i] == 16) {
+            q->bandWidthT[i] = 0;
+            counter++;
+        } else
+            q->bandWidthT[i] = band_tab[i+1] - band_tab[i];
+    }
+    memset(q->bandFlagsBuf, 0, BANDS * sizeof(int));
+    for(i = 0; i < BANDS-1; i++) {
+        if (q->bandWidthT[i])
+            q->bandFlagsBuf[i] = get_bits1(&q->gb);
+    }
+
+    imc_calculate_coeffs(q, q->flcoeffs1, q->flcoeffs2, q->bandWidthT, q->flcoeffs3, q->flcoeffs5);
+
+    bitscount = 0;
+    /* first 4 bands will be assigned 5 bits per coefficient */
+    if (stream_format_code & 0x2) {
+        bitscount += 15;
+
+        q->bitsBandT[0] = 5;
+        q->CWlengthT[0] = 5;
+        q->CWlengthT[1] = 5;
+        q->CWlengthT[2] = 5;
+        for(i = 1; i < 4; i++){
+            bits = (q->levlCoeffBuf[i] == 16) ? 0 : 5;
+            q->bitsBandT[i] = bits;
+            for(j = band_tab[i]; j < band_tab[i+1]; j++) {
+                q->CWlengthT[j] = bits;
+                bitscount += bits;
+            }
+        }
+    }
+
+    if(bit_allocation (q, stream_format_code, 512 - bitscount - get_bits_count(&q->gb), flag) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Bit allocations failed\n");
+        q->decoder_reset = 1;
+        return -1;
+    }
+
+    for(i = 0; i < BANDS; i++) {
+        q->sumLenArr[i] = 0;
+        q->skipFlagRaw[i] = 0;
+        for(j = band_tab[i]; j < band_tab[i+1]; j++)
+            q->sumLenArr[i] += q->CWlengthT[j];
+        if (q->bandFlagsBuf[i])
+            if( (((band_tab[i+1] - band_tab[i]) * 1.5) > q->sumLenArr[i]) && (q->sumLenArr[i] > 0))
+                q->skipFlagRaw[i] = 1;
+    }
+
+    imc_get_skip_coeff(q);
+
+    for(i = 0; i < BANDS; i++) {
+        q->flcoeffs6[i] = q->flcoeffs1[i];
+        /* band has flag set and at least one coded coefficient */
+        if (q->bandFlagsBuf[i] && (band_tab[i+1] - band_tab[i]) != q->skipFlagCount[i]){
+                q->flcoeffs6[i] *= q->sqrt_tab[band_tab[i+1] - band_tab[i]] /
+                                   q->sqrt_tab[(band_tab[i+1] - band_tab[i] - q->skipFlagCount[i])];
+        }
+    }
+
+    /* calculate bits left, bits needed and adjust bit allocation */
+    bits = summer = 0;
+
+    for(i = 0; i < BANDS; i++) {
+        if (q->bandFlagsBuf[i]) {
+            for(j = band_tab[i]; j < band_tab[i+1]; j++) {
+                if(q->skipFlags[j]) {
+                    summer += q->CWlengthT[j];
+                    q->CWlengthT[j] = 0;
+                }
+            }
+            bits += q->skipFlagBits[i];
+            summer -= q->skipFlagBits[i];
+        }
+    }
+    imc_adjust_bit_allocation(q, summer);
+
+    for(i = 0; i < BANDS; i++) {
+        q->sumLenArr[i] = 0;
+
+        for(j = band_tab[i]; j < band_tab[i+1]; j++)
+            if (!q->skipFlags[j])
+                q->sumLenArr[i] += q->CWlengthT[j];
+    }
+
+    memset(q->codewords, 0, sizeof(q->codewords));
+
+    if(imc_get_coeffs(q) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Read coefficients failed\n");
+        q->decoder_reset = 1;
+        return 0;
+    }
+
+    if(inverse_quant_coeff(q, stream_format_code) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Inverse quantization of coefficients failed\n");
+        q->decoder_reset = 1;
+        return 0;
+    }
+
+    memset(q->skipFlags, 0, sizeof(q->skipFlags));
+
+    imc_imdct256(q);
+
+    q->dsp.float_to_int16(data, q->out_samples, COEFFS);
+
+    *data_size = COEFFS * sizeof(int16_t);
+
+    return avctx->block_align;
+}
+
+
+static int imc_decode_close(AVCodecContext * avctx)
+{
+    IMCContext *q = avctx->priv_data;
+
+    ff_fft_end(&q->fft);
+    return 0;
+}
+
+
+AVCodec imc_decoder = {
+    .name = "imc",
+    .type = CODEC_TYPE_AUDIO,
+    .id = CODEC_ID_IMC,
+    .priv_data_size = sizeof(IMCContext),
+    .init = imc_decode_init,
+    .close = imc_decode_close,
+    .decode = imc_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/imcdata.h b/contrib/ffmpeg/libavcodec/imcdata.h
new file mode 100644
index 000000000..92ed275f1
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/imcdata.h
@@ -0,0 +1,164 @@
+/*
+ * IMC compatible decoder
+ * Copyright (c) 2002-2004 Maxim Poliakovski
+ * Copyright (c) 2006 Benjamin Larsson
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+static const uint16_t band_tab[33] = {
+      0,   3,   6,   9,  12,  16,  20,  24,  29,  34,  40,
+     46,  53,  60,  68,  76,  84,  93, 102, 111, 121, 131,
+    141, 151, 162, 173, 184, 195, 207, 219, 231, 243, 256,
+};
+
+
+static const int8_t cyclTab[32] = {
+    1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
+   12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23,
+   24, 25, 26, 27, 28, 29, 30, 31, 32, 32,
+};
+
+static const int8_t cyclTab2[32] = {
+   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 17, 18, 19, 20, 21, 22,
+23, 24, 25, 26, 27, 28, 29};
+
+static const float imc_weights1[31] = {
+    0.119595, 0.123124, 0.129192, 9.97377e-2, 8.1923e-2, 9.61153e-2, 8.77885e-2, 8.61174e-2,
+    9.00882e-2, 9.91658e-2, 0.112991, 0.131126, 0.152886, 0.177292, 0.221782, 0.244917, 0.267386,
+    0.306816, 0.323046, 0.33729, 0.366773, 0.392557, 0.398076, 0.403302, 0.42451, 0.444777,
+    0.449188, 0.455445, 0.477853, 0.500669, 0.510395};
+
+static const float imc_weights2[31] = {
+    3.23466e-3, 3.49886e-3, 3.98413e-3, 1.98116e-3, 1.16465e-3, 1.79283e-3, 1.40372e-3, 1.33274e-3,
+    1.50523e-3, 1.95064e-3, 2.77472e-3, 4.14725e-3, 6.2776e-3, 9.36401e-3, 1.71397e-2, 2.24052e-2,
+    2.83971e-2, 4.11689e-2, 4.73165e-2, 5.31631e-2, 6.66614e-2, 8.00824e-2, 8.31588e-2, 8.61397e-2,
+    9.89229e-2, 0.112197, 0.115227, 0.119613, 0.136174, 0.15445, 0.162685};
+
+static const float imc_quantizer1[4][8] = {
+    { 8.4431201e-1, 4.7358301e-1, 1.448354, 2.7073899e-1, 7.4449003e-1, 1.241991,  1.845484,  0.0},
+    { 8.6876702e-1, 4.7659001e-1, 1.478224, 2.5672799e-1, 7.55777e-1,   1.3229851, 2.03438,   0.0},
+    { 7.5891501e-1, 6.2272799e-1, 1.271322, 3.47904e-1,   7.5317699e-1, 1.150767,  1.628476,  0.0},
+    { 7.65257e-1,   6.44647e-1,   1.263824, 3.4548101e-1, 7.6384902e-1, 1.214466,  1.7638789, 0.0},
+};
+
+static const float imc_quantizer2[2][56] = {
+    { 1.39236e-1, 3.50548e-1, 5.9547901e-1, 8.5772401e-1, 1.121545, 1.3882281, 1.695882, 2.1270809,
+      7.2221003e-2, 1.85177e-1, 2.9521701e-1, 4.12568e-1, 5.4068601e-1, 6.7679501e-1, 8.1196898e-1, 9.4765198e-1,
+      1.0779999, 1.203415, 1.337265, 1.481871, 1.639982, 1.814766, 2.0701399, 2.449862,
+      3.7533998e-2, 1.02722e-1, 1.6021401e-1, 2.16043e-1, 2.7231601e-1, 3.3025399e-1, 3.9022601e-1, 4.52849e-1,
+      5.1794899e-1, 5.8529502e-1, 6.53956e-1, 7.2312802e-1, 7.9150802e-1, 8.5891002e-1, 9.28141e-1, 9.9706203e-1,
+      1.062153, 1.12564, 1.189834, 1.256122, 1.324469, 1.3955311, 1.468906, 1.545084,
+      1.6264729, 1.711524, 1.802705, 1.91023, 2.0533991, 2.22333, 2.4830019, 3.253329 },
+    { 1.11654e-1, 3.54469e-1, 6.4232099e-1, 9.6128798e-1, 1.295053, 1.61777, 1.989839, 2.51107,
+      5.7721999e-2, 1.69879e-1, 2.97589e-1, 4.3858799e-1, 5.9039903e-1, 7.4934798e-1, 9.1628098e-1, 1.087297,
+      1.262751, 1.4288321, 1.6040879, 1.79067, 2.000668, 2.2394669, 2.649332, 5.2760072,
+      2.9722e-2, 8.7316997e-2, 1.4445201e-1, 2.04247e-1, 2.6879501e-1, 3.3716801e-1, 4.08811e-1, 4.8306999e-1,
+      5.6049401e-1, 6.3955498e-1, 7.2044599e-1, 8.0427998e-1, 8.8933599e-1, 9.7537601e-1, 1.062461, 1.1510431,
+      1.240236, 1.326715, 1.412513, 1.500502, 1.591749, 1.686413, 1.785239, 1.891233,
+      2.0051291, 2.127681, 2.2709141, 2.475826, 2.7219379, 3.101985, 4.686213, 6.2287788},
+};
+
+
+static const float xTab[14] = {7.6, 3.6, 4.4, 3.7, 6.1, 5.1, 2.3, 1.6, 6.2, 1.5, 1.8, 1.2, 0, 0}; //10014048
+
+/* precomputed table for 10^(i/4), i=-15..16 */
+static const float imc_exp_tab[32] = {
+    1.778280e-4, 3.162278e-4, 5.623413e-4, 1.000000e-3,
+    1.778280e-3, 3.162278e-3, 5.623413e-3, 1.000000e-2,
+    1.778280e-2, 3.162278e-2, 5.623413e-2, 1.000000e-1,
+    1.778280e-1, 3.162278e-1, 5.623413e-1, 1.000000e00,
+    1.778280e00, 3.162278e00, 5.623413e00, 1.000000e01,
+    1.778280e01, 3.162278e01, 5.623413e01, 1.000000e02,
+    1.778280e02, 3.162278e02, 5.623413e02, 1.000000e03,
+    1.778280e03, 3.162278e03, 5.623413e03, 1.000000e04
+};
+static const float *imc_exp_tab2 = imc_exp_tab + 8;
+
+
+static const uint8_t imc_cb_select[4][32] = {
+    { 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2 },
+    { 0, 2, 0, 3, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+    { 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2 },
+    { 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+static const uint8_t imc_huffman_sizes[4] = {
+    17,  17,  18,  18
+};
+
+static const uint8_t imc_huffman_lens[4][4][18] = {
+    {
+        { 16, 15, 13, 11,  8,  5,  3,  1,  2,  4,  6,  9, 10, 12, 14, 16,  7,  0 },
+        { 10,  8,  7,  6,  4,  4,  3,  2,  2,  3,  4,  6,  7,  9, 11, 11,  7,  0 },
+        { 15, 15, 14, 11,  8,  6,  4,  2,  1,  4,  5,  7,  9, 10, 12, 13,  4,  0 },
+        { 13, 11, 10,  8,  6,  4,  2,  2,  2,  3,  5,  7,  9, 12, 15, 15, 14,  0 },
+    },
+    {
+        { 14, 12, 10,  8,  7,  4,  2,  2,  2,  3,  5,  7,  9, 11, 13, 14,  7,  0 },
+        { 14, 13, 11,  8,  6,  4,  3,  2,  2,  3,  5,  7,  9, 10, 12, 14,  3,  0 },
+        { 13, 12, 10,  7,  5,  4,  3,  2,  2,  3,  4,  6,  8,  9, 11, 13,  4,  0 },
+        { 13, 12, 10,  7,  5,  4,  3,  2,  2,  3,  4,  6,  8,  9, 11, 13,  4,  0 },
+    },
+    {
+        { 16, 14, 12, 10,  8,  5,  3,  1,  2,  4,  7,  9, 11, 13, 15, 17,  6, 17 },
+        { 15, 13, 11,  8,  6,  4,  2,  2,  2,  3,  5,  7, 10, 12, 14, 16,  9, 16 },
+        { 14, 12, 11,  9,  8,  6,  3,  1,  2,  5,  7, 10, 13, 15, 16, 17,  4, 17 },
+        { 16, 14, 12,  9,  7,  5,  2,  2,  2,  3,  4,  6,  8, 11, 13, 15, 10, 16 },
+    },
+    {
+        { 13, 11, 10,  8,  7,  5,  2,  2,  2,  4,  6,  9, 12, 14, 15, 16,  3, 16 },
+        { 11, 11, 10,  9,  8,  7,  5,  4,  3,  3,  3,  3,  3,  3,  4,  5,  6,  5 },
+        {  9,  9,  7,  6,  5,  4,  3,  3,  2,  3,  4,  5,  4,  5,  5,  6,  8,  6 },
+        { 13, 12, 10,  8,  5,  3,  3,  2,  2,  3,  4,  7,  9, 11, 14, 15,  6, 15 },
+    }
+};
+
+static const uint16_t imc_huffman_bits[4][4][18] = {
+    {
+        { 0xCC32, 0x6618, 0x1987, 0x0660, 0x00CD, 0x0018, 0x0007, 0x0000, 0x0002, 0x000D, 0x0032, 0x0199, 0x0331, 0x0CC2, 0x330D, 0xCC33, 0x0067, 0x0000 },
+        { 0x02FE, 0x00BE, 0x005E, 0x002D, 0x000A, 0x0009, 0x0003, 0x0003, 0x0000, 0x0002, 0x0008, 0x002C, 0x005D, 0x017E, 0x05FE, 0x05FF, 0x005C, 0x0000 },
+        { 0x5169, 0x5168, 0x28B5, 0x0517, 0x00A3, 0x0029, 0x0008, 0x0003, 0x0000, 0x0009, 0x0015, 0x0050, 0x0144, 0x028A, 0x0A2C, 0x145B, 0x000B, 0x0000 },
+        { 0x1231, 0x048D, 0x0247, 0x0090, 0x0025, 0x0008, 0x0001, 0x0003, 0x0000, 0x0005, 0x0013, 0x0049, 0x0122, 0x0919, 0x48C3, 0x48C2, 0x2460, 0x0000 },
+    },
+    {
+        { 0x2D1D, 0x0B46, 0x02D0, 0x00B5, 0x0059, 0x000A, 0x0003, 0x0001, 0x0000, 0x0004, 0x0017, 0x005B, 0x0169, 0x05A2, 0x168F, 0x2D1C, 0x0058, 0x0000 },
+        { 0x1800, 0x0C01, 0x0301, 0x0061, 0x0019, 0x0007, 0x0004, 0x0003, 0x0000, 0x0005, 0x000D, 0x0031, 0x00C1, 0x0181, 0x0601, 0x1801, 0x0002, 0x0000 },
+        { 0x1556, 0x0AAA, 0x02AB, 0x0054, 0x0014, 0x000B, 0x0002, 0x0003, 0x0000, 0x0003, 0x0008, 0x002B, 0x00AB, 0x0154, 0x0554, 0x1557, 0x0009, 0x0000 },
+        { 0x1556, 0x0AAA, 0x02AB, 0x0054, 0x0014, 0x000B, 0x0002, 0x0003, 0x0000, 0x0003, 0x0008, 0x002B, 0x00AB, 0x0154, 0x0554, 0x1557, 0x0009, 0x0000 },
+    },
+    {
+        { 0x2993, 0x0A65, 0x0298, 0x00A7, 0x0028, 0x0004, 0x0000, 0x0001, 0x0001, 0x0003, 0x0015, 0x0052, 0x014D, 0x0533, 0x14C8, 0x5324, 0x000B, 0x5325 },
+        { 0x09B8, 0x026F, 0x009A, 0x0012, 0x0005, 0x0000, 0x0001, 0x0002, 0x0003, 0x0001, 0x0003, 0x0008, 0x004C, 0x0136, 0x04DD, 0x1373, 0x0027, 0x1372 },
+        { 0x0787, 0x01E0, 0x00F1, 0x003D, 0x001F, 0x0006, 0x0001, 0x0001, 0x0001, 0x0002, 0x000E, 0x0079, 0x03C2, 0x0F0D, 0x1E19, 0x3C30, 0x0000, 0x3C31 },
+        { 0x4B06, 0x12C0, 0x04B1, 0x0097, 0x0024, 0x0008, 0x0002, 0x0003, 0x0000, 0x0003, 0x0005, 0x0013, 0x004A, 0x0259, 0x0961, 0x2582, 0x012D, 0x4B07 },
+    },
+    {
+        { 0x0A5A, 0x0297, 0x014A, 0x0053, 0x0028, 0x000B, 0x0003, 0x0000, 0x0002, 0x0004, 0x0015, 0x00A4, 0x052C, 0x14B7, 0x296C, 0x52DB, 0x0003, 0x52DA },
+        { 0x0193, 0x0192, 0x00C8, 0x0065, 0x0033, 0x0018, 0x0007, 0x0004, 0x0000, 0x0004, 0x0005, 0x0007, 0x0006, 0x0003, 0x0005, 0x0005, 0x000D, 0x0004 },
+        { 0x0012, 0x0013, 0x0005, 0x0003, 0x0000, 0x0003, 0x0005, 0x0004, 0x0003, 0x0003, 0x0005, 0x0005, 0x0004, 0x0004, 0x0003, 0x0005, 0x0008, 0x0004 },
+        { 0x0D66, 0x06B2, 0x01AD, 0x006A, 0x000C, 0x0005, 0x0004, 0x0000, 0x0003, 0x0002, 0x0007, 0x0034, 0x00D7, 0x0358, 0x1ACF, 0x359C, 0x001B, 0x359D },
+    }
+};
+
diff --git a/contrib/ffmpeg/libavcodec/imgconvert.c b/contrib/ffmpeg/libavcodec/imgconvert.c
new file mode 100644
index 000000000..d5b4cdca0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/imgconvert.c
@@ -0,0 +1,2842 @@
+/*
+ * Misc image convertion routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file imgconvert.c
+ * Misc image convertion routines.
+ */
+
+/* TODO:
+ * - write 'ffimg' program to test all the image related stuff
+ * - move all api to slice based system
+ * - integrate deinterlacing, postprocessing and scaling in the conversion process
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+
+#ifdef USE_FASTMEMCPY
+#include "libvo/fastmemcpy.h"
+#endif
+
+#ifdef HAVE_MMX
+#include "i386/mmx.h"
+#endif
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+
+#define FF_COLOR_RGB      0 /* RGB color space */
+#define FF_COLOR_GRAY     1 /* gray color space */
+#define FF_COLOR_YUV      2 /* YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */
+#define FF_COLOR_YUV_JPEG 3 /* YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */
+
+#define FF_PIXEL_PLANAR   0 /* each channel has one component in AVPicture */
+#define FF_PIXEL_PACKED   1 /* only one components containing all the channels */
+#define FF_PIXEL_PALETTE  2  /* one components containing indexes for a palette */
+
+typedef struct PixFmtInfo {
+    const char *name;
+    uint8_t nb_channels;     /* number of channels (including alpha) */
+    uint8_t color_type;      /* color type (see FF_COLOR_xxx constants) */
+    uint8_t pixel_type;      /* pixel storage type (see FF_PIXEL_xxx constants) */
+    uint8_t is_alpha : 1;    /* true if alpha can be specified */
+    uint8_t x_chroma_shift;  /* X chroma subsampling factor is 2 ^ shift */
+    uint8_t y_chroma_shift;  /* Y chroma subsampling factor is 2 ^ shift */
+    uint8_t depth;           /* bit depth of the color components */
+} PixFmtInfo;
+
+/* this table gives more information about formats */
+static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
+    /* YUV formats */
+    [PIX_FMT_YUV420P] = {
+        .name = "yuv420p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 1,
+    },
+    [PIX_FMT_YUV422P] = {
+        .name = "yuv422p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_YUV444P] = {
+        .name = "yuv444p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_YUV422] = {
+        .name = "yuv422",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_UYVY422] = {
+        .name = "uyvy422",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_YUV410P] = {
+        .name = "yuv410p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 2, .y_chroma_shift = 2,
+    },
+    [PIX_FMT_YUV411P] = {
+        .name = "yuv411p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 2, .y_chroma_shift = 0,
+    },
+
+    /* JPEG YUV */
+    [PIX_FMT_YUVJ420P] = {
+        .name = "yuvj420p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV_JPEG,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 1,
+    },
+    [PIX_FMT_YUVJ422P] = {
+        .name = "yuvj422p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV_JPEG,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_YUVJ444P] = {
+        .name = "yuvj444p",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV_JPEG,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+
+    /* RGB formats */
+    [PIX_FMT_RGB24] = {
+        .name = "rgb24",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR24] = {
+        .name = "bgr24",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGBA32] = {
+        .name = "rgba32",
+        .nb_channels = 4, .is_alpha = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB565] = {
+        .name = "rgb565",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB555] = {
+        .name = "rgb555",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+
+    /* gray / mono formats */
+    [PIX_FMT_GRAY16BE] = {
+        .name = "gray16be",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_GRAY,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_GRAY16LE] = {
+        .name = "gray16le",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_GRAY,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_GRAY8] = {
+        .name = "gray",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_GRAY,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+    },
+    [PIX_FMT_MONOWHITE] = {
+        .name = "monow",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_GRAY,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 1,
+    },
+    [PIX_FMT_MONOBLACK] = {
+        .name = "monob",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_GRAY,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 1,
+    },
+
+    /* paletted formats */
+    [PIX_FMT_PAL8] = {
+        .name = "pal8",
+        .nb_channels = 4, .is_alpha = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PALETTE,
+        .depth = 8,
+    },
+    [PIX_FMT_XVMC_MPEG2_MC] = {
+        .name = "xvmcmc",
+    },
+    [PIX_FMT_XVMC_MPEG2_IDCT] = {
+        .name = "xvmcidct",
+    },
+    [PIX_FMT_UYVY411] = {
+        .name = "uyvy411",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 2, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR32] = {
+        .name = "bgr32",
+        .nb_channels = 4, .is_alpha = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR565] = {
+        .name = "bgr565",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR555] = {
+        .name = "bgr555",
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB8] = {
+        .name = "rgb8",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB4] = {
+        .name = "rgb4",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB4_BYTE] = {
+        .name = "rgb4_byte",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR8] = {
+        .name = "bgr8",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR4] = {
+        .name = "bgr4",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR4_BYTE] = {
+        .name = "bgr4_byte",
+        .nb_channels = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_NV12] = {
+        .name = "nv12",
+        .nb_channels = 2,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 1,
+    },
+    [PIX_FMT_NV21] = {
+        .name = "nv12",
+        .nb_channels = 2,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 8,
+        .x_chroma_shift = 1, .y_chroma_shift = 1,
+    },
+
+    [PIX_FMT_BGR32_1] = {
+        .name = "bgr32_1",
+        .nb_channels = 4, .is_alpha = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB32_1] = {
+        .name = "rgb32_1",
+        .nb_channels = 4, .is_alpha = 1,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 8,
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+};
+
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift)
+{
+    *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift;
+    *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift;
+}
+
+const char *avcodec_get_pix_fmt_name(int pix_fmt)
+{
+    if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB)
+        return "???";
+    else
+        return pix_fmt_info[pix_fmt].name;
+}
+
+enum PixelFormat avcodec_get_pix_fmt(const char* name)
+{
+    int i;
+
+    for (i=0; i < PIX_FMT_NB; i++)
+         if (!strcmp(pix_fmt_info[i].name, name))
+             break;
+    return i;
+}
+
+/* Picture field are filled with 'ptr' addresses. Also return size */
+int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+                   int pix_fmt, int width, int height)
+{
+    int size, w2, h2, size2;
+    const PixFmtInfo *pinfo;
+
+    if(avcodec_check_dimensions(NULL, width, height))
+        goto fail;
+
+    pinfo = &pix_fmt_info[pix_fmt];
+    size = width * height;
+    switch(pix_fmt) {
+    case PIX_FMT_YUV420P:
+    case PIX_FMT_YUV422P:
+    case PIX_FMT_YUV444P:
+    case PIX_FMT_YUV410P:
+    case PIX_FMT_YUV411P:
+    case PIX_FMT_YUVJ420P:
+    case PIX_FMT_YUVJ422P:
+    case PIX_FMT_YUVJ444P:
+        w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
+        h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
+        size2 = w2 * h2;
+        picture->data[0] = ptr;
+        picture->data[1] = picture->data[0] + size;
+        picture->data[2] = picture->data[1] + size2;
+        picture->linesize[0] = width;
+        picture->linesize[1] = w2;
+        picture->linesize[2] = w2;
+        return size + 2 * size2;
+    case PIX_FMT_NV12:
+    case PIX_FMT_NV21:
+        w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
+        h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
+        size2 = w2 * h2 * 2;
+        picture->data[0] = ptr;
+        picture->data[1] = picture->data[0] + size;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width;
+        picture->linesize[1] = w2;
+        picture->linesize[2] = 0;
+        return size + 2 * size2;
+    case PIX_FMT_RGB24:
+    case PIX_FMT_BGR24:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width * 3;
+        return size * 3;
+    case PIX_FMT_RGBA32:
+    case PIX_FMT_BGR32:
+    case PIX_FMT_RGB32_1:
+    case PIX_FMT_BGR32_1:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width * 4;
+        return size * 4;
+    case PIX_FMT_GRAY16BE:
+    case PIX_FMT_GRAY16LE:
+    case PIX_FMT_BGR555:
+    case PIX_FMT_BGR565:
+    case PIX_FMT_RGB555:
+    case PIX_FMT_RGB565:
+    case PIX_FMT_YUV422:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width * 2;
+        return size * 2;
+    case PIX_FMT_UYVY422:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width * 2;
+        return size * 2;
+    case PIX_FMT_UYVY411:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width + width/2;
+        return size + size/2;
+    case PIX_FMT_RGB8:
+    case PIX_FMT_BGR8:
+    case PIX_FMT_RGB4_BYTE:
+    case PIX_FMT_BGR4_BYTE:
+    case PIX_FMT_GRAY8:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width;
+        return size;
+    case PIX_FMT_RGB4:
+    case PIX_FMT_BGR4:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = width / 2;
+        return size / 2;
+    case PIX_FMT_MONOWHITE:
+    case PIX_FMT_MONOBLACK:
+        picture->data[0] = ptr;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->linesize[0] = (width + 7) >> 3;
+        return picture->linesize[0] * height;
+    case PIX_FMT_PAL8:
+        size2 = (size + 3) & ~3;
+        picture->data[0] = ptr;
+        picture->data[1] = ptr + size2; /* palette is stored here as 256 32 bit words */
+        picture->data[2] = NULL;
+        picture->linesize[0] = width;
+        picture->linesize[1] = 4;
+        return size2 + 256 * 4;
+    default:
+fail:
+        picture->data[0] = NULL;
+        picture->data[1] = NULL;
+        picture->data[2] = NULL;
+        picture->data[3] = NULL;
+        return -1;
+    }
+}
+
+int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size)
+{
+    const PixFmtInfo* pf = &pix_fmt_info[pix_fmt];
+    int i, j, w, h, data_planes;
+    const unsigned char* s;
+    int size = avpicture_get_size(pix_fmt, width, height);
+
+    if (size > dest_size || size < 0)
+        return -1;
+
+    if (pf->pixel_type == FF_PIXEL_PACKED || pf->pixel_type == FF_PIXEL_PALETTE) {
+        if (pix_fmt == PIX_FMT_YUV422 ||
+            pix_fmt == PIX_FMT_UYVY422 ||
+            pix_fmt == PIX_FMT_BGR565 ||
+            pix_fmt == PIX_FMT_BGR555 ||
+            pix_fmt == PIX_FMT_RGB565 ||
+            pix_fmt == PIX_FMT_RGB555)
+            w = width * 2;
+        else if (pix_fmt == PIX_FMT_UYVY411)
+          w = width + width/2;
+        else if (pix_fmt == PIX_FMT_PAL8)
+          w = width;
+        else
+          w = width * (pf->depth * pf->nb_channels / 8);
+
+        data_planes = 1;
+        h = height;
+    } else {
+        data_planes = pf->nb_channels;
+        w = (width*pf->depth + 7)/8;
+        h = height;
+    }
+
+    for (i=0; i<data_planes; i++) {
+         if (i == 1) {
+             w = width >> pf->x_chroma_shift;
+             h = height >> pf->y_chroma_shift;
+         }
+         s = src->data[i];
+         for(j=0; j<h; j++) {
+             memcpy(dest, s, w);
+             dest += w;
+             s += src->linesize[i];
+         }
+    }
+
+    if (pf->pixel_type == FF_PIXEL_PALETTE)
+        memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
+
+    return size;
+}
+
+int avpicture_get_size(int pix_fmt, int width, int height)
+{
+    AVPicture dummy_pict;
+    return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height);
+}
+
+/**
+ * compute the loss when converting from a pixel format to another
+ */
+int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
+                             int has_alpha)
+{
+    const PixFmtInfo *pf, *ps;
+    int loss;
+
+    ps = &pix_fmt_info[src_pix_fmt];
+    pf = &pix_fmt_info[dst_pix_fmt];
+
+    /* compute loss */
+    loss = 0;
+    pf = &pix_fmt_info[dst_pix_fmt];
+    if (pf->depth < ps->depth ||
+        (dst_pix_fmt == PIX_FMT_RGB555 && src_pix_fmt == PIX_FMT_RGB565))
+        loss |= FF_LOSS_DEPTH;
+    if (pf->x_chroma_shift > ps->x_chroma_shift ||
+        pf->y_chroma_shift > ps->y_chroma_shift)
+        loss |= FF_LOSS_RESOLUTION;
+    switch(pf->color_type) {
+    case FF_COLOR_RGB:
+        if (ps->color_type != FF_COLOR_RGB &&
+            ps->color_type != FF_COLOR_GRAY)
+            loss |= FF_LOSS_COLORSPACE;
+        break;
+    case FF_COLOR_GRAY:
+        if (ps->color_type != FF_COLOR_GRAY)
+            loss |= FF_LOSS_COLORSPACE;
+        break;
+    case FF_COLOR_YUV:
+        if (ps->color_type != FF_COLOR_YUV)
+            loss |= FF_LOSS_COLORSPACE;
+        break;
+    case FF_COLOR_YUV_JPEG:
+        if (ps->color_type != FF_COLOR_YUV_JPEG &&
+            ps->color_type != FF_COLOR_YUV &&
+            ps->color_type != FF_COLOR_GRAY)
+            loss |= FF_LOSS_COLORSPACE;
+        break;
+    default:
+        /* fail safe test */
+        if (ps->color_type != pf->color_type)
+            loss |= FF_LOSS_COLORSPACE;
+        break;
+    }
+    if (pf->color_type == FF_COLOR_GRAY &&
+        ps->color_type != FF_COLOR_GRAY)
+        loss |= FF_LOSS_CHROMA;
+    if (!pf->is_alpha && (ps->is_alpha && has_alpha))
+        loss |= FF_LOSS_ALPHA;
+    if (pf->pixel_type == FF_PIXEL_PALETTE &&
+        (ps->pixel_type != FF_PIXEL_PALETTE && ps->color_type != FF_COLOR_GRAY))
+        loss |= FF_LOSS_COLORQUANT;
+    return loss;
+}
+
+static int avg_bits_per_pixel(int pix_fmt)
+{
+    int bits;
+    const PixFmtInfo *pf;
+
+    pf = &pix_fmt_info[pix_fmt];
+    switch(pf->pixel_type) {
+    case FF_PIXEL_PACKED:
+        switch(pix_fmt) {
+        case PIX_FMT_YUV422:
+        case PIX_FMT_UYVY422:
+        case PIX_FMT_RGB565:
+        case PIX_FMT_RGB555:
+        case PIX_FMT_BGR565:
+        case PIX_FMT_BGR555:
+            bits = 16;
+            break;
+        case PIX_FMT_UYVY411:
+            bits = 12;
+            break;
+        default:
+            bits = pf->depth * pf->nb_channels;
+            break;
+        }
+        break;
+    case FF_PIXEL_PLANAR:
+        if (pf->x_chroma_shift == 0 && pf->y_chroma_shift == 0) {
+            bits = pf->depth * pf->nb_channels;
+        } else {
+            bits = pf->depth + ((2 * pf->depth) >>
+                                (pf->x_chroma_shift + pf->y_chroma_shift));
+        }
+        break;
+    case FF_PIXEL_PALETTE:
+        bits = 8;
+        break;
+    default:
+        bits = -1;
+        break;
+    }
+    return bits;
+}
+
+static int avcodec_find_best_pix_fmt1(int pix_fmt_mask,
+                                      int src_pix_fmt,
+                                      int has_alpha,
+                                      int loss_mask)
+{
+    int dist, i, loss, min_dist, dst_pix_fmt;
+
+    /* find exact color match with smallest size */
+    dst_pix_fmt = -1;
+    min_dist = 0x7fffffff;
+    for(i = 0;i < PIX_FMT_NB; i++) {
+        if (pix_fmt_mask & (1 << i)) {
+            loss = avcodec_get_pix_fmt_loss(i, src_pix_fmt, has_alpha) & loss_mask;
+            if (loss == 0) {
+                dist = avg_bits_per_pixel(i);
+                if (dist < min_dist) {
+                    min_dist = dist;
+                    dst_pix_fmt = i;
+                }
+            }
+        }
+    }
+    return dst_pix_fmt;
+}
+
+/**
+ * find best pixel format to convert to. Return -1 if none found
+ */
+int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
+                              int has_alpha, int *loss_ptr)
+{
+    int dst_pix_fmt, loss_mask, i;
+    static const int loss_mask_order[] = {
+        ~0, /* no loss first */
+        ~FF_LOSS_ALPHA,
+        ~FF_LOSS_RESOLUTION,
+        ~(FF_LOSS_COLORSPACE | FF_LOSS_RESOLUTION),
+        ~FF_LOSS_COLORQUANT,
+        ~FF_LOSS_DEPTH,
+        0,
+    };
+
+    /* try with successive loss */
+    i = 0;
+    for(;;) {
+        loss_mask = loss_mask_order[i++];
+        dst_pix_fmt = avcodec_find_best_pix_fmt1(pix_fmt_mask, src_pix_fmt,
+                                                 has_alpha, loss_mask);
+        if (dst_pix_fmt >= 0)
+            goto found;
+        if (loss_mask == 0)
+            break;
+    }
+    return -1;
+ found:
+    if (loss_ptr)
+        *loss_ptr = avcodec_get_pix_fmt_loss(dst_pix_fmt, src_pix_fmt, has_alpha);
+    return dst_pix_fmt;
+}
+
+void ff_img_copy_plane(uint8_t *dst, int dst_wrap,
+                           const uint8_t *src, int src_wrap,
+                           int width, int height)
+{
+    if((!dst) || (!src))
+        return;
+    for(;height > 0; height--) {
+        memcpy(dst, src, width);
+        dst += dst_wrap;
+        src += src_wrap;
+    }
+}
+
+/**
+ * Copy image 'src' to 'dst'.
+ */
+void img_copy(AVPicture *dst, const AVPicture *src,
+              int pix_fmt, int width, int height)
+{
+    int bwidth, bits, i;
+    const PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+
+    pf = &pix_fmt_info[pix_fmt];
+    switch(pf->pixel_type) {
+    case FF_PIXEL_PACKED:
+        switch(pix_fmt) {
+        case PIX_FMT_YUV422:
+        case PIX_FMT_UYVY422:
+        case PIX_FMT_RGB565:
+        case PIX_FMT_RGB555:
+        case PIX_FMT_BGR565:
+        case PIX_FMT_BGR555:
+            bits = 16;
+            break;
+        case PIX_FMT_UYVY411:
+            bits = 12;
+            break;
+        default:
+            bits = pf->depth * pf->nb_channels;
+            break;
+        }
+        bwidth = (width * bits + 7) >> 3;
+        ff_img_copy_plane(dst->data[0], dst->linesize[0],
+                       src->data[0], src->linesize[0],
+                       bwidth, height);
+        break;
+    case FF_PIXEL_PLANAR:
+        for(i = 0; i < pf->nb_channels; i++) {
+            int w, h;
+            w = width;
+            h = height;
+            if (i == 1 || i == 2) {
+                w >>= pf->x_chroma_shift;
+                h >>= pf->y_chroma_shift;
+            }
+            bwidth = (w * pf->depth + 7) >> 3;
+            ff_img_copy_plane(dst->data[i], dst->linesize[i],
+                           src->data[i], src->linesize[i],
+                           bwidth, h);
+        }
+        break;
+    case FF_PIXEL_PALETTE:
+        ff_img_copy_plane(dst->data[0], dst->linesize[0],
+                       src->data[0], src->linesize[0],
+                       width, height);
+        /* copy the palette */
+        ff_img_copy_plane(dst->data[1], dst->linesize[1],
+                       src->data[1], src->linesize[1],
+                       4, 256);
+        break;
+    }
+}
+
+/* XXX: totally non optimized */
+
+static void yuv422_to_yuv420p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *p, *p1;
+    uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = src->data[0];
+    lum1 = dst->data[0];
+    cb1 = dst->data[1];
+    cr1 = dst->data[2];
+
+    for(;height >= 1; height -= 2) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 2; w -= 2) {
+            lum[0] = p[0];
+            cb[0] = p[1];
+            lum[1] = p[2];
+            cr[0] = p[3];
+            p += 4;
+            lum += 2;
+            cb++;
+            cr++;
+        }
+        if (w) {
+            lum[0] = p[0];
+            cb[0] = p[1];
+            cr[0] = p[3];
+            cb++;
+            cr++;
+        }
+        p1 += src->linesize[0];
+        lum1 += dst->linesize[0];
+        if (height>1) {
+            p = p1;
+            lum = lum1;
+            for(w = width; w >= 2; w -= 2) {
+                lum[0] = p[0];
+                lum[1] = p[2];
+                p += 4;
+                lum += 2;
+            }
+            if (w) {
+                lum[0] = p[0];
+            }
+            p1 += src->linesize[0];
+            lum1 += dst->linesize[0];
+        }
+        cb1 += dst->linesize[1];
+        cr1 += dst->linesize[2];
+    }
+}
+
+static void uyvy422_to_yuv420p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *p, *p1;
+    uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = src->data[0];
+
+    lum1 = dst->data[0];
+    cb1 = dst->data[1];
+    cr1 = dst->data[2];
+
+    for(;height >= 1; height -= 2) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 2; w -= 2) {
+            lum[0] = p[1];
+            cb[0] = p[0];
+            lum[1] = p[3];
+            cr[0] = p[2];
+            p += 4;
+            lum += 2;
+            cb++;
+            cr++;
+        }
+        if (w) {
+            lum[0] = p[1];
+            cb[0] = p[0];
+            cr[0] = p[2];
+            cb++;
+            cr++;
+        }
+        p1 += src->linesize[0];
+        lum1 += dst->linesize[0];
+        if (height>1) {
+            p = p1;
+            lum = lum1;
+            for(w = width; w >= 2; w -= 2) {
+                lum[0] = p[1];
+                lum[1] = p[3];
+                p += 4;
+                lum += 2;
+            }
+            if (w) {
+                lum[0] = p[1];
+            }
+            p1 += src->linesize[0];
+            lum1 += dst->linesize[0];
+        }
+        cb1 += dst->linesize[1];
+        cr1 += dst->linesize[2];
+    }
+}
+
+
+static void uyvy422_to_yuv422p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *p, *p1;
+    uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = src->data[0];
+    lum1 = dst->data[0];
+    cb1 = dst->data[1];
+    cr1 = dst->data[2];
+    for(;height > 0; height--) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 2; w -= 2) {
+            lum[0] = p[1];
+            cb[0] = p[0];
+            lum[1] = p[3];
+            cr[0] = p[2];
+            p += 4;
+            lum += 2;
+            cb++;
+            cr++;
+        }
+        p1 += src->linesize[0];
+        lum1 += dst->linesize[0];
+        cb1 += dst->linesize[1];
+        cr1 += dst->linesize[2];
+    }
+}
+
+
+static void yuv422_to_yuv422p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *p, *p1;
+    uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = src->data[0];
+    lum1 = dst->data[0];
+    cb1 = dst->data[1];
+    cr1 = dst->data[2];
+    for(;height > 0; height--) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 2; w -= 2) {
+            lum[0] = p[0];
+            cb[0] = p[1];
+            lum[1] = p[2];
+            cr[0] = p[3];
+            p += 4;
+            lum += 2;
+            cb++;
+            cr++;
+        }
+        p1 += src->linesize[0];
+        lum1 += dst->linesize[0];
+        cb1 += dst->linesize[1];
+        cr1 += dst->linesize[2];
+    }
+}
+
+static void yuv422p_to_yuv422(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    uint8_t *p, *p1;
+    const uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = dst->data[0];
+    lum1 = src->data[0];
+    cb1 = src->data[1];
+    cr1 = src->data[2];
+    for(;height > 0; height--) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 2; w -= 2) {
+            p[0] = lum[0];
+            p[1] = cb[0];
+            p[2] = lum[1];
+            p[3] = cr[0];
+            p += 4;
+            lum += 2;
+            cb++;
+            cr++;
+        }
+        p1 += dst->linesize[0];
+        lum1 += src->linesize[0];
+        cb1 += src->linesize[1];
+        cr1 += src->linesize[2];
+    }
+}
+
+static void yuv422p_to_uyvy422(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    uint8_t *p, *p1;
+    const uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = dst->data[0];
+    lum1 = src->data[0];
+    cb1 = src->data[1];
+    cr1 = src->data[2];
+    for(;height > 0; height--) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 2; w -= 2) {
+            p[1] = lum[0];
+            p[0] = cb[0];
+            p[3] = lum[1];
+            p[2] = cr[0];
+            p += 4;
+            lum += 2;
+            cb++;
+            cr++;
+        }
+        p1 += dst->linesize[0];
+        lum1 += src->linesize[0];
+        cb1 += src->linesize[1];
+        cr1 += src->linesize[2];
+    }
+}
+
+static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *p, *p1;
+    uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+    int w;
+
+    p1 = src->data[0];
+    lum1 = dst->data[0];
+    cb1 = dst->data[1];
+    cr1 = dst->data[2];
+    for(;height > 0; height--) {
+        p = p1;
+        lum = lum1;
+        cb = cb1;
+        cr = cr1;
+        for(w = width; w >= 4; w -= 4) {
+            cb[0] = p[0];
+            lum[0] = p[1];
+            lum[1] = p[2];
+            cr[0] = p[3];
+            lum[2] = p[4];
+            lum[3] = p[5];
+            p += 6;
+            lum += 4;
+            cb++;
+            cr++;
+        }
+        p1 += src->linesize[0];
+        lum1 += dst->linesize[0];
+        cb1 += dst->linesize[1];
+        cr1 += dst->linesize[2];
+    }
+}
+
+
+static void yuv420p_to_yuv422(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int w, h;
+    uint8_t *line1, *line2, *linesrc = dst->data[0];
+    uint8_t *lum1, *lum2, *lumsrc = src->data[0];
+    uint8_t *cb1, *cb2 = src->data[1];
+    uint8_t *cr1, *cr2 = src->data[2];
+
+    for(h = height / 2; h--;) {
+        line1 = linesrc;
+        line2 = linesrc + dst->linesize[0];
+
+        lum1 = lumsrc;
+        lum2 = lumsrc + src->linesize[0];
+
+        cb1 = cb2;
+        cr1 = cr2;
+
+        for(w = width / 2; w--;) {
+                *line1++ = *lum1++; *line2++ = *lum2++;
+                *line1++ =          *line2++ = *cb1++;
+                *line1++ = *lum1++; *line2++ = *lum2++;
+                *line1++ =          *line2++ = *cr1++;
+        }
+
+        linesrc += dst->linesize[0] * 2;
+        lumsrc += src->linesize[0] * 2;
+        cb2 += src->linesize[1];
+        cr2 += src->linesize[2];
+    }
+}
+
+static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int w, h;
+    uint8_t *line1, *line2, *linesrc = dst->data[0];
+    uint8_t *lum1, *lum2, *lumsrc = src->data[0];
+    uint8_t *cb1, *cb2 = src->data[1];
+    uint8_t *cr1, *cr2 = src->data[2];
+
+    for(h = height / 2; h--;) {
+        line1 = linesrc;
+        line2 = linesrc + dst->linesize[0];
+
+        lum1 = lumsrc;
+        lum2 = lumsrc + src->linesize[0];
+
+        cb1 = cb2;
+        cr1 = cr2;
+
+        for(w = width / 2; w--;) {
+                *line1++ =          *line2++ = *cb1++;
+                *line1++ = *lum1++; *line2++ = *lum2++;
+                *line1++ =          *line2++ = *cr1++;
+                *line1++ = *lum1++; *line2++ = *lum2++;
+        }
+
+        linesrc += dst->linesize[0] * 2;
+        lumsrc += src->linesize[0] * 2;
+        cb2 += src->linesize[1];
+        cr2 += src->linesize[2];
+    }
+}
+
+#define SCALEBITS 10
+#define ONE_HALF  (1 << (SCALEBITS - 1))
+#define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
+
+#define YUV_TO_RGB1_CCIR(cb1, cr1)\
+{\
+    cb = (cb1) - 128;\
+    cr = (cr1) - 128;\
+    r_add = FIX(1.40200*255.0/224.0) * cr + ONE_HALF;\
+    g_add = - FIX(0.34414*255.0/224.0) * cb - FIX(0.71414*255.0/224.0) * cr + \
+            ONE_HALF;\
+    b_add = FIX(1.77200*255.0/224.0) * cb + ONE_HALF;\
+}
+
+#define YUV_TO_RGB2_CCIR(r, g, b, y1)\
+{\
+    y = ((y1) - 16) * FIX(255.0/219.0);\
+    r = cm[(y + r_add) >> SCALEBITS];\
+    g = cm[(y + g_add) >> SCALEBITS];\
+    b = cm[(y + b_add) >> SCALEBITS];\
+}
+
+#define YUV_TO_RGB1(cb1, cr1)\
+{\
+    cb = (cb1) - 128;\
+    cr = (cr1) - 128;\
+    r_add = FIX(1.40200) * cr + ONE_HALF;\
+    g_add = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;\
+    b_add = FIX(1.77200) * cb + ONE_HALF;\
+}
+
+#define YUV_TO_RGB2(r, g, b, y1)\
+{\
+    y = (y1) << SCALEBITS;\
+    r = cm[(y + r_add) >> SCALEBITS];\
+    g = cm[(y + g_add) >> SCALEBITS];\
+    b = cm[(y + b_add) >> SCALEBITS];\
+}
+
+#define Y_CCIR_TO_JPEG(y)\
+ cm[((y) * FIX(255.0/219.0) + (ONE_HALF - 16 * FIX(255.0/219.0))) >> SCALEBITS]
+
+#define Y_JPEG_TO_CCIR(y)\
+ (((y) * FIX(219.0/255.0) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS)
+
+#define C_CCIR_TO_JPEG(y)\
+ cm[(((y) - 128) * FIX(127.0/112.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS]
+
+/* NOTE: the clamp is really necessary! */
+static inline int C_JPEG_TO_CCIR(int y) {
+    y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS);
+    if (y < 16)
+        y = 16;
+    return y;
+}
+
+
+#define RGB_TO_Y(r, g, b) \
+((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
+  FIX(0.11400) * (b) + ONE_HALF) >> SCALEBITS)
+
+#define RGB_TO_U(r1, g1, b1, shift)\
+(((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +         \
+     FIX(0.50000) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_V(r1, g1, b1, shift)\
+(((FIX(0.50000) * r1 - FIX(0.41869) * g1 -           \
+   FIX(0.08131) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_Y_CCIR(r, g, b) \
+((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \
+  FIX(0.11400*219.0/255.0) * (b) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS)
+
+#define RGB_TO_U_CCIR(r1, g1, b1, shift)\
+(((- FIX(0.16874*224.0/255.0) * r1 - FIX(0.33126*224.0/255.0) * g1 +         \
+     FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_V_CCIR(r1, g1, b1, shift)\
+(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 -           \
+   FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+static uint8_t y_ccir_to_jpeg[256];
+static uint8_t y_jpeg_to_ccir[256];
+static uint8_t c_ccir_to_jpeg[256];
+static uint8_t c_jpeg_to_ccir[256];
+
+/* init various conversion tables */
+static void img_convert_init(void)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    for(i = 0;i < 256; i++) {
+        y_ccir_to_jpeg[i] = Y_CCIR_TO_JPEG(i);
+        y_jpeg_to_ccir[i] = Y_JPEG_TO_CCIR(i);
+        c_ccir_to_jpeg[i] = C_CCIR_TO_JPEG(i);
+        c_jpeg_to_ccir[i] = C_JPEG_TO_CCIR(i);
+    }
+}
+
+/* apply to each pixel the given table */
+static void img_apply_table(uint8_t *dst, int dst_wrap,
+                            const uint8_t *src, int src_wrap,
+                            int width, int height, const uint8_t *table1)
+{
+    int n;
+    const uint8_t *s;
+    uint8_t *d;
+    const uint8_t *table;
+
+    table = table1;
+    for(;height > 0; height--) {
+        s = src;
+        d = dst;
+        n = width;
+        while (n >= 4) {
+            d[0] = table[s[0]];
+            d[1] = table[s[1]];
+            d[2] = table[s[2]];
+            d[3] = table[s[3]];
+            d += 4;
+            s += 4;
+            n -= 4;
+        }
+        while (n > 0) {
+            d[0] = table[s[0]];
+            d++;
+            s++;
+            n--;
+        }
+        dst += dst_wrap;
+        src += src_wrap;
+    }
+}
+
+/* XXX: use generic filter ? */
+/* XXX: in most cases, the sampling position is incorrect */
+
+/* 4x1 -> 1x1 */
+static void shrink41(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    const uint8_t *s;
+    uint8_t *d;
+
+    for(;height > 0; height--) {
+        s = src;
+        d = dst;
+        for(w = width;w > 0; w--) {
+            d[0] = (s[0] + s[1] + s[2] + s[3] + 2) >> 2;
+            s += 4;
+            d++;
+        }
+        src += src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 2x1 -> 1x1 */
+static void shrink21(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    const uint8_t *s;
+    uint8_t *d;
+
+    for(;height > 0; height--) {
+        s = src;
+        d = dst;
+        for(w = width;w > 0; w--) {
+            d[0] = (s[0] + s[1]) >> 1;
+            s += 2;
+            d++;
+        }
+        src += src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 1x2 -> 1x1 */
+static void shrink12(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    uint8_t *d;
+    const uint8_t *s1, *s2;
+
+    for(;height > 0; height--) {
+        s1 = src;
+        s2 = s1 + src_wrap;
+        d = dst;
+        for(w = width;w >= 4; w-=4) {
+            d[0] = (s1[0] + s2[0]) >> 1;
+            d[1] = (s1[1] + s2[1]) >> 1;
+            d[2] = (s1[2] + s2[2]) >> 1;
+            d[3] = (s1[3] + s2[3]) >> 1;
+            s1 += 4;
+            s2 += 4;
+            d += 4;
+        }
+        for(;w > 0; w--) {
+            d[0] = (s1[0] + s2[0]) >> 1;
+            s1++;
+            s2++;
+            d++;
+        }
+        src += 2 * src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 2x2 -> 1x1 */
+void ff_shrink22(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    const uint8_t *s1, *s2;
+    uint8_t *d;
+
+    for(;height > 0; height--) {
+        s1 = src;
+        s2 = s1 + src_wrap;
+        d = dst;
+        for(w = width;w >= 4; w-=4) {
+            d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
+            d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 2;
+            d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 2;
+            d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 2;
+            s1 += 8;
+            s2 += 8;
+            d += 4;
+        }
+        for(;w > 0; w--) {
+            d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
+            s1 += 2;
+            s2 += 2;
+            d++;
+        }
+        src += 2 * src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 4x4 -> 1x1 */
+void ff_shrink44(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    const uint8_t *s1, *s2, *s3, *s4;
+    uint8_t *d;
+
+    for(;height > 0; height--) {
+        s1 = src;
+        s2 = s1 + src_wrap;
+        s3 = s2 + src_wrap;
+        s4 = s3 + src_wrap;
+        d = dst;
+        for(w = width;w > 0; w--) {
+            d[0] = (s1[0] + s1[1] + s1[2] + s1[3] +
+                    s2[0] + s2[1] + s2[2] + s2[3] +
+                    s3[0] + s3[1] + s3[2] + s3[3] +
+                    s4[0] + s4[1] + s4[2] + s4[3] + 8) >> 4;
+            s1 += 4;
+            s2 += 4;
+            s3 += 4;
+            s4 += 4;
+            d++;
+        }
+        src += 4 * src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 8x8 -> 1x1 */
+void ff_shrink88(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w, i;
+
+    for(;height > 0; height--) {
+        for(w = width;w > 0; w--) {
+            int tmp=0;
+            for(i=0; i<8; i++){
+                tmp += src[0] + src[1] + src[2] + src[3] + src[4] + src[5] + src[6] + src[7];
+                src += src_wrap;
+            }
+            *(dst++) = (tmp + 32)>>6;
+            src += 8 - 8*src_wrap;
+        }
+        src += 8*src_wrap - 8*width;
+        dst += dst_wrap - width;
+    }
+}
+
+static void grow21_line(uint8_t *dst, const uint8_t *src,
+                        int width)
+{
+    int w;
+    const uint8_t *s1;
+    uint8_t *d;
+
+    s1 = src;
+    d = dst;
+    for(w = width;w >= 4; w-=4) {
+        d[1] = d[0] = s1[0];
+        d[3] = d[2] = s1[1];
+        s1 += 2;
+        d += 4;
+    }
+    for(;w >= 2; w -= 2) {
+        d[1] = d[0] = s1[0];
+        s1 ++;
+        d += 2;
+    }
+    /* only needed if width is not a multiple of two */
+    /* XXX: veryfy that */
+    if (w) {
+        d[0] = s1[0];
+    }
+}
+
+static void grow41_line(uint8_t *dst, const uint8_t *src,
+                        int width)
+{
+    int w, v;
+    const uint8_t *s1;
+    uint8_t *d;
+
+    s1 = src;
+    d = dst;
+    for(w = width;w >= 4; w-=4) {
+        v = s1[0];
+        d[0] = v;
+        d[1] = v;
+        d[2] = v;
+        d[3] = v;
+        s1 ++;
+        d += 4;
+    }
+}
+
+/* 1x1 -> 2x1 */
+static void grow21(uint8_t *dst, int dst_wrap,
+                   const uint8_t *src, int src_wrap,
+                   int width, int height)
+{
+    for(;height > 0; height--) {
+        grow21_line(dst, src, width);
+        src += src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 1x1 -> 2x2 */
+static void grow22(uint8_t *dst, int dst_wrap,
+                   const uint8_t *src, int src_wrap,
+                   int width, int height)
+{
+    for(;height > 0; height--) {
+        grow21_line(dst, src, width);
+        if (height%2)
+            src += src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 1x1 -> 4x1 */
+static void grow41(uint8_t *dst, int dst_wrap,
+                   const uint8_t *src, int src_wrap,
+                   int width, int height)
+{
+    for(;height > 0; height--) {
+        grow41_line(dst, src, width);
+        src += src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 1x1 -> 4x4 */
+static void grow44(uint8_t *dst, int dst_wrap,
+                   const uint8_t *src, int src_wrap,
+                   int width, int height)
+{
+    for(;height > 0; height--) {
+        grow41_line(dst, src, width);
+        if ((height & 3) == 1)
+            src += src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 1x2 -> 2x1 */
+static void conv411(uint8_t *dst, int dst_wrap,
+                    const uint8_t *src, int src_wrap,
+                    int width, int height)
+{
+    int w, c;
+    const uint8_t *s1, *s2;
+    uint8_t *d;
+
+    width>>=1;
+
+    for(;height > 0; height--) {
+        s1 = src;
+        s2 = src + src_wrap;
+        d = dst;
+        for(w = width;w > 0; w--) {
+            c = (s1[0] + s2[0]) >> 1;
+            d[0] = c;
+            d[1] = c;
+            s1++;
+            s2++;
+            d += 2;
+        }
+        src += src_wrap * 2;
+        dst += dst_wrap;
+    }
+}
+
+/* XXX: add jpeg quantize code */
+
+#define TRANSP_INDEX (6*6*6)
+
+/* this is maybe slow, but allows for extensions */
+static inline unsigned char gif_clut_index(uint8_t r, uint8_t g, uint8_t b)
+{
+    return ((((r)/47)%6)*6*6+(((g)/47)%6)*6+(((b)/47)%6));
+}
+
+static void build_rgb_palette(uint8_t *palette, int has_alpha)
+{
+    uint32_t *pal;
+    static const uint8_t pal_value[6] = { 0x00, 0x33, 0x66, 0x99, 0xcc, 0xff };
+    int i, r, g, b;
+
+    pal = (uint32_t *)palette;
+    i = 0;
+    for(r = 0; r < 6; r++) {
+        for(g = 0; g < 6; g++) {
+            for(b = 0; b < 6; b++) {
+                pal[i++] = (0xff << 24) | (pal_value[r] << 16) |
+                    (pal_value[g] << 8) | pal_value[b];
+            }
+        }
+    }
+    if (has_alpha)
+        pal[i++] = 0;
+    while (i < 256)
+        pal[i++] = 0xff000000;
+}
+
+/* copy bit n to bits 0 ... n - 1 */
+static inline unsigned int bitcopy_n(unsigned int a, int n)
+{
+    int mask;
+    mask = (1 << n) - 1;
+    return (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask);
+}
+
+/* rgb555 handling */
+
+#define RGB_NAME rgb555
+
+#define RGB_IN(r, g, b, s)\
+{\
+    unsigned int v = ((const uint16_t *)(s))[0];\
+    r = bitcopy_n(v >> (10 - 3), 3);\
+    g = bitcopy_n(v >> (5 - 3), 3);\
+    b = bitcopy_n(v << 3, 3);\
+}
+
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    ((uint16_t *)(d))[0] = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3);\
+}
+
+#define BPP 2
+
+#include "imgconvert_template.h"
+
+/* rgb565 handling */
+
+#define RGB_NAME rgb565
+
+#define RGB_IN(r, g, b, s)\
+{\
+    unsigned int v = ((const uint16_t *)(s))[0];\
+    r = bitcopy_n(v >> (11 - 3), 3);\
+    g = bitcopy_n(v >> (5 - 2), 2);\
+    b = bitcopy_n(v << 3, 3);\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    ((uint16_t *)(d))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);\
+}
+
+#define BPP 2
+
+#include "imgconvert_template.h"
+
+/* bgr24 handling */
+
+#define RGB_NAME bgr24
+
+#define RGB_IN(r, g, b, s)\
+{\
+    b = (s)[0];\
+    g = (s)[1];\
+    r = (s)[2];\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    (d)[0] = b;\
+    (d)[1] = g;\
+    (d)[2] = r;\
+}
+
+#define BPP 3
+
+#include "imgconvert_template.h"
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+/* rgb24 handling */
+
+#define RGB_NAME rgb24
+#define FMT_RGB24
+
+#define RGB_IN(r, g, b, s)\
+{\
+    r = (s)[0];\
+    g = (s)[1];\
+    b = (s)[2];\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    (d)[0] = r;\
+    (d)[1] = g;\
+    (d)[2] = b;\
+}
+
+#define BPP 3
+
+#include "imgconvert_template.h"
+
+/* rgba32 handling */
+
+#define RGB_NAME rgba32
+#define FMT_RGBA32
+
+#define RGB_IN(r, g, b, s)\
+{\
+    unsigned int v = ((const uint32_t *)(s))[0];\
+    r = (v >> 16) & 0xff;\
+    g = (v >> 8) & 0xff;\
+    b = v & 0xff;\
+}
+
+#define RGBA_IN(r, g, b, a, s)\
+{\
+    unsigned int v = ((const uint32_t *)(s))[0];\
+    a = (v >> 24) & 0xff;\
+    r = (v >> 16) & 0xff;\
+    g = (v >> 8) & 0xff;\
+    b = v & 0xff;\
+}
+
+#define RGBA_OUT(d, r, g, b, a)\
+{\
+    ((uint32_t *)(d))[0] = (a << 24) | (r << 16) | (g << 8) | b;\
+}
+
+#define BPP 4
+
+#include "imgconvert_template.h"
+
+static void mono_to_gray(AVPicture *dst, const AVPicture *src,
+                         int width, int height, int xor_mask)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int v, dst_wrap, src_wrap;
+    int y, w;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - ((width + 7) >> 3);
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - width;
+    for(y=0;y<height;y++) {
+        w = width;
+        while (w >= 8) {
+            v = *p++ ^ xor_mask;
+            q[0] = -(v >> 7);
+            q[1] = -((v >> 6) & 1);
+            q[2] = -((v >> 5) & 1);
+            q[3] = -((v >> 4) & 1);
+            q[4] = -((v >> 3) & 1);
+            q[5] = -((v >> 2) & 1);
+            q[6] = -((v >> 1) & 1);
+            q[7] = -((v >> 0) & 1);
+            w -= 8;
+            q += 8;
+        }
+        if (w > 0) {
+            v = *p++ ^ xor_mask;
+            do {
+                q[0] = -((v >> 7) & 1);
+                q++;
+                v <<= 1;
+            } while (--w);
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+static void monowhite_to_gray(AVPicture *dst, const AVPicture *src,
+                               int width, int height)
+{
+    mono_to_gray(dst, src, width, height, 0xff);
+}
+
+static void monoblack_to_gray(AVPicture *dst, const AVPicture *src,
+                               int width, int height)
+{
+    mono_to_gray(dst, src, width, height, 0x00);
+}
+
+static void gray_to_mono(AVPicture *dst, const AVPicture *src,
+                         int width, int height, int xor_mask)
+{
+    int n;
+    const uint8_t *s;
+    uint8_t *d;
+    int j, b, v, n1, src_wrap, dst_wrap, y;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - ((width + 7) >> 3);
+
+    for(y=0;y<height;y++) {
+        n = width;
+        while (n >= 8) {
+            v = 0;
+            for(j=0;j<8;j++) {
+                b = s[0];
+                s++;
+                v = (v << 1) | (b >> 7);
+            }
+            d[0] = v ^ xor_mask;
+            d++;
+            n -= 8;
+        }
+        if (n > 0) {
+            n1 = n;
+            v = 0;
+            while (n > 0) {
+                b = s[0];
+                s++;
+                v = (v << 1) | (b >> 7);
+                n--;
+            }
+            d[0] = (v << (8 - (n1 & 7))) ^ xor_mask;
+            d++;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void gray_to_monowhite(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    gray_to_mono(dst, src, width, height, 0xff);
+}
+
+static void gray_to_monoblack(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    gray_to_mono(dst, src, width, height, 0x00);
+}
+
+static void gray_to_gray16(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int x, y, src_wrap, dst_wrap;
+    uint8_t *s, *d;
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width;
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * 2;
+    for(y=0; y<height; y++){
+        for(x=0; x<width; x++){
+            *d++ = *s;
+            *d++ = *s++;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void gray16_to_gray(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int x, y, src_wrap, dst_wrap;
+    uint8_t *s, *d;
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * 2;
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width;
+    for(y=0; y<height; y++){
+        for(x=0; x<width; x++){
+            *d++ = *s;
+            s += 2;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void gray16be_to_gray(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    gray16_to_gray(dst, src, width, height);
+}
+
+static void gray16le_to_gray(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    gray16_to_gray(dst, src + 1, width, height);
+}
+
+static void gray16_to_gray16(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int x, y, src_wrap, dst_wrap;
+    uint16_t *s, *d;
+    s = src->data[0];
+    src_wrap = (src->linesize[0] - width * 2)/2;
+    d = dst->data[0];
+    dst_wrap = (dst->linesize[0] - width * 2)/2;
+    for(y=0; y<height; y++){
+        for(x=0; x<width; x++){
+            *d++ = bswap_16(*s++);
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+
+typedef struct ConvertEntry {
+    void (*convert)(AVPicture *dst,
+                    const AVPicture *src, int width, int height);
+} ConvertEntry;
+
+/* Add each new convertion function in this table. In order to be able
+   to convert from any format to any format, the following constraints
+   must be satisfied:
+
+   - all FF_COLOR_RGB formats must convert to and from PIX_FMT_RGB24
+
+   - all FF_COLOR_GRAY formats must convert to and from PIX_FMT_GRAY8
+
+   - all FF_COLOR_RGB formats with alpha must convert to and from PIX_FMT_RGBA32
+
+   - PIX_FMT_YUV444P and PIX_FMT_YUVJ444P must convert to and from
+     PIX_FMT_RGB24.
+
+   - PIX_FMT_422 must convert to and from PIX_FMT_422P.
+
+   The other conversion functions are just optimisations for common cases.
+*/
+static const ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
+    [PIX_FMT_YUV420P] = {
+        [PIX_FMT_YUV422] = {
+            .convert = yuv420p_to_yuv422,
+        },
+        [PIX_FMT_RGB555] = {
+            .convert = yuv420p_to_rgb555
+        },
+        [PIX_FMT_RGB565] = {
+            .convert = yuv420p_to_rgb565
+        },
+        [PIX_FMT_BGR24] = {
+            .convert = yuv420p_to_bgr24
+        },
+        [PIX_FMT_RGB24] = {
+            .convert = yuv420p_to_rgb24
+        },
+        [PIX_FMT_RGBA32] = {
+            .convert = yuv420p_to_rgba32
+        },
+        [PIX_FMT_UYVY422] = {
+            .convert = yuv420p_to_uyvy422,
+        },
+    },
+    [PIX_FMT_YUV422P] = {
+        [PIX_FMT_YUV422] = {
+            .convert = yuv422p_to_yuv422,
+        },
+        [PIX_FMT_UYVY422] = {
+            .convert = yuv422p_to_uyvy422,
+        },
+    },
+    [PIX_FMT_YUV444P] = {
+        [PIX_FMT_RGB24] = {
+            .convert = yuv444p_to_rgb24
+        },
+    },
+    [PIX_FMT_YUVJ420P] = {
+        [PIX_FMT_RGB555] = {
+            .convert = yuvj420p_to_rgb555
+        },
+        [PIX_FMT_RGB565] = {
+            .convert = yuvj420p_to_rgb565
+        },
+        [PIX_FMT_BGR24] = {
+            .convert = yuvj420p_to_bgr24
+        },
+        [PIX_FMT_RGB24] = {
+            .convert = yuvj420p_to_rgb24
+        },
+        [PIX_FMT_RGBA32] = {
+            .convert = yuvj420p_to_rgba32
+        },
+    },
+    [PIX_FMT_YUVJ444P] = {
+        [PIX_FMT_RGB24] = {
+            .convert = yuvj444p_to_rgb24
+        },
+    },
+    [PIX_FMT_YUV422] = {
+        [PIX_FMT_YUV420P] = {
+            .convert = yuv422_to_yuv420p,
+        },
+        [PIX_FMT_YUV422P] = {
+            .convert = yuv422_to_yuv422p,
+        },
+    },
+    [PIX_FMT_UYVY422] = {
+        [PIX_FMT_YUV420P] = {
+            .convert = uyvy422_to_yuv420p,
+        },
+        [PIX_FMT_YUV422P] = {
+            .convert = uyvy422_to_yuv422p,
+        },
+    },
+    [PIX_FMT_RGB24] = {
+        [PIX_FMT_YUV420P] = {
+            .convert = rgb24_to_yuv420p
+        },
+        [PIX_FMT_RGB565] = {
+            .convert = rgb24_to_rgb565
+        },
+        [PIX_FMT_RGB555] = {
+            .convert = rgb24_to_rgb555
+        },
+        [PIX_FMT_RGBA32] = {
+            .convert = rgb24_to_rgba32
+        },
+        [PIX_FMT_BGR24] = {
+            .convert = rgb24_to_bgr24
+        },
+        [PIX_FMT_GRAY8] = {
+            .convert = rgb24_to_gray
+        },
+        [PIX_FMT_PAL8] = {
+            .convert = rgb24_to_pal8
+        },
+        [PIX_FMT_YUV444P] = {
+            .convert = rgb24_to_yuv444p
+        },
+        [PIX_FMT_YUVJ420P] = {
+            .convert = rgb24_to_yuvj420p
+        },
+        [PIX_FMT_YUVJ444P] = {
+            .convert = rgb24_to_yuvj444p
+        },
+    },
+    [PIX_FMT_RGBA32] = {
+        [PIX_FMT_RGB24] = {
+            .convert = rgba32_to_rgb24
+        },
+        [PIX_FMT_BGR24] = {
+            .convert = rgba32_to_bgr24
+        },
+        [PIX_FMT_RGB565] = {
+            .convert = rgba32_to_rgb565
+        },
+        [PIX_FMT_RGB555] = {
+            .convert = rgba32_to_rgb555
+        },
+        [PIX_FMT_PAL8] = {
+            .convert = rgba32_to_pal8
+        },
+        [PIX_FMT_YUV420P] = {
+            .convert = rgba32_to_yuv420p
+        },
+        [PIX_FMT_GRAY8] = {
+            .convert = rgba32_to_gray
+        },
+    },
+    [PIX_FMT_BGR24] = {
+        [PIX_FMT_RGBA32] = {
+            .convert = bgr24_to_rgba32
+        },
+        [PIX_FMT_RGB24] = {
+            .convert = bgr24_to_rgb24
+        },
+        [PIX_FMT_YUV420P] = {
+            .convert = bgr24_to_yuv420p
+        },
+        [PIX_FMT_GRAY8] = {
+            .convert = bgr24_to_gray
+        },
+    },
+    [PIX_FMT_RGB555] = {
+        [PIX_FMT_RGB24] = {
+            .convert = rgb555_to_rgb24
+        },
+        [PIX_FMT_RGBA32] = {
+            .convert = rgb555_to_rgba32
+        },
+        [PIX_FMT_YUV420P] = {
+            .convert = rgb555_to_yuv420p
+        },
+        [PIX_FMT_GRAY8] = {
+            .convert = rgb555_to_gray
+        },
+    },
+    [PIX_FMT_RGB565] = {
+        [PIX_FMT_RGBA32] = {
+            .convert = rgb565_to_rgba32
+        },
+        [PIX_FMT_RGB24] = {
+            .convert = rgb565_to_rgb24
+        },
+        [PIX_FMT_YUV420P] = {
+            .convert = rgb565_to_yuv420p
+        },
+        [PIX_FMT_GRAY8] = {
+            .convert = rgb565_to_gray
+        },
+    },
+    [PIX_FMT_GRAY16BE] = {
+        [PIX_FMT_GRAY8] = {
+            .convert = gray16be_to_gray
+        },
+        [PIX_FMT_GRAY16LE] = {
+            .convert = gray16_to_gray16
+        },
+    },
+    [PIX_FMT_GRAY16LE] = {
+        [PIX_FMT_GRAY8] = {
+            .convert = gray16le_to_gray
+        },
+        [PIX_FMT_GRAY16BE] = {
+            .convert = gray16_to_gray16
+        },
+    },
+    [PIX_FMT_GRAY8] = {
+        [PIX_FMT_RGB555] = {
+            .convert = gray_to_rgb555
+        },
+        [PIX_FMT_RGB565] = {
+            .convert = gray_to_rgb565
+        },
+        [PIX_FMT_RGB24] = {
+            .convert = gray_to_rgb24
+        },
+        [PIX_FMT_BGR24] = {
+            .convert = gray_to_bgr24
+        },
+        [PIX_FMT_RGBA32] = {
+            .convert = gray_to_rgba32
+        },
+        [PIX_FMT_MONOWHITE] = {
+            .convert = gray_to_monowhite
+        },
+        [PIX_FMT_MONOBLACK] = {
+            .convert = gray_to_monoblack
+        },
+        [PIX_FMT_GRAY16LE] = {
+            .convert = gray_to_gray16
+        },
+        [PIX_FMT_GRAY16BE] = {
+            .convert = gray_to_gray16
+        },
+    },
+    [PIX_FMT_MONOWHITE] = {
+        [PIX_FMT_GRAY8] = {
+            .convert = monowhite_to_gray
+        },
+    },
+    [PIX_FMT_MONOBLACK] = {
+        [PIX_FMT_GRAY8] = {
+            .convert = monoblack_to_gray
+        },
+    },
+    [PIX_FMT_PAL8] = {
+        [PIX_FMT_RGB555] = {
+            .convert = pal8_to_rgb555
+        },
+        [PIX_FMT_RGB565] = {
+            .convert = pal8_to_rgb565
+        },
+        [PIX_FMT_BGR24] = {
+            .convert = pal8_to_bgr24
+        },
+        [PIX_FMT_RGB24] = {
+            .convert = pal8_to_rgb24
+        },
+        [PIX_FMT_RGBA32] = {
+            .convert = pal8_to_rgba32
+        },
+    },
+    [PIX_FMT_UYVY411] = {
+        [PIX_FMT_YUV411P] = {
+            .convert = uyvy411_to_yuv411p,
+        },
+    },
+
+};
+
+int avpicture_alloc(AVPicture *picture,
+                           int pix_fmt, int width, int height)
+{
+    int size;
+    void *ptr;
+
+    size = avpicture_get_size(pix_fmt, width, height);
+    if(size<0)
+        goto fail;
+    ptr = av_malloc(size);
+    if (!ptr)
+        goto fail;
+    avpicture_fill(picture, ptr, pix_fmt, width, height);
+    return 0;
+ fail:
+    memset(picture, 0, sizeof(AVPicture));
+    return -1;
+}
+
+void avpicture_free(AVPicture *picture)
+{
+    av_free(picture->data[0]);
+}
+
+/* return true if yuv planar */
+static inline int is_yuv_planar(const PixFmtInfo *ps)
+{
+    return (ps->color_type == FF_COLOR_YUV ||
+            ps->color_type == FF_COLOR_YUV_JPEG) &&
+        ps->pixel_type == FF_PIXEL_PLANAR;
+}
+
+/**
+ * Crop image top and left side
+ */
+int img_crop(AVPicture *dst, const AVPicture *src,
+              int pix_fmt, int top_band, int left_band)
+{
+    int y_shift;
+    int x_shift;
+
+    if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt]))
+        return -1;
+
+    y_shift = pix_fmt_info[pix_fmt].y_chroma_shift;
+    x_shift = pix_fmt_info[pix_fmt].x_chroma_shift;
+
+    dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
+    dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift);
+    dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift);
+
+    dst->linesize[0] = src->linesize[0];
+    dst->linesize[1] = src->linesize[1];
+    dst->linesize[2] = src->linesize[2];
+    return 0;
+}
+
+/**
+ * Pad image
+ */
+int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
+            int padtop, int padbottom, int padleft, int padright, int *color)
+{
+    uint8_t *optr, *iptr;
+    int y_shift;
+    int x_shift;
+    int yheight;
+    int i, y;
+
+    if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt]))
+        return -1;
+
+    for (i = 0; i < 3; i++) {
+        x_shift = i ? pix_fmt_info[pix_fmt].x_chroma_shift : 0;
+        y_shift = i ? pix_fmt_info[pix_fmt].y_chroma_shift : 0;
+
+        if (padtop || padleft) {
+            memset(dst->data[i], color[i], dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift));
+        }
+
+        if (padleft || padright || src) {
+            if (src) { /* first line */
+                iptr = src->data[i];
+                optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift);
+                memcpy(optr, iptr, src->linesize[i]);
+                iptr += src->linesize[i];
+            }
+            optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + (dst->linesize[i] - (padright >> x_shift));
+            yheight = (height - 1 - (padtop + padbottom)) >> y_shift;
+            for (y = 0; y < yheight; y++) {
+                memset(optr, color[i], (padleft + padright) >> x_shift);
+                if (src) {
+                    memcpy(optr + ((padleft + padright) >> x_shift), iptr, src->linesize[i]);
+                    iptr += src->linesize[i];
+                }
+                optr += dst->linesize[i];
+            }
+        }
+
+        if (padbottom || padright) {
+            optr = dst->data[i] + dst->linesize[i] * ((height - padbottom) >> y_shift) - (padright >> x_shift);
+            memset(optr, color[i], dst->linesize[i] * (padbottom >> y_shift) + (padright >> x_shift));
+        }
+    }
+    return 0;
+}
+
+#ifndef CONFIG_SWSCALER
+/* XXX: always use linesize. Return -1 if not supported */
+int img_convert(AVPicture *dst, int dst_pix_fmt,
+                const AVPicture *src, int src_pix_fmt,
+                int src_width, int src_height)
+{
+    static int inited;
+    int i, ret, dst_width, dst_height, int_pix_fmt;
+    const PixFmtInfo *src_pix, *dst_pix;
+    const ConvertEntry *ce;
+    AVPicture tmp1, *tmp = &tmp1;
+
+    if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB ||
+        dst_pix_fmt < 0 || dst_pix_fmt >= PIX_FMT_NB)
+        return -1;
+    if (src_width <= 0 || src_height <= 0)
+        return 0;
+
+    if (!inited) {
+        inited = 1;
+        img_convert_init();
+    }
+
+    dst_width = src_width;
+    dst_height = src_height;
+
+    dst_pix = &pix_fmt_info[dst_pix_fmt];
+    src_pix = &pix_fmt_info[src_pix_fmt];
+    if (src_pix_fmt == dst_pix_fmt) {
+        /* no conversion needed: just copy */
+        img_copy(dst, src, dst_pix_fmt, dst_width, dst_height);
+        return 0;
+    }
+
+    ce = &convert_table[src_pix_fmt][dst_pix_fmt];
+    if (ce->convert) {
+        /* specific conversion routine */
+        ce->convert(dst, src, dst_width, dst_height);
+        return 0;
+    }
+
+    /* gray to YUV */
+    if (is_yuv_planar(dst_pix) &&
+        src_pix_fmt == PIX_FMT_GRAY8) {
+        int w, h, y;
+        uint8_t *d;
+
+        if (dst_pix->color_type == FF_COLOR_YUV_JPEG) {
+            ff_img_copy_plane(dst->data[0], dst->linesize[0],
+                     src->data[0], src->linesize[0],
+                     dst_width, dst_height);
+        } else {
+            img_apply_table(dst->data[0], dst->linesize[0],
+                            src->data[0], src->linesize[0],
+                            dst_width, dst_height,
+                            y_jpeg_to_ccir);
+        }
+        /* fill U and V with 128 */
+        w = dst_width;
+        h = dst_height;
+        w >>= dst_pix->x_chroma_shift;
+        h >>= dst_pix->y_chroma_shift;
+        for(i = 1; i <= 2; i++) {
+            d = dst->data[i];
+            for(y = 0; y< h; y++) {
+                memset(d, 128, w);
+                d += dst->linesize[i];
+            }
+        }
+        return 0;
+    }
+
+    /* YUV to gray */
+    if (is_yuv_planar(src_pix) &&
+        dst_pix_fmt == PIX_FMT_GRAY8) {
+        if (src_pix->color_type == FF_COLOR_YUV_JPEG) {
+            ff_img_copy_plane(dst->data[0], dst->linesize[0],
+                     src->data[0], src->linesize[0],
+                     dst_width, dst_height);
+        } else {
+            img_apply_table(dst->data[0], dst->linesize[0],
+                            src->data[0], src->linesize[0],
+                            dst_width, dst_height,
+                            y_ccir_to_jpeg);
+        }
+        return 0;
+    }
+
+    /* YUV to YUV planar */
+    if (is_yuv_planar(dst_pix) && is_yuv_planar(src_pix)) {
+        int x_shift, y_shift, w, h, xy_shift;
+        void (*resize_func)(uint8_t *dst, int dst_wrap,
+                            const uint8_t *src, int src_wrap,
+                            int width, int height);
+
+        /* compute chroma size of the smallest dimensions */
+        w = dst_width;
+        h = dst_height;
+        if (dst_pix->x_chroma_shift >= src_pix->x_chroma_shift)
+            w >>= dst_pix->x_chroma_shift;
+        else
+            w >>= src_pix->x_chroma_shift;
+        if (dst_pix->y_chroma_shift >= src_pix->y_chroma_shift)
+            h >>= dst_pix->y_chroma_shift;
+        else
+            h >>= src_pix->y_chroma_shift;
+
+        x_shift = (dst_pix->x_chroma_shift - src_pix->x_chroma_shift);
+        y_shift = (dst_pix->y_chroma_shift - src_pix->y_chroma_shift);
+        xy_shift = ((x_shift & 0xf) << 4) | (y_shift & 0xf);
+        /* there must be filters for conversion at least from and to
+           YUV444 format */
+        switch(xy_shift) {
+        case 0x00:
+            resize_func = ff_img_copy_plane;
+            break;
+        case 0x10:
+            resize_func = shrink21;
+            break;
+        case 0x20:
+            resize_func = shrink41;
+            break;
+        case 0x01:
+            resize_func = shrink12;
+            break;
+        case 0x11:
+            resize_func = ff_shrink22;
+            break;
+        case 0x22:
+            resize_func = ff_shrink44;
+            break;
+        case 0xf0:
+            resize_func = grow21;
+            break;
+        case 0xe0:
+            resize_func = grow41;
+            break;
+        case 0xff:
+            resize_func = grow22;
+            break;
+        case 0xee:
+            resize_func = grow44;
+            break;
+        case 0xf1:
+            resize_func = conv411;
+            break;
+        default:
+            /* currently not handled */
+            goto no_chroma_filter;
+        }
+
+        ff_img_copy_plane(dst->data[0], dst->linesize[0],
+                       src->data[0], src->linesize[0],
+                       dst_width, dst_height);
+
+        for(i = 1;i <= 2; i++)
+            resize_func(dst->data[i], dst->linesize[i],
+                        src->data[i], src->linesize[i],
+                        dst_width>>dst_pix->x_chroma_shift, dst_height>>dst_pix->y_chroma_shift);
+        /* if yuv color space conversion is needed, we do it here on
+           the destination image */
+        if (dst_pix->color_type != src_pix->color_type) {
+            const uint8_t *y_table, *c_table;
+            if (dst_pix->color_type == FF_COLOR_YUV) {
+                y_table = y_jpeg_to_ccir;
+                c_table = c_jpeg_to_ccir;
+            } else {
+                y_table = y_ccir_to_jpeg;
+                c_table = c_ccir_to_jpeg;
+            }
+            img_apply_table(dst->data[0], dst->linesize[0],
+                            dst->data[0], dst->linesize[0],
+                            dst_width, dst_height,
+                            y_table);
+
+            for(i = 1;i <= 2; i++)
+                img_apply_table(dst->data[i], dst->linesize[i],
+                                dst->data[i], dst->linesize[i],
+                                dst_width>>dst_pix->x_chroma_shift,
+                                dst_height>>dst_pix->y_chroma_shift,
+                                c_table);
+        }
+        return 0;
+    }
+ no_chroma_filter:
+
+    /* try to use an intermediate format */
+    if (src_pix_fmt == PIX_FMT_YUV422 ||
+        dst_pix_fmt == PIX_FMT_YUV422) {
+        /* specific case: convert to YUV422P first */
+        int_pix_fmt = PIX_FMT_YUV422P;
+    } else if (src_pix_fmt == PIX_FMT_UYVY422 ||
+        dst_pix_fmt == PIX_FMT_UYVY422) {
+        /* specific case: convert to YUV422P first */
+        int_pix_fmt = PIX_FMT_YUV422P;
+    } else if (src_pix_fmt == PIX_FMT_UYVY411 ||
+        dst_pix_fmt == PIX_FMT_UYVY411) {
+        /* specific case: convert to YUV411P first */
+        int_pix_fmt = PIX_FMT_YUV411P;
+    } else if ((src_pix->color_type == FF_COLOR_GRAY &&
+                src_pix_fmt != PIX_FMT_GRAY8) ||
+               (dst_pix->color_type == FF_COLOR_GRAY &&
+                dst_pix_fmt != PIX_FMT_GRAY8)) {
+        /* gray8 is the normalized format */
+        int_pix_fmt = PIX_FMT_GRAY8;
+    } else if ((is_yuv_planar(src_pix) &&
+                src_pix_fmt != PIX_FMT_YUV444P &&
+                src_pix_fmt != PIX_FMT_YUVJ444P)) {
+        /* yuv444 is the normalized format */
+        if (src_pix->color_type == FF_COLOR_YUV_JPEG)
+            int_pix_fmt = PIX_FMT_YUVJ444P;
+        else
+            int_pix_fmt = PIX_FMT_YUV444P;
+    } else if ((is_yuv_planar(dst_pix) &&
+                dst_pix_fmt != PIX_FMT_YUV444P &&
+                dst_pix_fmt != PIX_FMT_YUVJ444P)) {
+        /* yuv444 is the normalized format */
+        if (dst_pix->color_type == FF_COLOR_YUV_JPEG)
+            int_pix_fmt = PIX_FMT_YUVJ444P;
+        else
+            int_pix_fmt = PIX_FMT_YUV444P;
+    } else {
+        /* the two formats are rgb or gray8 or yuv[j]444p */
+        if (src_pix->is_alpha && dst_pix->is_alpha)
+            int_pix_fmt = PIX_FMT_RGBA32;
+        else
+            int_pix_fmt = PIX_FMT_RGB24;
+    }
+    if (avpicture_alloc(tmp, int_pix_fmt, dst_width, dst_height) < 0)
+        return -1;
+    ret = -1;
+    if (img_convert(tmp, int_pix_fmt,
+                    src, src_pix_fmt, src_width, src_height) < 0)
+        goto fail1;
+    if (img_convert(dst, dst_pix_fmt,
+                    tmp, int_pix_fmt, dst_width, dst_height) < 0)
+        goto fail1;
+    ret = 0;
+ fail1:
+    avpicture_free(tmp);
+    return ret;
+}
+#endif
+
+/* NOTE: we scan all the pixels to have an exact information */
+static int get_alpha_info_pal8(const AVPicture *src, int width, int height)
+{
+    const unsigned char *p;
+    int src_wrap, ret, x, y;
+    unsigned int a;
+    uint32_t *palette = (uint32_t *)src->data[1];
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - width;
+    ret = 0;
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            a = palette[p[0]] >> 24;
+            if (a == 0x00) {
+                ret |= FF_ALPHA_TRANSP;
+            } else if (a != 0xff) {
+                ret |= FF_ALPHA_SEMI_TRANSP;
+            }
+            p++;
+        }
+        p += src_wrap;
+    }
+    return ret;
+}
+
+/**
+ * Tell if an image really has transparent alpha values.
+ * @return ored mask of FF_ALPHA_xxx constants
+ */
+int img_get_alpha_info(const AVPicture *src,
+                       int pix_fmt, int width, int height)
+{
+    const PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+    int ret;
+
+    pf = &pix_fmt_info[pix_fmt];
+    /* no alpha can be represented in format */
+    if (!pf->is_alpha)
+        return 0;
+    switch(pix_fmt) {
+    case PIX_FMT_RGBA32:
+        ret = get_alpha_info_rgba32(src, width, height);
+        break;
+    case PIX_FMT_PAL8:
+        ret = get_alpha_info_pal8(src, width, height);
+        break;
+    default:
+        /* we do not know, so everything is indicated */
+        ret = FF_ALPHA_TRANSP | FF_ALPHA_SEMI_TRANSP;
+        break;
+    }
+    return ret;
+}
+
+#ifdef HAVE_MMX
+#define DEINT_INPLACE_LINE_LUM \
+                    movd_m2r(lum_m4[0],mm0);\
+                    movd_m2r(lum_m3[0],mm1);\
+                    movd_m2r(lum_m2[0],mm2);\
+                    movd_m2r(lum_m1[0],mm3);\
+                    movd_m2r(lum[0],mm4);\
+                    punpcklbw_r2r(mm7,mm0);\
+                    movd_r2m(mm2,lum_m4[0]);\
+                    punpcklbw_r2r(mm7,mm1);\
+                    punpcklbw_r2r(mm7,mm2);\
+                    punpcklbw_r2r(mm7,mm3);\
+                    punpcklbw_r2r(mm7,mm4);\
+                    paddw_r2r(mm3,mm1);\
+                    psllw_i2r(1,mm2);\
+                    paddw_r2r(mm4,mm0);\
+                    psllw_i2r(2,mm1);\
+                    paddw_r2r(mm6,mm2);\
+                    paddw_r2r(mm2,mm1);\
+                    psubusw_r2r(mm0,mm1);\
+                    psrlw_i2r(3,mm1);\
+                    packuswb_r2r(mm7,mm1);\
+                    movd_r2m(mm1,lum_m2[0]);
+
+#define DEINT_LINE_LUM \
+                    movd_m2r(lum_m4[0],mm0);\
+                    movd_m2r(lum_m3[0],mm1);\
+                    movd_m2r(lum_m2[0],mm2);\
+                    movd_m2r(lum_m1[0],mm3);\
+                    movd_m2r(lum[0],mm4);\
+                    punpcklbw_r2r(mm7,mm0);\
+                    punpcklbw_r2r(mm7,mm1);\
+                    punpcklbw_r2r(mm7,mm2);\
+                    punpcklbw_r2r(mm7,mm3);\
+                    punpcklbw_r2r(mm7,mm4);\
+                    paddw_r2r(mm3,mm1);\
+                    psllw_i2r(1,mm2);\
+                    paddw_r2r(mm4,mm0);\
+                    psllw_i2r(2,mm1);\
+                    paddw_r2r(mm6,mm2);\
+                    paddw_r2r(mm2,mm1);\
+                    psubusw_r2r(mm0,mm1);\
+                    psrlw_i2r(3,mm1);\
+                    packuswb_r2r(mm7,mm1);\
+                    movd_r2m(mm1,dst[0]);
+#endif
+
+/* filter parameters: [-1 4 2 4 -1] // 8 */
+static void deinterlace_line(uint8_t *dst,
+                             const uint8_t *lum_m4, const uint8_t *lum_m3,
+                             const uint8_t *lum_m2, const uint8_t *lum_m1,
+                             const uint8_t *lum,
+                             int size)
+{
+#ifndef HAVE_MMX
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int sum;
+
+    for(;size > 0;size--) {
+        sum = -lum_m4[0];
+        sum += lum_m3[0] << 2;
+        sum += lum_m2[0] << 1;
+        sum += lum_m1[0] << 2;
+        sum += -lum[0];
+        dst[0] = cm[(sum + 4) >> 3];
+        lum_m4++;
+        lum_m3++;
+        lum_m2++;
+        lum_m1++;
+        lum++;
+        dst++;
+    }
+#else
+
+    {
+        mmx_t rounder;
+        rounder.uw[0]=4;
+        rounder.uw[1]=4;
+        rounder.uw[2]=4;
+        rounder.uw[3]=4;
+        pxor_r2r(mm7,mm7);
+        movq_m2r(rounder,mm6);
+    }
+    for (;size > 3; size-=4) {
+        DEINT_LINE_LUM
+        lum_m4+=4;
+        lum_m3+=4;
+        lum_m2+=4;
+        lum_m1+=4;
+        lum+=4;
+        dst+=4;
+    }
+#endif
+}
+static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
+                             int size)
+{
+#ifndef HAVE_MMX
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int sum;
+
+    for(;size > 0;size--) {
+        sum = -lum_m4[0];
+        sum += lum_m3[0] << 2;
+        sum += lum_m2[0] << 1;
+        lum_m4[0]=lum_m2[0];
+        sum += lum_m1[0] << 2;
+        sum += -lum[0];
+        lum_m2[0] = cm[(sum + 4) >> 3];
+        lum_m4++;
+        lum_m3++;
+        lum_m2++;
+        lum_m1++;
+        lum++;
+    }
+#else
+
+    {
+        mmx_t rounder;
+        rounder.uw[0]=4;
+        rounder.uw[1]=4;
+        rounder.uw[2]=4;
+        rounder.uw[3]=4;
+        pxor_r2r(mm7,mm7);
+        movq_m2r(rounder,mm6);
+    }
+    for (;size > 3; size-=4) {
+        DEINT_INPLACE_LINE_LUM
+        lum_m4+=4;
+        lum_m3+=4;
+        lum_m2+=4;
+        lum_m1+=4;
+        lum+=4;
+    }
+#endif
+}
+
+/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
+   top field is copied as is, but the bottom field is deinterlaced
+   against the top field. */
+static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,
+                                    const uint8_t *src1, int src_wrap,
+                                    int width, int height)
+{
+    const uint8_t *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
+    int y;
+
+    src_m2 = src1;
+    src_m1 = src1;
+    src_0=&src_m1[src_wrap];
+    src_p1=&src_0[src_wrap];
+    src_p2=&src_p1[src_wrap];
+    for(y=0;y<(height-2);y+=2) {
+        memcpy(dst,src_m1,width);
+        dst += dst_wrap;
+        deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
+        src_m2 = src_0;
+        src_m1 = src_p1;
+        src_0 = src_p2;
+        src_p1 += 2*src_wrap;
+        src_p2 += 2*src_wrap;
+        dst += dst_wrap;
+    }
+    memcpy(dst,src_m1,width);
+    dst += dst_wrap;
+    /* do last line */
+    deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
+}
+
+static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
+                                             int width, int height)
+{
+    uint8_t *src_m1, *src_0, *src_p1, *src_p2;
+    int y;
+    uint8_t *buf;
+    buf = (uint8_t*)av_malloc(width);
+
+    src_m1 = src1;
+    memcpy(buf,src_m1,width);
+    src_0=&src_m1[src_wrap];
+    src_p1=&src_0[src_wrap];
+    src_p2=&src_p1[src_wrap];
+    for(y=0;y<(height-2);y+=2) {
+        deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
+        src_m1 = src_p1;
+        src_0 = src_p2;
+        src_p1 += 2*src_wrap;
+        src_p2 += 2*src_wrap;
+    }
+    /* do last line */
+    deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
+    av_free(buf);
+}
+
+
+/* deinterlace - if not supported return -1 */
+int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
+                          int pix_fmt, int width, int height)
+{
+    int i;
+
+    if (pix_fmt != PIX_FMT_YUV420P &&
+        pix_fmt != PIX_FMT_YUV422P &&
+        pix_fmt != PIX_FMT_YUV444P &&
+        pix_fmt != PIX_FMT_YUV411P)
+        return -1;
+    if ((width & 3) != 0 || (height & 3) != 0)
+        return -1;
+
+    for(i=0;i<3;i++) {
+        if (i == 1) {
+            switch(pix_fmt) {
+            case PIX_FMT_YUV420P:
+                width >>= 1;
+                height >>= 1;
+                break;
+            case PIX_FMT_YUV422P:
+                width >>= 1;
+                break;
+            case PIX_FMT_YUV411P:
+                width >>= 2;
+                break;
+            default:
+                break;
+            }
+        }
+        if (src == dst) {
+            deinterlace_bottom_field_inplace(dst->data[i], dst->linesize[i],
+                                 width, height);
+        } else {
+            deinterlace_bottom_field(dst->data[i],dst->linesize[i],
+                                        src->data[i], src->linesize[i],
+                                        width, height);
+        }
+    }
+#ifdef HAVE_MMX
+    emms();
+#endif
+    return 0;
+}
+
+#undef FIX
diff --git a/contrib/ffmpeg/libavcodec/imgconvert_template.h b/contrib/ffmpeg/libavcodec/imgconvert_template.h
new file mode 100644
index 000000000..4cc898bab
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/imgconvert_template.h
@@ -0,0 +1,875 @@
+/*
+ * Templates for image convertion routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef RGB_OUT
+#define RGB_OUT(d, r, g, b) RGBA_OUT(d, r, g, b, 0xff)
+#endif
+
+static void glue(yuv420p_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                        int width, int height)
+{
+    const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1, *d2;
+    int w, y, cb, cr, r_add, g_add, b_add, width2;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    width2 = (width + 1) >> 1;
+    for(;height >= 2; height -= 2) {
+        d1 = d;
+        d2 = d + dst->linesize[0];
+        y2_ptr = y1_ptr + src->linesize[0];
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            /* output 4 pixels */
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[1]);
+            RGB_OUT(d2 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+            d2 += 2 * BPP;
+
+            y1_ptr += 2;
+            y2_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle odd width */
+        if (w) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+            d1 += BPP;
+            d2 += BPP;
+            y1_ptr++;
+            y2_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += 2 * dst->linesize[0];
+        y1_ptr += 2 * src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width2;
+        cr_ptr += src->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        d1 = d;
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+
+            y1_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle width */
+        if (w) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+    }
+}
+
+static void glue(yuvj420p_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                         int width, int height)
+{
+    const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1, *d2;
+    int w, y, cb, cr, r_add, g_add, b_add, width2;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    width2 = (width + 1) >> 1;
+    for(;height >= 2; height -= 2) {
+        d1 = d;
+        d2 = d + dst->linesize[0];
+        y2_ptr = y1_ptr + src->linesize[0];
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            /* output 4 pixels */
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y2_ptr[1]);
+            RGB_OUT(d2 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+            d2 += 2 * BPP;
+
+            y1_ptr += 2;
+            y2_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle odd width */
+        if (w) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+            d1 += BPP;
+            d2 += BPP;
+            y1_ptr++;
+            y2_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += 2 * dst->linesize[0];
+        y1_ptr += 2 * src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width2;
+        cr_ptr += src->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        d1 = d;
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+
+            y1_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle width */
+        if (w) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+    }
+}
+
+static void glue(RGB_NAME, _to_yuv420p)(AVPicture *dst, const AVPicture *src,
+                                        int width, int height)
+{
+    int wrap, wrap3, width2;
+    int r, g, b, r1, g1, b1, w;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    width2 = (width + 1) >> 1;
+    wrap = dst->linesize[0];
+    wrap3 = src->linesize[0];
+    p = src->data[0];
+    for(;height>=2;height -= 2) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y_CCIR(r, g, b);
+            p += wrap3;
+            lum += wrap;
+
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y_CCIR(r, g, b);
+
+            cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 2);
+            cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 2);
+
+            cb++;
+            cr++;
+            p += -wrap3 + 2 * BPP;
+            lum += -wrap + 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            p += wrap3;
+            lum += wrap;
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += -wrap3 + BPP;
+            lum += -wrap + 1;
+        }
+        p += wrap3 + (wrap3 - width * BPP);
+        lum += wrap + (wrap - width);
+        cb += dst->linesize[1] - width2;
+        cr += dst->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += 2 * BPP;
+           lum += 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r, g, b, 0);
+            cr[0] = RGB_TO_V_CCIR(r, g, b, 0);
+        }
+    }
+}
+
+static void glue(RGB_NAME, _to_gray)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int r, g, b, dst_wrap, src_wrap;
+    int x, y;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - BPP * width;
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - width;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGB_IN(r, g, b, p);
+            q[0] = RGB_TO_Y(r, g, b);
+            q++;
+            p += BPP;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+static void glue(gray_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int r, dst_wrap, src_wrap;
+    int x, y;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - width;
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - BPP * width;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            r = p[0];
+            RGB_OUT(q, r, r, r);
+            q += BPP;
+            p ++;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+static void glue(pal8_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int r, g, b, dst_wrap, src_wrap;
+    int x, y;
+    uint32_t v;
+    const uint32_t *palette;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - width;
+    palette = (uint32_t *)src->data[1];
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - BPP * width;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            v = palette[p[0]];
+            r = (v >> 16) & 0xff;
+            g = (v >> 8) & 0xff;
+            b = (v) & 0xff;
+#ifdef RGBA_OUT
+            {
+                int a;
+                a = (v >> 24) & 0xff;
+                RGBA_OUT(q, r, g, b, a);
+            }
+#else
+            RGB_OUT(q, r, g, b);
+#endif
+            q += BPP;
+            p ++;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+// RGB24 has optimised routines
+#if !defined(FMT_RGBA32) && !defined(FMT_RGB24)
+/* alpha support */
+
+static void glue(rgba32_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                      int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int v, r, g, b;
+#ifdef RGBA_OUT
+    unsigned int a;
+#endif
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * 4;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * BPP;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            v = ((const uint32_t *)(s))[0];
+            r = (v >> 16) & 0xff;
+            g = (v >> 8) & 0xff;
+            b = v & 0xff;
+#ifdef RGBA_OUT
+            a = (v >> 24) & 0xff;
+            RGBA_OUT(d, r, g, b, a);
+#else
+            RGB_OUT(d, r, g, b);
+#endif
+            s += 4;
+            d += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void glue(RGB_NAME, _to_rgba32)(AVPicture *dst, const AVPicture *src,
+                                       int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int r, g, b;
+#ifdef RGBA_IN
+    unsigned int a;
+#endif
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * BPP;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * 4;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+#ifdef RGBA_IN
+            RGBA_IN(r, g, b, a, s);
+            ((uint32_t *)(d))[0] = (a << 24) | (r << 16) | (g << 8) | b;
+#else
+            RGB_IN(r, g, b, s);
+            ((uint32_t *)(d))[0] = (0xff << 24) | (r << 16) | (g << 8) | b;
+#endif
+            d += 4;
+            s += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+#endif /* !defined(FMT_RGBA32) */
+
+#ifndef FMT_RGB24
+
+static void glue(rgb24_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                      int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int r, g, b;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * 3;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * BPP;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            r = s[0];
+            g = s[1];
+            b = s[2];
+            RGB_OUT(d, r, g, b);
+            s += 3;
+            d += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void glue(RGB_NAME, _to_rgb24)(AVPicture *dst, const AVPicture *src,
+                                      int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int r, g , b;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * BPP;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * 3;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            RGB_IN(r, g, b, s)
+            d[0] = r;
+            d[1] = g;
+            d[2] = b;
+            d += 3;
+            s += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+#endif /* !FMT_RGB24 */
+
+#ifdef FMT_RGB24
+
+static void yuv444p_to_rgb24(AVPicture *dst, const AVPicture *src,
+                             int width, int height)
+{
+    const uint8_t *y1_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1;
+    int w, y, cb, cr, r_add, g_add, b_add;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    for(;height > 0; height --) {
+        d1 = d;
+        for(w = width; w > 0; w--) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += dst->linesize[0];
+        y1_ptr += src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width;
+        cr_ptr += src->linesize[2] - width;
+    }
+}
+
+static void yuvj444p_to_rgb24(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *y1_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1;
+    int w, y, cb, cr, r_add, g_add, b_add;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    for(;height > 0; height --) {
+        d1 = d;
+        for(w = width; w > 0; w--) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += dst->linesize[0];
+        y1_ptr += src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width;
+        cr_ptr += src->linesize[2] - width;
+    }
+}
+
+static void rgb24_to_yuv444p(AVPicture *dst, const AVPicture *src,
+                             int width, int height)
+{
+    int src_wrap, x, y;
+    int r, g, b;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    src_wrap = src->linesize[0] - width * BPP;
+    p = src->data[0];
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r, g, b, 0);
+            cr[0] = RGB_TO_V_CCIR(r, g, b, 0);
+            p += BPP;
+            cb++;
+            cr++;
+            lum++;
+        }
+        p += src_wrap;
+        lum += dst->linesize[0] - width;
+        cb += dst->linesize[1] - width;
+        cr += dst->linesize[2] - width;
+    }
+}
+
+static void rgb24_to_yuvj420p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int wrap, wrap3, width2;
+    int r, g, b, r1, g1, b1, w;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    width2 = (width + 1) >> 1;
+    wrap = dst->linesize[0];
+    wrap3 = src->linesize[0];
+    p = src->data[0];
+    for(;height>=2;height -= 2) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y(r, g, b);
+            p += wrap3;
+            lum += wrap;
+
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y(r, g, b);
+
+            cb[0] = RGB_TO_U(r1, g1, b1, 2);
+            cr[0] = RGB_TO_V(r1, g1, b1, 2);
+
+            cb++;
+            cr++;
+            p += -wrap3 + 2 * BPP;
+            lum += -wrap + 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y(r, g, b);
+            p += wrap3;
+            lum += wrap;
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += -wrap3 + BPP;
+            lum += -wrap + 1;
+        }
+        p += wrap3 + (wrap3 - width * BPP);
+        lum += wrap + (wrap - width);
+        cb += dst->linesize[1] - width2;
+        cr += dst->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += 2 * BPP;
+           lum += 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r, g, b, 0);
+            cr[0] = RGB_TO_V(r, g, b, 0);
+        }
+    }
+}
+
+static void rgb24_to_yuvj444p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int src_wrap, x, y;
+    int r, g, b;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    src_wrap = src->linesize[0] - width * BPP;
+    p = src->data[0];
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r, g, b, 0);
+            cr[0] = RGB_TO_V(r, g, b, 0);
+            p += BPP;
+            cb++;
+            cr++;
+            lum++;
+        }
+        p += src_wrap;
+        lum += dst->linesize[0] - width;
+        cb += dst->linesize[1] - width;
+        cr += dst->linesize[2] - width;
+    }
+}
+
+#endif /* FMT_RGB24 */
+
+#if defined(FMT_RGB24) || defined(FMT_RGBA32)
+
+static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int dst_wrap, src_wrap;
+    int x, y, has_alpha;
+    unsigned int r, g, b;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - BPP * width;
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - width;
+    has_alpha = 0;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+#ifdef RGBA_IN
+            {
+                unsigned int a;
+                RGBA_IN(r, g, b, a, p);
+                /* crude approximation for alpha ! */
+                if (a < 0x80) {
+                    has_alpha = 1;
+                    q[0] = TRANSP_INDEX;
+                } else {
+                    q[0] = gif_clut_index(r, g, b);
+                }
+            }
+#else
+            RGB_IN(r, g, b, p);
+            q[0] = gif_clut_index(r, g, b);
+#endif
+            q++;
+            p += BPP;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+
+    build_rgb_palette(dst->data[1], has_alpha);
+}
+
+#endif /* defined(FMT_RGB24) || defined(FMT_RGBA32) */
+
+#ifdef RGBA_IN
+
+static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
+                                           int width, int height)
+{
+    const unsigned char *p;
+    int src_wrap, ret, x, y;
+    unsigned int r, g, b, a;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - BPP * width;
+    ret = 0;
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGBA_IN(r, g, b, a, p);
+            if (a == 0x00) {
+                ret |= FF_ALPHA_TRANSP;
+            } else if (a != 0xff) {
+                ret |= FF_ALPHA_SEMI_TRANSP;
+            }
+            p += BPP;
+        }
+        p += src_wrap;
+    }
+    return ret;
+}
+
+#endif /* RGBA_IN */
+
+#undef RGB_IN
+#undef RGBA_IN
+#undef RGB_OUT
+#undef RGBA_OUT
+#undef BPP
+#undef RGB_NAME
+#undef FMT_RGB24
+#undef FMT_RGBA32
diff --git a/contrib/ffmpeg/libavcodec/imgresample.c b/contrib/ffmpeg/libavcodec/imgresample.c
new file mode 100644
index 000000000..ce1a05ce4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/imgresample.c
@@ -0,0 +1,947 @@
+/*
+ * High quality image resampling with polyphase filters
+ * Copyright (c) 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file imgresample.c
+ * High quality image resampling with polyphase filters .
+ */
+
+#include "avcodec.h"
+#include "swscale.h"
+#include "dsputil.h"
+
+#ifdef USE_FASTMEMCPY
+#include "libvo/fastmemcpy.h"
+#endif
+
+#define NB_COMPONENTS 3
+
+#define PHASE_BITS 4
+#define NB_PHASES  (1 << PHASE_BITS)
+#define NB_TAPS    4
+#define FCENTER    1  /* index of the center of the filter */
+//#define TEST    1  /* Test it */
+
+#define POS_FRAC_BITS 16
+#define POS_FRAC      (1 << POS_FRAC_BITS)
+/* 6 bits precision is needed for MMX */
+#define FILTER_BITS   8
+
+#define LINE_BUF_HEIGHT (NB_TAPS * 4)
+
+struct SwsContext {
+    struct ImgReSampleContext *resampling_ctx;
+    enum PixelFormat src_pix_fmt, dst_pix_fmt;
+};
+
+struct ImgReSampleContext {
+    int iwidth, iheight, owidth, oheight;
+    int topBand, bottomBand, leftBand, rightBand;
+    int padtop, padbottom, padleft, padright;
+    int pad_owidth, pad_oheight;
+    int h_incr, v_incr;
+    DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
+    DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
+    uint8_t *line_buf;
+};
+
+void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
+
+static inline int get_phase(int pos)
+{
+    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
+}
+
+/* This function must be optimized */
+static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
+                            int src_width, int src_start, int src_incr,
+                            int16_t *filters)
+{
+    int src_pos, phase, sum, i;
+    const uint8_t *s;
+    int16_t *filter;
+
+    src_pos = src_start;
+    for(i=0;i<dst_width;i++) {
+#ifdef TEST
+        /* test */
+        if ((src_pos >> POS_FRAC_BITS) < 0 ||
+            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
+            av_abort();
+#endif
+        s = src + (src_pos >> POS_FRAC_BITS);
+        phase = get_phase(src_pos);
+        filter = filters + phase * NB_TAPS;
+#if NB_TAPS == 4
+        sum = s[0] * filter[0] +
+            s[1] * filter[1] +
+            s[2] * filter[2] +
+            s[3] * filter[3];
+#else
+        {
+            int j;
+            sum = 0;
+            for(j=0;j<NB_TAPS;j++)
+                sum += s[j] * filter[j];
+        }
+#endif
+        sum = sum >> FILTER_BITS;
+        if (sum < 0)
+            sum = 0;
+        else if (sum > 255)
+            sum = 255;
+        dst[0] = sum;
+        src_pos += src_incr;
+        dst++;
+    }
+}
+
+/* This function must be optimized */
+static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
+                       int wrap, int16_t *filter)
+{
+    int sum, i;
+    const uint8_t *s;
+
+    s = src;
+    for(i=0;i<dst_width;i++) {
+#if NB_TAPS == 4
+        sum = s[0 * wrap] * filter[0] +
+            s[1 * wrap] * filter[1] +
+            s[2 * wrap] * filter[2] +
+            s[3 * wrap] * filter[3];
+#else
+        {
+            int j;
+            uint8_t *s1 = s;
+
+            sum = 0;
+            for(j=0;j<NB_TAPS;j++) {
+                sum += s1[0] * filter[j];
+                s1 += wrap;
+            }
+        }
+#endif
+        sum = sum >> FILTER_BITS;
+        if (sum < 0)
+            sum = 0;
+        else if (sum > 255)
+            sum = 255;
+        dst[0] = sum;
+        dst++;
+        s++;
+    }
+}
+
+#ifdef HAVE_MMX
+
+#include "i386/mmx.h"
+
+#define FILTER4(reg) \
+{\
+        s = src + (src_pos >> POS_FRAC_BITS);\
+        phase = get_phase(src_pos);\
+        filter = filters + phase * NB_TAPS;\
+        movq_m2r(*s, reg);\
+        punpcklbw_r2r(mm7, reg);\
+        movq_m2r(*filter, mm6);\
+        pmaddwd_r2r(reg, mm6);\
+        movq_r2r(mm6, reg);\
+        psrlq_i2r(32, reg);\
+        paddd_r2r(mm6, reg);\
+        psrad_i2r(FILTER_BITS, reg);\
+        src_pos += src_incr;\
+}
+
+#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
+
+/* XXX: do four pixels at a time */
+static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
+                                 const uint8_t *src, int src_width,
+                                 int src_start, int src_incr, int16_t *filters)
+{
+    int src_pos, phase;
+    const uint8_t *s;
+    int16_t *filter;
+    mmx_t tmp;
+
+    src_pos = src_start;
+    pxor_r2r(mm7, mm7);
+
+    while (dst_width >= 4) {
+
+        FILTER4(mm0);
+        FILTER4(mm1);
+        FILTER4(mm2);
+        FILTER4(mm3);
+
+        packuswb_r2r(mm7, mm0);
+        packuswb_r2r(mm7, mm1);
+        packuswb_r2r(mm7, mm3);
+        packuswb_r2r(mm7, mm2);
+        movq_r2m(mm0, tmp);
+        dst[0] = tmp.ub[0];
+        movq_r2m(mm1, tmp);
+        dst[1] = tmp.ub[0];
+        movq_r2m(mm2, tmp);
+        dst[2] = tmp.ub[0];
+        movq_r2m(mm3, tmp);
+        dst[3] = tmp.ub[0];
+        dst += 4;
+        dst_width -= 4;
+    }
+    while (dst_width > 0) {
+        FILTER4(mm0);
+        packuswb_r2r(mm7, mm0);
+        movq_r2m(mm0, tmp);
+        dst[0] = tmp.ub[0];
+        dst++;
+        dst_width--;
+    }
+    emms();
+}
+
+static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
+                            int wrap, int16_t *filter)
+{
+    int sum, i, v;
+    const uint8_t *s;
+    mmx_t tmp;
+    mmx_t coefs[4];
+
+    for(i=0;i<4;i++) {
+        v = filter[i];
+        coefs[i].uw[0] = v;
+        coefs[i].uw[1] = v;
+        coefs[i].uw[2] = v;
+        coefs[i].uw[3] = v;
+    }
+
+    pxor_r2r(mm7, mm7);
+    s = src;
+    while (dst_width >= 4) {
+        movq_m2r(s[0 * wrap], mm0);
+        punpcklbw_r2r(mm7, mm0);
+        movq_m2r(s[1 * wrap], mm1);
+        punpcklbw_r2r(mm7, mm1);
+        movq_m2r(s[2 * wrap], mm2);
+        punpcklbw_r2r(mm7, mm2);
+        movq_m2r(s[3 * wrap], mm3);
+        punpcklbw_r2r(mm7, mm3);
+
+        pmullw_m2r(coefs[0], mm0);
+        pmullw_m2r(coefs[1], mm1);
+        pmullw_m2r(coefs[2], mm2);
+        pmullw_m2r(coefs[3], mm3);
+
+        paddw_r2r(mm1, mm0);
+        paddw_r2r(mm3, mm2);
+        paddw_r2r(mm2, mm0);
+        psraw_i2r(FILTER_BITS, mm0);
+
+        packuswb_r2r(mm7, mm0);
+        movq_r2m(mm0, tmp);
+
+        *(uint32_t *)dst = tmp.ud[0];
+        dst += 4;
+        s += 4;
+        dst_width -= 4;
+    }
+    while (dst_width > 0) {
+        sum = s[0 * wrap] * filter[0] +
+            s[1 * wrap] * filter[1] +
+            s[2 * wrap] * filter[2] +
+            s[3 * wrap] * filter[3];
+        sum = sum >> FILTER_BITS;
+        if (sum < 0)
+            sum = 0;
+        else if (sum > 255)
+            sum = 255;
+        dst[0] = sum;
+        dst++;
+        s++;
+        dst_width--;
+    }
+    emms();
+}
+#endif
+
+#ifdef HAVE_ALTIVEC
+typedef         union {
+    vector unsigned char v;
+    unsigned char c[16];
+} vec_uc_t;
+
+typedef         union {
+    vector signed short v;
+    signed short s[8];
+} vec_ss_t;
+
+void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
+                          int wrap, int16_t *filter)
+{
+    int sum, i;
+    const uint8_t *s;
+    vector unsigned char *tv, tmp, dstv, zero;
+    vec_ss_t srchv[4], srclv[4], fv[4];
+    vector signed short zeros, sumhv, sumlv;
+    s = src;
+
+    for(i=0;i<4;i++)
+    {
+        /*
+           The vec_madds later on does an implicit >>15 on the result.
+           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
+           a signed short, we have just enough bits to pre-shift our
+           filter constants <<7 to compensate for vec_madds.
+        */
+        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
+        fv[i].v = vec_splat(fv[i].v, 0);
+    }
+
+    zero = vec_splat_u8(0);
+    zeros = vec_splat_s16(0);
+
+
+    /*
+       When we're resampling, we'd ideally like both our input buffers,
+       and output buffers to be 16-byte aligned, so we can do both aligned
+       reads and writes. Sadly we can't always have this at the moment, so
+       we opt for aligned writes, as unaligned writes have a huge overhead.
+       To do this, do enough scalar resamples to get dst 16-byte aligned.
+    */
+    i = (-(int)dst) & 0xf;
+    while(i>0) {
+        sum = s[0 * wrap] * filter[0] +
+        s[1 * wrap] * filter[1] +
+        s[2 * wrap] * filter[2] +
+        s[3 * wrap] * filter[3];
+        sum = sum >> FILTER_BITS;
+        if (sum<0) sum = 0; else if (sum>255) sum=255;
+        dst[0] = sum;
+        dst++;
+        s++;
+        dst_width--;
+        i--;
+    }
+
+    /* Do our altivec resampling on 16 pixels at once. */
+    while(dst_width>=16) {
+        /*
+           Read 16 (potentially unaligned) bytes from each of
+           4 lines into 4 vectors, and split them into shorts.
+           Interleave the multipy/accumulate for the resample
+           filter with the loads to hide the 3 cycle latency
+           the vec_madds have.
+        */
+        tv = (vector unsigned char *) &s[0 * wrap];
+        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
+        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
+        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
+        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
+        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
+
+        tv = (vector unsigned char *) &s[1 * wrap];
+        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
+        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
+        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
+        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
+        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
+
+        tv = (vector unsigned char *) &s[2 * wrap];
+        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
+        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
+        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
+        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
+        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
+
+        tv = (vector unsigned char *) &s[3 * wrap];
+        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
+        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
+        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
+        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
+        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
+
+        /*
+           Pack the results into our destination vector,
+           and do an aligned write of that back to memory.
+        */
+        dstv = vec_packsu(sumhv, sumlv) ;
+        vec_st(dstv, 0, (vector unsigned char *) dst);
+
+        dst+=16;
+        s+=16;
+        dst_width-=16;
+    }
+
+    /*
+       If there are any leftover pixels, resample them
+       with the slow scalar method.
+    */
+    while(dst_width>0) {
+        sum = s[0 * wrap] * filter[0] +
+        s[1 * wrap] * filter[1] +
+        s[2 * wrap] * filter[2] +
+        s[3 * wrap] * filter[3];
+        sum = sum >> FILTER_BITS;
+        if (sum<0) sum = 0; else if (sum>255) sum=255;
+        dst[0] = sum;
+        dst++;
+        s++;
+        dst_width--;
+    }
+}
+#endif
+
+/* slow version to handle limit cases. Does not need optimisation */
+static void h_resample_slow(uint8_t *dst, int dst_width,
+                            const uint8_t *src, int src_width,
+                            int src_start, int src_incr, int16_t *filters)
+{
+    int src_pos, phase, sum, j, v, i;
+    const uint8_t *s, *src_end;
+    int16_t *filter;
+
+    src_end = src + src_width;
+    src_pos = src_start;
+    for(i=0;i<dst_width;i++) {
+        s = src + (src_pos >> POS_FRAC_BITS);
+        phase = get_phase(src_pos);
+        filter = filters + phase * NB_TAPS;
+        sum = 0;
+        for(j=0;j<NB_TAPS;j++) {
+            if (s < src)
+                v = src[0];
+            else if (s >= src_end)
+                v = src_end[-1];
+            else
+                v = s[0];
+            sum += v * filter[j];
+            s++;
+        }
+        sum = sum >> FILTER_BITS;
+        if (sum < 0)
+            sum = 0;
+        else if (sum > 255)
+            sum = 255;
+        dst[0] = sum;
+        src_pos += src_incr;
+        dst++;
+    }
+}
+
+static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
+                       int src_width, int src_start, int src_incr,
+                       int16_t *filters)
+{
+    int n, src_end;
+
+    if (src_start < 0) {
+        n = (0 - src_start + src_incr - 1) / src_incr;
+        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
+        dst += n;
+        dst_width -= n;
+        src_start += n * src_incr;
+    }
+    src_end = src_start + dst_width * src_incr;
+    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
+        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
+            src_incr;
+    } else {
+        n = dst_width;
+    }
+#ifdef HAVE_MMX
+    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
+        h_resample_fast4_mmx(dst, n,
+                             src, src_width, src_start, src_incr, filters);
+    else
+#endif
+        h_resample_fast(dst, n,
+                        src, src_width, src_start, src_incr, filters);
+    if (n < dst_width) {
+        dst += n;
+        dst_width -= n;
+        src_start += n * src_incr;
+        h_resample_slow(dst, dst_width,
+                        src, src_width, src_start, src_incr, filters);
+    }
+}
+
+static void component_resample(ImgReSampleContext *s,
+                               uint8_t *output, int owrap, int owidth, int oheight,
+                               uint8_t *input, int iwrap, int iwidth, int iheight)
+{
+    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
+    uint8_t *new_line, *src_line;
+
+    last_src_y = - FCENTER - 1;
+    /* position of the bottom of the filter in the source image */
+    src_y = (last_src_y + NB_TAPS) * POS_FRAC;
+    ring_y = NB_TAPS; /* position in ring buffer */
+    for(y=0;y<oheight;y++) {
+        /* apply horizontal filter on new lines from input if needed */
+        src_y1 = src_y >> POS_FRAC_BITS;
+        while (last_src_y < src_y1) {
+            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
+                ring_y = NB_TAPS;
+            last_src_y++;
+            /* handle limit conditions : replicate line (slightly
+               inefficient because we filter multiple times) */
+            y1 = last_src_y;
+            if (y1 < 0) {
+                y1 = 0;
+            } else if (y1 >= iheight) {
+                y1 = iheight - 1;
+            }
+            src_line = input + y1 * iwrap;
+            new_line = s->line_buf + ring_y * owidth;
+            /* apply filter and handle limit cases correctly */
+            h_resample(new_line, owidth,
+                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
+                       &s->h_filters[0][0]);
+            /* handle ring buffer wraping */
+            if (ring_y >= LINE_BUF_HEIGHT) {
+                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
+                       new_line, owidth);
+            }
+        }
+        /* apply vertical filter */
+        phase_y = get_phase(src_y);
+#ifdef HAVE_MMX
+        /* desactivated MMX because loss of precision */
+        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
+            v_resample4_mmx(output, owidth,
+                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
+                            &s->v_filters[phase_y][0]);
+        else
+#endif
+#ifdef HAVE_ALTIVEC
+            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
+                v_resample16_altivec(output, owidth,
+                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
+                                &s->v_filters[phase_y][0]);
+        else
+#endif
+            v_resample(output, owidth,
+                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
+                       &s->v_filters[phase_y][0]);
+
+        src_y += s->v_incr;
+
+        output += owrap;
+    }
+}
+
+ImgReSampleContext *img_resample_init(int owidth, int oheight,
+                                      int iwidth, int iheight)
+{
+    return img_resample_full_init(owidth, oheight, iwidth, iheight,
+            0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
+                                      int iwidth, int iheight,
+                                      int topBand, int bottomBand,
+        int leftBand, int rightBand,
+        int padtop, int padbottom,
+        int padleft, int padright)
+{
+    ImgReSampleContext *s;
+
+    if (!owidth || !oheight || !iwidth || !iheight)
+        return NULL;
+
+    s = av_mallocz(sizeof(ImgReSampleContext));
+    if (!s)
+        return NULL;
+    if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
+        return NULL;
+    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
+    if (!s->line_buf)
+        goto fail;
+
+    s->owidth = owidth;
+    s->oheight = oheight;
+    s->iwidth = iwidth;
+    s->iheight = iheight;
+
+    s->topBand = topBand;
+    s->bottomBand = bottomBand;
+    s->leftBand = leftBand;
+    s->rightBand = rightBand;
+
+    s->padtop = padtop;
+    s->padbottom = padbottom;
+    s->padleft = padleft;
+    s->padright = padright;
+
+    s->pad_owidth = owidth - (padleft + padright);
+    s->pad_oheight = oheight - (padtop + padbottom);
+
+    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
+    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
+
+    av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
+            (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
+    av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
+            (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
+
+    return s;
+fail:
+    av_free(s);
+    return NULL;
+}
+
+void img_resample(ImgReSampleContext *s,
+                  AVPicture *output, const AVPicture *input)
+{
+    int i, shift;
+    uint8_t* optr;
+
+    for (i=0;i<3;i++) {
+        shift = (i == 0) ? 0 : 1;
+
+        optr = output->data[i] + (((output->linesize[i] *
+                        s->padtop) + s->padleft) >> shift);
+
+        component_resample(s, optr, output->linesize[i],
+                s->pad_owidth >> shift, s->pad_oheight >> shift,
+                input->data[i] + (input->linesize[i] *
+                    (s->topBand >> shift)) + (s->leftBand >> shift),
+                input->linesize[i], ((s->iwidth - s->leftBand -
+                        s->rightBand) >> shift),
+                           (s->iheight - s->topBand - s->bottomBand) >> shift);
+    }
+}
+
+void img_resample_close(ImgReSampleContext *s)
+{
+    av_free(s->line_buf);
+    av_free(s);
+}
+
+struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
+                                  int dstW, int dstH, int dstFormat,
+                                  int flags, SwsFilter *srcFilter,
+                                  SwsFilter *dstFilter, double *param)
+{
+    struct SwsContext *ctx;
+
+    ctx = av_malloc(sizeof(struct SwsContext));
+    if (ctx == NULL) {
+        av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
+
+        return NULL;
+    }
+
+    if ((srcH != dstH) || (srcW != dstW)) {
+        if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
+            av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
+        }
+        ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
+    } else {
+        ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
+        ctx->resampling_ctx->iheight = srcH;
+        ctx->resampling_ctx->iwidth = srcW;
+        ctx->resampling_ctx->oheight = dstH;
+        ctx->resampling_ctx->owidth = dstW;
+    }
+    ctx->src_pix_fmt = srcFormat;
+    ctx->dst_pix_fmt = dstFormat;
+
+    return ctx;
+}
+
+void sws_freeContext(struct SwsContext *ctx)
+{
+    if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
+        (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
+        img_resample_close(ctx->resampling_ctx);
+    } else {
+        av_free(ctx->resampling_ctx);
+    }
+    av_free(ctx);
+}
+
+
+/**
+ * Checks if context is valid or reallocs a new one instead.
+ * If context is NULL, just calls sws_getContext() to get a new one.
+ * Otherwise, checks if the parameters are the same already saved in context.
+ * If that is the case, returns the current context.
+ * Otherwise, frees context and gets a new one.
+ *
+ * Be warned that srcFilter, dstFilter are not checked, they are
+ * asumed to remain valid.
+ */
+struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
+                        int srcW, int srcH, int srcFormat,
+                        int dstW, int dstH, int dstFormat, int flags,
+                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
+{
+    if (ctx != NULL) {
+        if ((ctx->resampling_ctx->iwidth != srcW) ||
+                        (ctx->resampling_ctx->iheight != srcH) ||
+                        (ctx->src_pix_fmt != srcFormat) ||
+                        (ctx->resampling_ctx->owidth != dstW) ||
+                        (ctx->resampling_ctx->oheight != dstH) ||
+                        (ctx->dst_pix_fmt != dstFormat))
+        {
+            sws_freeContext(ctx);
+            ctx = NULL;
+        }
+    }
+    if (ctx == NULL) {
+        return sws_getContext(srcW, srcH, srcFormat,
+                        dstW, dstH, dstFormat, flags,
+                        srcFilter, dstFilter, param);
+    }
+    return ctx;
+}
+
+int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
+              int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    AVPicture src_pict, dst_pict;
+    int i, res = 0;
+    AVPicture picture_format_temp;
+    AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
+    uint8_t *buf1 = NULL, *buf2 = NULL;
+    enum PixelFormat current_pix_fmt;
+
+    for (i = 0; i < 4; i++) {
+        src_pict.data[i] = src[i];
+        src_pict.linesize[i] = srcStride[i];
+        dst_pict.data[i] = dst[i];
+        dst_pict.linesize[i] = dstStride[i];
+    }
+    if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
+        (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
+        /* We have to rescale the picture, but only YUV420P rescaling is supported... */
+
+        if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
+            int size;
+
+            /* create temporary picture for rescaling input*/
+            size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
+            buf1 = av_malloc(size);
+            if (!buf1) {
+                res = -1;
+                goto the_end;
+            }
+            formatted_picture = &picture_format_temp;
+            avpicture_fill((AVPicture*)formatted_picture, buf1,
+                           PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
+
+            if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
+                            &src_pict, ctx->src_pix_fmt,
+                            ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
+
+                av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
+                res = -1;
+                goto the_end;
+            }
+        } else {
+            formatted_picture = &src_pict;
+        }
+
+        if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
+            int size;
+
+            /* create temporary picture for rescaling output*/
+            size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
+            buf2 = av_malloc(size);
+            if (!buf2) {
+                res = -1;
+                goto the_end;
+            }
+            resampled_picture = &picture_resample_temp;
+            avpicture_fill((AVPicture*)resampled_picture, buf2,
+                           PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
+
+        } else {
+            resampled_picture = &dst_pict;
+        }
+
+        /* ...and finally rescale!!! */
+        img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
+        current_pix_fmt = PIX_FMT_YUV420P;
+    } else {
+        resampled_picture = &src_pict;
+        current_pix_fmt = ctx->src_pix_fmt;
+    }
+
+    if (current_pix_fmt != ctx->dst_pix_fmt) {
+        if (img_convert(&dst_pict, ctx->dst_pix_fmt,
+                        resampled_picture, current_pix_fmt,
+                        ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
+
+            av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
+
+            res = -1;
+            goto the_end;
+        }
+    } else if (resampled_picture != &dst_pict) {
+        img_copy(&dst_pict, resampled_picture, current_pix_fmt,
+                        ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
+    }
+
+the_end:
+    av_free(buf1);
+    av_free(buf2);
+    return res;
+}
+
+
+#ifdef TEST
+#include <stdio.h>
+
+/* input */
+#define XSIZE 256
+#define YSIZE 256
+uint8_t img[XSIZE * YSIZE];
+
+/* output */
+#define XSIZE1 512
+#define YSIZE1 512
+uint8_t img1[XSIZE1 * YSIZE1];
+uint8_t img2[XSIZE1 * YSIZE1];
+
+void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
+{
+#undef fprintf
+    FILE *f;
+    f=fopen(filename,"w");
+    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
+    fwrite(img,1, xsize * ysize,f);
+    fclose(f);
+#define fprintf please_use_av_log
+}
+
+static void dump_filter(int16_t *filter)
+{
+    int i, ph;
+
+    for(ph=0;ph<NB_PHASES;ph++) {
+        av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
+        for(i=0;i<NB_TAPS;i++) {
+            av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
+        }
+        av_log(NULL, AV_LOG_INFO, "\n");
+    }
+}
+
+#ifdef HAVE_MMX
+int mm_flags;
+#endif
+
+int main(int argc, char **argv)
+{
+    int x, y, v, i, xsize, ysize;
+    ImgReSampleContext *s;
+    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
+    char buf[256];
+
+    /* build test image */
+    for(y=0;y<YSIZE;y++) {
+        for(x=0;x<XSIZE;x++) {
+            if (x < XSIZE/2 && y < YSIZE/2) {
+                if (x < XSIZE/4 && y < YSIZE/4) {
+                    if ((x % 10) <= 6 &&
+                        (y % 10) <= 6)
+                        v = 0xff;
+                    else
+                        v = 0x00;
+                } else if (x < XSIZE/4) {
+                    if (x & 1)
+                        v = 0xff;
+                    else
+                        v = 0;
+                } else if (y < XSIZE/4) {
+                    if (y & 1)
+                        v = 0xff;
+                    else
+                        v = 0;
+                } else {
+                    if (y < YSIZE*3/8) {
+                        if ((y+x) & 1)
+                            v = 0xff;
+                        else
+                            v = 0;
+                    } else {
+                        if (((x+3) % 4) <= 1 &&
+                            ((y+3) % 4) <= 1)
+                            v = 0xff;
+                        else
+                            v = 0x00;
+                    }
+                }
+            } else if (x < XSIZE/2) {
+                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
+            } else if (y < XSIZE/2) {
+                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
+            } else {
+                v = ((x + y - XSIZE) * 255) / XSIZE;
+            }
+            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
+        }
+    }
+    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
+    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
+        fact = factors[i];
+        xsize = (int)(XSIZE * fact);
+        ysize = (int)((YSIZE - 100) * fact);
+        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
+        av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
+        dump_filter(&s->h_filters[0][0]);
+        component_resample(s, img1, xsize, xsize, ysize,
+                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
+        img_resample_close(s);
+
+        snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
+        save_pgm(buf, img1, xsize, ysize);
+    }
+
+    /* mmx test */
+#ifdef HAVE_MMX
+    av_log(NULL, AV_LOG_INFO, "MMX test\n");
+    fact = 0.72;
+    xsize = (int)(XSIZE * fact);
+    ysize = (int)(YSIZE * fact);
+    mm_flags = MM_MMX;
+    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
+    component_resample(s, img1, xsize, xsize, ysize,
+                       img, XSIZE, XSIZE, YSIZE);
+
+    mm_flags = 0;
+    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
+    component_resample(s, img2, xsize, xsize, ysize,
+                       img, XSIZE, XSIZE, YSIZE);
+    if (memcmp(img1, img2, xsize * ysize) != 0) {
+        av_log(NULL, AV_LOG_ERROR, "mmx error\n");
+        exit(1);
+    }
+    av_log(NULL, AV_LOG_INFO, "MMX OK\n");
+#endif
+    return 0;
+}
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/indeo2.c b/contrib/ffmpeg/libavcodec/indeo2.c
new file mode 100644
index 000000000..f3917ff3a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/indeo2.c
@@ -0,0 +1,221 @@
+/*
+ * Intel Indeo 2 codec
+ * Copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file indeo2.c
+ * Intel Indeo 2 decoder.
+ */
+#define ALT_BITSTREAM_READER_LE
+#include "avcodec.h"
+#include "bitstream.h"
+#include "indeo2data.h"
+
+typedef struct Ir2Context{
+    AVCodecContext *avctx;
+    AVFrame picture;
+    GetBitContext gb;
+    int decode_delta;
+} Ir2Context;
+
+#define CODE_VLC_BITS 14
+static VLC ir2_vlc;
+
+/* Indeo 2 codes are in range 0x01..0x7F and 0x81..0x90 */
+static inline int ir2_get_code(GetBitContext *gb)
+{
+    return get_vlc2(gb, ir2_vlc.table, CODE_VLC_BITS, 1) + 1;
+}
+
+static int ir2_decode_plane(Ir2Context *ctx, int width, int height, uint8_t *dst, int stride,
+                             const uint8_t *table)
+{
+    int i;
+    int j;
+    int out = 0;
+    int c;
+    int t;
+
+    if(width&1)
+        return -1;
+
+    /* first line contain absolute values, other lines contain deltas */
+    while (out < width){
+        c = ir2_get_code(&ctx->gb);
+        if(c >= 0x80) { /* we have a run */
+            c -= 0x7F;
+            if(out + c*2 > width)
+                return -1;
+            for (i = 0; i < c * 2; i++)
+                dst[out++] = 0x80;
+        } else { /* copy two values from table */
+            dst[out++] = table[c * 2];
+            dst[out++] = table[(c * 2) + 1];
+        }
+    }
+    dst += stride;
+
+    for (j = 1; j < height; j++){
+        out = 0;
+        while (out < width){
+            c = ir2_get_code(&ctx->gb);
+            if(c >= 0x80) { /* we have a skip */
+                c -= 0x7F;
+                if(out + c*2 > width)
+                    return -1;
+                for (i = 0; i < c * 2; i++) {
+                    dst[out] = dst[out - stride];
+                    out++;
+                }
+            } else { /* add two deltas from table */
+                t = dst[out - stride] + (table[c * 2] - 128);
+                t= clip_uint8(t);
+                dst[out] = t;
+                out++;
+                t = dst[out - stride] + (table[(c * 2) + 1] - 128);
+                t= clip_uint8(t);
+                dst[out] = t;
+                out++;
+            }
+        }
+        dst += stride;
+    }
+    return 0;
+}
+
+static int ir2_decode_plane_inter(Ir2Context *ctx, int width, int height, uint8_t *dst, int stride,
+                             const uint8_t *table)
+{
+    int j;
+    int out = 0;
+    int c;
+    int t;
+
+    if(width&1)
+        return -1;
+
+    for (j = 0; j < height; j++){
+        out = 0;
+        while (out < width){
+            c = ir2_get_code(&ctx->gb);
+            if(c >= 0x80) { /* we have a skip */
+                c -= 0x7F;
+                out += c * 2;
+            } else { /* add two deltas from table */
+                t = dst[out] + (((table[c * 2] - 128)*3) >> 2);
+                t= clip_uint8(t);
+                dst[out] = t;
+                out++;
+                t = dst[out] + (((table[(c * 2) + 1] - 128)*3) >> 2);
+                t= clip_uint8(t);
+                dst[out] = t;
+                out++;
+            }
+        }
+        dst += stride;
+    }
+    return 0;
+}
+
+static int ir2_decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    Ir2Context * const s = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    int start;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference = 1;
+    p->buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, p)) {
+        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    s->decode_delta = buf[18];
+
+    /* decide whether frame uses deltas or not */
+#ifndef ALT_BITSTREAM_READER_LE
+    for (i = 0; i < buf_size; i++)
+        buf[i] = ff_reverse[buf[i]];
+#endif
+    start = 48; /* hardcoded for now */
+
+    init_get_bits(&s->gb, buf + start, buf_size - start);
+
+    if (s->decode_delta) { /* intraframe */
+        ir2_decode_plane(s, avctx->width, avctx->height,
+                         s->picture.data[0], s->picture.linesize[0], ir2_luma_table);
+        /* swapped U and V */
+        ir2_decode_plane(s, avctx->width >> 2, avctx->height >> 2,
+                         s->picture.data[2], s->picture.linesize[2], ir2_luma_table);
+        ir2_decode_plane(s, avctx->width >> 2, avctx->height >> 2,
+                         s->picture.data[1], s->picture.linesize[1], ir2_luma_table);
+    } else { /* interframe */
+        ir2_decode_plane_inter(s, avctx->width, avctx->height,
+                         s->picture.data[0], s->picture.linesize[0], ir2_luma_table);
+        /* swapped U and V */
+        ir2_decode_plane_inter(s, avctx->width >> 2, avctx->height >> 2,
+                         s->picture.data[2], s->picture.linesize[2], ir2_luma_table);
+        ir2_decode_plane_inter(s, avctx->width >> 2, avctx->height >> 2,
+                         s->picture.data[1], s->picture.linesize[1], ir2_luma_table);
+    }
+
+    *picture= *(AVFrame*)&s->picture;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+static int ir2_decode_init(AVCodecContext *avctx){
+    Ir2Context * const ic = avctx->priv_data;
+
+    ic->avctx = avctx;
+
+    avctx->pix_fmt= PIX_FMT_YUV410P;
+
+    if (!ir2_vlc.table)
+        init_vlc(&ir2_vlc, CODE_VLC_BITS, IR2_CODES,
+                 &ir2_codes[0][1], 4, 2,
+#ifdef ALT_BITSTREAM_READER_LE
+                 &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+#else
+                 &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC);
+#endif
+
+    return 0;
+}
+
+AVCodec indeo2_decoder = {
+    "indeo2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_INDEO2,
+    sizeof(Ir2Context),
+    ir2_decode_init,
+    NULL,
+    NULL,
+    ir2_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/indeo2data.h b/contrib/ffmpeg/libavcodec/indeo2data.h
new file mode 100644
index 000000000..71d250af7
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/indeo2data.h
@@ -0,0 +1,134 @@
+/*
+ * Intel Indeo 2 codec
+ * copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define IR2_CODES 143
+static const uint16_t ir2_codes[IR2_CODES][2] = {
+#ifdef ALT_BITSTREAM_READER_LE
+{0x0000,  3}, {0x0004,  3}, {0x0006,  3}, {0x0001,  5},
+{0x0009,  5}, {0x0019,  5}, {0x000D,  5}, {0x001D,  5},
+{0x0023,  6}, {0x0013,  6}, {0x0033,  6}, {0x000B,  6},
+{0x002B,  6}, {0x001B,  6}, {0x0007,  8}, {0x0087,  8},
+{0x0027,  8}, {0x00A7,  8}, {0x0067,  8}, {0x00E7,  8},
+{0x0097,  8}, {0x0057,  8}, {0x0037,  8}, {0x00B7,  8},
+{0x00F7,  8}, {0x000F,  9}, {0x008F,  9}, {0x018F,  9},
+{0x014F,  9}, {0x00CF,  9}, {0x002F,  9}, {0x012F,  9},
+{0x01AF,  9}, {0x006F,  9}, {0x00EF,  9}, {0x01EF,  9},
+{0x001F, 10}, {0x021F, 10}, {0x011F, 10}, {0x031F, 10},
+{0x009F, 10}, {0x029F, 10}, {0x019F, 10}, {0x039F, 10},
+{0x005F, 10}, {0x025F, 10}, {0x015F, 10}, {0x035F, 10},
+{0x00DF, 10}, {0x02DF, 10}, {0x01DF, 10}, {0x03DF, 10},
+{0x003F, 13}, {0x103F, 13}, {0x083F, 13}, {0x183F, 13},
+{0x043F, 13}, {0x143F, 13}, {0x0C3F, 13}, {0x1C3F, 13},
+{0x023F, 13}, {0x123F, 13}, {0x0A3F, 13}, {0x1A3F, 13},
+{0x063F, 13}, {0x163F, 13}, {0x0E3F, 13}, {0x1E3F, 13},
+{0x013F, 13}, {0x113F, 13}, {0x093F, 13}, {0x193F, 13},
+{0x053F, 13}, {0x153F, 13}, {0x0D3F, 13}, {0x1D3F, 13},
+{0x033F, 13}, {0x133F, 13}, {0x0B3F, 13}, {0x1B3F, 13},
+{0x073F, 13}, {0x173F, 13}, {0x0F3F, 13}, {0x1F3F, 13},
+{0x00BF, 13}, {0x10BF, 13}, {0x08BF, 13}, {0x18BF, 13},
+{0x04BF, 13}, {0x14BF, 13}, {0x0CBF, 13}, {0x1CBF, 13},
+{0x02BF, 13}, {0x12BF, 13}, {0x0ABF, 13}, {0x1ABF, 13},
+{0x06BF, 13}, {0x16BF, 13}, {0x0EBF, 13}, {0x1EBF, 13},
+{0x01BF, 13}, {0x11BF, 13}, {0x09BF, 13}, {0x19BF, 13},
+{0x05BF, 13}, {0x15BF, 13}, {0x0DBF, 13}, {0x1DBF, 13},
+{0x03BF, 13}, {0x13BF, 13}, {0x0BBF, 13}, {0x1BBF, 13},
+{0x07BF, 13}, {0x17BF, 13}, {0x0FBF, 13}, {0x1FBF, 13},
+{0x007F, 14}, {0x207F, 14}, {0x107F, 14}, {0x307F, 14},
+{0x087F, 14}, {0x287F, 14}, {0x187F, 14}, {0x387F, 14},
+{0x047F, 14}, {0x247F, 14}, {0x147F, 14}, {0x0002,  3},
+{0x0011,  5}, {0x0005,  5}, {0x0015,  5}, {0x0003,  6},
+{0x003B,  6}, {0x0047,  8}, {0x00C7,  8}, {0x0017,  8},
+{0x00D7,  8}, {0x0077,  8}, {0x010F,  9}, {0x004F,  9},
+{0x01CF,  9}, {0x00AF,  9}, {0x016F,  9},
+#else
+    {0x0000,  3}, {0x0001,  3}, {0x0003,  3}, {0x0010,  5},
+    {0x0012,  5}, {0x0013,  5}, {0x0016,  5}, {0x0017,  5},
+    {0x0031,  6}, {0x0032,  6}, {0x0033,  6}, {0x0034,  6},
+    {0x0035,  6}, {0x0036,  6}, {0x00E0,  8}, {0x00E1,  8},
+    {0x00E4,  8}, {0x00E5,  8}, {0x00E6,  8}, {0x00E7,  8},
+    {0x00E9,  8}, {0x00EA,  8}, {0x00EC,  8}, {0x00ED,  8},
+    {0x00EF,  8}, {0x01E0,  9}, {0x01E2,  9}, {0x01E3,  9},
+    {0x01E5,  9}, {0x01E6,  9}, {0x01E8,  9}, {0x01E9,  9},
+    {0x01EB,  9}, {0x01EC,  9}, {0x01EE,  9}, {0x01EF,  9},
+    {0x03E0, 10}, {0x03E1, 10}, {0x03E2, 10}, {0x03E3, 10},
+    {0x03E4, 10}, {0x03E5, 10}, {0x03E6, 10}, {0x03E7, 10},
+    {0x03E8, 10}, {0x03E9, 10}, {0x03EA, 10}, {0x03EB, 10},
+    {0x03EC, 10}, {0x03ED, 10}, {0x03EE, 10}, {0x03EF, 10},
+    {0x1F80, 13}, {0x1F81, 13}, {0x1F82, 13}, {0x1F83, 13},
+    {0x1F84, 13}, {0x1F85, 13}, {0x1F86, 13}, {0x1F87, 13},
+    {0x1F88, 13}, {0x1F89, 13}, {0x1F8A, 13}, {0x1F8B, 13},
+    {0x1F8C, 13}, {0x1F8D, 13}, {0x1F8E, 13}, {0x1F8F, 13},
+    {0x1F90, 13}, {0x1F91, 13}, {0x1F92, 13}, {0x1F93, 13},
+    {0x1F94, 13}, {0x1F95, 13}, {0x1F96, 13}, {0x1F97, 13},
+    {0x1F98, 13}, {0x1F99, 13}, {0x1F9A, 13}, {0x1F9B, 13},
+    {0x1F9C, 13}, {0x1F9D, 13}, {0x1F9E, 13}, {0x1F9F, 13},
+    {0x1FA0, 13}, {0x1FA1, 13}, {0x1FA2, 13}, {0x1FA3, 13},
+    {0x1FA4, 13}, {0x1FA5, 13}, {0x1FA6, 13}, {0x1FA7, 13},
+    {0x1FA8, 13}, {0x1FA9, 13}, {0x1FAA, 13}, {0x1FAB, 13},
+    {0x1FAC, 13}, {0x1FAD, 13}, {0x1FAE, 13}, {0x1FAF, 13},
+    {0x1FB0, 13}, {0x1FB1, 13}, {0x1FB2, 13}, {0x1FB3, 13},
+    {0x1FB4, 13}, {0x1FB5, 13}, {0x1FB6, 13}, {0x1FB7, 13},
+    {0x1FB8, 13}, {0x1FB9, 13}, {0x1FBA, 13}, {0x1FBB, 13},
+    {0x1FBC, 13}, {0x1FBD, 13}, {0x1FBE, 13}, {0x1FBF, 13},
+    {0x3F80, 14}, {0x3F81, 14}, {0x3F82, 14}, {0x3F83, 14},
+    {0x3F84, 14}, {0x3F85, 14}, {0x3F86, 14}, {0x3F87, 14},
+    {0x3F88, 14}, {0x3F89, 14}, {0x3F8A, 14}, {0x0002,  3},
+    {0x0011,  5}, {0x0014,  5}, {0x0015,  5}, {0x0030,  6},
+    {0x0037,  6}, {0x00E2,  8}, {0x00E3,  8}, {0x00E8,  8},
+    {0x00EB,  8}, {0x00EE,  8}, {0x01E1,  9}, {0x01E4,  9},
+    {0x01E7,  9}, {0x01EA,  9}, {0x01ED,  9}
+#endif
+};
+
+static const uint8_t ir2_luma_table[256] = {
+ 0x80, 0x80, 0x84, 0x84, 0x7C, 0x7C, 0x7F, 0x85,
+ 0x81, 0x7B, 0x85, 0x7F, 0x7B, 0x81, 0x8C, 0x8C,
+ 0x74, 0x74, 0x83, 0x8D, 0x7D, 0x73, 0x8D, 0x83,
+ 0x73, 0x7D, 0x77, 0x89, 0x89, 0x77, 0x89, 0x77,
+ 0x77, 0x89, 0x8C, 0x95, 0x74, 0x6B, 0x95, 0x8C,
+ 0x6B, 0x74, 0x7C, 0x90, 0x84, 0x70, 0x90, 0x7C,
+ 0x70, 0x84, 0x96, 0x96, 0x6A, 0x6A, 0x82, 0x98,
+ 0x7E, 0x68, 0x98, 0x82, 0x68, 0x7E, 0x97, 0xA2,
+ 0x69, 0x5E, 0xA2, 0x97, 0x5E, 0x69, 0xA2, 0xA2,
+ 0x5E, 0x5E, 0x8B, 0xA3, 0x75, 0x5D, 0xA3, 0x8B,
+ 0x5D, 0x75, 0x71, 0x95, 0x8F, 0x6B, 0x95, 0x71,
+ 0x6B, 0x8F, 0x78, 0x9D, 0x88, 0x63, 0x9D, 0x78,
+ 0x63, 0x88, 0x7F, 0xA7, 0x81, 0x59, 0xA7, 0x7F,
+ 0x59, 0x81, 0xA4, 0xB1, 0x5C, 0x4F, 0xB1, 0xA4,
+ 0x4F, 0x5C, 0x96, 0xB1, 0x6A, 0x4F, 0xB1, 0x96,
+ 0x4F, 0x6A, 0xB2, 0xB2, 0x4E, 0x4E, 0x65, 0x9B,
+ 0x9B, 0x65, 0x9B, 0x65, 0x65, 0x9B, 0x89, 0xB4,
+ 0x77, 0x4C, 0xB4, 0x89, 0x4C, 0x77, 0x6A, 0xA3,
+ 0x96, 0x5D, 0xA3, 0x6A, 0x5D, 0x96, 0x73, 0xAC,
+ 0x8D, 0x54, 0xAC, 0x73, 0x54, 0x8D, 0xB4, 0xC3,
+ 0x4C, 0x3D, 0xC3, 0xB4, 0x3D, 0x4C, 0xA4, 0xC3,
+ 0x5C, 0x3D, 0xC3, 0xA4, 0x3D, 0x5C, 0xC4, 0xC4,
+ 0x3C, 0x3C, 0x96, 0xC6, 0x6A, 0x3A, 0xC6, 0x96,
+ 0x3A, 0x6A, 0x7C, 0xBA, 0x84, 0x46, 0xBA, 0x7C,
+ 0x46, 0x84, 0x5B, 0xAB, 0xA5, 0x55, 0xAB, 0x5B,
+ 0x55, 0xA5, 0x63, 0xB4, 0x9D, 0x4C, 0xB4, 0x63,
+ 0x4C, 0x9D, 0x86, 0xCA, 0x7A, 0x36, 0xCA, 0x86,
+ 0x36, 0x7A, 0xB6, 0xD7, 0x4A, 0x29, 0xD7, 0xB6,
+ 0x29, 0x4A, 0xC8, 0xD7, 0x38, 0x29, 0xD7, 0xC8,
+ 0x29, 0x38, 0xA4, 0xD8, 0x5C, 0x28, 0xD8, 0xA4,
+ 0x28, 0x5C, 0x6C, 0xC1, 0x94, 0x3F, 0xC1, 0x6C,
+ 0x3F, 0x94, 0xD9, 0xD9, 0x27, 0x27, 0x80, 0x80
+};
diff --git a/contrib/ffmpeg/libavcodec/indeo3.c b/contrib/ffmpeg/libavcodec/indeo3.c
new file mode 100644
index 000000000..33dcff820
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/indeo3.c
@@ -0,0 +1,1153 @@
+/*
+ * Intel Indeo 3 (IV31, IV32, etc.) video decoder for ffmpeg
+ * written, produced, and directed by Alan Smithee
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+#include "indeo3data.h"
+
+typedef struct
+{
+  unsigned char *Ybuf;
+  unsigned char *Ubuf;
+  unsigned char *Vbuf;
+  unsigned char *the_buf;
+  unsigned int the_buf_size;
+  unsigned short y_w, y_h;
+  unsigned short uv_w, uv_h;
+} YUVBufs;
+
+typedef struct Indeo3DecodeContext {
+    AVCodecContext *avctx;
+    int width, height;
+    AVFrame frame;
+
+    YUVBufs iv_frame[2];
+    YUVBufs *cur_frame;
+    YUVBufs *ref_frame;
+
+    unsigned char *ModPred;
+    unsigned short *corrector_type;
+} Indeo3DecodeContext;
+
+static int corrector_type_0[24] = {
+  195, 159, 133, 115, 101,  93,  87,  77,
+  195, 159, 133, 115, 101,  93,  87,  77,
+  128,  79,  79,  79,  79,  79,  79,  79
+};
+
+static int corrector_type_2[8] = { 9, 7, 6, 8, 5, 4, 3, 2 };
+
+static void build_modpred(Indeo3DecodeContext *s)
+{
+  int i, j;
+
+  s->ModPred = (unsigned char *) av_malloc (8 * 128);
+
+  for (i=0; i < 128; ++i) {
+    s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2));
+    s->ModPred[i+1*128] = (i == 7)  ?  20 : ((i == 119 || i == 120)
+                                 ? 236 : 2*((i + 2) - ((i + 1) % 3)));
+    s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4));
+    s->ModPred[i+3*128] =                        2*((i + 1) - ((i - 3) % 5));
+    s->ModPred[i+4*128] = (i == 8)  ?  20 : 2*((i + 1) - ((i - 3) % 6));
+    s->ModPred[i+5*128] =                        2*((i + 4) - ((i + 3) % 7));
+    s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8));
+    s->ModPred[i+7*128] =                        2*((i + 5) - ((i + 4) % 9));
+  }
+
+  s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short));
+
+  for (i=0; i < 24; ++i) {
+    for (j=0; j < 256; ++j) {
+      s->corrector_type[i*256+j] = (j < corrector_type_0[i])
+                                ? 1 : ((j < 248 || (i == 16 && j == 248))
+                                       ? 0 : corrector_type_2[j - 248]);
+    }
+  }
+}
+
+static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char *cur,
+  unsigned char *ref, int width, int height, unsigned char *buf1,
+  long fflags2, unsigned char *hdr,
+  unsigned char *buf2, int min_width_160);
+
+#ifndef min
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+/* ---------------------------------------------------------------------- */
+static void iv_alloc_frames(Indeo3DecodeContext *s)
+{
+  int luma_width, luma_height, luma_pixels, chroma_width, chroma_height,
+      chroma_pixels, i;
+  unsigned int bufsize;
+
+  luma_width   = (s->width  + 3) & (~3);
+  luma_height  = (s->height + 3) & (~3);
+
+  s->iv_frame[0].y_w = s->iv_frame[0].y_h =
+    s->iv_frame[0].the_buf_size = 0;
+  s->iv_frame[1].y_w = s->iv_frame[1].y_h =
+    s->iv_frame[1].the_buf_size = 0;
+  s->iv_frame[1].the_buf = NULL;
+
+  chroma_width  = ((luma_width >> 2) + 3) & (~3);
+  chroma_height = ((luma_height>> 2) + 3) & (~3);
+  luma_pixels = luma_width * luma_height;
+  chroma_pixels = chroma_width * chroma_height;
+
+  bufsize = luma_pixels * 2 + luma_width * 3 +
+    (chroma_pixels + chroma_width) * 4;
+
+  if((s->iv_frame[0].the_buf =
+    (s->iv_frame[0].the_buf_size == 0 ? av_malloc(bufsize) :
+      av_realloc(s->iv_frame[0].the_buf, bufsize))) == NULL)
+    return;
+  s->iv_frame[0].y_w = s->iv_frame[1].y_w = luma_width;
+  s->iv_frame[0].y_h = s->iv_frame[1].y_h = luma_height;
+  s->iv_frame[0].uv_w = s->iv_frame[1].uv_w = chroma_width;
+  s->iv_frame[0].uv_h = s->iv_frame[1].uv_h = chroma_height;
+  s->iv_frame[0].the_buf_size = bufsize;
+
+  s->iv_frame[0].Ybuf = s->iv_frame[0].the_buf + luma_width;
+  i = luma_pixels + luma_width * 2;
+  s->iv_frame[1].Ybuf = s->iv_frame[0].the_buf + i;
+  i += (luma_pixels + luma_width);
+  s->iv_frame[0].Ubuf = s->iv_frame[0].the_buf + i;
+  i += (chroma_pixels + chroma_width);
+  s->iv_frame[1].Ubuf = s->iv_frame[0].the_buf + i;
+  i += (chroma_pixels + chroma_width);
+  s->iv_frame[0].Vbuf = s->iv_frame[0].the_buf + i;
+  i += (chroma_pixels + chroma_width);
+  s->iv_frame[1].Vbuf = s->iv_frame[0].the_buf + i;
+
+  for(i = 1; i <= luma_width; i++)
+    s->iv_frame[0].Ybuf[-i] = s->iv_frame[1].Ybuf[-i] =
+      s->iv_frame[0].Ubuf[-i] = 0x80;
+
+  for(i = 1; i <= chroma_width; i++) {
+    s->iv_frame[1].Ubuf[-i] = 0x80;
+    s->iv_frame[0].Vbuf[-i] = 0x80;
+    s->iv_frame[1].Vbuf[-i] = 0x80;
+    s->iv_frame[1].Vbuf[chroma_pixels+i-1] = 0x80;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+static void iv_free_func(Indeo3DecodeContext *s)
+{
+  int i;
+
+  for(i = 0 ; i < 2 ; i++) {
+    if(s->iv_frame[i].the_buf != NULL)
+      av_free(s->iv_frame[i].the_buf);
+    s->iv_frame[i].Ybuf = s->iv_frame[i].Ubuf =
+      s->iv_frame[i].Vbuf = NULL;
+    s->iv_frame[i].the_buf = NULL;
+    s->iv_frame[i].the_buf_size = 0;
+    s->iv_frame[i].y_w = s->iv_frame[i].y_h = 0;
+    s->iv_frame[i].uv_w = s->iv_frame[i].uv_h = 0;
+  }
+
+  av_free(s->ModPred);
+  av_free(s->corrector_type);
+}
+
+/* ---------------------------------------------------------------------- */
+static unsigned long iv_decode_frame(Indeo3DecodeContext *s,
+                                     unsigned char *buf, int buf_size)
+{
+  unsigned int hdr_width, hdr_height,
+    chroma_width, chroma_height;
+  unsigned long fflags1, fflags2, fflags3, offs1, offs2, offs3, offs;
+  unsigned char *hdr_pos, *buf_pos;
+
+  buf_pos = buf;
+  buf_pos += 18;
+
+  fflags1 = le2me_16(*(uint16_t *)buf_pos);
+  buf_pos += 2;
+  fflags3 = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 4;
+  fflags2 = *buf_pos++;
+  buf_pos += 3;
+  hdr_height = le2me_16(*(uint16_t *)buf_pos);
+  buf_pos += 2;
+  hdr_width = le2me_16(*(uint16_t *)buf_pos);
+
+  if(avcodec_check_dimensions(NULL, hdr_width, hdr_height))
+      return -1;
+
+  buf_pos += 2;
+  chroma_height = ((hdr_height >> 2) + 3) & 0x7ffc;
+  chroma_width = ((hdr_width >> 2) + 3) & 0x7ffc;
+  offs1 = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 4;
+  offs2 = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 4;
+  offs3 = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 8;
+  hdr_pos = buf_pos;
+  if(fflags3 == 0x80) return 4;
+
+  if(fflags1 & 0x200) {
+    s->cur_frame = s->iv_frame + 1;
+    s->ref_frame = s->iv_frame;
+  } else {
+    s->cur_frame = s->iv_frame;
+    s->ref_frame = s->iv_frame + 1;
+  }
+
+  buf_pos = buf + 16 + offs1;
+  offs = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 4;
+
+  iv_Decode_Chunk(s, s->cur_frame->Ybuf, s->ref_frame->Ybuf, hdr_width,
+    hdr_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos,
+    min(hdr_width, 160));
+
+  if (!(s->avctx->flags & CODEC_FLAG_GRAY))
+  {
+
+  buf_pos = buf + 16 + offs2;
+  offs = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 4;
+
+  iv_Decode_Chunk(s, s->cur_frame->Vbuf, s->ref_frame->Vbuf, chroma_width,
+    chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos,
+    min(chroma_width, 40));
+
+  buf_pos = buf + 16 + offs3;
+  offs = le2me_32(*(uint32_t *)buf_pos);
+  buf_pos += 4;
+
+  iv_Decode_Chunk(s, s->cur_frame->Ubuf, s->ref_frame->Ubuf, chroma_width,
+    chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos,
+    min(chroma_width, 40));
+
+  }
+
+  return 8;
+}
+
+typedef struct {
+  long xpos;
+  long ypos;
+  long width;
+  long height;
+  long split_flag;
+  long split_direction;
+  long usl7;
+} ustr_t;
+
+/* ---------------------------------------------------------------------- */
+
+#define LV1_CHECK(buf1,rle_v3,lv1,lp2)  \
+  if((lv1 & 0x80) != 0) {   \
+    if(rle_v3 != 0)         \
+      rle_v3 = 0;           \
+    else {                  \
+      rle_v3 = 1;           \
+      buf1 -= 2;            \
+    }                       \
+  }                         \
+  lp2 = 4;
+
+
+#define RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3)  \
+  if(rle_v3 == 0) {         \
+    rle_v2 = *buf1;         \
+    rle_v1 = 1;             \
+    if(rle_v2 > 32) {       \
+      rle_v2 -= 32;         \
+      rle_v1 = 0;           \
+    }                       \
+    rle_v3 = 1;             \
+  }                         \
+  buf1--;
+
+
+#define LP2_CHECK(buf1,rle_v3,lp2)  \
+  if(lp2 == 0 && rle_v3 != 0)     \
+    rle_v3 = 0;           \
+  else {                  \
+    buf1--;               \
+    rle_v3 = 1;           \
+  }
+
+
+#define RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) \
+  rle_v2--;             \
+  if(rle_v2 == 0) {     \
+    rle_v3 = 0;         \
+    buf1 += 2;          \
+  }                     \
+  lp2 = 4;
+
+static void iv_Decode_Chunk(Indeo3DecodeContext *s,
+  unsigned char *cur, unsigned char *ref, int width, int height,
+  unsigned char *buf1, long fflags2, unsigned char *hdr,
+  unsigned char *buf2, int min_width_160)
+{
+  unsigned char bit_buf;
+  unsigned long bit_pos, lv, lv1, lv2;
+  long *width_tbl, width_tbl_arr[10];
+  signed char *ref_vectors;
+  unsigned char *cur_frm_pos, *ref_frm_pos, *cp, *cp2;
+  uint32_t *cur_lp, *ref_lp;
+  const uint32_t *correction_lp[2], *correctionloworder_lp[2], *correctionhighorder_lp[2];
+  unsigned short *correction_type_sp[2];
+  ustr_t strip_tbl[20], *strip;
+  int i, j, k, lp1, lp2, flag1, cmd, blks_width, blks_height, region_160_width,
+    rle_v1, rle_v2, rle_v3;
+  unsigned short res;
+
+  bit_buf = 0;
+  ref_vectors = NULL;
+
+  width_tbl = width_tbl_arr + 1;
+  i = (width < 0 ? width + 3 : width)/4;
+  for(j = -1; j < 8; j++)
+    width_tbl[j] = i * j;
+
+  strip = strip_tbl;
+
+  for(region_160_width = 0; region_160_width < (width - min_width_160); region_160_width += min_width_160);
+
+  strip->ypos = strip->xpos = 0;
+  for(strip->width = min_width_160; width > strip->width; strip->width *= 2);
+  strip->height = height;
+  strip->split_direction = 0;
+  strip->split_flag = 0;
+  strip->usl7 = 0;
+
+  bit_pos = 0;
+
+  rle_v1 = rle_v2 = rle_v3 = 0;
+
+  while(strip >= strip_tbl) {
+    if(bit_pos <= 0) {
+      bit_pos = 8;
+      bit_buf = *buf1++;
+    }
+
+    bit_pos -= 2;
+    cmd = (bit_buf >> bit_pos) & 0x03;
+
+    if(cmd == 0) {
+      strip++;
+      memcpy(strip, strip-1, sizeof(ustr_t));
+      strip->split_flag = 1;
+      strip->split_direction = 0;
+      strip->height = (strip->height > 8 ? ((strip->height+8)>>4)<<3 : 4);
+      continue;
+    } else if(cmd == 1) {
+      strip++;
+      memcpy(strip, strip-1, sizeof(ustr_t));
+      strip->split_flag = 1;
+      strip->split_direction = 1;
+      strip->width = (strip->width > 8 ? ((strip->width+8)>>4)<<3 : 4);
+      continue;
+    } else if(cmd == 2) {
+      if(strip->usl7 == 0) {
+        strip->usl7 = 1;
+        ref_vectors = NULL;
+        continue;
+      }
+    } else if(cmd == 3) {
+      if(strip->usl7 == 0) {
+        strip->usl7 = 1;
+        ref_vectors = (signed char*)buf2 + (*buf1 * 2);
+        buf1++;
+        continue;
+      }
+    }
+
+    cur_frm_pos = cur + width * strip->ypos + strip->xpos;
+
+    if((blks_width = strip->width) < 0)
+      blks_width += 3;
+    blks_width >>= 2;
+    blks_height = strip->height;
+
+    if(ref_vectors != NULL) {
+      ref_frm_pos = ref + (ref_vectors[0] + strip->ypos) * width +
+        ref_vectors[1] + strip->xpos;
+    } else
+      ref_frm_pos = cur_frm_pos - width_tbl[4];
+
+    if(cmd == 2) {
+      if(bit_pos <= 0) {
+        bit_pos = 8;
+        bit_buf = *buf1++;
+      }
+
+      bit_pos -= 2;
+      cmd = (bit_buf >> bit_pos) & 0x03;
+
+      if(cmd == 0 || ref_vectors != NULL) {
+        for(lp1 = 0; lp1 < blks_width; lp1++) {
+          for(i = 0, j = 0; i < blks_height; i++, j += width_tbl[1])
+            ((uint32_t *)cur_frm_pos)[j] = ((uint32_t *)ref_frm_pos)[j];
+          cur_frm_pos += 4;
+          ref_frm_pos += 4;
+        }
+      } else if(cmd != 1)
+        return;
+    } else {
+      k = *buf1 >> 4;
+      j = *buf1 & 0x0f;
+      buf1++;
+      lv = j + fflags2;
+
+      if((lv - 8) <= 7 && (k == 0 || k == 3 || k == 10)) {
+        cp2 = s->ModPred + ((lv - 8) << 7);
+        cp = ref_frm_pos;
+        for(i = 0; i < blks_width << 2; i++) {
+            int v = *cp >> 1;
+            *(cp++) = cp2[v];
+        }
+      }
+
+      if(k == 1 || k == 4) {
+        lv = (hdr[j] & 0xf) + fflags2;
+        correction_type_sp[0] = s->corrector_type + (lv << 8);
+        correction_lp[0] = correction + (lv << 8);
+        lv = (hdr[j] >> 4) + fflags2;
+        correction_lp[1] = correction + (lv << 8);
+        correction_type_sp[1] = s->corrector_type + (lv << 8);
+      } else {
+        correctionloworder_lp[0] = correctionloworder_lp[1] = correctionloworder + (lv << 8);
+        correctionhighorder_lp[0] = correctionhighorder_lp[1] = correctionhighorder + (lv << 8);
+        correction_type_sp[0] = correction_type_sp[1] = s->corrector_type + (lv << 8);
+        correction_lp[0] = correction_lp[1] = correction + (lv << 8);
+      }
+
+      switch(k) {
+        case 1:
+        case 0:                    /********** CASE 0 **********/
+          for( ; blks_height > 0; blks_height -= 4) {
+            for(lp1 = 0; lp1 < blks_width; lp1++) {
+              for(lp2 = 0; lp2 < 4; ) {
+                k = *buf1++;
+                cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2];
+                ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2];
+
+                switch(correction_type_sp[0][k]) {
+                  case 0:
+                    *cur_lp = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1);
+                    lp2++;
+                    break;
+                  case 1:
+                    res = ((le2me_16(((unsigned short *)(ref_lp))[0]) >> 1) + correction_lp[lp2 & 0x01][*buf1]) << 1;
+                    ((unsigned short *)cur_lp)[0] = le2me_16(res);
+                    res = ((le2me_16(((unsigned short *)(ref_lp))[1]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1;
+                    ((unsigned short *)cur_lp)[1] = le2me_16(res);
+                    buf1++;
+                    lp2++;
+                    break;
+                  case 2:
+                    if(lp2 == 0) {
+                      for(i = 0, j = 0; i < 2; i++, j += width_tbl[1])
+                        cur_lp[j] = ref_lp[j];
+                      lp2 += 2;
+                    }
+                    break;
+                  case 3:
+                    if(lp2 < 2) {
+                      for(i = 0, j = 0; i < (3 - lp2); i++, j += width_tbl[1])
+                        cur_lp[j] = ref_lp[j];
+                      lp2 = 3;
+                    }
+                    break;
+                  case 8:
+                    if(lp2 == 0) {
+                      RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3)
+
+                      if(rle_v1 == 1 || ref_vectors != NULL) {
+                        for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                          cur_lp[j] = ref_lp[j];
+                      }
+
+                      RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2)
+                      break;
+                    } else {
+                      rle_v1 = 1;
+                      rle_v2 = *buf1 - 1;
+                    }
+                  case 5:
+                      LP2_CHECK(buf1,rle_v3,lp2)
+                  case 4:
+                    for(i = 0, j = 0; i < (4 - lp2); i++, j += width_tbl[1])
+                      cur_lp[j] = ref_lp[j];
+                    lp2 = 4;
+                    break;
+
+                  case 7:
+                    if(rle_v3 != 0)
+                      rle_v3 = 0;
+                    else {
+                      buf1--;
+                      rle_v3 = 1;
+                    }
+                  case 6:
+                    if(ref_vectors != NULL) {
+                      for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                        cur_lp[j] = ref_lp[j];
+                    }
+                    lp2 = 4;
+                    break;
+
+                  case 9:
+                    lv1 = *buf1++;
+                    lv = (lv1 & 0x7F) << 1;
+                    lv += (lv << 8);
+                    lv += (lv << 16);
+                    for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                      cur_lp[j] = lv;
+
+                    LV1_CHECK(buf1,rle_v3,lv1,lp2)
+                    break;
+                  default:
+                    return;
+                }
+              }
+
+              cur_frm_pos += 4;
+              ref_frm_pos += 4;
+            }
+
+            cur_frm_pos += ((width - blks_width) * 4);
+            ref_frm_pos += ((width - blks_width) * 4);
+          }
+          break;
+
+        case 4:
+        case 3:                    /********** CASE 3 **********/
+          if(ref_vectors != NULL)
+            return;
+          flag1 = 1;
+
+          for( ; blks_height > 0; blks_height -= 8) {
+            for(lp1 = 0; lp1 < blks_width; lp1++) {
+              for(lp2 = 0; lp2 < 4; ) {
+                k = *buf1++;
+
+                cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2];
+                ref_lp = ((uint32_t *)cur_frm_pos) + width_tbl[(lp2 * 2) - 1];
+
+                switch(correction_type_sp[lp2 & 0x01][k]) {
+                  case 0:
+                    cur_lp[width_tbl[1]] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1);
+                    if(lp2 > 0 || flag1 == 0 || strip->ypos != 0)
+                      cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                    else
+                      cur_lp[0] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1);
+                    lp2++;
+                    break;
+
+                  case 1:
+                    res = ((le2me_16(((unsigned short *)ref_lp)[0]) >> 1) + correction_lp[lp2 & 0x01][*buf1]) << 1;
+                    ((unsigned short *)cur_lp)[width_tbl[2]] = le2me_16(res);
+                    res = ((le2me_16(((unsigned short *)ref_lp)[1]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1;
+                    ((unsigned short *)cur_lp)[width_tbl[2]+1] = le2me_16(res);
+
+                    if(lp2 > 0 || flag1 == 0 || strip->ypos != 0)
+                      cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                    else
+                      cur_lp[0] = cur_lp[width_tbl[1]];
+                    buf1++;
+                    lp2++;
+                    break;
+
+                  case 2:
+                    if(lp2 == 0) {
+                      for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                        cur_lp[j] = *ref_lp;
+                      lp2 += 2;
+                    }
+                    break;
+
+                  case 3:
+                    if(lp2 < 2) {
+                      for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1])
+                        cur_lp[j] = *ref_lp;
+                      lp2 = 3;
+                    }
+                    break;
+
+                  case 6:
+                    lp2 = 4;
+                    break;
+
+                  case 7:
+                    if(rle_v3 != 0)
+                      rle_v3 = 0;
+                    else {
+                      buf1--;
+                      rle_v3 = 1;
+                    }
+                    lp2 = 4;
+                    break;
+
+                  case 8:
+                    if(lp2 == 0) {
+                      RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3)
+
+                      if(rle_v1 == 1) {
+                        for(i = 0, j = 0; i < 8; i++, j += width_tbl[1])
+                          cur_lp[j] = ref_lp[j];
+                      }
+
+                      RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2)
+                      break;
+                    } else {
+                      rle_v2 = (*buf1) - 1;
+                      rle_v1 = 1;
+                    }
+                  case 5:
+                      LP2_CHECK(buf1,rle_v3,lp2)
+                  case 4:
+                    for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1])
+                      cur_lp[j] = *ref_lp;
+                    lp2 = 4;
+                    break;
+
+                  case 9:
+                    av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n");
+                    lv1 = *buf1++;
+                    lv = (lv1 & 0x7F) << 1;
+                    lv += (lv << 8);
+                    lv += (lv << 16);
+
+                    for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                      cur_lp[j] = lv;
+
+                    LV1_CHECK(buf1,rle_v3,lv1,lp2)
+                    break;
+
+                  default:
+                    return;
+                }
+              }
+
+              cur_frm_pos += 4;
+            }
+
+            cur_frm_pos += (((width * 2) - blks_width) * 4);
+            flag1 = 0;
+          }
+          break;
+
+        case 10:                    /********** CASE 10 **********/
+          if(ref_vectors == NULL) {
+            flag1 = 1;
+
+            for( ; blks_height > 0; blks_height -= 8) {
+              for(lp1 = 0; lp1 < blks_width; lp1 += 2) {
+                for(lp2 = 0; lp2 < 4; ) {
+                  k = *buf1++;
+                  cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2];
+                  ref_lp = ((uint32_t *)cur_frm_pos) + width_tbl[(lp2 * 2) - 1];
+                  lv1 = ref_lp[0];
+                  lv2 = ref_lp[1];
+                  if(lp2 == 0 && flag1 != 0) {
+#ifdef WORDS_BIGENDIAN
+                    lv1 = lv1 & 0xFF00FF00;
+                    lv1 = (lv1 >> 8) | lv1;
+                    lv2 = lv2 & 0xFF00FF00;
+                    lv2 = (lv2 >> 8) | lv2;
+#else
+                    lv1 = lv1 & 0x00FF00FF;
+                    lv1 = (lv1 << 8) | lv1;
+                    lv2 = lv2 & 0x00FF00FF;
+                    lv2 = (lv2 << 8) | lv2;
+#endif
+                  }
+
+                  switch(correction_type_sp[lp2 & 0x01][k]) {
+                    case 0:
+                      cur_lp[width_tbl[1]] = le2me_32(((le2me_32(lv1) >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1);
+                      cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(lv2) >> 1) + correctionhighorder_lp[lp2 & 0x01][k]) << 1);
+                      if(lp2 > 0 || strip->ypos != 0 || flag1 == 0) {
+                        cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                        cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE;
+                      } else {
+                        cur_lp[0] = cur_lp[width_tbl[1]];
+                        cur_lp[1] = cur_lp[width_tbl[1]+1];
+                      }
+                      lp2++;
+                      break;
+
+                    case 1:
+                      cur_lp[width_tbl[1]] = le2me_32(((le2me_32(lv1) >> 1) + correctionloworder_lp[lp2 & 0x01][*buf1]) << 1);
+                      cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(lv2) >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1);
+                      if(lp2 > 0 || strip->ypos != 0 || flag1 == 0) {
+                        cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                        cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE;
+                      } else {
+                        cur_lp[0] = cur_lp[width_tbl[1]];
+                        cur_lp[1] = cur_lp[width_tbl[1]+1];
+                      }
+                      buf1++;
+                      lp2++;
+                      break;
+
+                    case 2:
+                      if(lp2 == 0) {
+                        if(flag1 != 0) {
+                          for(i = 0, j = width_tbl[1]; i < 3; i++, j += width_tbl[1]) {
+                            cur_lp[j] = lv1;
+                            cur_lp[j+1] = lv2;
+                          }
+                          cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                          cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE;
+                        } else {
+                          for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) {
+                            cur_lp[j] = lv1;
+                            cur_lp[j+1] = lv2;
+                          }
+                        }
+                        lp2 += 2;
+                      }
+                      break;
+
+                    case 3:
+                      if(lp2 < 2) {
+                        if(lp2 == 0 && flag1 != 0) {
+                          for(i = 0, j = width_tbl[1]; i < 5; i++, j += width_tbl[1]) {
+                            cur_lp[j] = lv1;
+                            cur_lp[j+1] = lv2;
+                          }
+                          cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                          cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE;
+                        } else {
+                          for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1]) {
+                            cur_lp[j] = lv1;
+                            cur_lp[j+1] = lv2;
+                          }
+                        }
+                        lp2 = 3;
+                      }
+                      break;
+
+                    case 8:
+                      if(lp2 == 0) {
+                        RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3)
+                        if(rle_v1 == 1) {
+                          if(flag1 != 0) {
+                            for(i = 0, j = width_tbl[1]; i < 7; i++, j += width_tbl[1]) {
+                              cur_lp[j] = lv1;
+                              cur_lp[j+1] = lv2;
+                            }
+                            cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                            cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE;
+                          } else {
+                            for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) {
+                              cur_lp[j] = lv1;
+                              cur_lp[j+1] = lv2;
+                            }
+                          }
+                        }
+                        RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2)
+                        break;
+                      } else {
+                        rle_v1 = 1;
+                        rle_v2 = (*buf1) - 1;
+                      }
+                    case 5:
+                        LP2_CHECK(buf1,rle_v3,lp2)
+                    case 4:
+                      if(lp2 == 0 && flag1 != 0) {
+                        for(i = 0, j = width_tbl[1]; i < 7; i++, j += width_tbl[1]) {
+                          cur_lp[j] = lv1;
+                          cur_lp[j+1] = lv2;
+                        }
+                        cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE;
+                        cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE;
+                      } else {
+                        for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1]) {
+                          cur_lp[j] = lv1;
+                          cur_lp[j+1] = lv2;
+                        }
+                      }
+                      lp2 = 4;
+                      break;
+
+                    case 6:
+                      lp2 = 4;
+                      break;
+
+                    case 7:
+                      if(lp2 == 0) {
+                        if(rle_v3 != 0)
+                          rle_v3 = 0;
+                        else {
+                          buf1--;
+                          rle_v3 = 1;
+                        }
+                        lp2 = 4;
+                      }
+                      break;
+
+                    case 9:
+                      av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n");
+                      lv1 = *buf1;
+                      lv = (lv1 & 0x7F) << 1;
+                      lv += (lv << 8);
+                      lv += (lv << 16);
+                      for(i = 0, j = 0; i < 8; i++, j += width_tbl[1])
+                        cur_lp[j] = lv;
+                      LV1_CHECK(buf1,rle_v3,lv1,lp2)
+                      break;
+
+                    default:
+                      return;
+                  }
+                }
+
+                cur_frm_pos += 8;
+              }
+
+              cur_frm_pos += (((width * 2) - blks_width) * 4);
+              flag1 = 0;
+            }
+          } else {
+            for( ; blks_height > 0; blks_height -= 8) {
+              for(lp1 = 0; lp1 < blks_width; lp1 += 2) {
+                for(lp2 = 0; lp2 < 4; ) {
+                  k = *buf1++;
+                  cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2];
+                  ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2 * 2];
+
+                  switch(correction_type_sp[lp2 & 0x01][k]) {
+                    case 0:
+                      lv1 = correctionloworder_lp[lp2 & 0x01][k];
+                      lv2 = correctionhighorder_lp[lp2 & 0x01][k];
+                      cur_lp[0] = le2me_32(((le2me_32(ref_lp[0]) >> 1) + lv1) << 1);
+                      cur_lp[1] = le2me_32(((le2me_32(ref_lp[1]) >> 1) + lv2) << 1);
+                      cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + lv1) << 1);
+                      cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(ref_lp[width_tbl[1]+1]) >> 1) + lv2) << 1);
+                      lp2++;
+                      break;
+
+                    case 1:
+                      lv1 = correctionloworder_lp[lp2 & 0x01][*buf1++];
+                      lv2 = correctionloworder_lp[lp2 & 0x01][k];
+                      cur_lp[0] = le2me_32(((le2me_32(ref_lp[0]) >> 1) + lv1) << 1);
+                      cur_lp[1] = le2me_32(((le2me_32(ref_lp[1]) >> 1) + lv2) << 1);
+                      cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + lv1) << 1);
+                      cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(ref_lp[width_tbl[1]+1]) >> 1) + lv2) << 1);
+                      lp2++;
+                      break;
+
+                    case 2:
+                      if(lp2 == 0) {
+                        for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) {
+                          cur_lp[j] = ref_lp[j];
+                          cur_lp[j+1] = ref_lp[j+1];
+                        }
+                        lp2 += 2;
+                      }
+                      break;
+
+                    case 3:
+                      if(lp2 < 2) {
+                        for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1]) {
+                          cur_lp[j] = ref_lp[j];
+                          cur_lp[j+1] = ref_lp[j+1];
+                        }
+                        lp2 = 3;
+                      }
+                      break;
+
+                    case 8:
+                      if(lp2 == 0) {
+                        RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3)
+                        for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) {
+                          ((uint32_t *)cur_frm_pos)[j] = ((uint32_t *)ref_frm_pos)[j];
+                          ((uint32_t *)cur_frm_pos)[j+1] = ((uint32_t *)ref_frm_pos)[j+1];
+                        }
+                        RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2)
+                        break;
+                      } else {
+                        rle_v1 = 1;
+                        rle_v2 = (*buf1) - 1;
+                      }
+                    case 5:
+                    case 7:
+                        LP2_CHECK(buf1,rle_v3,lp2)
+                    case 6:
+                    case 4:
+                      for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1]) {
+                        cur_lp[j] = ref_lp[j];
+                        cur_lp[j+1] = ref_lp[j+1];
+                      }
+                      lp2 = 4;
+                      break;
+
+                    case 9:
+                      av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n");
+                      lv1 = *buf1;
+                      lv = (lv1 & 0x7F) << 1;
+                      lv += (lv << 8);
+                      lv += (lv << 16);
+                      for(i = 0, j = 0; i < 8; i++, j += width_tbl[1])
+                        ((uint32_t *)cur_frm_pos)[j] = ((uint32_t *)cur_frm_pos)[j+1] = lv;
+                      LV1_CHECK(buf1,rle_v3,lv1,lp2)
+                      break;
+
+                    default:
+                      return;
+                  }
+                }
+
+                cur_frm_pos += 8;
+                ref_frm_pos += 8;
+              }
+
+              cur_frm_pos += (((width * 2) - blks_width) * 4);
+              ref_frm_pos += (((width * 2) - blks_width) * 4);
+            }
+          }
+          break;
+
+        case 11:                    /********** CASE 11 **********/
+          if(ref_vectors == NULL)
+            return;
+
+          for( ; blks_height > 0; blks_height -= 8) {
+            for(lp1 = 0; lp1 < blks_width; lp1++) {
+              for(lp2 = 0; lp2 < 4; ) {
+                k = *buf1++;
+                cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2];
+                ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2 * 2];
+
+                switch(correction_type_sp[lp2 & 0x01][k]) {
+                  case 0:
+                    cur_lp[0] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1);
+                    cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1);
+                    lp2++;
+                    break;
+
+                  case 1:
+                    lv1 = (unsigned short)(correction_lp[lp2 & 0x01][*buf1++]);
+                    lv2 = (unsigned short)(correction_lp[lp2 & 0x01][k]);
+                    res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[0]) >> 1) + lv1) << 1);
+                    ((unsigned short *)cur_lp)[0] = le2me_16(res);
+                    res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[1]) >> 1) + lv2) << 1);
+                    ((unsigned short *)cur_lp)[1] = le2me_16(res);
+                    res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[width_tbl[2]]) >> 1) + lv1) << 1);
+                    ((unsigned short *)cur_lp)[width_tbl[2]] = le2me_16(res);
+                    res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[width_tbl[2]+1]) >> 1) + lv2) << 1);
+                    ((unsigned short *)cur_lp)[width_tbl[2]+1] = le2me_16(res);
+                    lp2++;
+                    break;
+
+                  case 2:
+                    if(lp2 == 0) {
+                      for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                        cur_lp[j] = ref_lp[j];
+                      lp2 += 2;
+                    }
+                    break;
+
+                  case 3:
+                    if(lp2 < 2) {
+                      for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1])
+                        cur_lp[j] = ref_lp[j];
+                      lp2 = 3;
+                    }
+                    break;
+
+                  case 8:
+                    if(lp2 == 0) {
+                      RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3)
+
+                      for(i = 0, j = 0; i < 8; i++, j += width_tbl[1])
+                        cur_lp[j] = ref_lp[j];
+
+                      RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2)
+                      break;
+                    } else {
+                      rle_v1 = 1;
+                      rle_v2 = (*buf1) - 1;
+                    }
+                  case 5:
+                  case 7:
+                      LP2_CHECK(buf1,rle_v3,lp2)
+                  case 4:
+                  case 6:
+                    for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1])
+                      cur_lp[j] = ref_lp[j];
+                    lp2 = 4;
+                    break;
+
+                case 9:
+                  av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n");
+                  lv1 = *buf1++;
+                  lv = (lv1 & 0x7F) << 1;
+                  lv += (lv << 8);
+                  lv += (lv << 16);
+                  for(i = 0, j = 0; i < 4; i++, j += width_tbl[1])
+                    cur_lp[j] = lv;
+                  LV1_CHECK(buf1,rle_v3,lv1,lp2)
+                  break;
+
+                  default:
+                    return;
+                }
+              }
+
+              cur_frm_pos += 4;
+              ref_frm_pos += 4;
+            }
+
+            cur_frm_pos += (((width * 2) - blks_width) * 4);
+            ref_frm_pos += (((width * 2) - blks_width) * 4);
+          }
+          break;
+
+        default:
+          return;
+      }
+    }
+
+    if(strip < strip_tbl)
+      return;
+
+    for( ; strip >= strip_tbl; strip--) {
+      if(strip->split_flag != 0) {
+        strip->split_flag = 0;
+        strip->usl7 = (strip-1)->usl7;
+
+        if(strip->split_direction) {
+          strip->xpos += strip->width;
+          strip->width = (strip-1)->width - strip->width;
+          if(region_160_width <= strip->xpos && width < strip->width + strip->xpos)
+            strip->width = width - strip->xpos;
+        } else {
+          strip->ypos += strip->height;
+          strip->height = (strip-1)->height - strip->height;
+        }
+        break;
+      }
+    }
+  }
+}
+
+static int indeo3_decode_init(AVCodecContext *avctx)
+{
+    Indeo3DecodeContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->width = avctx->width;
+    s->height = avctx->height;
+    avctx->pix_fmt = PIX_FMT_YUV410P;
+    avctx->has_b_frames = 0;
+
+    build_modpred(s);
+    iv_alloc_frames(s);
+
+    return 0;
+}
+
+static int indeo3_decode_frame(AVCodecContext *avctx,
+                               void *data, int *data_size,
+                               unsigned char *buf, int buf_size)
+{
+    Indeo3DecodeContext *s=avctx->priv_data;
+    unsigned char *src, *dest;
+    int y;
+
+    iv_decode_frame(s, buf, buf_size);
+
+    if(s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    s->frame.reference = 0;
+    if(avctx->get_buffer(avctx, &s->frame) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    src = s->cur_frame->Ybuf;
+    dest = s->frame.data[0];
+    for (y = 0; y < s->height; y++) {
+      memcpy(dest, src, s->cur_frame->y_w);
+      src += s->cur_frame->y_w;
+      dest += s->frame.linesize[0];
+    }
+
+    if (!(s->avctx->flags & CODEC_FLAG_GRAY))
+    {
+    src = s->cur_frame->Ubuf;
+    dest = s->frame.data[1];
+    for (y = 0; y < s->height / 4; y++) {
+      memcpy(dest, src, s->cur_frame->uv_w);
+      src += s->cur_frame->uv_w;
+      dest += s->frame.linesize[1];
+    }
+
+    src = s->cur_frame->Vbuf;
+    dest = s->frame.data[2];
+    for (y = 0; y < s->height / 4; y++) {
+      memcpy(dest, src, s->cur_frame->uv_w);
+      src += s->cur_frame->uv_w;
+      dest += s->frame.linesize[2];
+    }
+    }
+
+    *data_size=sizeof(AVFrame);
+    *(AVFrame*)data= s->frame;
+
+    return buf_size;
+}
+
+static int indeo3_decode_end(AVCodecContext *avctx)
+{
+    Indeo3DecodeContext *s = avctx->priv_data;
+
+    iv_free_func(s);
+
+    return 0;
+}
+
+AVCodec indeo3_decoder = {
+    "indeo3",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_INDEO3,
+    sizeof(Indeo3DecodeContext),
+    indeo3_decode_init,
+    NULL,
+    indeo3_decode_end,
+    indeo3_decode_frame,
+    0,
+    NULL
+};
diff --git a/contrib/ffmpeg/libavcodec/indeo3data.h b/contrib/ffmpeg/libavcodec/indeo3data.h
new file mode 100644
index 000000000..e69a09f0e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/indeo3data.h
@@ -0,0 +1,2335 @@
+/*
+ * Intel Indeo 3 (IV31, IV32, etc.) video decoder for ffmpeg
+ * written, produced, and directed by Alan Smithee
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static const uint32_t correction[] = {
+        0x00000000, 0x00000202, 0xfffffdfe, 0x000002ff, 0xfffffd01, 0xffffff03, 0x000000fd, 0x00000404,
+        0xfffffbfc, 0x00000501, 0xfffffaff, 0x00000105, 0xfffffefb, 0x000003fc, 0xfffffc04, 0x000005fe,
+        0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000904, 0xfffff6fc, 0x00000409, 0xfffffbf7, 0x00000909,
+        0xfffff6f7, 0x00000a01, 0xfffff5ff, 0x0000010a, 0xfffffef6, 0x000007fb, 0xfffff805, 0xfffffb08,
+        0x000004f8, 0x00000f09, 0xfffff0f7, 0x0000090f, 0xfffff6f1, 0x00000bfd, 0xfffff403, 0xfffffd0c,
+        0x000002f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200,
+        0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff7, 0xfffff009, 0xfffff710,
+        0x000008f0, 0x00001b0b, 0xffffe4f5, 0x00000b1b, 0xfffff4e5, 0x00001c13, 0xffffe3ed, 0x0000131c,
+        0xffffece4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001d04, 0xffffe2fc, 0x0000041d,
+        0xfffffbe3, 0x00001e1e, 0xffffe1e2, 0x000020fe, 0xffffdf02, 0xfffffe21, 0x000001df, 0x000016ee,
+        0xffffe912, 0xffffee17, 0x000011e9, 0x00001df1, 0xffffe20f, 0xfffff11e, 0x00000ee2, 0x00002e16,
+        0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003123,
+        0xffffcedd, 0x00002331, 0xffffdccf, 0x000028f5, 0xffffd70b, 0xfffff529, 0x00000ad7, 0x00003304,
+        0xffffccfc, 0x00000433, 0xfffffbcd, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e3,
+        0xffffd61d, 0xffffe32a, 0x00001cd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1b,
+        0xffffb3e5, 0x00001b4c, 0xffffe4b4, 0x00004d2b, 0xffffb2d5, 0x00002b4d, 0xffffd4b3, 0x000036e8,
+        0xffffc918, 0xffffe837, 0x000017c9, 0x00004f0e, 0xffffb0f2, 0x00000e4f, 0xfffff1b1, 0x0000533f,
+        0xffffacc1, 0x00003f53, 0xffffc0ad, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802,
+        0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005d5d, 0xffffa2a3, 0x00003ccc, 0xffffc334, 0xffffcc3d,
+        0x000033c3, 0x00007834, 0xffff87cc, 0x00003478, 0xffffcb88, 0x00004ad3, 0xffffb52d, 0xffffd34b,
+        0x00002cb5, 0x00007d4b, 0xffff82b5, 0x00004b7d, 0xffffb483, 0x00007a21, 0xffff85df, 0x0000217a,
+        0xffffde86, 0x000066f3, 0xffff990d, 0xfffff367, 0x00000c99, 0x00005fd8, 0xffffa028, 0xffffd860,
+        0x000027a0, 0x00007ede, 0xffff8122, 0xffffde7f, 0x00002181, 0x000058a7, 0xffffa759, 0x000068b2,
+        0xffff974e, 0xffffb269, 0x00004d97, 0x00000c0c, 0xfffff3f4, 0x00001717, 0xffffe8e9, 0x00002a2a,
+        0xffffd5d6, 0x00004949, 0xffffb6b7, 0x00000000, 0x02020000, 0xfdfe0000, 0x02ff0000, 0xfd010000,
+        0xff030000, 0x00fd0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02ff0202, 0xfd010202, 0xff030202,
+        0x00fd0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x02fefdfe, 0xfd00fdfe, 0xff02fdfe, 0x00fcfdfe,
+        0x000002ff, 0x020202ff, 0xfdfe02ff, 0x02ff02ff, 0xfd0102ff, 0xff0302ff, 0x00fd02ff, 0xfffffd01,
+        0x0201fd01, 0xfdfdfd01, 0x02fefd01, 0xfd00fd01, 0xff02fd01, 0x00fcfd01, 0xffffff03, 0x0201ff03,
+        0xfdfdff03, 0x02feff03, 0xfd00ff03, 0xff02ff03, 0x00fcff03, 0x000000fd, 0x020200fd, 0xfdfe00fd,
+        0x02ff00fd, 0xfd0100fd, 0xff0300fd, 0x00fd00fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000303, 0xfffffcfd, 0x000003ff, 0xfffffc01, 0xffffff04, 0x000000fc, 0x00000707,
+        0xfffff8f9, 0x00000802, 0xfffff7fe, 0x00000208, 0xfffffdf8, 0x000008fe, 0xfffff702, 0xfffffe09,
+        0x000001f7, 0x000005fa, 0xfffffa06, 0x00000d06, 0xfffff2fa, 0x0000060d, 0xfffff9f3, 0x00000d0d,
+        0xfffff2f3, 0x00000e01, 0xfffff1ff, 0x0000010e, 0xfffffef2, 0x00000bf8, 0xfffff408, 0xfffff80c,
+        0x000007f4, 0x0000170e, 0xffffe8f2, 0x00000e17, 0xfffff1e9, 0x000011fb, 0xffffee05, 0xfffffb12,
+        0x000004ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001aff,
+        0xffffe501, 0xffffff1b, 0x000000e5, 0x000010ef, 0xffffef11, 0x000016f3, 0xffffe90d, 0xfffff317,
+        0x00000ce9, 0x00002810, 0xffffd7f0, 0x00001028, 0xffffefd8, 0x0000291c, 0xffffd6e4, 0x00001c29,
+        0xffffe3d7, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002b06, 0xffffd4fa, 0x0000062b,
+        0xfffff9d5, 0x00002e2e, 0xffffd1d2, 0x000031fc, 0xffffce04, 0xfffffc32, 0x000003ce, 0x000021e5,
+        0xffffde1b, 0xffffe522, 0x00001ade, 0x00002cea, 0xffffd316, 0xffffea2d, 0x000015d3, 0x00004522,
+        0xffffbade, 0x00002245, 0xffffddbb, 0x00004613, 0xffffb9ed, 0x00001346, 0xffffecba, 0x00004935,
+        0xffffb6cb, 0x00003549, 0xffffcab7, 0x00003def, 0xffffc211, 0xffffef3e, 0x000010c2, 0x00004d05,
+        0xffffb2fb, 0x0000054d, 0xfffffab3, 0x00005252, 0xffffadae, 0x000032cd, 0xffffcd33, 0x00003fd5,
+        0xffffc02b, 0xffffd540, 0x00002ac0, 0x000059f6, 0xffffa60a, 0xfffff65a, 0x000009a6, 0x00007229,
+        0xffff8dd7, 0x00002972, 0xffffd68e, 0x00007440, 0xffff8bc0, 0x00004074, 0xffffbf8c, 0x000051db,
+        0xffffae25, 0xffffdb52, 0x000024ae, 0x00007716, 0xffff88ea, 0x00001677, 0xffffe989, 0x00007c5f,
+        0xffff83a1, 0x00005f7c, 0xffffa084, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005bb2,
+        0xffffa44e, 0xffffb25c, 0x00004da4, 0x000070bc, 0xffff8f44, 0xffffbc71, 0x0000438f, 0x00001212,
+        0xffffedee, 0x00002222, 0xffffddde, 0x00003f3f, 0xffffc0c1, 0x00006d6d, 0xffff9293, 0x00000000,
+        0x03030000, 0xfcfd0000, 0x03ff0000, 0xfc010000, 0xff040000, 0x00fc0000, 0x07070000, 0xf8f90000,
+        0x00000303, 0x03030303, 0xfcfd0303, 0x03ff0303, 0xfc010303, 0xff040303, 0x00fc0303, 0x07070303,
+        0xf8f90303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x03fefcfd, 0xfc00fcfd, 0xff03fcfd, 0x00fbfcfd,
+        0x0706fcfd, 0xf8f8fcfd, 0x000003ff, 0x030303ff, 0xfcfd03ff, 0x03ff03ff, 0xfc0103ff, 0xff0403ff,
+        0x00fc03ff, 0x070703ff, 0xf8f903ff, 0xfffffc01, 0x0302fc01, 0xfcfcfc01, 0x03fefc01, 0xfc00fc01,
+        0xff03fc01, 0x00fbfc01, 0x0706fc01, 0xf8f8fc01, 0xffffff04, 0x0302ff04, 0xfcfcff04, 0x03feff04,
+        0xfc00ff04, 0xff03ff04, 0x00fbff04, 0x0706ff04, 0xf8f8ff04, 0x000000fc, 0x030300fc, 0xfcfd00fc,
+        0x03ff00fc, 0xfc0100fc, 0xff0400fc, 0x00fc00fc, 0x070700fc, 0xf8f900fc, 0x00000707, 0x03030707,
+        0xfcfd0707, 0x03ff0707, 0xfc010707, 0xff040707, 0x00fc0707, 0x07070707, 0xf8f90707, 0xfffff8f9,
+        0x0302f8f9, 0xfcfcf8f9, 0x03fef8f9, 0xfc00f8f9, 0xff03f8f9, 0x00fbf8f9, 0x0706f8f9, 0xf8f8f8f9,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000404, 0xfffffbfc, 0x000004ff, 0xfffffb01, 0xffffff05, 0x000000fb, 0x00000a03,
+        0xfffff5fd, 0x0000030a, 0xfffffcf6, 0x00000909, 0xfffff6f7, 0x000006f9, 0xfffff907, 0x00000bfd,
+        0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001108, 0xffffeef8, 0x00000811, 0xfffff7ef, 0x00001111,
+        0xffffeeef, 0x00001301, 0xffffecff, 0x00000113, 0xfffffeed, 0x00000ff5, 0xfffff00b, 0xfffff510,
+        0x00000af0, 0x000016fa, 0xffffe906, 0xfffffa17, 0x000005e9, 0x00001f12, 0xffffe0ee, 0x0000121f,
+        0xffffede1, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002121, 0xffffdedf, 0x000023ff,
+        0xffffdc01, 0xffffff24, 0x000000dc, 0x000016e9, 0xffffe917, 0x00001eef, 0xffffe111, 0xffffef1f,
+        0x000010e1, 0x00003615, 0xffffc9eb, 0x00001536, 0xffffeaca, 0x00003725, 0xffffc8db, 0x00002537,
+        0xffffdac9, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003908, 0xffffc6f8, 0x00000839,
+        0xfffff7c7, 0x00003d3d, 0xffffc2c3, 0x000041fb, 0xffffbe05, 0xfffffb42, 0x000004be, 0x00002cdc,
+        0xffffd324, 0xffffdc2d, 0x000023d3, 0x00003be3, 0xffffc41d, 0xffffe33c, 0x00001cc4, 0x00005c2d,
+        0xffffa3d3, 0x00002d5c, 0xffffd2a4, 0x00005d19, 0xffffa2e7, 0x0000195d, 0xffffe6a3, 0x00006147,
+        0xffff9eb9, 0x00004761, 0xffffb89f, 0x000052ea, 0xffffad16, 0xffffea53, 0x000015ad, 0x00006607,
+        0xffff99f9, 0x00000766, 0xfffff89a, 0x00006d6d, 0xffff9293, 0x000043bc, 0xffffbc44, 0x000054c7,
+        0xffffab39, 0xffffc755, 0x000038ab, 0x000077f3, 0xffff880d, 0xfffff378, 0x00000c88, 0x00006dcf,
+        0xffff9231, 0xffffcf6e, 0x00003092, 0x00007a98, 0xffff8568, 0xffff987b, 0x00006785, 0x00001818,
+        0xffffe7e8, 0x00002e2e, 0xffffd1d2, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000,
+        0x04ff0000, 0xfb010000, 0xff050000, 0x00fb0000, 0x0a030000, 0xf5fd0000, 0x030a0000, 0x00000404,
+        0x04040404, 0xfbfc0404, 0x04ff0404, 0xfb010404, 0xff050404, 0x00fb0404, 0x0a030404, 0xf5fd0404,
+        0x030a0404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x04fefbfc, 0xfb00fbfc, 0xff04fbfc, 0x00fafbfc,
+        0x0a02fbfc, 0xf5fcfbfc, 0x0309fbfc, 0x000004ff, 0x040404ff, 0xfbfc04ff, 0x04ff04ff, 0xfb0104ff,
+        0xff0504ff, 0x00fb04ff, 0x0a0304ff, 0xf5fd04ff, 0x030a04ff, 0xfffffb01, 0x0403fb01, 0xfbfbfb01,
+        0x04fefb01, 0xfb00fb01, 0xff04fb01, 0x00fafb01, 0x0a02fb01, 0xf5fcfb01, 0x0309fb01, 0xffffff05,
+        0x0403ff05, 0xfbfbff05, 0x04feff05, 0xfb00ff05, 0xff04ff05, 0x00faff05, 0x0a02ff05, 0xf5fcff05,
+        0x0309ff05, 0x000000fb, 0x040400fb, 0xfbfc00fb, 0x04ff00fb, 0xfb0100fb, 0xff0500fb, 0x00fb00fb,
+        0x0a0300fb, 0xf5fd00fb, 0x030a00fb, 0x00000a03, 0x04040a03, 0xfbfc0a03, 0x04ff0a03, 0xfb010a03,
+        0xff050a03, 0x00fb0a03, 0x0a030a03, 0xf5fd0a03, 0x030a0a03, 0xfffff5fd, 0x0403f5fd, 0xfbfbf5fd,
+        0x04fef5fd, 0xfb00f5fd, 0xff04f5fd, 0x00faf5fd, 0x0a02f5fd, 0xf5fcf5fd, 0x0309f5fd, 0x0000030a,
+        0x0404030a, 0xfbfc030a, 0x04ff030a, 0xfb01030a, 0xff05030a, 0x00fb030a, 0x0a03030a, 0xf5fd030a,
+        0x030a030a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000505, 0xfffffafb, 0x000006fe, 0xfffff902, 0xfffffe07, 0x000001f9, 0x00000b0b,
+        0xfffff4f5, 0x00000d03, 0xfffff2fd, 0x0000030d, 0xfffffcf3, 0x000008f7, 0xfffff709, 0x00000efc,
+        0xfffff104, 0xfffffc0f, 0x000003f1, 0x0000160b, 0xffffe9f5, 0x00000b16, 0xfffff4ea, 0x00001515,
+        0xffffeaeb, 0x00001802, 0xffffe7fe, 0x00000218, 0xfffffde8, 0x000013f2, 0xffffec0e, 0xfffff214,
+        0x00000dec, 0x00002617, 0xffffd9e9, 0x00001726, 0xffffe8da, 0x00001cf8, 0xffffe308, 0xfffff81d,
+        0x000007e3, 0x0000270b, 0xffffd8f5, 0x00000b27, 0xfffff4d9, 0x00002929, 0xffffd6d7, 0x00002cff,
+        0xffffd301, 0xffffff2d, 0x000000d3, 0x00001ce3, 0xffffe31d, 0x000026ea, 0xffffd916, 0xffffea27,
+        0x000015d9, 0x0000431b, 0xffffbce5, 0x00001b43, 0xffffe4bd, 0x0000452f, 0xffffbad1, 0x00002f45,
+        0xffffd0bb, 0x000037f1, 0xffffc80f, 0xfffff138, 0x00000ec8, 0x0000470b, 0xffffb8f5, 0x00000b47,
+        0xfffff4b9, 0x00004c4c, 0xffffb3b4, 0x000052fa, 0xffffad06, 0xfffffa53, 0x000005ad, 0x000038d3,
+        0xffffc72d, 0xffffd339, 0x00002cc7, 0x00004adc, 0xffffb524, 0xffffdc4b, 0x000023b5, 0x00007338,
+        0xffff8cc8, 0x00003873, 0xffffc78d, 0x0000751f, 0xffff8ae1, 0x00001f75, 0xffffe08b, 0x00007a58,
+        0xffff85a8, 0x0000587a, 0xffffa786, 0x000067e4, 0xffff981c, 0xffffe468, 0x00001b98, 0x000054ab,
+        0xffffab55, 0x000069b8, 0xffff9648, 0xffffb86a, 0x00004796, 0x00001e1e, 0xffffe1e2, 0x00003a3a,
+        0xffffc5c6, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x06fe0000, 0xf9020000,
+        0xfe070000, 0x01f90000, 0x0b0b0000, 0xf4f50000, 0x0d030000, 0xf2fd0000, 0x00000505, 0x05050505,
+        0xfafb0505, 0x06fe0505, 0xf9020505, 0xfe070505, 0x01f90505, 0x0b0b0505, 0xf4f50505, 0x0d030505,
+        0xf2fd0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x06fdfafb, 0xf901fafb, 0xfe06fafb, 0x01f8fafb,
+        0x0b0afafb, 0xf4f4fafb, 0x0d02fafb, 0xf2fcfafb, 0x000006fe, 0x050506fe, 0xfafb06fe, 0x06fe06fe,
+        0xf90206fe, 0xfe0706fe, 0x01f906fe, 0x0b0b06fe, 0xf4f506fe, 0x0d0306fe, 0xf2fd06fe, 0xfffff902,
+        0x0504f902, 0xfafaf902, 0x06fdf902, 0xf901f902, 0xfe06f902, 0x01f8f902, 0x0b0af902, 0xf4f4f902,
+        0x0d02f902, 0xf2fcf902, 0xfffffe07, 0x0504fe07, 0xfafafe07, 0x06fdfe07, 0xf901fe07, 0xfe06fe07,
+        0x01f8fe07, 0x0b0afe07, 0xf4f4fe07, 0x0d02fe07, 0xf2fcfe07, 0x000001f9, 0x050501f9, 0xfafb01f9,
+        0x06fe01f9, 0xf90201f9, 0xfe0701f9, 0x01f901f9, 0x0b0b01f9, 0xf4f501f9, 0x0d0301f9, 0xf2fd01f9,
+        0x00000b0b, 0x05050b0b, 0xfafb0b0b, 0x06fe0b0b, 0xf9020b0b, 0xfe070b0b, 0x01f90b0b, 0x0b0b0b0b,
+        0xf4f50b0b, 0x0d030b0b, 0xf2fd0b0b, 0xfffff4f5, 0x0504f4f5, 0xfafaf4f5, 0x06fdf4f5, 0xf901f4f5,
+        0xfe06f4f5, 0x01f8f4f5, 0x0b0af4f5, 0xf4f4f4f5, 0x0d02f4f5, 0xf2fcf4f5, 0x00000d03, 0x05050d03,
+        0xfafb0d03, 0x06fe0d03, 0xf9020d03, 0xfe070d03, 0x01f90d03, 0x0b0b0d03, 0xf4f50d03, 0x0d030d03,
+        0xf2fd0d03, 0xfffff2fd, 0x0504f2fd, 0xfafaf2fd, 0x06fdf2fd, 0xf901f2fd, 0xfe06f2fd, 0x01f8f2fd,
+        0x0b0af2fd, 0xf4f4f2fd, 0x0d02f2fd, 0xf2fcf2fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000606, 0xfffff9fa, 0x000007fe, 0xfffff802, 0xfffffe08, 0x000001f8, 0x00000d0d,
+        0xfffff2f3, 0x00000f04, 0xfffff0fc, 0x0000040f, 0xfffffbf1, 0x00000af5, 0xfffff50b, 0x000011fb,
+        0xffffee05, 0xfffffb12, 0x000004ee, 0x00001a0d, 0xffffe5f3, 0x00000d1a, 0xfffff2e6, 0x00001a1a,
+        0xffffe5e6, 0x00001d02, 0xffffe2fe, 0x0000021d, 0xfffffde3, 0x000017f0, 0xffffe810, 0xfffff018,
+        0x00000fe8, 0x00002e1c, 0xffffd1e4, 0x00001c2e, 0xffffe3d2, 0x000022f7, 0xffffdd09, 0xfffff723,
+        0x000008dd, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003131, 0xffffcecf, 0x000035ff,
+        0xffffca01, 0xffffff36, 0x000000ca, 0x000022dd, 0xffffdd23, 0x00002ee6, 0xffffd11a, 0xffffe62f,
+        0x000019d1, 0x00005120, 0xffffaee0, 0x00002051, 0xffffdfaf, 0x00005338, 0xffffacc8, 0x00003853,
+        0xffffc7ad, 0x000042ee, 0xffffbd12, 0xffffee43, 0x000011bd, 0x0000560d, 0xffffa9f3, 0x00000d56,
+        0xfffff2aa, 0x00005b5b, 0xffffa4a5, 0x000062f9, 0xffff9d07, 0xfffff963, 0x0000069d, 0x000043ca,
+        0xffffbc36, 0xffffca44, 0x000035bc, 0x000059d4, 0xffffa62c, 0xffffd45a, 0x00002ba6, 0x00007bdf,
+        0xffff8421, 0xffffdf7c, 0x00002084, 0x00006699, 0xffff9967, 0x00007eaa, 0xffff8156, 0xffffaa7f,
+        0x00005581, 0x00002525, 0xffffdadb, 0x00004545, 0xffffbabb, 0x00000000, 0x06060000, 0xf9fa0000,
+        0x07fe0000, 0xf8020000, 0xfe080000, 0x01f80000, 0x0d0d0000, 0xf2f30000, 0x0f040000, 0xf0fc0000,
+        0x040f0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x07fe0606, 0xf8020606, 0xfe080606, 0x01f80606,
+        0x0d0d0606, 0xf2f30606, 0x0f040606, 0xf0fc0606, 0x040f0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa,
+        0x07fdf9fa, 0xf801f9fa, 0xfe07f9fa, 0x01f7f9fa, 0x0d0cf9fa, 0xf2f2f9fa, 0x0f03f9fa, 0xf0fbf9fa,
+        0x040ef9fa, 0x000007fe, 0x060607fe, 0xf9fa07fe, 0x07fe07fe, 0xf80207fe, 0xfe0807fe, 0x01f807fe,
+        0x0d0d07fe, 0xf2f307fe, 0x0f0407fe, 0xf0fc07fe, 0x040f07fe, 0xfffff802, 0x0605f802, 0xf9f9f802,
+        0x07fdf802, 0xf801f802, 0xfe07f802, 0x01f7f802, 0x0d0cf802, 0xf2f2f802, 0x0f03f802, 0xf0fbf802,
+        0x040ef802, 0xfffffe08, 0x0605fe08, 0xf9f9fe08, 0x07fdfe08, 0xf801fe08, 0xfe07fe08, 0x01f7fe08,
+        0x0d0cfe08, 0xf2f2fe08, 0x0f03fe08, 0xf0fbfe08, 0x040efe08, 0x000001f8, 0x060601f8, 0xf9fa01f8,
+        0x07fe01f8, 0xf80201f8, 0xfe0801f8, 0x01f801f8, 0x0d0d01f8, 0xf2f301f8, 0x0f0401f8, 0xf0fc01f8,
+        0x040f01f8, 0x00000d0d, 0x06060d0d, 0xf9fa0d0d, 0x07fe0d0d, 0xf8020d0d, 0xfe080d0d, 0x01f80d0d,
+        0x0d0d0d0d, 0xf2f30d0d, 0x0f040d0d, 0xf0fc0d0d, 0x040f0d0d, 0xfffff2f3, 0x0605f2f3, 0xf9f9f2f3,
+        0x07fdf2f3, 0xf801f2f3, 0xfe07f2f3, 0x01f7f2f3, 0x0d0cf2f3, 0xf2f2f2f3, 0x0f03f2f3, 0xf0fbf2f3,
+        0x040ef2f3, 0x00000f04, 0x06060f04, 0xf9fa0f04, 0x07fe0f04, 0xf8020f04, 0xfe080f04, 0x01f80f04,
+        0x0d0d0f04, 0xf2f30f04, 0x0f040f04, 0xf0fc0f04, 0x040f0f04, 0xfffff0fc, 0x0605f0fc, 0xf9f9f0fc,
+        0x07fdf0fc, 0xf801f0fc, 0xfe07f0fc, 0x01f7f0fc, 0x0d0cf0fc, 0xf2f2f0fc, 0x0f03f0fc, 0xf0fbf0fc,
+        0x040ef0fc, 0x0000040f, 0x0606040f, 0xf9fa040f, 0x07fe040f, 0xf802040f, 0xfe08040f, 0x01f8040f,
+        0x0d0d040f, 0xf2f3040f, 0x0f04040f, 0xf0fc040f, 0x040f040f, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000707, 0xfffff8f9, 0x000009fd, 0xfffff603, 0xfffffd0a, 0x000002f6, 0x00001010,
+        0xffffeff0, 0x00001205, 0xffffedfb, 0x00000512, 0xfffffaee, 0x00000cf3, 0xfffff30d, 0x000014fa,
+        0xffffeb06, 0xfffffa15, 0x000005eb, 0x00001e0f, 0xffffe1f1, 0x00000f1e, 0xfffff0e2, 0x00001e1e,
+        0xffffe1e2, 0x00002202, 0xffffddfe, 0x00000222, 0xfffffdde, 0x00001bed, 0xffffe413, 0xffffed1c,
+        0x000012e4, 0x00003620, 0xffffc9e0, 0x00002036, 0xffffdfca, 0x000028f5, 0xffffd70b, 0xfffff529,
+        0x00000ad7, 0x0000370f, 0xffffc8f1, 0x00000f37, 0xfffff0c9, 0x00003939, 0xffffc6c7, 0x00003eff,
+        0xffffc101, 0xffffff3f, 0x000000c1, 0x000027d8, 0xffffd828, 0x000036e2, 0xffffc91e, 0xffffe237,
+        0x00001dc9, 0x00005e25, 0xffffa1db, 0x0000255e, 0xffffdaa2, 0x00006041, 0xffff9fbf, 0x00004160,
+        0xffffbea0, 0x00004deb, 0xffffb215, 0xffffeb4e, 0x000014b2, 0x0000640f, 0xffff9bf1, 0x00000f64,
+        0xfffff09c, 0x00006a6a, 0xffff9596, 0x000073f8, 0xffff8c08, 0xfffff874, 0x0000078c, 0x00004ec1,
+        0xffffb13f, 0xffffc14f, 0x00003eb1, 0x000068cd, 0xffff9733, 0xffffcd69, 0x00003297, 0x00007788,
+        0xffff8878, 0x00002b2b, 0xffffd4d5, 0x00005050, 0xffffafb0, 0x00000000, 0x07070000, 0xf8f90000,
+        0x09fd0000, 0xf6030000, 0xfd0a0000, 0x02f60000, 0x10100000, 0xeff00000, 0x12050000, 0xedfb0000,
+        0x05120000, 0x00000707, 0x07070707, 0xf8f90707, 0x09fd0707, 0xf6030707, 0xfd0a0707, 0x02f60707,
+        0x10100707, 0xeff00707, 0x12050707, 0xedfb0707, 0x05120707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9,
+        0x09fcf8f9, 0xf602f8f9, 0xfd09f8f9, 0x02f5f8f9, 0x100ff8f9, 0xefeff8f9, 0x1204f8f9, 0xedfaf8f9,
+        0x0511f8f9, 0x000009fd, 0x070709fd, 0xf8f909fd, 0x09fd09fd, 0xf60309fd, 0xfd0a09fd, 0x02f609fd,
+        0x101009fd, 0xeff009fd, 0x120509fd, 0xedfb09fd, 0x051209fd, 0xfffff603, 0x0706f603, 0xf8f8f603,
+        0x09fcf603, 0xf602f603, 0xfd09f603, 0x02f5f603, 0x100ff603, 0xefeff603, 0x1204f603, 0xedfaf603,
+        0x0511f603, 0xfffffd0a, 0x0706fd0a, 0xf8f8fd0a, 0x09fcfd0a, 0xf602fd0a, 0xfd09fd0a, 0x02f5fd0a,
+        0x100ffd0a, 0xefeffd0a, 0x1204fd0a, 0xedfafd0a, 0x0511fd0a, 0x000002f6, 0x070702f6, 0xf8f902f6,
+        0x09fd02f6, 0xf60302f6, 0xfd0a02f6, 0x02f602f6, 0x101002f6, 0xeff002f6, 0x120502f6, 0xedfb02f6,
+        0x051202f6, 0x00001010, 0x07071010, 0xf8f91010, 0x09fd1010, 0xf6031010, 0xfd0a1010, 0x02f61010,
+        0x10101010, 0xeff01010, 0x12051010, 0xedfb1010, 0x05121010, 0xffffeff0, 0x0706eff0, 0xf8f8eff0,
+        0x09fceff0, 0xf602eff0, 0xfd09eff0, 0x02f5eff0, 0x100feff0, 0xefefeff0, 0x1204eff0, 0xedfaeff0,
+        0x0511eff0, 0x00001205, 0x07071205, 0xf8f91205, 0x09fd1205, 0xf6031205, 0xfd0a1205, 0x02f61205,
+        0x10101205, 0xeff01205, 0x12051205, 0xedfb1205, 0x05121205, 0xffffedfb, 0x0706edfb, 0xf8f8edfb,
+        0x09fcedfb, 0xf602edfb, 0xfd09edfb, 0x02f5edfb, 0x100fedfb, 0xefefedfb, 0x1204edfb, 0xedfaedfb,
+        0x0511edfb, 0x00000512, 0x07070512, 0xf8f90512, 0x09fd0512, 0xf6030512, 0xfd0a0512, 0x02f60512,
+        0x10100512, 0xeff00512, 0x12050512, 0xedfb0512, 0x05120512, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000808, 0xfffff7f8, 0x00000afd, 0xfffff503, 0xfffffd0b, 0x000002f5, 0x00001212,
+        0xffffedee, 0x00001405, 0xffffebfb, 0x00000514, 0xfffffaec, 0x00000ef1, 0xfffff10f, 0x000017f9,
+        0xffffe807, 0xfffff918, 0x000006e8, 0x00002311, 0xffffdcef, 0x00001123, 0xffffeedd, 0x00002222,
+        0xffffddde, 0x00002603, 0xffffd9fd, 0x00000326, 0xfffffcda, 0x00001fea, 0xffffe016, 0xffffea20,
+        0x000015e0, 0x00003d25, 0xffffc2db, 0x0000253d, 0xffffdac3, 0x00002ef3, 0xffffd10d, 0xfffff32f,
+        0x00000cd1, 0x00003f11, 0xffffc0ef, 0x0000113f, 0xffffeec1, 0x00004141, 0xffffbebf, 0x000047ff,
+        0xffffb801, 0xffffff48, 0x000000b8, 0x00002dd2, 0xffffd22e, 0x00003edd, 0xffffc123, 0xffffdd3f,
+        0x000022c1, 0x00006b2b, 0xffff94d5, 0x00002b6b, 0xffffd495, 0x00006e4b, 0xffff91b5, 0x00004b6e,
+        0xffffb492, 0x000058e8, 0xffffa718, 0xffffe859, 0x000017a7, 0x00007211, 0xffff8def, 0x00001172,
+        0xffffee8e, 0x00007979, 0xffff8687, 0x00005ab8, 0xffffa548, 0xffffb85b, 0x000047a5, 0x000077c6,
+        0xffff883a, 0xffffc678, 0x00003988, 0x00003131, 0xffffcecf, 0x00005c5c, 0xffffa3a4, 0x00000000,
+        0x08080000, 0xf7f80000, 0x0afd0000, 0xf5030000, 0xfd0b0000, 0x02f50000, 0x12120000, 0xedee0000,
+        0x14050000, 0xebfb0000, 0x05140000, 0x00000808, 0x08080808, 0xf7f80808, 0x0afd0808, 0xf5030808,
+        0xfd0b0808, 0x02f50808, 0x12120808, 0xedee0808, 0x14050808, 0xebfb0808, 0x05140808, 0xfffff7f8,
+        0x0807f7f8, 0xf7f7f7f8, 0x0afcf7f8, 0xf502f7f8, 0xfd0af7f8, 0x02f4f7f8, 0x1211f7f8, 0xededf7f8,
+        0x1404f7f8, 0xebfaf7f8, 0x0513f7f8, 0x00000afd, 0x08080afd, 0xf7f80afd, 0x0afd0afd, 0xf5030afd,
+        0xfd0b0afd, 0x02f50afd, 0x12120afd, 0xedee0afd, 0x14050afd, 0xebfb0afd, 0x05140afd, 0xfffff503,
+        0x0807f503, 0xf7f7f503, 0x0afcf503, 0xf502f503, 0xfd0af503, 0x02f4f503, 0x1211f503, 0xededf503,
+        0x1404f503, 0xebfaf503, 0x0513f503, 0xfffffd0b, 0x0807fd0b, 0xf7f7fd0b, 0x0afcfd0b, 0xf502fd0b,
+        0xfd0afd0b, 0x02f4fd0b, 0x1211fd0b, 0xededfd0b, 0x1404fd0b, 0xebfafd0b, 0x0513fd0b, 0x000002f5,
+        0x080802f5, 0xf7f802f5, 0x0afd02f5, 0xf50302f5, 0xfd0b02f5, 0x02f502f5, 0x121202f5, 0xedee02f5,
+        0x140502f5, 0xebfb02f5, 0x051402f5, 0x00001212, 0x08081212, 0xf7f81212, 0x0afd1212, 0xf5031212,
+        0xfd0b1212, 0x02f51212, 0x12121212, 0xedee1212, 0x14051212, 0xebfb1212, 0x05141212, 0xffffedee,
+        0x0807edee, 0xf7f7edee, 0x0afcedee, 0xf502edee, 0xfd0aedee, 0x02f4edee, 0x1211edee, 0xedededee,
+        0x1404edee, 0xebfaedee, 0x0513edee, 0x00001405, 0x08081405, 0xf7f81405, 0x0afd1405, 0xf5031405,
+        0xfd0b1405, 0x02f51405, 0x12121405, 0xedee1405, 0x14051405, 0xebfb1405, 0x05141405, 0xffffebfb,
+        0x0807ebfb, 0xf7f7ebfb, 0x0afcebfb, 0xf502ebfb, 0xfd0aebfb, 0x02f4ebfb, 0x1211ebfb, 0xededebfb,
+        0x1404ebfb, 0xebfaebfb, 0x0513ebfb, 0x00000514, 0x08080514, 0xf7f80514, 0x0afd0514, 0xf5030514,
+        0xfd0b0514, 0x02f50514, 0x12120514, 0xedee0514, 0x14050514, 0xebfb0514, 0x05140514, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000909, 0xfffff6f7, 0x00000bfd, 0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001414,
+        0xffffebec, 0x00001706, 0xffffe8fa, 0x00000617, 0xfffff9e9, 0x000010ef, 0xffffef11, 0x00001af9,
+        0xffffe507, 0xfffff91b, 0x000006e5, 0x00002713, 0xffffd8ed, 0x00001327, 0xffffecd9, 0x00002727,
+        0xffffd8d9, 0x00002b03, 0xffffd4fd, 0x0000032b, 0xfffffcd5, 0x000023e8, 0xffffdc18, 0xffffe824,
+        0x000017dc, 0x0000452a, 0xffffbad6, 0x00002a45, 0xffffd5bb, 0x000034f2, 0xffffcb0e, 0xfffff235,
+        0x00000dcb, 0x00004713, 0xffffb8ed, 0x00001347, 0xffffecb9, 0x00004949, 0xffffb6b7, 0x00004ffe,
+        0xffffb002, 0xfffffe50, 0x000001b0, 0x000033cc, 0xffffcc34, 0x000045d9, 0xffffba27, 0xffffd946,
+        0x000026ba, 0x00007930, 0xffff86d0, 0x00003079, 0xffffcf87, 0x00007c54, 0xffff83ac, 0x0000547c,
+        0xffffab84, 0x000063e5, 0xffff9c1b, 0xffffe564, 0x00001a9c, 0x000065af, 0xffff9a51, 0xffffaf66,
+        0x0000509a, 0x00003737, 0xffffc8c9, 0x00006868, 0xffff9798, 0x00000000, 0x09090000, 0xf6f70000,
+        0x0bfd0000, 0xf4030000, 0xfd0c0000, 0x02f40000, 0x14140000, 0xebec0000, 0x17060000, 0xe8fa0000,
+        0x06170000, 0xf9e90000, 0x00000909, 0x09090909, 0xf6f70909, 0x0bfd0909, 0xf4030909, 0xfd0c0909,
+        0x02f40909, 0x14140909, 0xebec0909, 0x17060909, 0xe8fa0909, 0x06170909, 0xf9e90909, 0xfffff6f7,
+        0x0908f6f7, 0xf6f6f6f7, 0x0bfcf6f7, 0xf402f6f7, 0xfd0bf6f7, 0x02f3f6f7, 0x1413f6f7, 0xebebf6f7,
+        0x1705f6f7, 0xe8f9f6f7, 0x0616f6f7, 0xf9e8f6f7, 0x00000bfd, 0x09090bfd, 0xf6f70bfd, 0x0bfd0bfd,
+        0xf4030bfd, 0xfd0c0bfd, 0x02f40bfd, 0x14140bfd, 0xebec0bfd, 0x17060bfd, 0xe8fa0bfd, 0x06170bfd,
+        0xf9e90bfd, 0xfffff403, 0x0908f403, 0xf6f6f403, 0x0bfcf403, 0xf402f403, 0xfd0bf403, 0x02f3f403,
+        0x1413f403, 0xebebf403, 0x1705f403, 0xe8f9f403, 0x0616f403, 0xf9e8f403, 0xfffffd0c, 0x0908fd0c,
+        0xf6f6fd0c, 0x0bfcfd0c, 0xf402fd0c, 0xfd0bfd0c, 0x02f3fd0c, 0x1413fd0c, 0xebebfd0c, 0x1705fd0c,
+        0xe8f9fd0c, 0x0616fd0c, 0xf9e8fd0c, 0x000002f4, 0x090902f4, 0xf6f702f4, 0x0bfd02f4, 0xf40302f4,
+        0xfd0c02f4, 0x02f402f4, 0x141402f4, 0xebec02f4, 0x170602f4, 0xe8fa02f4, 0x061702f4, 0xf9e902f4,
+        0x00001414, 0x09091414, 0xf6f71414, 0x0bfd1414, 0xf4031414, 0xfd0c1414, 0x02f41414, 0x14141414,
+        0xebec1414, 0x17061414, 0xe8fa1414, 0x06171414, 0xf9e91414, 0xffffebec, 0x0908ebec, 0xf6f6ebec,
+        0x0bfcebec, 0xf402ebec, 0xfd0bebec, 0x02f3ebec, 0x1413ebec, 0xebebebec, 0x1705ebec, 0xe8f9ebec,
+        0x0616ebec, 0xf9e8ebec, 0x00001706, 0x09091706, 0xf6f71706, 0x0bfd1706, 0xf4031706, 0xfd0c1706,
+        0x02f41706, 0x14141706, 0xebec1706, 0x17061706, 0xe8fa1706, 0x06171706, 0xf9e91706, 0xffffe8fa,
+        0x0908e8fa, 0xf6f6e8fa, 0x0bfce8fa, 0xf402e8fa, 0xfd0be8fa, 0x02f3e8fa, 0x1413e8fa, 0xebebe8fa,
+        0x1705e8fa, 0xe8f9e8fa, 0x0616e8fa, 0xf9e8e8fa, 0x00000617, 0x09090617, 0xf6f70617, 0x0bfd0617,
+        0xf4030617, 0xfd0c0617, 0x02f40617, 0x14140617, 0xebec0617, 0x17060617, 0xe8fa0617, 0x06170617,
+        0xf9e90617, 0xfffff9e9, 0x0908f9e9, 0xf6f6f9e9, 0x0bfcf9e9, 0xf402f9e9, 0xfd0bf9e9, 0x02f3f9e9,
+        0x1413f9e9, 0xebebf9e9, 0x1705f9e9, 0xe8f9f9e9, 0x0616f9e9, 0xf9e8f9e9, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404,
+        0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x000003fc, 0xfffffc04, 0x000005fe,
+        0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000804, 0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808,
+        0xfffff7f8, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000007fc, 0xfffff804, 0xfffffc08,
+        0x000003f8, 0x00000e08, 0xfffff1f8, 0x0000080e, 0xfffff7f2, 0x00000bfe, 0xfffff402, 0xfffffe0c,
+        0x000001f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200,
+        0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff8, 0xfffff008, 0xfffff810,
+        0x000007f0, 0x00001a0a, 0xffffe5f6, 0x00000a1a, 0xfffff5e6, 0x00001c12, 0xffffe3ee, 0x0000121c,
+        0xffffede4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001c04, 0xffffe3fc, 0x0000041c,
+        0xfffffbe4, 0x00001e1e, 0xffffe1e2, 0x00001ffe, 0xffffe002, 0xfffffe20, 0x000001e0, 0x000015ee,
+        0xffffea12, 0xffffee16, 0x000011ea, 0x00001df2, 0xffffe20e, 0xfffff21e, 0x00000de2, 0x00002e16,
+        0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002e0c, 0xffffd1f4, 0x00000c2e, 0xfffff3d2, 0x00003022,
+        0xffffcfde, 0x00002230, 0xffffddd0, 0x000027f6, 0xffffd80a, 0xfffff628, 0x000009d8, 0x00003204,
+        0xffffcdfc, 0x00000432, 0xfffffbce, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e4,
+        0xffffd61c, 0xffffe42a, 0x00001bd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1a,
+        0xffffb3e6, 0x00001a4c, 0xffffe5b4, 0x00004c2a, 0xffffb3d6, 0x00002a4c, 0xffffd5b4, 0x000035e8,
+        0xffffca18, 0xffffe836, 0x000017ca, 0x00004e0e, 0xffffb1f2, 0x00000e4e, 0xfffff1b2, 0x0000523e,
+        0xffffadc2, 0x00003e52, 0xffffc1ae, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802,
+        0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005c5c, 0xffffa3a4, 0x00003bcc, 0xffffc434, 0xffffcc3c,
+        0x000033c4, 0x00007634, 0xffff89cc, 0x00003476, 0xffffcb8a, 0x000049d4, 0xffffb62c, 0xffffd44a,
+        0x00002bb6, 0x0000764a, 0xffff89b6, 0x00004a76, 0xffffb58a, 0x00007620, 0xffff89e0, 0x00002076,
+        0xffffdf8a, 0x000065f4, 0xffff9a0c, 0xfffff466, 0x00000b9a, 0x00005fd8, 0xffffa028, 0xffffd860,
+        0x000027a0, 0x000075de, 0xffff8a22, 0xffffde76, 0x0000218a, 0x000057a8, 0xffffa858, 0x000067b2,
+        0xffff984e, 0xffffb268, 0x00004d98, 0x00000c0c, 0xfffff3f4, 0x00001616, 0xffffe9ea, 0x00002a2a,
+        0xffffd5d6, 0x00004848, 0xffffb7b8, 0x00000000, 0x02020000, 0xfdfe0000, 0x02000000, 0xfe000000,
+        0x00020000, 0xfffe0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02000202, 0xfe000202, 0x00020202,
+        0xfffe0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x01fffdfe, 0xfdfffdfe, 0x0001fdfe, 0xfffdfdfe,
+        0x00000200, 0x02020200, 0xfdfe0200, 0x02000200, 0xfe000200, 0x00020200, 0xfffe0200, 0xfffffe00,
+        0x0201fe00, 0xfdfdfe00, 0x01fffe00, 0xfdfffe00, 0x0001fe00, 0xfffdfe00, 0x00000002, 0x02020002,
+        0xfdfe0002, 0x02000002, 0xfe000002, 0x00020002, 0xfffe0002, 0xfffffffe, 0x0201fffe, 0xfdfdfffe,
+        0x01fffffe, 0xfdfffffe, 0x0001fffe, 0xfffdfffe, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000303, 0xfffffcfd, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606,
+        0xfffff9fa, 0x00000903, 0xfffff6fd, 0x00000309, 0xfffffcf7, 0x000008fd, 0xfffff703, 0xfffffd09,
+        0x000002f7, 0x000005fa, 0xfffffa06, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000c0c,
+        0xfffff3f4, 0x00000f00, 0xfffff100, 0x0000000f, 0xfffffff1, 0x00000bf7, 0xfffff409, 0xfffff70c,
+        0x000008f4, 0x0000180f, 0xffffe7f1, 0x00000f18, 0xfffff0e8, 0x000011fa, 0xffffee06, 0xfffffa12,
+        0x000005ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001b00,
+        0xffffe500, 0x0000001b, 0xffffffe5, 0x000011ee, 0xffffee12, 0x000017f4, 0xffffe80c, 0xfffff418,
+        0x00000be8, 0x0000270f, 0xffffd8f1, 0x00000f27, 0xfffff0d9, 0x00002a1b, 0xffffd5e5, 0x00001b2a,
+        0xffffe4d6, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002a06, 0xffffd5fa, 0x0000062a,
+        0xfffff9d6, 0x00002d2d, 0xffffd2d3, 0x000032fd, 0xffffcd03, 0xfffffd33, 0x000002cd, 0x000020e5,
+        0xffffdf1b, 0xffffe521, 0x00001adf, 0x00002ceb, 0xffffd315, 0xffffeb2d, 0x000014d3, 0x00004521,
+        0xffffbadf, 0x00002145, 0xffffdebb, 0x00004512, 0xffffbaee, 0x00001245, 0xffffedbb, 0x00004836,
+        0xffffb7ca, 0x00003648, 0xffffc9b8, 0x00003eee, 0xffffc112, 0xffffee3f, 0x000011c1, 0x00004e06,
+        0xffffb1fa, 0x0000064e, 0xfffff9b2, 0x00005151, 0xffffaeaf, 0x000032cd, 0xffffcd33, 0x00003ed6,
+        0xffffc12a, 0xffffd63f, 0x000029c1, 0x000059f7, 0xffffa609, 0xfffff75a, 0x000008a6, 0x0000722a,
+        0xffff8dd6, 0x00002a72, 0xffffd58e, 0x0000753f, 0xffff8ac1, 0x00003f75, 0xffffc08b, 0x000050dc,
+        0xffffaf24, 0xffffdc51, 0x000023af, 0x00007815, 0xffff87eb, 0x00001578, 0xffffea88, 0x00007b60,
+        0xffff84a0, 0x0000607b, 0xffff9f85, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005cb2,
+        0xffffa34e, 0xffffb25d, 0x00004da3, 0x000071bb, 0xffff8e45, 0xffffbb72, 0x0000448e, 0x00001212,
+        0xffffedee, 0x00002121, 0xffffdedf, 0x00003f3f, 0xffffc0c1, 0x00006c6c, 0xffff9394, 0x00000000,
+        0x03030000, 0xfcfd0000, 0x03000000, 0xfd000000, 0x00030000, 0xfffd0000, 0x06060000, 0xf9fa0000,
+        0x00000303, 0x03030303, 0xfcfd0303, 0x03000303, 0xfd000303, 0x00030303, 0xfffd0303, 0x06060303,
+        0xf9fa0303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x02fffcfd, 0xfcfffcfd, 0x0002fcfd, 0xfffcfcfd,
+        0x0605fcfd, 0xf9f9fcfd, 0x00000300, 0x03030300, 0xfcfd0300, 0x03000300, 0xfd000300, 0x00030300,
+        0xfffd0300, 0x06060300, 0xf9fa0300, 0xfffffd00, 0x0302fd00, 0xfcfcfd00, 0x02fffd00, 0xfcfffd00,
+        0x0002fd00, 0xfffcfd00, 0x0605fd00, 0xf9f9fd00, 0x00000003, 0x03030003, 0xfcfd0003, 0x03000003,
+        0xfd000003, 0x00030003, 0xfffd0003, 0x06060003, 0xf9fa0003, 0xfffffffd, 0x0302fffd, 0xfcfcfffd,
+        0x02fffffd, 0xfcfffffd, 0x0002fffd, 0xfffcfffd, 0x0605fffd, 0xf9f9fffd, 0x00000606, 0x03030606,
+        0xfcfd0606, 0x03000606, 0xfd000606, 0x00030606, 0xfffd0606, 0x06060606, 0xf9fa0606, 0xfffff9fa,
+        0x0302f9fa, 0xfcfcf9fa, 0x02fff9fa, 0xfcfff9fa, 0x0002f9fa, 0xfffcf9fa, 0x0605f9fa, 0xf9f9f9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000404, 0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000804,
+        0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808, 0xfffff7f8, 0x000007f8, 0xfffff808, 0x00000bfc,
+        0xfffff404, 0xfffffc0c, 0x000003f4, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00001010,
+        0xffffeff0, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00000ff4, 0xfffff00c, 0xfffff410,
+        0x00000bf0, 0x000017fc, 0xffffe804, 0xfffffc18, 0x000003e8, 0x00002010, 0xffffdff0, 0x00001020,
+        0xffffefe0, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002020, 0xffffdfe0, 0x00002400,
+        0xffffdc00, 0x00000024, 0xffffffdc, 0x000017e8, 0xffffe818, 0x00001ff0, 0xffffe010, 0xfffff020,
+        0x00000fe0, 0x00003414, 0xffffcbec, 0x00001434, 0xffffebcc, 0x00003824, 0xffffc7dc, 0x00002438,
+        0xffffdbc8, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003808, 0xffffc7f8, 0x00000838,
+        0xfffff7c8, 0x00003c3c, 0xffffc3c4, 0x00003ffc, 0xffffc004, 0xfffffc40, 0x000003c0, 0x00002bdc,
+        0xffffd424, 0xffffdc2c, 0x000023d4, 0x00003be4, 0xffffc41c, 0xffffe43c, 0x00001bc4, 0x00005c2c,
+        0xffffa3d4, 0x00002c5c, 0xffffd3a4, 0x00005c18, 0xffffa3e8, 0x0000185c, 0xffffe7a4, 0x00006048,
+        0xffff9fb8, 0x00004860, 0xffffb7a0, 0x000053ec, 0xffffac14, 0xffffec54, 0x000013ac, 0x00006408,
+        0xffff9bf8, 0x00000864, 0xfffff79c, 0x00006c6c, 0xffff9394, 0x000043bc, 0xffffbc44, 0x000053c8,
+        0xffffac38, 0xffffc854, 0x000037ac, 0x000077f4, 0xffff880c, 0xfffff478, 0x00000b88, 0x00006bd0,
+        0xffff9430, 0xffffd06c, 0x00002f94, 0x00007b98, 0xffff8468, 0xffff987c, 0x00006784, 0x00001818,
+        0xffffe7e8, 0x00002c2c, 0xffffd3d4, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000,
+        0x04000000, 0xfc000000, 0x00040000, 0xfffc0000, 0x08040000, 0xf7fc0000, 0x04080000, 0x00000404,
+        0x04040404, 0xfbfc0404, 0x04000404, 0xfc000404, 0x00040404, 0xfffc0404, 0x08040404, 0xf7fc0404,
+        0x04080404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x03fffbfc, 0xfbfffbfc, 0x0003fbfc, 0xfffbfbfc,
+        0x0803fbfc, 0xf7fbfbfc, 0x0407fbfc, 0x00000400, 0x04040400, 0xfbfc0400, 0x04000400, 0xfc000400,
+        0x00040400, 0xfffc0400, 0x08040400, 0xf7fc0400, 0x04080400, 0xfffffc00, 0x0403fc00, 0xfbfbfc00,
+        0x03fffc00, 0xfbfffc00, 0x0003fc00, 0xfffbfc00, 0x0803fc00, 0xf7fbfc00, 0x0407fc00, 0x00000004,
+        0x04040004, 0xfbfc0004, 0x04000004, 0xfc000004, 0x00040004, 0xfffc0004, 0x08040004, 0xf7fc0004,
+        0x04080004, 0xfffffffc, 0x0403fffc, 0xfbfbfffc, 0x03fffffc, 0xfbfffffc, 0x0003fffc, 0xfffbfffc,
+        0x0803fffc, 0xf7fbfffc, 0x0407fffc, 0x00000804, 0x04040804, 0xfbfc0804, 0x04000804, 0xfc000804,
+        0x00040804, 0xfffc0804, 0x08040804, 0xf7fc0804, 0x04080804, 0xfffff7fc, 0x0403f7fc, 0xfbfbf7fc,
+        0x03fff7fc, 0xfbfff7fc, 0x0003f7fc, 0xfffbf7fc, 0x0803f7fc, 0xf7fbf7fc, 0x0407f7fc, 0x00000408,
+        0x04040408, 0xfbfc0408, 0x04000408, 0xfc000408, 0x00040408, 0xfffc0408, 0x08040408, 0xf7fc0408,
+        0x04080408, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000505, 0xfffffafb, 0x00000500, 0xfffffb00, 0x00000005, 0xfffffffb, 0x00000a0a,
+        0xfffff5f6, 0x00000f05, 0xfffff0fb, 0x0000050f, 0xfffffaf1, 0x000009f6, 0xfffff60a, 0x00000efb,
+        0xfffff105, 0xfffffb0f, 0x000004f1, 0x0000140a, 0xffffebf6, 0x00000a14, 0xfffff5ec, 0x00001414,
+        0xffffebec, 0x00001900, 0xffffe700, 0x00000019, 0xffffffe7, 0x000013f1, 0xffffec0f, 0xfffff114,
+        0x00000eec, 0x00002819, 0xffffd7e7, 0x00001928, 0xffffe6d8, 0x00001df6, 0xffffe20a, 0xfffff61e,
+        0x000009e2, 0x0000280a, 0xffffd7f6, 0x00000a28, 0xfffff5d8, 0x00002828, 0xffffd7d8, 0x00002d00,
+        0xffffd300, 0x0000002d, 0xffffffd3, 0x00001de2, 0xffffe21e, 0x000027ec, 0xffffd814, 0xffffec28,
+        0x000013d8, 0x00004119, 0xffffbee7, 0x00001941, 0xffffe6bf, 0x0000462d, 0xffffb9d3, 0x00002d46,
+        0xffffd2ba, 0x000036f1, 0xffffc90f, 0xfffff137, 0x00000ec9, 0x0000460a, 0xffffb9f6, 0x00000a46,
+        0xfffff5ba, 0x00004b4b, 0xffffb4b5, 0x000054fb, 0xffffab05, 0xfffffb55, 0x000004ab, 0x000036d3,
+        0xffffc92d, 0xffffd337, 0x00002cc9, 0x00004add, 0xffffb523, 0xffffdd4b, 0x000022b5, 0x00007337,
+        0xffff8cc9, 0x00003773, 0xffffc88d, 0x0000731e, 0xffff8ce2, 0x00001e73, 0xffffe18d, 0x0000785a,
+        0xffff87a6, 0x00005a78, 0xffffa588, 0x000068e2, 0xffff971e, 0xffffe269, 0x00001d97, 0x000054ab,
+        0xffffab55, 0x000068ba, 0xffff9746, 0xffffba69, 0x00004597, 0x00001e1e, 0xffffe1e2, 0x00003c3c,
+        0xffffc3c4, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x05000000, 0xfb000000,
+        0x00050000, 0xfffb0000, 0x0a0a0000, 0xf5f60000, 0x0f050000, 0xf0fb0000, 0x00000505, 0x05050505,
+        0xfafb0505, 0x05000505, 0xfb000505, 0x00050505, 0xfffb0505, 0x0a0a0505, 0xf5f60505, 0x0f050505,
+        0xf0fb0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x04fffafb, 0xfafffafb, 0x0004fafb, 0xfffafafb,
+        0x0a09fafb, 0xf5f5fafb, 0x0f04fafb, 0xf0fafafb, 0x00000500, 0x05050500, 0xfafb0500, 0x05000500,
+        0xfb000500, 0x00050500, 0xfffb0500, 0x0a0a0500, 0xf5f60500, 0x0f050500, 0xf0fb0500, 0xfffffb00,
+        0x0504fb00, 0xfafafb00, 0x04fffb00, 0xfafffb00, 0x0004fb00, 0xfffafb00, 0x0a09fb00, 0xf5f5fb00,
+        0x0f04fb00, 0xf0fafb00, 0x00000005, 0x05050005, 0xfafb0005, 0x05000005, 0xfb000005, 0x00050005,
+        0xfffb0005, 0x0a0a0005, 0xf5f60005, 0x0f050005, 0xf0fb0005, 0xfffffffb, 0x0504fffb, 0xfafafffb,
+        0x04fffffb, 0xfafffffb, 0x0004fffb, 0xfffafffb, 0x0a09fffb, 0xf5f5fffb, 0x0f04fffb, 0xf0fafffb,
+        0x00000a0a, 0x05050a0a, 0xfafb0a0a, 0x05000a0a, 0xfb000a0a, 0x00050a0a, 0xfffb0a0a, 0x0a0a0a0a,
+        0xf5f60a0a, 0x0f050a0a, 0xf0fb0a0a, 0xfffff5f6, 0x0504f5f6, 0xfafaf5f6, 0x04fff5f6, 0xfafff5f6,
+        0x0004f5f6, 0xfffaf5f6, 0x0a09f5f6, 0xf5f5f5f6, 0x0f04f5f6, 0xf0faf5f6, 0x00000f05, 0x05050f05,
+        0xfafb0f05, 0x05000f05, 0xfb000f05, 0x00050f05, 0xfffb0f05, 0x0a0a0f05, 0xf5f60f05, 0x0f050f05,
+        0xf0fb0f05, 0xfffff0fb, 0x0504f0fb, 0xfafaf0fb, 0x04fff0fb, 0xfafff0fb, 0x0004f0fb, 0xfffaf0fb,
+        0x0a09f0fb, 0xf5f5f0fb, 0x0f04f0fb, 0xf0faf0fb, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000606, 0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x00000c0c,
+        0xfffff3f4, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000bf4, 0xfffff40c, 0x000011fa,
+        0xffffee06, 0xfffffa12, 0x000005ee, 0x0000180c, 0xffffe7f4, 0x00000c18, 0xfffff3e8, 0x00001818,
+        0xffffe7e8, 0x00001e00, 0xffffe200, 0x0000001e, 0xffffffe2, 0x000017ee, 0xffffe812, 0xffffee18,
+        0x000011e8, 0x0000301e, 0xffffcfe2, 0x00001e30, 0xffffe1d0, 0x000023fa, 0xffffdc06, 0xfffffa24,
+        0x000005dc, 0x0000300c, 0xffffcff4, 0x00000c30, 0xfffff3d0, 0x00003030, 0xffffcfd0, 0x00003600,
+        0xffffca00, 0x00000036, 0xffffffca, 0x000023dc, 0xffffdc24, 0x00002fe8, 0xffffd018, 0xffffe830,
+        0x000017d0, 0x00004e1e, 0xffffb1e2, 0x00001e4e, 0xffffe1b2, 0x00005436, 0xffffabca, 0x00003654,
+        0xffffc9ac, 0x000041ee, 0xffffbe12, 0xffffee42, 0x000011be, 0x0000540c, 0xffffabf4, 0x00000c54,
+        0xfffff3ac, 0x00005a5a, 0xffffa5a6, 0x00005ffa, 0xffffa006, 0xfffffa60, 0x000005a0, 0x000041ca,
+        0xffffbe36, 0xffffca42, 0x000035be, 0x000059d6, 0xffffa62a, 0xffffd65a, 0x000029a6, 0x00007de2,
+        0xffff821e, 0xffffe27e, 0x00001d82, 0x0000659a, 0xffff9a66, 0x00007dac, 0xffff8254, 0xffffac7e,
+        0x00005382, 0x00002424, 0xffffdbdc, 0x00004242, 0xffffbdbe, 0x00000000, 0x06060000, 0xf9fa0000,
+        0x06000000, 0xfa000000, 0x00060000, 0xfffa0000, 0x0c0c0000, 0xf3f40000, 0x0c060000, 0xf3fa0000,
+        0x060c0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x06000606, 0xfa000606, 0x00060606, 0xfffa0606,
+        0x0c0c0606, 0xf3f40606, 0x0c060606, 0xf3fa0606, 0x060c0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa,
+        0x05fff9fa, 0xf9fff9fa, 0x0005f9fa, 0xfff9f9fa, 0x0c0bf9fa, 0xf3f3f9fa, 0x0c05f9fa, 0xf3f9f9fa,
+        0x060bf9fa, 0x00000600, 0x06060600, 0xf9fa0600, 0x06000600, 0xfa000600, 0x00060600, 0xfffa0600,
+        0x0c0c0600, 0xf3f40600, 0x0c060600, 0xf3fa0600, 0x060c0600, 0xfffffa00, 0x0605fa00, 0xf9f9fa00,
+        0x05fffa00, 0xf9fffa00, 0x0005fa00, 0xfff9fa00, 0x0c0bfa00, 0xf3f3fa00, 0x0c05fa00, 0xf3f9fa00,
+        0x060bfa00, 0x00000006, 0x06060006, 0xf9fa0006, 0x06000006, 0xfa000006, 0x00060006, 0xfffa0006,
+        0x0c0c0006, 0xf3f40006, 0x0c060006, 0xf3fa0006, 0x060c0006, 0xfffffffa, 0x0605fffa, 0xf9f9fffa,
+        0x05fffffa, 0xf9fffffa, 0x0005fffa, 0xfff9fffa, 0x0c0bfffa, 0xf3f3fffa, 0x0c05fffa, 0xf3f9fffa,
+        0x060bfffa, 0x00000c0c, 0x06060c0c, 0xf9fa0c0c, 0x06000c0c, 0xfa000c0c, 0x00060c0c, 0xfffa0c0c,
+        0x0c0c0c0c, 0xf3f40c0c, 0x0c060c0c, 0xf3fa0c0c, 0x060c0c0c, 0xfffff3f4, 0x0605f3f4, 0xf9f9f3f4,
+        0x05fff3f4, 0xf9fff3f4, 0x0005f3f4, 0xfff9f3f4, 0x0c0bf3f4, 0xf3f3f3f4, 0x0c05f3f4, 0xf3f9f3f4,
+        0x060bf3f4, 0x00000c06, 0x06060c06, 0xf9fa0c06, 0x06000c06, 0xfa000c06, 0x00060c06, 0xfffa0c06,
+        0x0c0c0c06, 0xf3f40c06, 0x0c060c06, 0xf3fa0c06, 0x060c0c06, 0xfffff3fa, 0x0605f3fa, 0xf9f9f3fa,
+        0x05fff3fa, 0xf9fff3fa, 0x0005f3fa, 0xfff9f3fa, 0x0c0bf3fa, 0xf3f3f3fa, 0x0c05f3fa, 0xf3f9f3fa,
+        0x060bf3fa, 0x0000060c, 0x0606060c, 0xf9fa060c, 0x0600060c, 0xfa00060c, 0x0006060c, 0xfffa060c,
+        0x0c0c060c, 0xf3f4060c, 0x0c06060c, 0xf3fa060c, 0x060c060c, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000707, 0xfffff8f9, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x00000e0e,
+        0xfffff1f2, 0x00001507, 0xffffeaf9, 0x00000715, 0xfffff8eb, 0x00000df2, 0xfffff20e, 0x000014f9,
+        0xffffeb07, 0xfffff915, 0x000006eb, 0x00001c0e, 0xffffe3f2, 0x00000e1c, 0xfffff1e4, 0x00001c1c,
+        0xffffe3e4, 0x00002300, 0xffffdd00, 0x00000023, 0xffffffdd, 0x00001beb, 0xffffe415, 0xffffeb1c,
+        0x000014e4, 0x00003823, 0xffffc7dd, 0x00002338, 0xffffdcc8, 0x000029f2, 0xffffd60e, 0xfffff22a,
+        0x00000dd6, 0x0000380e, 0xffffc7f2, 0x00000e38, 0xfffff1c8, 0x00003838, 0xffffc7c8, 0x00003f00,
+        0xffffc100, 0x0000003f, 0xffffffc1, 0x000029d6, 0xffffd62a, 0x000037e4, 0xffffc81c, 0xffffe438,
+        0x00001bc8, 0x00005b23, 0xffffa4dd, 0x0000235b, 0xffffdca5, 0x0000623f, 0xffff9dc1, 0x00003f62,
+        0xffffc09e, 0x00004ceb, 0xffffb315, 0xffffeb4d, 0x000014b3, 0x0000620e, 0xffff9df2, 0x00000e62,
+        0xfffff19e, 0x00006969, 0xffff9697, 0x000076f9, 0xffff8907, 0xfffff977, 0x00000689, 0x00004cc1,
+        0xffffb33f, 0xffffc14d, 0x00003eb3, 0x000068cf, 0xffff9731, 0xffffcf69, 0x00003097, 0x00007689,
+        0xffff8977, 0x00002a2a, 0xffffd5d6, 0x00004d4d, 0xffffb2b3, 0x00000000, 0x07070000, 0xf8f90000,
+        0x07000000, 0xf9000000, 0x00070000, 0xfff90000, 0x0e0e0000, 0xf1f20000, 0x15070000, 0xeaf90000,
+        0x07150000, 0x00000707, 0x07070707, 0xf8f90707, 0x07000707, 0xf9000707, 0x00070707, 0xfff90707,
+        0x0e0e0707, 0xf1f20707, 0x15070707, 0xeaf90707, 0x07150707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9,
+        0x06fff8f9, 0xf8fff8f9, 0x0006f8f9, 0xfff8f8f9, 0x0e0df8f9, 0xf1f1f8f9, 0x1506f8f9, 0xeaf8f8f9,
+        0x0714f8f9, 0x00000700, 0x07070700, 0xf8f90700, 0x07000700, 0xf9000700, 0x00070700, 0xfff90700,
+        0x0e0e0700, 0xf1f20700, 0x15070700, 0xeaf90700, 0x07150700, 0xfffff900, 0x0706f900, 0xf8f8f900,
+        0x06fff900, 0xf8fff900, 0x0006f900, 0xfff8f900, 0x0e0df900, 0xf1f1f900, 0x1506f900, 0xeaf8f900,
+        0x0714f900, 0x00000007, 0x07070007, 0xf8f90007, 0x07000007, 0xf9000007, 0x00070007, 0xfff90007,
+        0x0e0e0007, 0xf1f20007, 0x15070007, 0xeaf90007, 0x07150007, 0xfffffff9, 0x0706fff9, 0xf8f8fff9,
+        0x06fffff9, 0xf8fffff9, 0x0006fff9, 0xfff8fff9, 0x0e0dfff9, 0xf1f1fff9, 0x1506fff9, 0xeaf8fff9,
+        0x0714fff9, 0x00000e0e, 0x07070e0e, 0xf8f90e0e, 0x07000e0e, 0xf9000e0e, 0x00070e0e, 0xfff90e0e,
+        0x0e0e0e0e, 0xf1f20e0e, 0x15070e0e, 0xeaf90e0e, 0x07150e0e, 0xfffff1f2, 0x0706f1f2, 0xf8f8f1f2,
+        0x06fff1f2, 0xf8fff1f2, 0x0006f1f2, 0xfff8f1f2, 0x0e0df1f2, 0xf1f1f1f2, 0x1506f1f2, 0xeaf8f1f2,
+        0x0714f1f2, 0x00001507, 0x07071507, 0xf8f91507, 0x07001507, 0xf9001507, 0x00071507, 0xfff91507,
+        0x0e0e1507, 0xf1f21507, 0x15071507, 0xeaf91507, 0x07151507, 0xffffeaf9, 0x0706eaf9, 0xf8f8eaf9,
+        0x06ffeaf9, 0xf8ffeaf9, 0x0006eaf9, 0xfff8eaf9, 0x0e0deaf9, 0xf1f1eaf9, 0x1506eaf9, 0xeaf8eaf9,
+        0x0714eaf9, 0x00000715, 0x07070715, 0xf8f90715, 0x07000715, 0xf9000715, 0x00070715, 0xfff90715,
+        0x0e0e0715, 0xf1f20715, 0x15070715, 0xeaf90715, 0x07150715, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000808, 0xfffff7f8, 0x00000800, 0xfffff800, 0x00000008, 0xfffffff8, 0x00001010,
+        0xffffeff0, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00000ff0, 0xfffff010, 0x000017f8,
+        0xffffe808, 0xfffff818, 0x000007e8, 0x00002010, 0xffffdff0, 0x00001020, 0xffffefe0, 0x00002020,
+        0xffffdfe0, 0x00002800, 0xffffd800, 0x00000028, 0xffffffd8, 0x00001fe8, 0xffffe018, 0xffffe820,
+        0x000017e0, 0x00004028, 0xffffbfd8, 0x00002840, 0xffffd7c0, 0x00002ff0, 0xffffd010, 0xfffff030,
+        0x00000fd0, 0x00004010, 0xffffbff0, 0x00001040, 0xffffefc0, 0x00004040, 0xffffbfc0, 0x00004800,
+        0xffffb800, 0x00000048, 0xffffffb8, 0x00002fd0, 0xffffd030, 0x00003fe0, 0xffffc020, 0xffffe040,
+        0x00001fc0, 0x00006828, 0xffff97d8, 0x00002868, 0xffffd798, 0x00007048, 0xffff8fb8, 0x00004870,
+        0xffffb790, 0x000057e8, 0xffffa818, 0xffffe858, 0x000017a8, 0x00007010, 0xffff8ff0, 0x00001070,
+        0xffffef90, 0x00007878, 0xffff8788, 0x000057b8, 0xffffa848, 0xffffb858, 0x000047a8, 0x000077c8,
+        0xffff8838, 0xffffc878, 0x00003788, 0x00003030, 0xffffcfd0, 0x00005858, 0xffffa7a8, 0x00000000,
+        0x08080000, 0xf7f80000, 0x08000000, 0xf8000000, 0x00080000, 0xfff80000, 0x10100000, 0xeff00000,
+        0x10080000, 0xeff80000, 0x08100000, 0x00000808, 0x08080808, 0xf7f80808, 0x08000808, 0xf8000808,
+        0x00080808, 0xfff80808, 0x10100808, 0xeff00808, 0x10080808, 0xeff80808, 0x08100808, 0xfffff7f8,
+        0x0807f7f8, 0xf7f7f7f8, 0x07fff7f8, 0xf7fff7f8, 0x0007f7f8, 0xfff7f7f8, 0x100ff7f8, 0xefeff7f8,
+        0x1007f7f8, 0xeff7f7f8, 0x080ff7f8, 0x00000800, 0x08080800, 0xf7f80800, 0x08000800, 0xf8000800,
+        0x00080800, 0xfff80800, 0x10100800, 0xeff00800, 0x10080800, 0xeff80800, 0x08100800, 0xfffff800,
+        0x0807f800, 0xf7f7f800, 0x07fff800, 0xf7fff800, 0x0007f800, 0xfff7f800, 0x100ff800, 0xefeff800,
+        0x1007f800, 0xeff7f800, 0x080ff800, 0x00000008, 0x08080008, 0xf7f80008, 0x08000008, 0xf8000008,
+        0x00080008, 0xfff80008, 0x10100008, 0xeff00008, 0x10080008, 0xeff80008, 0x08100008, 0xfffffff8,
+        0x0807fff8, 0xf7f7fff8, 0x07fffff8, 0xf7fffff8, 0x0007fff8, 0xfff7fff8, 0x100ffff8, 0xefeffff8,
+        0x1007fff8, 0xeff7fff8, 0x080ffff8, 0x00001010, 0x08081010, 0xf7f81010, 0x08001010, 0xf8001010,
+        0x00081010, 0xfff81010, 0x10101010, 0xeff01010, 0x10081010, 0xeff81010, 0x08101010, 0xffffeff0,
+        0x0807eff0, 0xf7f7eff0, 0x07ffeff0, 0xf7ffeff0, 0x0007eff0, 0xfff7eff0, 0x100feff0, 0xefefeff0,
+        0x1007eff0, 0xeff7eff0, 0x080feff0, 0x00001008, 0x08081008, 0xf7f81008, 0x08001008, 0xf8001008,
+        0x00081008, 0xfff81008, 0x10101008, 0xeff01008, 0x10081008, 0xeff81008, 0x08101008, 0xffffeff8,
+        0x0807eff8, 0xf7f7eff8, 0x07ffeff8, 0xf7ffeff8, 0x0007eff8, 0xfff7eff8, 0x100feff8, 0xefefeff8,
+        0x1007eff8, 0xeff7eff8, 0x080feff8, 0x00000810, 0x08080810, 0xf7f80810, 0x08000810, 0xf8000810,
+        0x00080810, 0xfff80810, 0x10100810, 0xeff00810, 0x10080810, 0xeff80810, 0x08100810, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000909, 0xfffff6f7, 0x00000900, 0xfffff700, 0x00000009, 0xfffffff7, 0x00001212,
+        0xffffedee, 0x00001b09, 0xffffe4f7, 0x0000091b, 0xfffff6e5, 0x000011ee, 0xffffee12, 0x00001af7,
+        0xffffe509, 0xfffff71b, 0x000008e5, 0x00002412, 0xffffdbee, 0x00001224, 0xffffeddc, 0x00002424,
+        0xffffdbdc, 0x00002d00, 0xffffd300, 0x0000002d, 0xffffffd3, 0x000023e5, 0xffffdc1b, 0xffffe524,
+        0x00001adc, 0x0000482d, 0xffffb7d3, 0x00002d48, 0xffffd2b8, 0x000035ee, 0xffffca12, 0xffffee36,
+        0x000011ca, 0x00004812, 0xffffb7ee, 0x00001248, 0xffffedb8, 0x00004848, 0xffffb7b8, 0x00005100,
+        0xffffaf00, 0x00000051, 0xffffffaf, 0x000035ca, 0xffffca36, 0x000047dc, 0xffffb824, 0xffffdc48,
+        0x000023b8, 0x0000752d, 0xffff8ad3, 0x00002d75, 0xffffd28b, 0x00007e51, 0xffff81af, 0x0000517e,
+        0xffffae82, 0x000062e5, 0xffff9d1b, 0xffffe563, 0x00001a9d, 0x000062af, 0xffff9d51, 0xffffaf63,
+        0x0000509d, 0x00003636, 0xffffc9ca, 0x00006c6c, 0xffff9394, 0x00000000, 0x09090000, 0xf6f70000,
+        0x09000000, 0xf7000000, 0x00090000, 0xfff70000, 0x12120000, 0xedee0000, 0x1b090000, 0xe4f70000,
+        0x091b0000, 0xf6e50000, 0x00000909, 0x09090909, 0xf6f70909, 0x09000909, 0xf7000909, 0x00090909,
+        0xfff70909, 0x12120909, 0xedee0909, 0x1b090909, 0xe4f70909, 0x091b0909, 0xf6e50909, 0xfffff6f7,
+        0x0908f6f7, 0xf6f6f6f7, 0x08fff6f7, 0xf6fff6f7, 0x0008f6f7, 0xfff6f6f7, 0x1211f6f7, 0xededf6f7,
+        0x1b08f6f7, 0xe4f6f6f7, 0x091af6f7, 0xf6e4f6f7, 0x00000900, 0x09090900, 0xf6f70900, 0x09000900,
+        0xf7000900, 0x00090900, 0xfff70900, 0x12120900, 0xedee0900, 0x1b090900, 0xe4f70900, 0x091b0900,
+        0xf6e50900, 0xfffff700, 0x0908f700, 0xf6f6f700, 0x08fff700, 0xf6fff700, 0x0008f700, 0xfff6f700,
+        0x1211f700, 0xededf700, 0x1b08f700, 0xe4f6f700, 0x091af700, 0xf6e4f700, 0x00000009, 0x09090009,
+        0xf6f70009, 0x09000009, 0xf7000009, 0x00090009, 0xfff70009, 0x12120009, 0xedee0009, 0x1b090009,
+        0xe4f70009, 0x091b0009, 0xf6e50009, 0xfffffff7, 0x0908fff7, 0xf6f6fff7, 0x08fffff7, 0xf6fffff7,
+        0x0008fff7, 0xfff6fff7, 0x1211fff7, 0xededfff7, 0x1b08fff7, 0xe4f6fff7, 0x091afff7, 0xf6e4fff7,
+        0x00001212, 0x09091212, 0xf6f71212, 0x09001212, 0xf7001212, 0x00091212, 0xfff71212, 0x12121212,
+        0xedee1212, 0x1b091212, 0xe4f71212, 0x091b1212, 0xf6e51212, 0xffffedee, 0x0908edee, 0xf6f6edee,
+        0x08ffedee, 0xf6ffedee, 0x0008edee, 0xfff6edee, 0x1211edee, 0xedededee, 0x1b08edee, 0xe4f6edee,
+        0x091aedee, 0xf6e4edee, 0x00001b09, 0x09091b09, 0xf6f71b09, 0x09001b09, 0xf7001b09, 0x00091b09,
+        0xfff71b09, 0x12121b09, 0xedee1b09, 0x1b091b09, 0xe4f71b09, 0x091b1b09, 0xf6e51b09, 0xffffe4f7,
+        0x0908e4f7, 0xf6f6e4f7, 0x08ffe4f7, 0xf6ffe4f7, 0x0008e4f7, 0xfff6e4f7, 0x1211e4f7, 0xedede4f7,
+        0x1b08e4f7, 0xe4f6e4f7, 0x091ae4f7, 0xf6e4e4f7, 0x0000091b, 0x0909091b, 0xf6f7091b, 0x0900091b,
+        0xf700091b, 0x0009091b, 0xfff7091b, 0x1212091b, 0xedee091b, 0x1b09091b, 0xe4f7091b, 0x091b091b,
+        0xf6e5091b, 0xfffff6e5, 0x0908f6e5, 0xf6f6f6e5, 0x08fff6e5, 0xf6fff6e5, 0x0008f6e5, 0xfff6f6e5,
+        0x1211f6e5, 0xededf6e5, 0x1b08f6e5, 0xe4f6f6e5, 0x091af6e5, 0xf6e4f6e5, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606,
+        0xfffff9fa, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x000004fb, 0xfffffb05, 0xfffffb05,
+        0x000004fb, 0x00000b06, 0xfffff4fa, 0x0000060b, 0xfffff9f5, 0x00000800, 0xfffff800, 0x00000008,
+        0xfffffff8, 0x00000b0b, 0xfffff4f5, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x0000110c,
+        0xffffeef4, 0x00000c11, 0xfffff3ef, 0x00001111, 0xffffeeef, 0x00001206, 0xffffedfa, 0x00000612,
+        0xfffff9ee, 0x00000af8, 0xfffff508, 0xfffff80b, 0x000007f5, 0x00000f00, 0xfffff100, 0x0000000f,
+        0xfffffff1, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00001912, 0xffffe6ee, 0x00001219,
+        0xffffede7, 0x0000190b, 0xffffe6f5, 0x00000b19, 0xfffff4e7, 0x00001919, 0xffffe6e7, 0x00000df2,
+        0xfffff20e, 0xfffff20e, 0x00000df2, 0x00001a00, 0xffffe600, 0x0000001a, 0xffffffe6, 0x000011f5,
+        0xffffee0b, 0xfffff512, 0x00000aee, 0x000015f9, 0xffffea07, 0xfffff916, 0x000006ea, 0x0000221a,
+        0xffffdde6, 0x00001a22, 0xffffe5de, 0x00002212, 0xffffddee, 0x00001222, 0xffffedde, 0x00002222,
+        0xffffddde, 0x0000230b, 0xffffdcf5, 0x00000b23, 0xfffff4dd, 0x00001d00, 0xffffe300, 0x0000001d,
+        0xffffffe3, 0x000015ed, 0xffffea13, 0xffffed16, 0x000012ea, 0x000019f1, 0xffffe60f, 0xfffff11a,
+        0x00000ee6, 0x00002500, 0xffffdb00, 0x00000025, 0xffffffdb, 0x00002c1b, 0xffffd3e5, 0x00001b2c,
+        0xffffe4d4, 0x00002c24, 0xffffd3dc, 0x0000242c, 0xffffdbd4, 0x00002c12, 0xffffd3ee, 0x0000122c,
+        0xffffedd4, 0x000020f6, 0xffffdf0a, 0xfffff621, 0x000009df, 0x00002d2d, 0xffffd2d3, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606,
+        0xfffff9fa, 0x00000700, 0xfffff900, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020300, 0x0201fd00,
+        0x02020003, 0x0201fffd, 0x02020606, 0x0201f9fa, 0x02020700, 0x0201f900, 0xfdfe0000, 0xfdfe0202,
+        0xfdfdfdfe, 0xfdfe0300, 0xfdfdfd00, 0xfdfe0003, 0xfdfdfffd, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0700,
+        0xfdfdf900, 0x03000000, 0x03000202, 0x02fffdfe, 0x03000300, 0x02fffd00, 0x03000003, 0x02fffffd,
+        0x03000606, 0x02fff9fa, 0x03000700, 0x02fff900, 0xfd000000, 0xfd000202, 0xfcfffdfe, 0xfd000300,
+        0xfcfffd00, 0xfd000003, 0xfcfffffd, 0xfd000606, 0xfcfff9fa, 0xfd000700, 0xfcfff900, 0x00030000,
+        0x00030202, 0x0002fdfe, 0x00030300, 0x0002fd00, 0x00030003, 0x0002fffd, 0x00030606, 0x0002f9fa,
+        0x00030700, 0x0002f900, 0xfffd0000, 0xfffd0202, 0xfffcfdfe, 0xfffd0300, 0xfffcfd00, 0xfffd0003,
+        0xfffcfffd, 0xfffd0606, 0xfffcf9fa, 0xfffd0700, 0xfffcf900, 0x06060000, 0x06060202, 0x0605fdfe,
+        0x06060300, 0x0605fd00, 0x06060003, 0x0605fffd, 0x06060606, 0x0605f9fa, 0x06060700, 0x0605f900,
+        0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0300, 0xf9f9fd00, 0xf9fa0003, 0xf9f9fffd, 0xf9fa0606,
+        0xf9f9f9fa, 0xf9fa0700, 0xf9f9f900, 0x07000000, 0x07000202, 0x06fffdfe, 0x07000300, 0x06fffd00,
+        0x07000003, 0x06fffffd, 0x07000606, 0x06fff9fa, 0x07000700, 0x06fff900, 0xf9000000, 0xf9000202,
+        0xf8fffdfe, 0xf9000300, 0xf8fffd00, 0xf9000003, 0xf8fffffd, 0xf9000606, 0xf8fff9fa, 0xf9000700,
+        0xf8fff900, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606,
+        0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x000003fc, 0xfffffc04, 0xfffffa0a,
+        0x000005f6, 0xfffff400, 0x00000c00, 0xfffff3fa, 0xfffff406, 0x00000bfa, 0x00000c06, 0xfffffff2,
+        0x0000000e, 0x00000c0c, 0xfffff3f4, 0xffffee00, 0x00001200, 0xfffff40e, 0x00000bf2, 0xfffff9ee,
+        0xfffffa12, 0x000005ee, 0x00000612, 0xffffedf6, 0xffffee0a, 0x000011f6, 0x0000120a, 0xffffffea,
+        0x00000016, 0xffffe800, 0x00001800, 0xfffff3ea, 0xfffff416, 0x00000bea, 0x00000c16, 0xffffe7f8,
+        0xffffe808, 0x000017f8, 0x00001808, 0xfffff9e6, 0xfffffa1a, 0x000005e6, 0x0000061a, 0xffffffe4,
+        0x0000001c, 0x00001414, 0xffffebec, 0xffffe5f2, 0x00001a0e, 0xfffff3e2, 0x00000c1e, 0xffffdff6,
+        0x0000200a, 0xffffdfee, 0x00002012, 0xffffe5e6, 0x00001a1a, 0xffffebde, 0x00001422, 0xfffff3da,
+        0x00000c26, 0xffffdfe0, 0x00002020, 0x00002020, 0xffffd7ea, 0xffffddde, 0x00002222, 0x00000000,
+        0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa,
+        0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002,
+        0x01fffffe, 0x02000202, 0x01fffdfe, 0x02000606, 0x01fff9fa, 0x02000600, 0x01fffa00, 0x02000006,
+        0x01fffffa, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000202, 0xfdfffdfe,
+        0xfe000606, 0xfdfff9fa, 0xfe000600, 0xfdfffa00, 0xfe000006, 0xfdfffffa, 0x00020000, 0x00020200,
+        0x0001fe00, 0x00020002, 0x0001fffe, 0x00020202, 0x0001fdfe, 0x00020606, 0x0001f9fa, 0x00020600,
+        0x0001fa00, 0x00020006, 0x0001fffa, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe,
+        0xfffe0202, 0xfffdfdfe, 0xfffe0606, 0xfffdf9fa, 0xfffe0600, 0xfffdfa00, 0xfffe0006, 0xfffdfffa,
+        0x02020000, 0x02020200, 0x0201fe00, 0x02020002, 0x0201fffe, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020600, 0x0201fa00, 0x02020006, 0x0201fffa, 0xfdfe0000, 0xfdfe0200, 0xfdfdfe00,
+        0xfdfe0002, 0xfdfdfffe, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0600, 0xfdfdfa00,
+        0xfdfe0006, 0xfdfdfffa, 0x06060000, 0x06060200, 0x0605fe00, 0x06060002, 0x0605fffe, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060600, 0x0605fa00, 0x06060006, 0x0605fffa, 0xf9fa0000,
+        0xf9fa0200, 0xf9f9fe00, 0xf9fa0002, 0xf9f9fffe, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0600, 0xf9f9fa00, 0xf9fa0006, 0xf9f9fffa, 0x06000000, 0x06000200, 0x05fffe00, 0x06000002,
+        0x05fffffe, 0x06000202, 0x05fffdfe, 0x06000606, 0x05fff9fa, 0x06000600, 0x05fffa00, 0x06000006,
+        0x05fffffa, 0xfa000000, 0xfa000200, 0xf9fffe00, 0xfa000002, 0xf9fffffe, 0xfa000202, 0xf9fffdfe,
+        0xfa000606, 0xf9fff9fa, 0xfa000600, 0xf9fffa00, 0xfa000006, 0xf9fffffa, 0x00060000, 0x00060200,
+        0x0005fe00, 0x00060002, 0x0005fffe, 0x00060202, 0x0005fdfe, 0x00060606, 0x0005f9fa, 0x00060600,
+        0x0005fa00, 0x00060006, 0x0005fffa, 0xfffa0000, 0xfffa0200, 0xfff9fe00, 0xfffa0002, 0xfff9fffe,
+        0xfffa0202, 0xfff9fdfe, 0xfffa0606, 0xfff9f9fa, 0xfffa0600, 0xfff9fa00, 0xfffa0006, 0xfff9fffa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a,
+        0xfffff5f6, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000005fa, 0xfffffa06, 0xfffff80e,
+        0x000007f2, 0xffffffee, 0x00000012, 0xfffff00a, 0x00000ff6, 0xffffe800, 0x00001800, 0xfffff7e8,
+        0xfffff818, 0x000007e8, 0x00000818, 0x00001212, 0xffffedee, 0xfffff014, 0x00000fec, 0xffffe5f2,
+        0xffffe60e, 0x000019f2, 0x00001a0e, 0xffffffe2, 0x0000001e, 0xffffde00, 0x00002200, 0xfffff7de,
+        0xfffff822, 0x000007de, 0x00000822, 0xffffede2, 0xffffee1e, 0x000011e2, 0x0000121e, 0xffffddf6,
+        0xffffde0a, 0x000021f6, 0x0000220a, 0xffffddec, 0x00002214, 0xffffffd8, 0x00000028, 0x00001e1e,
+        0xffffe1e2, 0xffffedd8, 0x00001228, 0xffffd400, 0x00002c00, 0xffffd3f0, 0x00002c10, 0xffffdbdc,
+        0xffffdbdc, 0x00002424, 0xffffd3e6, 0x00002c1a, 0xffffe5d2, 0x00001a2e, 0xffffedcc, 0x00001234,
+        0xffffc9ec, 0xffffd3d4, 0x00002c2c, 0xffffc9e0, 0xffffd1d2, 0xffffd1d2, 0x00002e2e, 0x00000000,
+        0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a, 0xfffff5f6,
+        0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002,
+        0x01fffffe, 0x02000404, 0x01fffbfc, 0x02000a0a, 0x01fff5f6, 0x02000a00, 0x01fff600, 0x0200000a,
+        0x01fffff6, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000404, 0xfdfffbfc,
+        0xfe000a0a, 0xfdfff5f6, 0xfe000a00, 0xfdfff600, 0xfe00000a, 0xfdfffff6, 0x00020000, 0x00020200,
+        0x0001fe00, 0x00020002, 0x0001fffe, 0x00020404, 0x0001fbfc, 0x00020a0a, 0x0001f5f6, 0x00020a00,
+        0x0001f600, 0x0002000a, 0x0001fff6, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe,
+        0xfffe0404, 0xfffdfbfc, 0xfffe0a0a, 0xfffdf5f6, 0xfffe0a00, 0xfffdf600, 0xfffe000a, 0xfffdfff6,
+        0x04040000, 0x04040200, 0x0403fe00, 0x04040002, 0x0403fffe, 0x04040404, 0x0403fbfc, 0x04040a0a,
+        0x0403f5f6, 0x04040a00, 0x0403f600, 0x0404000a, 0x0403fff6, 0xfbfc0000, 0xfbfc0200, 0xfbfbfe00,
+        0xfbfc0002, 0xfbfbfffe, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0a0a, 0xfbfbf5f6, 0xfbfc0a00, 0xfbfbf600,
+        0xfbfc000a, 0xfbfbfff6, 0x0a0a0000, 0x0a0a0200, 0x0a09fe00, 0x0a0a0002, 0x0a09fffe, 0x0a0a0404,
+        0x0a09fbfc, 0x0a0a0a0a, 0x0a09f5f6, 0x0a0a0a00, 0x0a09f600, 0x0a0a000a, 0x0a09fff6, 0xf5f60000,
+        0xf5f60200, 0xf5f5fe00, 0xf5f60002, 0xf5f5fffe, 0xf5f60404, 0xf5f5fbfc, 0xf5f60a0a, 0xf5f5f5f6,
+        0xf5f60a00, 0xf5f5f600, 0xf5f6000a, 0xf5f5fff6, 0x0a000000, 0x0a000200, 0x09fffe00, 0x0a000002,
+        0x09fffffe, 0x0a000404, 0x09fffbfc, 0x0a000a0a, 0x09fff5f6, 0x0a000a00, 0x09fff600, 0x0a00000a,
+        0x09fffff6, 0xf6000000, 0xf6000200, 0xf5fffe00, 0xf6000002, 0xf5fffffe, 0xf6000404, 0xf5fffbfc,
+        0xf6000a0a, 0xf5fff5f6, 0xf6000a00, 0xf5fff600, 0xf600000a, 0xf5fffff6, 0x000a0000, 0x000a0200,
+        0x0009fe00, 0x000a0002, 0x0009fffe, 0x000a0404, 0x0009fbfc, 0x000a0a0a, 0x0009f5f6, 0x000a0a00,
+        0x0009f600, 0x000a000a, 0x0009fff6, 0xfff60000, 0xfff60200, 0xfff5fe00, 0xfff60002, 0xfff5fffe,
+        0xfff60404, 0xfff5fbfc, 0xfff60a0a, 0xfff5f5f6, 0xfff60a00, 0xfff5f600, 0xfff6000a, 0xfff5fff6,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c,
+        0xfffff3f4, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x000007f8, 0xfffff808, 0xfffff008,
+        0x00000ff8, 0xffffe800, 0x00001800, 0xfffff7e8, 0xfffff818, 0x000007e8, 0x00000818, 0xfffff014,
+        0x00000fec, 0xffffffe4, 0x0000001c, 0xffffe7f0, 0xffffe810, 0x000017f0, 0x00001810, 0xffffe000,
+        0x00002000, 0xffffefe4, 0xfffff01c, 0x00000fe4, 0x0000101c, 0xffffdff8, 0xffffe008, 0xfffff7e0,
+        0xfffff820, 0x000007e0, 0x00000820, 0x00001ff8, 0x00002008, 0x00001818, 0xffffe7e8, 0xffffe818,
+        0x000017e8, 0xffffdfec, 0x00002014, 0xffffffd8, 0x00000028, 0xffffefd8, 0x00001028, 0xffffd400,
+        0xffffd400, 0xffffffd4, 0x0000002c, 0x00002c00, 0x00002c00, 0xffffdfe0, 0x00002020, 0xffffd3f0,
+        0x00002c10, 0xffffd3e8, 0xffffe7d4, 0x0000182c, 0x00002c18, 0xffffefd0, 0x00001030, 0xffffdbdc,
+        0xffffdbdc, 0x00002424, 0x00002424, 0xffffcbec, 0x00002828, 0xffffd7d8, 0xffffcbe0, 0x00000000,
+        0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c, 0xfffff3f4,
+        0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x04000000, 0x04000400, 0x03fffc00, 0x04000004,
+        0x03fffffc, 0x04000404, 0x03fffbfc, 0x04000c0c, 0x03fff3f4, 0x04000c00, 0x03fff400, 0x0400000c,
+        0x03fffff4, 0xfc000000, 0xfc000400, 0xfbfffc00, 0xfc000004, 0xfbfffffc, 0xfc000404, 0xfbfffbfc,
+        0xfc000c0c, 0xfbfff3f4, 0xfc000c00, 0xfbfff400, 0xfc00000c, 0xfbfffff4, 0x00040000, 0x00040400,
+        0x0003fc00, 0x00040004, 0x0003fffc, 0x00040404, 0x0003fbfc, 0x00040c0c, 0x0003f3f4, 0x00040c00,
+        0x0003f400, 0x0004000c, 0x0003fff4, 0xfffc0000, 0xfffc0400, 0xfffbfc00, 0xfffc0004, 0xfffbfffc,
+        0xfffc0404, 0xfffbfbfc, 0xfffc0c0c, 0xfffbf3f4, 0xfffc0c00, 0xfffbf400, 0xfffc000c, 0xfffbfff4,
+        0x04040000, 0x04040400, 0x0403fc00, 0x04040004, 0x0403fffc, 0x04040404, 0x0403fbfc, 0x04040c0c,
+        0x0403f3f4, 0x04040c00, 0x0403f400, 0x0404000c, 0x0403fff4, 0xfbfc0000, 0xfbfc0400, 0xfbfbfc00,
+        0xfbfc0004, 0xfbfbfffc, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0c0c, 0xfbfbf3f4, 0xfbfc0c00, 0xfbfbf400,
+        0xfbfc000c, 0xfbfbfff4, 0x0c0c0000, 0x0c0c0400, 0x0c0bfc00, 0x0c0c0004, 0x0c0bfffc, 0x0c0c0404,
+        0x0c0bfbfc, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c0c00, 0x0c0bf400, 0x0c0c000c, 0x0c0bfff4, 0xf3f40000,
+        0xf3f40400, 0xf3f3fc00, 0xf3f40004, 0xf3f3fffc, 0xf3f40404, 0xf3f3fbfc, 0xf3f40c0c, 0xf3f3f3f4,
+        0xf3f40c00, 0xf3f3f400, 0xf3f4000c, 0xf3f3fff4, 0x0c000000, 0x0c000400, 0x0bfffc00, 0x0c000004,
+        0x0bfffffc, 0x0c000404, 0x0bfffbfc, 0x0c000c0c, 0x0bfff3f4, 0x0c000c00, 0x0bfff400, 0x0c00000c,
+        0x0bfffff4, 0xf4000000, 0xf4000400, 0xf3fffc00, 0xf4000004, 0xf3fffffc, 0xf4000404, 0xf3fffbfc,
+        0xf4000c0c, 0xf3fff3f4, 0xf4000c00, 0xf3fff400, 0xf400000c, 0xf3fffff4, 0x000c0000, 0x000c0400,
+        0x000bfc00, 0x000c0004, 0x000bfffc, 0x000c0404, 0x000bfbfc, 0x000c0c0c, 0x000bf3f4, 0x000c0c00,
+        0x000bf400, 0x000c000c, 0x000bfff4, 0xfff40000, 0xfff40400, 0xfff3fc00, 0xfff40004, 0xfff3fffc,
+        0xfff40404, 0xfff3fbfc, 0xfff40c0c, 0xfff3f3f4, 0xfff40c00, 0xfff3f400, 0xfff4000c, 0xfff3fff4,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+
+static const uint32_t correctionloworder[] = {
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x04040404,
+        0xfbfbfbfc, 0x05050101, 0xfafafeff, 0x01010505, 0xfefefafb, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe,
+        0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x09090404, 0xf6f6fbfc, 0x04040909, 0xfbfbf6f7, 0x09090909,
+        0xf6f6f6f7, 0x0a0a0101, 0xf5f5feff, 0x01010a0a, 0xfefef5f6, 0x0807fafb, 0xf7f80505, 0xfafb0808,
+        0x0504f7f8, 0x0f0f0909, 0xf0f0f6f7, 0x09090f0f, 0xf6f6f0f1, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c,
+        0x0302f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000,
+        0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff6f7, 0xeff00909, 0xf6f71010,
+        0x0908eff0, 0x1b1b0b0b, 0xe4e4f4f5, 0x0b0b1b1b, 0xf4f4e4e5, 0x1c1c1313, 0xe3e3eced, 0x13131c1c,
+        0xecece3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1d1d0404, 0xe2e2fbfc, 0x04041d1d,
+        0xfbfbe2e3, 0x1e1e1e1e, 0xe1e1e1e2, 0x2120fdfe, 0xdedf0202, 0xfdfe2121, 0x0201dedf, 0x1716edee,
+        0xe8e91212, 0xedee1717, 0x1211e8e9, 0x1e1df0f1, 0xe1e20f0f, 0xf0f11e1e, 0x0f0ee1e2, 0x2e2e1616,
+        0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31312323,
+        0xcecedcdd, 0x23233131, 0xdcdccecf, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929, 0x0b0ad6d7, 0x33330404,
+        0xccccfbfc, 0x04043333, 0xfbfbcccd, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e2e3,
+        0xd5d61d1d, 0xe2e32a2a, 0x1d1cd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1b1b,
+        0xb3b3e4e5, 0x1b1b4c4c, 0xe4e4b3b4, 0x4d4d2b2b, 0xb2b2d4d5, 0x2b2b4d4d, 0xd4d4b2b3, 0x3736e7e8,
+        0xc8c91818, 0xe7e83737, 0x1817c8c9, 0x4f4f0e0e, 0xb0b0f1f2, 0x0e0e4f4f, 0xf1f1b0b1, 0x53533f3f,
+        0xacacc0c1, 0x3f3f5353, 0xc0c0acad, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202,
+        0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5d5d5d5d, 0xa2a2a2a3, 0x3d3ccbcc, 0xc2c33434, 0xcbcc3d3d,
+        0x3433c2c3, 0x78783434, 0x8787cbcc, 0x34347878, 0xcbcb8788, 0x4b4ad2d3, 0xb4b52d2d, 0xd2d34b4b,
+        0x2d2cb4b5, 0x7d7d4b4b, 0x8282b4b5, 0x4b4b7d7d, 0xb4b48283, 0x7a7a2121, 0x8585dedf, 0x21217a7a,
+        0xdede8586, 0x6766f2f3, 0x98990d0d, 0xf2f36767, 0x0d0c9899, 0x605fd7d8, 0x9fa02828, 0xd7d86060,
+        0x28279fa0, 0x7f7eddde, 0x80812222, 0xddde7f7f, 0x22218081, 0x5958a6a7, 0xa6a75959, 0x6968b1b2,
+        0x96974e4e, 0xb1b26969, 0x4e4d9697, 0x0c0c0c0c, 0xf3f3f3f4, 0x17171717, 0xe8e8e8e9, 0x2a2a2a2a,
+        0xd5d5d5d6, 0x49494949, 0xb6b6b6b7, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0xfcfd0101,
+        0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfeff0303, 0xfeff0303,
+        0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd,
+        0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707,
+        0xf8f8f8f9, 0x08080202, 0xf7f7fdfe, 0x02020808, 0xfdfdf7f8, 0x0908fdfe, 0xf6f70202, 0xfdfe0909,
+        0x0201f6f7, 0x0605f9fa, 0xf9fa0606, 0x0d0d0606, 0xf2f2f9fa, 0x06060d0d, 0xf9f9f2f3, 0x0d0d0d0d,
+        0xf2f2f2f3, 0x0e0e0101, 0xf1f1feff, 0x01010e0e, 0xfefef1f2, 0x0c0bf7f8, 0xf3f40808, 0xf7f80c0c,
+        0x0807f3f4, 0x17170e0e, 0xe8e8f1f2, 0x0e0e1717, 0xf1f1e8e9, 0x1211fafb, 0xedee0505, 0xfafb1212,
+        0x0504edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1afeff,
+        0xe4e50101, 0xfeff1b1b, 0x0100e4e5, 0x1110eeef, 0xeeef1111, 0x1716f2f3, 0xe8e90d0d, 0xf2f31717,
+        0x0d0ce8e9, 0x28281010, 0xd7d7eff0, 0x10102828, 0xefefd7d8, 0x29291c1c, 0xd6d6e3e4, 0x1c1c2929,
+        0xe3e3d6d7, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2b2b0606, 0xd4d4f9fa, 0x06062b2b,
+        0xf9f9d4d5, 0x2e2e2e2e, 0xd1d1d1d2, 0x3231fbfc, 0xcdce0404, 0xfbfc3232, 0x0403cdce, 0x2221e4e5,
+        0xddde1b1b, 0xe4e52222, 0x1b1addde, 0x2d2ce9ea, 0xd2d31616, 0xe9ea2d2d, 0x1615d2d3, 0x45452222,
+        0xbabaddde, 0x22224545, 0xddddbabb, 0x46461313, 0xb9b9eced, 0x13134646, 0xececb9ba, 0x49493535,
+        0xb6b6cacb, 0x35354949, 0xcacab6b7, 0x3e3deeef, 0xc1c21111, 0xeeef3e3e, 0x1110c1c2, 0x4d4d0505,
+        0xb2b2fafb, 0x05054d4d, 0xfafab2b3, 0x52525252, 0xadadadae, 0x3332cccd, 0xcccd3333, 0x403fd4d5,
+        0xbfc02b2b, 0xd4d54040, 0x2b2abfc0, 0x5a59f5f6, 0xa5a60a0a, 0xf5f65a5a, 0x0a09a5a6, 0x72722929,
+        0x8d8dd6d7, 0x29297272, 0xd6d68d8e, 0x74744040, 0x8b8bbfc0, 0x40407474, 0xbfbf8b8c, 0x5251dadb,
+        0xadae2525, 0xdadb5252, 0x2524adae, 0x77771616, 0x8888e9ea, 0x16167777, 0xe9e98889, 0x7c7c5f5f,
+        0x8383a0a1, 0x5f5f7c7c, 0xa0a08384, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5c5bb1b2,
+        0xa3a44e4e, 0xb1b25c5c, 0x4e4da3a4, 0x7170bbbc, 0x8e8f4444, 0xbbbc7171, 0x44438e8f, 0x12121212,
+        0xedededee, 0x22222222, 0xddddddde, 0x3f3f3f3f, 0xc0c0c0c1, 0x6d6d6d6d, 0x92929293, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303,
+        0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd,
+        0xfcfcfcfd, 0xfcfcfcfd, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff,
+        0x0403feff, 0x0403feff, 0x0403feff, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101,
+        0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404,
+        0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc,
+        0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x07070707, 0x07070707,
+        0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9,
+        0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303,
+        0xf5f5fcfd, 0x03030a0a, 0xfcfcf5f6, 0x09090909, 0xf6f6f6f7, 0x0706f8f9, 0xf8f90707, 0x0c0bfcfd,
+        0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x11110808, 0xeeeef7f8, 0x08081111, 0xf7f7eeef, 0x11111111,
+        0xeeeeeeef, 0x13130101, 0xececfeff, 0x01011313, 0xfefeeced, 0x100ff4f5, 0xeff00b0b, 0xf4f51010,
+        0x0b0aeff0, 0x1716f9fa, 0xe8e90606, 0xf9fa1717, 0x0605e8e9, 0x1f1f1212, 0xe0e0edee, 0x12121f1f,
+        0xedede0e1, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x21212121, 0xdedededf, 0x2423feff,
+        0xdbdc0101, 0xfeff2424, 0x0100dbdc, 0x1716e8e9, 0xe8e91717, 0x1f1eeeef, 0xe0e11111, 0xeeef1f1f,
+        0x1110e0e1, 0x36361515, 0xc9c9eaeb, 0x15153636, 0xeaeac9ca, 0x37372525, 0xc8c8dadb, 0x25253737,
+        0xdadac8c9, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x39390808, 0xc6c6f7f8, 0x08083939,
+        0xf7f7c6c7, 0x3d3d3d3d, 0xc2c2c2c3, 0x4241fafb, 0xbdbe0505, 0xfafb4242, 0x0504bdbe, 0x2d2cdbdc,
+        0xd2d32424, 0xdbdc2d2d, 0x2423d2d3, 0x3c3be2e3, 0xc3c41d1d, 0xe2e33c3c, 0x1d1cc3c4, 0x5c5c2d2d,
+        0xa3a3d2d3, 0x2d2d5c5c, 0xd2d2a3a4, 0x5d5d1919, 0xa2a2e6e7, 0x19195d5d, 0xe6e6a2a3, 0x61614747,
+        0x9e9eb8b9, 0x47476161, 0xb8b89e9f, 0x5352e9ea, 0xacad1616, 0xe9ea5353, 0x1615acad, 0x66660707,
+        0x9999f8f9, 0x07076666, 0xf8f8999a, 0x6d6d6d6d, 0x92929293, 0x4443bbbc, 0xbbbc4444, 0x5554c6c7,
+        0xaaab3939, 0xc6c75555, 0x3938aaab, 0x7877f2f3, 0x87880d0d, 0xf2f37878, 0x0d0c8788, 0x6e6dcecf,
+        0x91923131, 0xcecf6e6e, 0x31309192, 0x7b7a9798, 0x84856868, 0x97987b7b, 0x68678485, 0x18181818,
+        0xe7e7e7e8, 0x2e2e2e2e, 0xd1d1d1d2, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff,
+        0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0xfafb0101, 0xfafb0101, 0xfafb0101,
+        0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfeff0505,
+        0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505,
+        0xfeff0505, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb,
+        0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303,
+        0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd,
+        0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0x03030a0a,
+        0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a,
+        0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b,
+        0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x03030d0d, 0xfcfcf2f3, 0x0908f6f7, 0xf6f70909, 0x0f0efbfc,
+        0xf0f10404, 0xfbfc0f0f, 0x0403f0f1, 0x16160b0b, 0xe9e9f4f5, 0x0b0b1616, 0xf4f4e9ea, 0x15151515,
+        0xeaeaeaeb, 0x18180202, 0xe7e7fdfe, 0x02021818, 0xfdfde7e8, 0x1413f1f2, 0xebec0e0e, 0xf1f21414,
+        0x0e0debec, 0x26261717, 0xd9d9e8e9, 0x17172626, 0xe8e8d9da, 0x1d1cf7f8, 0xe2e30808, 0xf7f81d1d,
+        0x0807e2e3, 0x27270b0b, 0xd8d8f4f5, 0x0b0b2727, 0xf4f4d8d9, 0x29292929, 0xd6d6d6d7, 0x2d2cfeff,
+        0xd2d30101, 0xfeff2d2d, 0x0100d2d3, 0x1d1ce2e3, 0xe2e31d1d, 0x2726e9ea, 0xd8d91616, 0xe9ea2727,
+        0x1615d8d9, 0x43431b1b, 0xbcbce4e5, 0x1b1b4343, 0xe4e4bcbd, 0x45452f2f, 0xbabad0d1, 0x2f2f4545,
+        0xd0d0babb, 0x3837f0f1, 0xc7c80f0f, 0xf0f13838, 0x0f0ec7c8, 0x47470b0b, 0xb8b8f4f5, 0x0b0b4747,
+        0xf4f4b8b9, 0x4c4c4c4c, 0xb3b3b3b4, 0x5352f9fa, 0xacad0606, 0xf9fa5353, 0x0605acad, 0x3938d2d3,
+        0xc6c72d2d, 0xd2d33939, 0x2d2cc6c7, 0x4b4adbdc, 0xb4b52424, 0xdbdc4b4b, 0x2423b4b5, 0x73733838,
+        0x8c8cc7c8, 0x38387373, 0xc7c78c8d, 0x75751f1f, 0x8a8ae0e1, 0x1f1f7575, 0xe0e08a8b, 0x7a7a5858,
+        0x8585a7a8, 0x58587a7a, 0xa7a78586, 0x6867e3e4, 0x97981c1c, 0xe3e46868, 0x1c1b9798, 0x5554aaab,
+        0xaaab5555, 0x6a69b7b8, 0x95964848, 0xb7b86a6a, 0x48479596, 0x1e1e1e1e, 0xe1e1e1e2, 0x3a3a3a3a,
+        0xc5c5c5c6, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505,
+        0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505,
+        0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb,
+        0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe,
+        0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0xf8f90202,
+        0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202,
+        0xf8f90202, 0xf8f90202, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707,
+        0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9,
+        0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9,
+        0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b,
+        0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5,
+        0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0x0d0d0303, 0x0d0d0303,
+        0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303,
+        0x0d0d0303, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd,
+        0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d,
+        0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0xfbfbf0f1, 0x0b0af4f5, 0xf4f50b0b, 0x1211fafb,
+        0xedee0505, 0xfafb1212, 0x0504edee, 0x1a1a0d0d, 0xe5e5f2f3, 0x0d0d1a1a, 0xf2f2e5e6, 0x1a1a1a1a,
+        0xe5e5e5e6, 0x1d1d0202, 0xe2e2fdfe, 0x02021d1d, 0xfdfde2e3, 0x1817eff0, 0xe7e81010, 0xeff01818,
+        0x100fe7e8, 0x2e2e1c1c, 0xd1d1e3e4, 0x1c1c2e2e, 0xe3e3d1d2, 0x2322f6f7, 0xdcdd0909, 0xf6f72323,
+        0x0908dcdd, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31313131, 0xcecececf, 0x3635feff,
+        0xc9ca0101, 0xfeff3636, 0x0100c9ca, 0x2322dcdd, 0xdcdd2323, 0x2f2ee5e6, 0xd0d11a1a, 0xe5e62f2f,
+        0x1a19d0d1, 0x51512020, 0xaeaedfe0, 0x20205151, 0xdfdfaeaf, 0x53533838, 0xacacc7c8, 0x38385353,
+        0xc7c7acad, 0x4342edee, 0xbcbd1212, 0xedee4343, 0x1211bcbd, 0x56560d0d, 0xa9a9f2f3, 0x0d0d5656,
+        0xf2f2a9aa, 0x5b5b5b5b, 0xa4a4a4a5, 0x6362f8f9, 0x9c9d0707, 0xf8f96363, 0x07069c9d, 0x4443c9ca,
+        0xbbbc3636, 0xc9ca4444, 0x3635bbbc, 0x5a59d3d4, 0xa5a62c2c, 0xd3d45a5a, 0x2c2ba5a6, 0x7c7bdedf,
+        0x83842121, 0xdedf7c7c, 0x21208384, 0x67669899, 0x98996767, 0x7f7ea9aa, 0x80815656, 0xa9aa7f7f,
+        0x56558081, 0x25252525, 0xdadadadb, 0x45454545, 0xbabababb, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe,
+        0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0xf7f80202, 0xf7f80202, 0xf7f80202,
+        0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202,
+        0xf7f80202, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808,
+        0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8,
+        0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8,
+        0x0201f7f8, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d,
+        0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3,
+        0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3,
+        0xf2f2f2f3, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404,
+        0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc,
+        0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc,
+        0xf0f0fbfc, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f,
+        0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010,
+        0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0xfafaedee, 0x0d0cf2f3, 0xf2f30d0d, 0x1514f9fa,
+        0xeaeb0606, 0xf9fa1515, 0x0605eaeb, 0x1e1e0f0f, 0xe1e1f0f1, 0x0f0f1e1e, 0xf0f0e1e2, 0x1e1e1e1e,
+        0xe1e1e1e2, 0x22220202, 0xddddfdfe, 0x02022222, 0xfdfdddde, 0x1c1beced, 0xe3e41313, 0xeced1c1c,
+        0x1312e3e4, 0x36362020, 0xc9c9dfe0, 0x20203636, 0xdfdfc9ca, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929,
+        0x0b0ad6d7, 0x37370f0f, 0xc8c8f0f1, 0x0f0f3737, 0xf0f0c8c9, 0x39393939, 0xc6c6c6c7, 0x3f3efeff,
+        0xc0c10101, 0xfeff3f3f, 0x0100c0c1, 0x2827d7d8, 0xd7d82828, 0x3736e1e2, 0xc8c91e1e, 0xe1e23737,
+        0x1e1dc8c9, 0x5e5e2525, 0xa1a1dadb, 0x25255e5e, 0xdadaa1a2, 0x60604141, 0x9f9fbebf, 0x41416060,
+        0xbebe9fa0, 0x4e4deaeb, 0xb1b21515, 0xeaeb4e4e, 0x1514b1b2, 0x64640f0f, 0x9b9bf0f1, 0x0f0f6464,
+        0xf0f09b9c, 0x6a6a6a6a, 0x95959596, 0x7473f7f8, 0x8b8c0808, 0xf7f87474, 0x08078b8c, 0x4f4ec0c1,
+        0xb0b13f3f, 0xc0c14f4f, 0x3f3eb0b1, 0x6968cccd, 0x96973333, 0xcccd6969, 0x33329697, 0x78778788,
+        0x87887878, 0x2b2b2b2b, 0xd4d4d4d5, 0x50505050, 0xafafafb0, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707,
+        0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd,
+        0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0xf5f60303, 0xf5f60303, 0xf5f60303,
+        0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303,
+        0xf5f60303, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a,
+        0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6,
+        0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6,
+        0x0302f5f6, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0, 0xefefeff0, 0xefefeff0,
+        0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0,
+        0xefefeff0, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505,
+        0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0xededfafb, 0xededfafb, 0xededfafb,
+        0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb,
+        0xededfafb, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212,
+        0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212,
+        0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0xfafaebec, 0x0f0ef0f1, 0xf0f10f0f, 0x1817f8f9,
+        0xe7e80707, 0xf8f91818, 0x0706e7e8, 0x23231111, 0xdcdceeef, 0x11112323, 0xeeeedcdd, 0x22222222,
+        0xddddddde, 0x26260303, 0xd9d9fcfd, 0x03032626, 0xfcfcd9da, 0x201fe9ea, 0xdfe01616, 0xe9ea2020,
+        0x1615dfe0, 0x3d3d2525, 0xc2c2dadb, 0x25253d3d, 0xdadac2c3, 0x2f2ef2f3, 0xd0d10d0d, 0xf2f32f2f,
+        0x0d0cd0d1, 0x3f3f1111, 0xc0c0eeef, 0x11113f3f, 0xeeeec0c1, 0x41414141, 0xbebebebf, 0x4847feff,
+        0xb7b80101, 0xfeff4848, 0x0100b7b8, 0x2e2dd1d2, 0xd1d22e2e, 0x3f3edcdd, 0xc0c12323, 0xdcdd3f3f,
+        0x2322c0c1, 0x6b6b2b2b, 0x9494d4d5, 0x2b2b6b6b, 0xd4d49495, 0x6e6e4b4b, 0x9191b4b5, 0x4b4b6e6e,
+        0xb4b49192, 0x5958e7e8, 0xa6a71818, 0xe7e85959, 0x1817a6a7, 0x72721111, 0x8d8deeef, 0x11117272,
+        0xeeee8d8e, 0x79797979, 0x86868687, 0x5b5ab7b8, 0xa4a54848, 0xb7b85b5b, 0x4847a4a5, 0x7877c5c6,
+        0x87883a3a, 0xc5c67878, 0x3a398788, 0x31313131, 0xcecececf, 0x5c5c5c5c, 0xa3a3a3a4, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808,
+        0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd,
+        0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0xf4f50303,
+        0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303,
+        0xf4f50303, 0xf4f50303, 0xf4f50303, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b,
+        0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0x0302f4f5,
+        0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5,
+        0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212,
+        0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee,
+        0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee,
+        0xedededee, 0xedededee, 0xedededee, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505,
+        0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0xebebfafb,
+        0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb,
+        0xebebfafb, 0xebebfafb, 0xebebfafb, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414,
+        0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414,
+        0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x1110eeef, 0xeeef1111, 0x1b1af8f9,
+        0xe4e50707, 0xf8f91b1b, 0x0706e4e5, 0x27271313, 0xd8d8eced, 0x13132727, 0xececd8d9, 0x27272727,
+        0xd8d8d8d9, 0x2b2b0303, 0xd4d4fcfd, 0x03032b2b, 0xfcfcd4d5, 0x2423e7e8, 0xdbdc1818, 0xe7e82424,
+        0x1817dbdc, 0x45452a2a, 0xbabad5d6, 0x2a2a4545, 0xd5d5babb, 0x3534f1f2, 0xcacb0e0e, 0xf1f23535,
+        0x0e0dcacb, 0x47471313, 0xb8b8eced, 0x13134747, 0xececb8b9, 0x49494949, 0xb6b6b6b7, 0x504ffdfe,
+        0xafb00202, 0xfdfe5050, 0x0201afb0, 0x3433cbcc, 0xcbcc3434, 0x4645d8d9, 0xb9ba2727, 0xd8d94646,
+        0x2726b9ba, 0x79793030, 0x8686cfd0, 0x30307979, 0xcfcf8687, 0x7c7c5454, 0x8383abac, 0x54547c7c,
+        0xabab8384, 0x6463e4e5, 0x9b9c1b1b, 0xe4e56464, 0x1b1a9b9c, 0x6665aeaf, 0x999a5151, 0xaeaf6666,
+        0x5150999a, 0x37373737, 0xc8c8c8c9, 0x68686868, 0x97979798, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909,
+        0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd,
+        0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd,
+        0x0c0bfcfd, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303,
+        0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xfcfd0c0c, 0xfcfd0c0c,
+        0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c,
+        0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4,
+        0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606,
+        0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0xe8e8f9fa,
+        0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa,
+        0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0x06061717, 0x06061717, 0x06061717, 0x06061717,
+        0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717,
+        0x06061717, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9,
+        0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404,
+        0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe,
+        0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x08080404, 0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808,
+        0xf7f7f7f8, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0807fbfc, 0xf7f80404, 0xfbfc0808,
+        0x0403f7f8, 0x0e0e0808, 0xf1f1f7f8, 0x08080e0e, 0xf7f7f1f2, 0x0c0bfdfe, 0xf3f40202, 0xfdfe0c0c,
+        0x0201f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000,
+        0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff7f8, 0xeff00808, 0xf7f81010,
+        0x0807eff0, 0x1a1a0a0a, 0xe5e5f5f6, 0x0a0a1a1a, 0xf5f5e5e6, 0x1c1c1212, 0xe3e3edee, 0x12121c1c,
+        0xedede3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1c1c0404, 0xe3e3fbfc, 0x04041c1c,
+        0xfbfbe3e4, 0x1e1e1e1e, 0xe1e1e1e2, 0x201ffdfe, 0xdfe00202, 0xfdfe2020, 0x0201dfe0, 0x1615edee,
+        0xe9ea1212, 0xedee1616, 0x1211e9ea, 0x1e1df1f2, 0xe1e20e0e, 0xf1f21e1e, 0x0e0de1e2, 0x2e2e1616,
+        0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2e2e0c0c, 0xd1d1f3f4, 0x0c0c2e2e, 0xf3f3d1d2, 0x30302222,
+        0xcfcfddde, 0x22223030, 0xddddcfd0, 0x2827f5f6, 0xd7d80a0a, 0xf5f62828, 0x0a09d7d8, 0x32320404,
+        0xcdcdfbfc, 0x04043232, 0xfbfbcdce, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e3e4,
+        0xd5d61c1c, 0xe3e42a2a, 0x1c1bd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1a1a,
+        0xb3b3e5e6, 0x1a1a4c4c, 0xe5e5b3b4, 0x4c4c2a2a, 0xb3b3d5d6, 0x2a2a4c4c, 0xd5d5b3b4, 0x3635e7e8,
+        0xc9ca1818, 0xe7e83636, 0x1817c9ca, 0x4e4e0e0e, 0xb1b1f1f2, 0x0e0e4e4e, 0xf1f1b1b2, 0x52523e3e,
+        0xadadc1c2, 0x3e3e5252, 0xc1c1adae, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202,
+        0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5c5c5c5c, 0xa3a3a3a4, 0x3c3bcbcc, 0xc3c43434, 0xcbcc3c3c,
+        0x3433c3c4, 0x76763434, 0x8989cbcc, 0x34347676, 0xcbcb898a, 0x4a49d3d4, 0xb5b62c2c, 0xd3d44a4a,
+        0x2c2bb5b6, 0x76764a4a, 0x8989b5b6, 0x4a4a7676, 0xb5b5898a, 0x76762020, 0x8989dfe0, 0x20207676,
+        0xdfdf898a, 0x6665f3f4, 0x999a0c0c, 0xf3f46666, 0x0c0b999a, 0x605fd7d8, 0x9fa02828, 0xd7d86060,
+        0x28279fa0, 0x7675ddde, 0x898a2222, 0xddde7676, 0x2221898a, 0x5857a7a8, 0xa7a85858, 0x6867b1b2,
+        0x97984e4e, 0xb1b26868, 0x4e4d9798, 0x0c0c0c0c, 0xf3f3f3f4, 0x16161616, 0xe9e9e9ea, 0x2a2a2a2a,
+        0xd5d5d5d6, 0x48484848, 0xb7b7b7b8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0xfdfe0000,
+        0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x09090303, 0xf6f6fcfd, 0x03030909, 0xfcfcf6f7, 0x0908fcfd, 0xf6f70303, 0xfcfd0909,
+        0x0302f6f7, 0x0605f9fa, 0xf9fa0606, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0f0f0000, 0xf0f10000, 0x00000f0f, 0xfffff0f1, 0x0c0bf6f7, 0xf3f40909, 0xf6f70c0c,
+        0x0908f3f4, 0x18180f0f, 0xe7e7f0f1, 0x0f0f1818, 0xf0f0e7e8, 0x1211f9fa, 0xedee0606, 0xf9fa1212,
+        0x0605edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1b0000,
+        0xe4e50000, 0x00001b1b, 0xffffe4e5, 0x1211edee, 0xedee1212, 0x1817f3f4, 0xe7e80c0c, 0xf3f41818,
+        0x0c0be7e8, 0x27270f0f, 0xd8d8f0f1, 0x0f0f2727, 0xf0f0d8d9, 0x2a2a1b1b, 0xd5d5e4e5, 0x1b1b2a2a,
+        0xe4e4d5d6, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2a2a0606, 0xd5d5f9fa, 0x06062a2a,
+        0xf9f9d5d6, 0x2d2d2d2d, 0xd2d2d2d3, 0x3332fcfd, 0xcccd0303, 0xfcfd3333, 0x0302cccd, 0x2120e4e5,
+        0xdedf1b1b, 0xe4e52121, 0x1b1adedf, 0x2d2ceaeb, 0xd2d31515, 0xeaeb2d2d, 0x1514d2d3, 0x45452121,
+        0xbabadedf, 0x21214545, 0xdedebabb, 0x45451212, 0xbabaedee, 0x12124545, 0xededbabb, 0x48483636,
+        0xb7b7c9ca, 0x36364848, 0xc9c9b7b8, 0x3f3eedee, 0xc0c11212, 0xedee3f3f, 0x1211c0c1, 0x4e4e0606,
+        0xb1b1f9fa, 0x06064e4e, 0xf9f9b1b2, 0x51515151, 0xaeaeaeaf, 0x3332cccd, 0xcccd3333, 0x3f3ed5d6,
+        0xc0c12a2a, 0xd5d63f3f, 0x2a29c0c1, 0x5a59f6f7, 0xa5a60909, 0xf6f75a5a, 0x0908a5a6, 0x72722a2a,
+        0x8d8dd5d6, 0x2a2a7272, 0xd5d58d8e, 0x75753f3f, 0x8a8ac0c1, 0x3f3f7575, 0xc0c08a8b, 0x5150dbdc,
+        0xaeaf2424, 0xdbdc5151, 0x2423aeaf, 0x78781515, 0x8787eaeb, 0x15157878, 0xeaea8788, 0x7b7b6060,
+        0x84849fa0, 0x60607b7b, 0x9f9f8485, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5d5cb1b2,
+        0xa2a34e4e, 0xb1b25d5d, 0x4e4da2a3, 0x7271babb, 0x8d8e4545, 0xbabb7272, 0x45448d8e, 0x12121212,
+        0xedededee, 0x21212121, 0xdedededf, 0x3f3f3f3f, 0xc0c0c0c1, 0x6c6c6c6c, 0x93939394, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303,
+        0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd,
+        0xfcfcfcfd, 0xfcfcfcfd, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000,
+        0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000,
+        0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303, 0x00000303, 0x00000303, 0x00000303,
+        0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd,
+        0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404,
+        0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808, 0xf7f7f7f8, 0x0807f7f8, 0xf7f80808, 0x0c0bfbfc,
+        0xf3f40404, 0xfbfc0c0c, 0x0403f3f4, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x10101010,
+        0xefefeff0, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x100ff3f4, 0xeff00c0c, 0xf3f41010,
+        0x0c0beff0, 0x1817fbfc, 0xe7e80404, 0xfbfc1818, 0x0403e7e8, 0x20201010, 0xdfdfeff0, 0x10102020,
+        0xefefdfe0, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x20202020, 0xdfdfdfe0, 0x24240000,
+        0xdbdc0000, 0x00002424, 0xffffdbdc, 0x1817e7e8, 0xe7e81818, 0x201feff0, 0xdfe01010, 0xeff02020,
+        0x100fdfe0, 0x34341414, 0xcbcbebec, 0x14143434, 0xebebcbcc, 0x38382424, 0xc7c7dbdc, 0x24243838,
+        0xdbdbc7c8, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x38380808, 0xc7c7f7f8, 0x08083838,
+        0xf7f7c7c8, 0x3c3c3c3c, 0xc3c3c3c4, 0x403ffbfc, 0xbfc00404, 0xfbfc4040, 0x0403bfc0, 0x2c2bdbdc,
+        0xd3d42424, 0xdbdc2c2c, 0x2423d3d4, 0x3c3be3e4, 0xc3c41c1c, 0xe3e43c3c, 0x1c1bc3c4, 0x5c5c2c2c,
+        0xa3a3d3d4, 0x2c2c5c5c, 0xd3d3a3a4, 0x5c5c1818, 0xa3a3e7e8, 0x18185c5c, 0xe7e7a3a4, 0x60604848,
+        0x9f9fb7b8, 0x48486060, 0xb7b79fa0, 0x5453ebec, 0xabac1414, 0xebec5454, 0x1413abac, 0x64640808,
+        0x9b9bf7f8, 0x08086464, 0xf7f79b9c, 0x6c6c6c6c, 0x93939394, 0x4443bbbc, 0xbbbc4444, 0x5453c7c8,
+        0xabac3838, 0xc7c85454, 0x3837abac, 0x7877f3f4, 0x87880c0c, 0xf3f47878, 0x0c0b8788, 0x6c6bcfd0,
+        0x93943030, 0xcfd06c6c, 0x302f9394, 0x7c7b9798, 0x83846868, 0x97987c7c, 0x68678384, 0x18181818,
+        0xe7e7e7e8, 0x2c2c2c2c, 0xd3d3d3d4, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000,
+        0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000,
+        0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404,
+        0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404,
+        0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc,
+        0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404,
+        0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc,
+        0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0x04040808,
+        0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808,
+        0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x05050f0f, 0xfafaf0f1, 0x0a09f5f6, 0xf5f60a0a, 0x0f0efafb,
+        0xf0f10505, 0xfafb0f0f, 0x0504f0f1, 0x14140a0a, 0xebebf5f6, 0x0a0a1414, 0xf5f5ebec, 0x14141414,
+        0xebebebec, 0x19190000, 0xe6e70000, 0x00001919, 0xffffe6e7, 0x1413f0f1, 0xebec0f0f, 0xf0f11414,
+        0x0f0eebec, 0x28281919, 0xd7d7e6e7, 0x19192828, 0xe6e6d7d8, 0x1e1df5f6, 0xe1e20a0a, 0xf5f61e1e,
+        0x0a09e1e2, 0x28280a0a, 0xd7d7f5f6, 0x0a0a2828, 0xf5f5d7d8, 0x28282828, 0xd7d7d7d8, 0x2d2d0000,
+        0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x1e1de1e2, 0xe1e21e1e, 0x2827ebec, 0xd7d81414, 0xebec2828,
+        0x1413d7d8, 0x41411919, 0xbebee6e7, 0x19194141, 0xe6e6bebf, 0x46462d2d, 0xb9b9d2d3, 0x2d2d4646,
+        0xd2d2b9ba, 0x3736f0f1, 0xc8c90f0f, 0xf0f13737, 0x0f0ec8c9, 0x46460a0a, 0xb9b9f5f6, 0x0a0a4646,
+        0xf5f5b9ba, 0x4b4b4b4b, 0xb4b4b4b5, 0x5554fafb, 0xaaab0505, 0xfafb5555, 0x0504aaab, 0x3736d2d3,
+        0xc8c92d2d, 0xd2d33737, 0x2d2cc8c9, 0x4b4adcdd, 0xb4b52323, 0xdcdd4b4b, 0x2322b4b5, 0x73733737,
+        0x8c8cc8c9, 0x37377373, 0xc8c88c8d, 0x73731e1e, 0x8c8ce1e2, 0x1e1e7373, 0xe1e18c8d, 0x78785a5a,
+        0x8787a5a6, 0x5a5a7878, 0xa5a58788, 0x6968e1e2, 0x96971e1e, 0xe1e26969, 0x1e1d9697, 0x5554aaab,
+        0xaaab5555, 0x6968b9ba, 0x96974646, 0xb9ba6969, 0x46459697, 0x1e1e1e1e, 0xe1e1e1e2, 0x3c3c3c3c,
+        0xc3c3c3c4, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505,
+        0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505,
+        0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb,
+        0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x05050000, 0x05050000, 0x05050000, 0x05050000,
+        0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0xfafb0000,
+        0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000,
+        0xfafb0000, 0xfafb0000, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505,
+        0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0xfffffafb, 0xfffffafb, 0xfffffafb,
+        0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb,
+        0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a,
+        0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6,
+        0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0f0f0505, 0x0f0f0505,
+        0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505,
+        0x0f0f0505, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb,
+        0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0bf3f4, 0xf3f40c0c, 0x1211f9fa,
+        0xedee0606, 0xf9fa1212, 0x0605edee, 0x18180c0c, 0xe7e7f3f4, 0x0c0c1818, 0xf3f3e7e8, 0x18181818,
+        0xe7e7e7e8, 0x1e1e0000, 0xe1e20000, 0x00001e1e, 0xffffe1e2, 0x1817edee, 0xe7e81212, 0xedee1818,
+        0x1211e7e8, 0x30301e1e, 0xcfcfe1e2, 0x1e1e3030, 0xe1e1cfd0, 0x2423f9fa, 0xdbdc0606, 0xf9fa2424,
+        0x0605dbdc, 0x30300c0c, 0xcfcff3f4, 0x0c0c3030, 0xf3f3cfd0, 0x30303030, 0xcfcfcfd0, 0x36360000,
+        0xc9ca0000, 0x00003636, 0xffffc9ca, 0x2423dbdc, 0xdbdc2424, 0x302fe7e8, 0xcfd01818, 0xe7e83030,
+        0x1817cfd0, 0x4e4e1e1e, 0xb1b1e1e2, 0x1e1e4e4e, 0xe1e1b1b2, 0x54543636, 0xababc9ca, 0x36365454,
+        0xc9c9abac, 0x4241edee, 0xbdbe1212, 0xedee4242, 0x1211bdbe, 0x54540c0c, 0xababf3f4, 0x0c0c5454,
+        0xf3f3abac, 0x5a5a5a5a, 0xa5a5a5a6, 0x605ff9fa, 0x9fa00606, 0xf9fa6060, 0x06059fa0, 0x4241c9ca,
+        0xbdbe3636, 0xc9ca4242, 0x3635bdbe, 0x5a59d5d6, 0xa5a62a2a, 0xd5d65a5a, 0x2a29a5a6, 0x7e7de1e2,
+        0x81821e1e, 0xe1e27e7e, 0x1e1d8182, 0x6665999a, 0x999a6666, 0x7e7dabac, 0x81825454, 0xabac7e7e,
+        0x54538182, 0x24242424, 0xdbdbdbdc, 0x42424242, 0xbdbdbdbe, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000,
+        0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000,
+        0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000,
+        0xf9fa0000, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606,
+        0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0xfffff9fa, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606,
+        0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa,
+        0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa,
+        0xf3f3f9fa, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c,
+        0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e,
+        0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0xf8f8eaeb, 0x0e0df1f2, 0xf1f20e0e, 0x1514f8f9,
+        0xeaeb0707, 0xf8f91515, 0x0706eaeb, 0x1c1c0e0e, 0xe3e3f1f2, 0x0e0e1c1c, 0xf1f1e3e4, 0x1c1c1c1c,
+        0xe3e3e3e4, 0x23230000, 0xdcdd0000, 0x00002323, 0xffffdcdd, 0x1c1beaeb, 0xe3e41515, 0xeaeb1c1c,
+        0x1514e3e4, 0x38382323, 0xc7c7dcdd, 0x23233838, 0xdcdcc7c8, 0x2a29f1f2, 0xd5d60e0e, 0xf1f22a2a,
+        0x0e0dd5d6, 0x38380e0e, 0xc7c7f1f2, 0x0e0e3838, 0xf1f1c7c8, 0x38383838, 0xc7c7c7c8, 0x3f3f0000,
+        0xc0c10000, 0x00003f3f, 0xffffc0c1, 0x2a29d5d6, 0xd5d62a2a, 0x3837e3e4, 0xc7c81c1c, 0xe3e43838,
+        0x1c1bc7c8, 0x5b5b2323, 0xa4a4dcdd, 0x23235b5b, 0xdcdca4a5, 0x62623f3f, 0x9d9dc0c1, 0x3f3f6262,
+        0xc0c09d9e, 0x4d4ceaeb, 0xb2b31515, 0xeaeb4d4d, 0x1514b2b3, 0x62620e0e, 0x9d9df1f2, 0x0e0e6262,
+        0xf1f19d9e, 0x69696969, 0x96969697, 0x7776f8f9, 0x88890707, 0xf8f97777, 0x07068889, 0x4d4cc0c1,
+        0xb2b33f3f, 0xc0c14d4d, 0x3f3eb2b3, 0x6968cecf, 0x96973131, 0xcecf6969, 0x31309697, 0x77768889,
+        0x88897777, 0x2a2a2a2a, 0xd5d5d5d6, 0x4d4d4d4d, 0xb2b2b2b3, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707,
+        0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000,
+        0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707,
+        0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9,
+        0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9,
+        0xfffff8f9, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e,
+        0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2,
+        0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2,
+        0xf1f1f1f2, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707,
+        0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9,
+        0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9,
+        0xeaeaf8f9, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515,
+        0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010,
+        0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x100feff0, 0xeff01010, 0x1817f7f8,
+        0xe7e80808, 0xf7f81818, 0x0807e7e8, 0x20201010, 0xdfdfeff0, 0x10102020, 0xefefdfe0, 0x20202020,
+        0xdfdfdfe0, 0x28280000, 0xd7d80000, 0x00002828, 0xffffd7d8, 0x201fe7e8, 0xdfe01818, 0xe7e82020,
+        0x1817dfe0, 0x40402828, 0xbfbfd7d8, 0x28284040, 0xd7d7bfc0, 0x302feff0, 0xcfd01010, 0xeff03030,
+        0x100fcfd0, 0x40401010, 0xbfbfeff0, 0x10104040, 0xefefbfc0, 0x40404040, 0xbfbfbfc0, 0x48480000,
+        0xb7b80000, 0x00004848, 0xffffb7b8, 0x302fcfd0, 0xcfd03030, 0x403fdfe0, 0xbfc02020, 0xdfe04040,
+        0x201fbfc0, 0x68682828, 0x9797d7d8, 0x28286868, 0xd7d79798, 0x70704848, 0x8f8fb7b8, 0x48487070,
+        0xb7b78f90, 0x5857e7e8, 0xa7a81818, 0xe7e85858, 0x1817a7a8, 0x70701010, 0x8f8feff0, 0x10107070,
+        0xefef8f90, 0x78787878, 0x87878788, 0x5857b7b8, 0xa7a84848, 0xb7b85858, 0x4847a7a8, 0x7877c7c8,
+        0x87883838, 0xc7c87878, 0x38378788, 0x30303030, 0xcfcfcfd0, 0x58585858, 0xa7a7a7a8, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808,
+        0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000,
+        0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0xf7f80000,
+        0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000,
+        0xf7f80000, 0xf7f80000, 0xf7f80000, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808,
+        0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0xfffff7f8,
+        0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8,
+        0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0,
+        0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0,
+        0xefefeff0, 0xefefeff0, 0xefefeff0, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808,
+        0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0xefeff7f8,
+        0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8,
+        0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010,
+        0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212,
+        0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x1211edee, 0xedee1212, 0x1b1af6f7,
+        0xe4e50909, 0xf6f71b1b, 0x0908e4e5, 0x24241212, 0xdbdbedee, 0x12122424, 0xededdbdc, 0x24242424,
+        0xdbdbdbdc, 0x2d2d0000, 0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x2423e4e5, 0xdbdc1b1b, 0xe4e52424,
+        0x1b1adbdc, 0x48482d2d, 0xb7b7d2d3, 0x2d2d4848, 0xd2d2b7b8, 0x3635edee, 0xc9ca1212, 0xedee3636,
+        0x1211c9ca, 0x48481212, 0xb7b7edee, 0x12124848, 0xededb7b8, 0x48484848, 0xb7b7b7b8, 0x51510000,
+        0xaeaf0000, 0x00005151, 0xffffaeaf, 0x3635c9ca, 0xc9ca3636, 0x4847dbdc, 0xb7b82424, 0xdbdc4848,
+        0x2423b7b8, 0x75752d2d, 0x8a8ad2d3, 0x2d2d7575, 0xd2d28a8b, 0x7e7e5151, 0x8181aeaf, 0x51517e7e,
+        0xaeae8182, 0x6362e4e5, 0x9c9d1b1b, 0xe4e56363, 0x1b1a9c9d, 0x6362aeaf, 0x9c9d5151, 0xaeaf6363,
+        0x51509c9d, 0x36363636, 0xc9c9c9ca, 0x6c6c6c6c, 0x93939394, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909,
+        0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x09090000, 0x09090000, 0x09090000, 0x09090000,
+        0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000,
+        0x09090000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000,
+        0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0x00000909, 0x00000909,
+        0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909,
+        0x00000909, 0x00000909, 0x00000909, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7,
+        0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7,
+        0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212,
+        0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee, 0xedededee, 0xedededee,
+        0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee,
+        0xedededee, 0xedededee, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909,
+        0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0xe4e4f6f7,
+        0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7,
+        0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b,
+        0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b,
+        0x09091b1b, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5,
+        0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0504fafb, 0xfafb0505, 0xfafb0505,
+        0x0504fafb, 0x0b0b0606, 0xf4f4f9fa, 0x06060b0b, 0xf9f9f4f5, 0x08080000, 0xf7f80000, 0x00000808,
+        0xfffff7f8, 0x0b0b0b0b, 0xf4f4f4f5, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x11110c0c,
+        0xeeeef3f4, 0x0c0c1111, 0xf3f3eeef, 0x11111111, 0xeeeeeeef, 0x12120606, 0xededf9fa, 0x06061212,
+        0xf9f9edee, 0x0b0af7f8, 0xf4f50808, 0xf7f80b0b, 0x0807f4f5, 0x0f0f0000, 0xf0f10000, 0x00000f0f,
+        0xfffff0f1, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x19191212, 0xe6e6edee, 0x12121919,
+        0xedede6e7, 0x19190b0b, 0xe6e6f4f5, 0x0b0b1919, 0xf4f4e6e7, 0x19191919, 0xe6e6e6e7, 0x0e0df1f2,
+        0xf1f20e0e, 0xf1f20e0e, 0x0e0df1f2, 0x1a1a0000, 0xe5e60000, 0x00001a1a, 0xffffe5e6, 0x1211f4f5,
+        0xedee0b0b, 0xf4f51212, 0x0b0aedee, 0x1615f8f9, 0xe9ea0707, 0xf8f91616, 0x0706e9ea, 0x22221a1a,
+        0xdddde5e6, 0x1a1a2222, 0xe5e5ddde, 0x22221212, 0xddddedee, 0x12122222, 0xededddde, 0x22222222,
+        0xddddddde, 0x23230b0b, 0xdcdcf4f5, 0x0b0b2323, 0xf4f4dcdd, 0x1d1d0000, 0xe2e30000, 0x00001d1d,
+        0xffffe2e3, 0x1615eced, 0xe9ea1313, 0xeced1616, 0x1312e9ea, 0x1a19f0f1, 0xe5e60f0f, 0xf0f11a1a,
+        0x0f0ee5e6, 0x25250000, 0xdadb0000, 0x00002525, 0xffffdadb, 0x2c2c1b1b, 0xd3d3e4e5, 0x1b1b2c2c,
+        0xe4e4d3d4, 0x2c2c2424, 0xd3d3dbdc, 0x24242c2c, 0xdbdbd3d4, 0x2c2c1212, 0xd3d3edee, 0x12122c2c,
+        0xededd3d4, 0x2120f5f6, 0xdedf0a0a, 0xf5f62121, 0x0a09dedf, 0x2d2d2d2d, 0xd2d2d2d3, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000,
+        0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000,
+        0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd,
+        0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000,
+        0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa,
+        0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303,
+        0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000,
+        0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000,
+        0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0403fbfc, 0xfbfc0404, 0xf9fa0a0a,
+        0x0605f5f6, 0xf3f40000, 0x0c0c0000, 0xf3f3f9fa, 0xf3f40606, 0x0c0bf9fa, 0x0c0c0606, 0xfffff1f2,
+        0x00000e0e, 0x0c0c0c0c, 0xf3f3f3f4, 0xedee0000, 0x12120000, 0xf3f40e0e, 0x0c0bf1f2, 0xf9f9edee,
+        0xf9fa1212, 0x0605edee, 0x06061212, 0xededf5f6, 0xedee0a0a, 0x1211f5f6, 0x12120a0a, 0xffffe9ea,
+        0x00001616, 0xe7e80000, 0x18180000, 0xf3f3e9ea, 0xf3f41616, 0x0c0be9ea, 0x0c0c1616, 0xe7e7f7f8,
+        0xe7e80808, 0x1817f7f8, 0x18180808, 0xf9f9e5e6, 0xf9fa1a1a, 0x0605e5e6, 0x06061a1a, 0xffffe3e4,
+        0x00001c1c, 0x14141414, 0xebebebec, 0xe5e5f1f2, 0x1a1a0e0e, 0xf3f3e1e2, 0x0c0c1e1e, 0xdfdff5f6,
+        0x20200a0a, 0xdfdfedee, 0x20201212, 0xe5e5e5e6, 0x1a1a1a1a, 0xebebddde, 0x14142222, 0xf3f3d9da,
+        0x0c0c2626, 0xdfdfdfe0, 0x20202020, 0x20202020, 0xd7d7e9ea, 0xddddddde, 0x22222222, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606,
+        0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000,
+        0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000,
+        0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000,
+        0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606,
+        0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000,
+        0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0605f9fa, 0xf9fa0606, 0xf7f80e0e,
+        0x0807f1f2, 0xffffedee, 0x00001212, 0xeff00a0a, 0x100ff5f6, 0xe7e80000, 0x18180000, 0xf7f7e7e8,
+        0xf7f81818, 0x0807e7e8, 0x08081818, 0x12121212, 0xedededee, 0xeff01414, 0x100febec, 0xe5e5f1f2,
+        0xe5e60e0e, 0x1a19f1f2, 0x1a1a0e0e, 0xffffe1e2, 0x00001e1e, 0xddde0000, 0x22220000, 0xf7f7ddde,
+        0xf7f82222, 0x0807ddde, 0x08082222, 0xedede1e2, 0xedee1e1e, 0x1211e1e2, 0x12121e1e, 0xddddf5f6,
+        0xddde0a0a, 0x2221f5f6, 0x22220a0a, 0xddddebec, 0x22221414, 0xffffd7d8, 0x00002828, 0x1e1e1e1e,
+        0xe1e1e1e2, 0xededd7d8, 0x12122828, 0xd3d40000, 0x2c2c0000, 0xd3d3eff0, 0x2c2c1010, 0xdbdbdbdc,
+        0xdbdbdbdc, 0x24242424, 0xd3d3e5e6, 0x2c2c1a1a, 0xe5e5d1d2, 0x1a1a2e2e, 0xededcbcc, 0x12123434,
+        0xc9c9ebec, 0xd3d3d3d4, 0x2c2c2c2c, 0xc9c9dfe0, 0xd1d1d1d2, 0xd1d1d1d2, 0x2e2e2e2e, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6,
+        0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a,
+        0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000,
+        0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000,
+        0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000,
+        0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404,
+        0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6,
+        0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a,
+        0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000,
+        0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x0807f7f8, 0xf7f80808, 0xeff00808,
+        0x100ff7f8, 0xe7e80000, 0x18180000, 0xf7f7e7e8, 0xf7f81818, 0x0807e7e8, 0x08081818, 0xeff01414,
+        0x100febec, 0xffffe3e4, 0x00001c1c, 0xe7e7eff0, 0xe7e81010, 0x1817eff0, 0x18181010, 0xdfe00000,
+        0x20200000, 0xefefe3e4, 0xeff01c1c, 0x100fe3e4, 0x10101c1c, 0xdfdff7f8, 0xdfe00808, 0xf7f7dfe0,
+        0xf7f82020, 0x0807dfe0, 0x08082020, 0x201ff7f8, 0x20200808, 0x18181818, 0xe7e7e7e8, 0xe7e81818,
+        0x1817e7e8, 0xdfdfebec, 0x20201414, 0xffffd7d8, 0x00002828, 0xefefd7d8, 0x10102828, 0xd3d40000,
+        0xd3d40000, 0xffffd3d4, 0x00002c2c, 0x2c2c0000, 0x2c2c0000, 0xdfdfdfe0, 0x20202020, 0xd3d3eff0,
+        0x2c2c1010, 0xd3d3e7e8, 0xe7e7d3d4, 0x18182c2c, 0x2c2c1818, 0xefefcfd0, 0x10103030, 0xdbdbdbdc,
+        0xdbdbdbdc, 0x24242424, 0x24242424, 0xcbcbebec, 0x28282828, 0xd7d7d7d8, 0xcbcbdfe0, 0x00000000,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404,
+        0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c,
+        0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000,
+        0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000,
+        0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4,
+        0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000,
+        0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000,
+        0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404,
+        0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404,
+        0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c,
+        0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000,
+        0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000,
+        0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+
+static const uint32_t correctionhighorder[] = {
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101,
+        0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303,
+        0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707,
+        0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc,
+        0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404,
+        0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101,
+        0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff,
+        0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd,
+        0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303,
+        0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd,
+        0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb,
+        0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101,
+        0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd,
+        0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb,
+        0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101,
+        0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd,
+        0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202,
+        0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505,
+        0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303,
+        0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9,
+        0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe,
+        0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000,
+        0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5,
+        0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707,
+        0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb,
+        0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd,
+        0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b,
+        0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202,
+        0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505,
+        0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303,
+        0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9,
+        0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa,
+        0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c,
+        0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec,
+        0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd,
+        0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717,
+        0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4,
+        0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909,
+        0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606,
+        0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303,
+        0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414,
+        0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa,
+        0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c,
+        0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec,
+        0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd,
+        0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717,
+        0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4,
+        0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000,
+        0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd,
+        0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303,
+        0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000,
+        0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000,
+        0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd,
+        0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303,
+        0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc,
+        0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000,
+        0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc,
+        0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000,
+        0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc,
+        0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000,
+        0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505,
+        0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505,
+        0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000,
+        0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000,
+        0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6,
+        0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505,
+        0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb,
+        0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb,
+        0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000,
+        0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505,
+        0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505,
+        0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7,
+        0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909,
+        0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee,
+        0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000,
+        0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b,
+        0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7,
+        0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909,
+        0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909,
+        0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000,
+        0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212,
+        0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7,
+        0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909,
+        0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee,
+        0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000,
+        0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b,
+        0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7,
+        0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000,
+        0x03030000, 0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000,
+        0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303,
+        0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303,
+        0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd,
+        0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000,
+        0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000,
+        0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000,
+        0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000,
+        0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000,
+        0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0x00000606, 0x00000606,
+        0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606,
+        0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000,
+        0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a,
+        0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6,
+        0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6,
+        0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000,
+        0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000,
+        0x0a0a0000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000,
+        0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0x00000a0a, 0x00000a0a,
+        0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a,
+        0x00000a0a, 0x00000a0a, 0x00000a0a, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6,
+        0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040000, 0x04040000, 0x04040000, 0x04040000,
+        0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000,
+        0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000,
+        0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404, 0x00000404,
+        0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404,
+        0x00000404, 0x00000404, 0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc,
+        0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000,
+        0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000,
+        0x0c0c0000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000,
+        0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0x00000c0c, 0x00000c0c,
+        0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c,
+        0x00000c0c, 0x00000c0c, 0x00000c0c, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4,
+        0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
diff --git a/contrib/ffmpeg/libavcodec/interplayvideo.c b/contrib/ffmpeg/libavcodec/interplayvideo.c
new file mode 100644
index 000000000..95059c365
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/interplayvideo.c
@@ -0,0 +1,964 @@
+/*
+ * Interplay MVE Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file interplayvideo.c
+ * Interplay MVE Video Decoder by Mike Melanson (melanson@pcisys.net)
+ * For more information about the Interplay MVE format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/interplay-mve.txt
+ * This code is written in such a way that the identifiers match up
+ * with the encoding descriptions in the document.
+ *
+ * This decoder presently only supports a PAL8 output colorspace.
+ *
+ * An Interplay video frame consists of 2 parts: The decoding map and
+ * the video data. A demuxer must load these 2 parts together in a single
+ * buffer before sending it through the stream to this decoder.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define PALETTE_COUNT 256
+
+/* debugging support */
+#define DEBUG_INTERPLAY 0
+#if DEBUG_INTERPLAY
+#define debug_interplay(x,...) av_log(NULL, AV_LOG_DEBUG, x, __VA_ARGS__)
+#else
+static inline void debug_interplay(const char *format, ...) { }
+#endif
+
+typedef struct IpvideoContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame second_last_frame;
+    AVFrame last_frame;
+    AVFrame current_frame;
+    unsigned char *decoding_map;
+    int decoding_map_size;
+
+    unsigned char *buf;
+    int size;
+
+    unsigned char *stream_ptr;
+    unsigned char *stream_end;
+    unsigned char *pixel_ptr;
+    int line_inc;
+    int stride;
+    int upper_motion_limit_offset;
+
+} IpvideoContext;
+
+#define CHECK_STREAM_PTR(n) \
+  if ((s->stream_ptr + n) > s->stream_end) { \
+    av_log(s->avctx, AV_LOG_ERROR, "Interplay video warning: stream_ptr out of bounds (%p >= %p)\n", \
+      s->stream_ptr + n, s->stream_end); \
+    return -1; \
+  }
+
+#define COPY_FROM_CURRENT() \
+    motion_offset = current_offset; \
+    motion_offset += y * s->stride; \
+    motion_offset += x; \
+    if (motion_offset < 0) { \
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset); \
+        return -1; \
+    } else if (motion_offset > s->upper_motion_limit_offset) { \
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n", \
+            motion_offset, s->upper_motion_limit_offset); \
+        return -1; \
+    } \
+    s->dsp.put_pixels_tab[0][0](s->pixel_ptr, \
+        s->current_frame.data[0] + motion_offset, s->stride, 8);
+
+#define COPY_FROM_PREVIOUS() \
+    motion_offset = current_offset; \
+    motion_offset += y * s->stride; \
+    motion_offset += x; \
+    if (motion_offset < 0) { \
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset); \
+        return -1; \
+    } else if (motion_offset > s->upper_motion_limit_offset) { \
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n", \
+            motion_offset, s->upper_motion_limit_offset); \
+        return -1; \
+    } \
+    s->dsp.put_pixels_tab[0][0](s->pixel_ptr, \
+        s->last_frame.data[0] + motion_offset, s->stride, 8);
+
+#define COPY_FROM_SECOND_LAST() \
+    motion_offset = current_offset; \
+    motion_offset += y * s->stride; \
+    motion_offset += x; \
+    if (motion_offset < 0) { \
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset); \
+        return -1; \
+    } else if (motion_offset > s->upper_motion_limit_offset) { \
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n", \
+            motion_offset, s->upper_motion_limit_offset); \
+        return -1; \
+    } \
+    s->dsp.put_pixels_tab[0][0](s->pixel_ptr, \
+        s->second_last_frame.data[0] + motion_offset, s->stride, 8);
+
+static int ipvideo_decode_block_opcode_0x0(IpvideoContext *s)
+{
+    int x, y;
+    int motion_offset;
+    int current_offset = s->pixel_ptr - s->current_frame.data[0];
+
+    /* copy a block from the previous frame */
+    x = y = 0;
+    COPY_FROM_PREVIOUS();
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x1(IpvideoContext *s)
+{
+    int x, y;
+    int motion_offset;
+    int current_offset = s->pixel_ptr - s->current_frame.data[0];
+
+    /* copy block from 2 frames ago */
+    x = y = 0;
+    COPY_FROM_SECOND_LAST();
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x2(IpvideoContext *s)
+{
+    unsigned char B;
+    int x, y;
+    int motion_offset;
+    int current_offset = s->pixel_ptr - s->current_frame.data[0];
+
+    /* copy block from 2 frames ago using a motion vector; need 1 more byte */
+    CHECK_STREAM_PTR(1);
+    B = *s->stream_ptr++;
+
+    if (B < 56) {
+        x = 8 + (B % 7);
+        y = B / 7;
+    } else {
+        x = -14 + ((B - 56) % 29);
+        y =   8 + ((B - 56) / 29);
+    }
+
+    debug_interplay ("    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    COPY_FROM_SECOND_LAST();
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x3(IpvideoContext *s)
+{
+    unsigned char B;
+    int x, y;
+    int motion_offset;
+    int current_offset = s->pixel_ptr - s->current_frame.data[0];
+
+    /* copy 8x8 block from current frame from an up/left block */
+
+    /* need 1 more byte for motion */
+    CHECK_STREAM_PTR(1);
+    B = *s->stream_ptr++;
+
+    if (B < 56) {
+        x = -(8 + (B % 7));
+        y = -(B / 7);
+    } else {
+        x = -(-14 + ((B - 56) % 29));
+        y = -(  8 + ((B - 56) / 29));
+    }
+
+    debug_interplay ("    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    COPY_FROM_CURRENT();
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x4(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char B, BL, BH;
+    int motion_offset;
+    int current_offset = s->pixel_ptr - s->current_frame.data[0];
+
+    /* copy a block from the previous frame; need 1 more byte */
+    CHECK_STREAM_PTR(1);
+
+    B = *s->stream_ptr++;
+    BL = B & 0x0F;
+    BH = (B >> 4) & 0x0F;
+    x = -8 + BL;
+    y = -8 + BH;
+
+    debug_interplay ("    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    COPY_FROM_PREVIOUS();
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x5(IpvideoContext *s)
+{
+    signed char x, y;
+    int motion_offset;
+    int current_offset = s->pixel_ptr - s->current_frame.data[0];
+
+    /* copy a block from the previous frame using an expanded range;
+     * need 2 more bytes */
+    CHECK_STREAM_PTR(2);
+
+    x = *s->stream_ptr++;
+    y = *s->stream_ptr++;
+
+    debug_interplay ("    motion bytes = %d, %d\n", x, y);
+    COPY_FROM_PREVIOUS();
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x6(IpvideoContext *s)
+{
+    /* mystery opcode? skip multiple blocks? */
+    av_log(s->avctx, AV_LOG_ERROR, "  Interplay video: Help! Mystery opcode 0x6 seen\n");
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x7(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char P0, P1;
+    unsigned char B[8];
+    unsigned int flags;
+    int bitmask;
+
+    /* 2-color encoding */
+    CHECK_STREAM_PTR(2);
+
+    P0 = *s->stream_ptr++;
+    P1 = *s->stream_ptr++;
+
+    if (P0 <= P1) {
+
+        /* need 8 more bytes from the stream */
+        CHECK_STREAM_PTR(8);
+        for (y = 0; y < 8; y++)
+            B[y] = *s->stream_ptr++;
+
+        for (y = 0; y < 8; y++) {
+            flags = B[y];
+            for (x = 0x01; x <= 0x80; x <<= 1) {
+                if (flags & x)
+                    *s->pixel_ptr++ = P1;
+                else
+                    *s->pixel_ptr++ = P0;
+            }
+            s->pixel_ptr += s->line_inc;
+        }
+
+    } else {
+
+        /* need 2 more bytes from the stream */
+        CHECK_STREAM_PTR(2);
+        B[0] = *s->stream_ptr++;
+        B[1] = *s->stream_ptr++;
+
+        flags = (B[1] << 8) | B[0];
+        bitmask = 0x0001;
+        for (y = 0; y < 8; y += 2) {
+            for (x = 0; x < 8; x += 2, bitmask <<= 1) {
+                if (flags & bitmask) {
+                    *(s->pixel_ptr + x) = P1;
+                    *(s->pixel_ptr + x + 1) = P1;
+                    *(s->pixel_ptr + s->stride + x) = P1;
+                    *(s->pixel_ptr + s->stride + x + 1) = P1;
+                } else {
+                    *(s->pixel_ptr + x) = P0;
+                    *(s->pixel_ptr + x + 1) = P0;
+                    *(s->pixel_ptr + s->stride + x) = P0;
+                    *(s->pixel_ptr + s->stride + x + 1) = P0;
+                }
+            }
+            s->pixel_ptr += s->stride * 2;
+        }
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x8(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char P[8];
+    unsigned char B[8];
+    unsigned int flags = 0;
+    unsigned int bitmask = 0;
+    unsigned char P0 = 0, P1 = 0;
+    int lower_half = 0;
+
+    /* 2-color encoding for each 4x4 quadrant, or 2-color encoding on
+     * either top and bottom or left and right halves */
+    CHECK_STREAM_PTR(2);
+
+    P[0] = *s->stream_ptr++;
+    P[1] = *s->stream_ptr++;
+
+    if (P[0] <= P[1]) {
+
+        /* need 12 more bytes */
+        CHECK_STREAM_PTR(12);
+        B[0] = *s->stream_ptr++;  B[1] = *s->stream_ptr++;
+        P[2] = *s->stream_ptr++;  P[3] = *s->stream_ptr++;
+        B[2] = *s->stream_ptr++;  B[3] = *s->stream_ptr++;
+        P[4] = *s->stream_ptr++;  P[5] = *s->stream_ptr++;
+        B[4] = *s->stream_ptr++;  B[5] = *s->stream_ptr++;
+        P[6] = *s->stream_ptr++;  P[7] = *s->stream_ptr++;
+        B[6] = *s->stream_ptr++;  B[7] = *s->stream_ptr++;
+
+        for (y = 0; y < 8; y++) {
+
+            /* time to reload flags? */
+            if (y == 0) {
+                flags =
+                    ((B[0] & 0xF0) <<  4) | ((B[4] & 0xF0) <<  8) |
+                    ((B[0] & 0x0F)      ) | ((B[4] & 0x0F) <<  4) |
+                    ((B[1] & 0xF0) << 20) | ((B[5] & 0xF0) << 24) |
+                    ((B[1] & 0x0F) << 16) | ((B[5] & 0x0F) << 20);
+                bitmask = 0x00000001;
+                lower_half = 0;  /* still on top half */
+            } else if (y == 4) {
+                flags =
+                    ((B[2] & 0xF0) <<  4) | ((B[6] & 0xF0) <<  8) |
+                    ((B[2] & 0x0F)      ) | ((B[6] & 0x0F) <<  4) |
+                    ((B[3] & 0xF0) << 20) | ((B[7] & 0xF0) << 24) |
+                    ((B[3] & 0x0F) << 16) | ((B[7] & 0x0F) << 20);
+                bitmask = 0x00000001;
+                lower_half = 2;
+            }
+
+            for (x = 0; x < 8; x++, bitmask <<= 1) {
+                /* get the pixel values ready for this quadrant */
+                if (x == 0) {
+                    P0 = P[lower_half + 0];
+                    P1 = P[lower_half + 1];
+                } else if (x == 4) {
+                    P0 = P[lower_half + 4];
+                    P1 = P[lower_half + 5];
+                }
+
+                if (flags & bitmask)
+                    *s->pixel_ptr++ = P1;
+                else
+                    *s->pixel_ptr++ = P0;
+            }
+            s->pixel_ptr += s->line_inc;
+        }
+
+    } else {
+
+        /* need 10 more bytes */
+        CHECK_STREAM_PTR(10);
+        B[0] = *s->stream_ptr++;  B[1] = *s->stream_ptr++;
+        B[2] = *s->stream_ptr++;  B[3] = *s->stream_ptr++;
+        P[2] = *s->stream_ptr++;  P[3] = *s->stream_ptr++;
+        B[4] = *s->stream_ptr++;  B[5] = *s->stream_ptr++;
+        B[6] = *s->stream_ptr++;  B[7] = *s->stream_ptr++;
+
+        if (P[2] <= P[3]) {
+
+            /* vertical split; left & right halves are 2-color encoded */
+
+            for (y = 0; y < 8; y++) {
+
+                /* time to reload flags? */
+                if (y == 0) {
+                    flags =
+                        ((B[0] & 0xF0) <<  4) | ((B[4] & 0xF0) <<  8) |
+                        ((B[0] & 0x0F)      ) | ((B[4] & 0x0F) <<  4) |
+                        ((B[1] & 0xF0) << 20) | ((B[5] & 0xF0) << 24) |
+                        ((B[1] & 0x0F) << 16) | ((B[5] & 0x0F) << 20);
+                    bitmask = 0x00000001;
+                } else if (y == 4) {
+                    flags =
+                        ((B[2] & 0xF0) <<  4) | ((B[6] & 0xF0) <<  8) |
+                        ((B[2] & 0x0F)      ) | ((B[6] & 0x0F) <<  4) |
+                        ((B[3] & 0xF0) << 20) | ((B[7] & 0xF0) << 24) |
+                        ((B[3] & 0x0F) << 16) | ((B[7] & 0x0F) << 20);
+                    bitmask = 0x00000001;
+                }
+
+                for (x = 0; x < 8; x++, bitmask <<= 1) {
+                    /* get the pixel values ready for this half */
+                    if (x == 0) {
+                        P0 = P[0];
+                        P1 = P[1];
+                    } else if (x == 4) {
+                        P0 = P[2];
+                        P1 = P[3];
+                    }
+
+                    if (flags & bitmask)
+                        *s->pixel_ptr++ = P1;
+                    else
+                        *s->pixel_ptr++ = P0;
+                }
+                s->pixel_ptr += s->line_inc;
+            }
+
+        } else {
+
+            /* horizontal split; top & bottom halves are 2-color encoded */
+
+            for (y = 0; y < 8; y++) {
+
+                flags = B[y];
+                if (y == 0) {
+                    P0 = P[0];
+                    P1 = P[1];
+                } else if (y == 4) {
+                    P0 = P[2];
+                    P1 = P[3];
+                }
+
+                for (bitmask = 0x01; bitmask <= 0x80; bitmask <<= 1) {
+
+                    if (flags & bitmask)
+                        *s->pixel_ptr++ = P1;
+                    else
+                        *s->pixel_ptr++ = P0;
+                }
+                s->pixel_ptr += s->line_inc;
+            }
+        }
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0x9(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char P[4];
+    unsigned char B[4];
+    unsigned int flags = 0;
+    int shifter = 0;
+    unsigned char pix;
+
+    /* 4-color encoding */
+    CHECK_STREAM_PTR(4);
+
+    for (y = 0; y < 4; y++)
+        P[y] = *s->stream_ptr++;
+
+    if ((P[0] <= P[1]) && (P[2] <= P[3])) {
+
+        /* 1 of 4 colors for each pixel, need 16 more bytes */
+        CHECK_STREAM_PTR(16);
+
+        for (y = 0; y < 8; y++) {
+            /* get the next set of 8 2-bit flags */
+            flags = (s->stream_ptr[1] << 8) | s->stream_ptr[0];
+            s->stream_ptr += 2;
+            for (x = 0, shifter = 0; x < 8; x++, shifter += 2) {
+                *s->pixel_ptr++ = P[(flags >> shifter) & 0x03];
+            }
+            s->pixel_ptr += s->line_inc;
+        }
+
+    } else if ((P[0] <= P[1]) && (P[2] > P[3])) {
+
+        /* 1 of 4 colors for each 2x2 block, need 4 more bytes */
+        CHECK_STREAM_PTR(4);
+
+        B[0] = *s->stream_ptr++;
+        B[1] = *s->stream_ptr++;
+        B[2] = *s->stream_ptr++;
+        B[3] = *s->stream_ptr++;
+        flags = (B[3] << 24) | (B[2] << 16) | (B[1] << 8) | B[0];
+        shifter = 0;
+
+        for (y = 0; y < 8; y += 2) {
+            for (x = 0; x < 8; x += 2, shifter += 2) {
+                pix = P[(flags >> shifter) & 0x03];
+                *(s->pixel_ptr + x) = pix;
+                *(s->pixel_ptr + x + 1) = pix;
+                *(s->pixel_ptr + s->stride + x) = pix;
+                *(s->pixel_ptr + s->stride + x + 1) = pix;
+            }
+            s->pixel_ptr += s->stride * 2;
+        }
+
+    } else if ((P[0] > P[1]) && (P[2] <= P[3])) {
+
+        /* 1 of 4 colors for each 2x1 block, need 8 more bytes */
+        CHECK_STREAM_PTR(8);
+
+        for (y = 0; y < 8; y++) {
+            /* time to reload flags? */
+            if ((y == 0) || (y == 4)) {
+                B[0] = *s->stream_ptr++;
+                B[1] = *s->stream_ptr++;
+                B[2] = *s->stream_ptr++;
+                B[3] = *s->stream_ptr++;
+                flags = (B[3] << 24) | (B[2] << 16) | (B[1] << 8) | B[0];
+                shifter = 0;
+            }
+            for (x = 0; x < 8; x += 2, shifter += 2) {
+                pix = P[(flags >> shifter) & 0x03];
+                *(s->pixel_ptr + x) = pix;
+                *(s->pixel_ptr + x + 1) = pix;
+            }
+            s->pixel_ptr += s->stride;
+        }
+
+    } else {
+
+        /* 1 of 4 colors for each 1x2 block, need 8 more bytes */
+        CHECK_STREAM_PTR(8);
+
+        for (y = 0; y < 8; y += 2) {
+            /* time to reload flags? */
+            if ((y == 0) || (y == 4)) {
+                B[0] = *s->stream_ptr++;
+                B[1] = *s->stream_ptr++;
+                B[2] = *s->stream_ptr++;
+                B[3] = *s->stream_ptr++;
+                flags = (B[3] << 24) | (B[2] << 16) | (B[1] << 8) | B[0];
+                shifter = 0;
+            }
+            for (x = 0; x < 8; x++, shifter += 2) {
+                pix = P[(flags >> shifter) & 0x03];
+                *(s->pixel_ptr + x) = pix;
+                *(s->pixel_ptr + s->stride + x) = pix;
+            }
+            s->pixel_ptr += s->stride * 2;
+        }
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0xA(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char P[16];
+    unsigned char B[16];
+    int flags = 0;
+    int shifter = 0;
+    int index;
+    int split;
+    int lower_half;
+
+    /* 4-color encoding for each 4x4 quadrant, or 4-color encoding on
+     * either top and bottom or left and right halves */
+    CHECK_STREAM_PTR(4);
+
+    for (y = 0; y < 4; y++)
+        P[y] = *s->stream_ptr++;
+
+    if (P[0] <= P[1]) {
+
+        /* 4-color encoding for each quadrant; need 28 more bytes */
+        CHECK_STREAM_PTR(28);
+
+        for (y = 0; y < 4; y++)
+            B[y] = *s->stream_ptr++;
+        for (y = 4; y < 16; y += 4) {
+            for (x = y; x < y + 4; x++)
+                P[x] = *s->stream_ptr++;
+            for (x = y; x < y + 4; x++)
+                B[x] = *s->stream_ptr++;
+        }
+
+        for (y = 0; y < 8; y++) {
+
+            lower_half = (y >= 4) ? 4 : 0;
+            flags = (B[y + 8] << 8) | B[y];
+
+            for (x = 0, shifter = 0; x < 8; x++, shifter += 2) {
+                split = (x >= 4) ? 8 : 0;
+                index = split + lower_half + ((flags >> shifter) & 0x03);
+                *s->pixel_ptr++ = P[index];
+            }
+
+            s->pixel_ptr += s->line_inc;
+        }
+
+    } else {
+
+        /* 4-color encoding for either left and right or top and bottom
+         * halves; need 20 more bytes */
+        CHECK_STREAM_PTR(20);
+
+        for (y = 0; y < 8; y++)
+            B[y] = *s->stream_ptr++;
+        for (y = 4; y < 8; y++)
+            P[y] = *s->stream_ptr++;
+        for (y = 8; y < 16; y++)
+            B[y] = *s->stream_ptr++;
+
+        if (P[4] <= P[5]) {
+
+            /* block is divided into left and right halves */
+            for (y = 0; y < 8; y++) {
+
+                flags = (B[y + 8] << 8) | B[y];
+                split = 0;
+
+                for (x = 0, shifter = 0; x < 8; x++, shifter += 2) {
+                    if (x == 4)
+                        split = 4;
+                    *s->pixel_ptr++ = P[split + ((flags >> shifter) & 0x03)];
+                }
+
+                s->pixel_ptr += s->line_inc;
+            }
+
+        } else {
+
+            /* block is divided into top and bottom halves */
+            split = 0;
+            for (y = 0; y < 8; y++) {
+
+                flags = (B[y * 2 + 1] << 8) | B[y * 2];
+                if (y == 4)
+                    split = 4;
+
+                for (x = 0, shifter = 0; x < 8; x++, shifter += 2)
+                    *s->pixel_ptr++ = P[split + ((flags >> shifter) & 0x03)];
+
+                s->pixel_ptr += s->line_inc;
+            }
+        }
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0xB(IpvideoContext *s)
+{
+    int x, y;
+
+    /* 64-color encoding (each pixel in block is a different color) */
+    CHECK_STREAM_PTR(64);
+
+    for (y = 0; y < 8; y++) {
+        for (x = 0; x < 8; x++) {
+            *s->pixel_ptr++ = *s->stream_ptr++;
+        }
+        s->pixel_ptr += s->line_inc;
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0xC(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char pix;
+
+    /* 16-color block encoding: each 2x2 block is a different color */
+    CHECK_STREAM_PTR(16);
+
+    for (y = 0; y < 8; y += 2) {
+        for (x = 0; x < 8; x += 2) {
+            pix = *s->stream_ptr++;
+            *(s->pixel_ptr + x) = pix;
+            *(s->pixel_ptr + x + 1) = pix;
+            *(s->pixel_ptr + s->stride + x) = pix;
+            *(s->pixel_ptr + s->stride + x + 1) = pix;
+        }
+        s->pixel_ptr += s->stride * 2;
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0xD(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char P[4];
+    unsigned char index = 0;
+
+    /* 4-color block encoding: each 4x4 block is a different color */
+    CHECK_STREAM_PTR(4);
+
+    for (y = 0; y < 4; y++)
+        P[y] = *s->stream_ptr++;
+
+    for (y = 0; y < 8; y++) {
+        if (y < 4)
+            index = 0;
+        else
+            index = 2;
+
+        for (x = 0; x < 8; x++) {
+            if (x == 4)
+                index++;
+            *s->pixel_ptr++ = P[index];
+        }
+        s->pixel_ptr += s->line_inc;
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0xE(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char pix;
+
+    /* 1-color encoding: the whole block is 1 solid color */
+    CHECK_STREAM_PTR(1);
+    pix = *s->stream_ptr++;
+
+    for (y = 0; y < 8; y++) {
+        for (x = 0; x < 8; x++) {
+            *s->pixel_ptr++ = pix;
+        }
+        s->pixel_ptr += s->line_inc;
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int ipvideo_decode_block_opcode_0xF(IpvideoContext *s)
+{
+    int x, y;
+    unsigned char sample0, sample1;
+
+    /* dithered encoding */
+    CHECK_STREAM_PTR(2);
+    sample0 = *s->stream_ptr++;
+    sample1 = *s->stream_ptr++;
+
+    for (y = 0; y < 8; y++) {
+        for (x = 0; x < 8; x += 2) {
+            if (y & 1) {
+                *s->pixel_ptr++ = sample1;
+                *s->pixel_ptr++ = sample0;
+            } else {
+                *s->pixel_ptr++ = sample0;
+                *s->pixel_ptr++ = sample1;
+            }
+        }
+        s->pixel_ptr += s->line_inc;
+    }
+
+    /* report success */
+    return 0;
+}
+
+static int (*ipvideo_decode_block[16])(IpvideoContext *s);
+
+static void ipvideo_decode_opcodes(IpvideoContext *s)
+{
+    int x, y;
+    int index = 0;
+    unsigned char opcode;
+    int ret;
+    int code_counts[16];
+    static int frame = 0;
+
+    debug_interplay("------------------ frame %d\n", frame);
+    frame++;
+
+    for (x = 0; x < 16; x++)
+        code_counts[x] = 0;
+
+    /* this is PAL8, so make the palette available */
+    memcpy(s->current_frame.data[1], s->avctx->palctrl->palette, PALETTE_COUNT * 4);
+
+    s->stride = s->current_frame.linesize[0];
+    s->stream_ptr = s->buf + 14;  /* data starts 14 bytes in */
+    s->stream_end = s->buf + s->size;
+    s->line_inc = s->stride - 8;
+    s->upper_motion_limit_offset = (s->avctx->height - 8) * s->stride
+        + s->avctx->width - 8;
+    s->dsp = s->dsp;
+
+    for (y = 0; y < (s->stride * s->avctx->height); y += s->stride * 8) {
+        for (x = y; x < y + s->avctx->width; x += 8) {
+            /* bottom nibble first, then top nibble (which makes it
+             * hard to use a GetBitcontext) */
+            if (index & 1)
+                opcode = s->decoding_map[index >> 1] >> 4;
+            else
+                opcode = s->decoding_map[index >> 1] & 0xF;
+            index++;
+
+            debug_interplay("  block @ (%3d, %3d): encoding 0x%X, data ptr @ %p\n",
+                x - y, y / s->stride, opcode, s->stream_ptr);
+            code_counts[opcode]++;
+
+            s->pixel_ptr = s->current_frame.data[0] + x;
+            ret = ipvideo_decode_block[opcode](s);
+            if (ret != 0) {
+                av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode problem on frame %d, @ block (%d, %d)\n",
+                    frame, x - y, y / s->stride);
+                return;
+            }
+        }
+    }
+    if ((s->stream_ptr != s->stream_end) &&
+        (s->stream_ptr + 1 != s->stream_end)) {
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode finished with %td bytes left over\n",
+            s->stream_end - s->stream_ptr);
+    }
+}
+
+static int ipvideo_decode_init(AVCodecContext *avctx)
+{
+    IpvideoContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+
+    if (s->avctx->palctrl == NULL) {
+        av_log(avctx, AV_LOG_ERROR, " Interplay video: palette expected.\n");
+        return -1;
+    }
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    /* decoding map contains 4 bits of information per 8x8 block */
+    s->decoding_map_size = avctx->width * avctx->height / (8 * 8 * 2);
+
+    /* assign block decode functions */
+    ipvideo_decode_block[0x0] = ipvideo_decode_block_opcode_0x0;
+    ipvideo_decode_block[0x1] = ipvideo_decode_block_opcode_0x1;
+    ipvideo_decode_block[0x2] = ipvideo_decode_block_opcode_0x2;
+    ipvideo_decode_block[0x3] = ipvideo_decode_block_opcode_0x3;
+    ipvideo_decode_block[0x4] = ipvideo_decode_block_opcode_0x4;
+    ipvideo_decode_block[0x5] = ipvideo_decode_block_opcode_0x5;
+    ipvideo_decode_block[0x6] = ipvideo_decode_block_opcode_0x6;
+    ipvideo_decode_block[0x7] = ipvideo_decode_block_opcode_0x7;
+    ipvideo_decode_block[0x8] = ipvideo_decode_block_opcode_0x8;
+    ipvideo_decode_block[0x9] = ipvideo_decode_block_opcode_0x9;
+    ipvideo_decode_block[0xA] = ipvideo_decode_block_opcode_0xA;
+    ipvideo_decode_block[0xB] = ipvideo_decode_block_opcode_0xB;
+    ipvideo_decode_block[0xC] = ipvideo_decode_block_opcode_0xC;
+    ipvideo_decode_block[0xD] = ipvideo_decode_block_opcode_0xD;
+    ipvideo_decode_block[0xE] = ipvideo_decode_block_opcode_0xE;
+    ipvideo_decode_block[0xF] = ipvideo_decode_block_opcode_0xF;
+
+    s->current_frame.data[0] = s->last_frame.data[0] =
+    s->second_last_frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int ipvideo_decode_frame(AVCodecContext *avctx,
+                                void *data, int *data_size,
+                                uint8_t *buf, int buf_size)
+{
+    IpvideoContext *s = avctx->priv_data;
+    AVPaletteControl *palette_control = avctx->palctrl;
+
+    /* compressed buffer needs to be large enough to at least hold an entire
+     * decoding map */
+    if (buf_size < s->decoding_map_size)
+        return buf_size;
+
+    s->decoding_map = buf;
+    s->buf = buf + s->decoding_map_size;
+    s->size = buf_size - s->decoding_map_size;
+
+    s->current_frame.reference = 3;
+    if (avctx->get_buffer(avctx, &s->current_frame)) {
+        av_log(avctx, AV_LOG_ERROR, "  Interplay Video: get_buffer() failed\n");
+        return -1;
+    }
+
+    ipvideo_decode_opcodes(s);
+
+    if (palette_control->palette_changed) {
+        palette_control->palette_changed = 0;
+        s->current_frame.palette_has_changed = 1;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->current_frame;
+
+    /* shuffle frames */
+    if (s->second_last_frame.data[0])
+        avctx->release_buffer(avctx, &s->second_last_frame);
+    s->second_last_frame = s->last_frame;
+    s->last_frame = s->current_frame;
+    s->current_frame.data[0] = NULL;  /* catch any access attempts */
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int ipvideo_decode_end(AVCodecContext *avctx)
+{
+    IpvideoContext *s = avctx->priv_data;
+
+    /* release the last frame */
+    if (s->last_frame.data[0])
+        avctx->release_buffer(avctx, &s->last_frame);
+    if (s->second_last_frame.data[0])
+        avctx->release_buffer(avctx, &s->second_last_frame);
+
+    return 0;
+}
+
+AVCodec interplay_video_decoder = {
+    "interplayvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_INTERPLAY_VIDEO,
+    sizeof(IpvideoContext),
+    ipvideo_decode_init,
+    NULL,
+    ipvideo_decode_end,
+    ipvideo_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/jfdctfst.c b/contrib/ffmpeg/libavcodec/jfdctfst.c
new file mode 100644
index 000000000..38424563d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/jfdctfst.c
@@ -0,0 +1,338 @@
+/*
+ * jfdctfst.c
+ *
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1994-1996, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+/**
+ * @file jfdctfst.c
+ * Independent JPEG Group's fast AAN dct.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "common.h"
+#include "dsputil.h"
+
+#define DCTSIZE 8
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+#define SHIFT_TEMPS
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
+#define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
+#define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
+#define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an int32_t constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+static always_inline void row_fdct(DCTELEM * data){
+  int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int_fast16_t tmp10, tmp11, tmp12, tmp13;
+  int_fast16_t z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;      /* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+}
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+fdct_ifast (DCTELEM * data)
+{
+  int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int_fast16_t tmp10, tmp11, tmp12, tmp13;
+  int_fast16_t z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  row_fdct(data);
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+/*
+ * Perform the forward 2-4-8 DCT on one block of samples.
+ */
+
+GLOBAL(void)
+fdct_ifast248 (DCTELEM * data)
+{
+  int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int_fast16_t tmp10, tmp11, tmp12, tmp13;
+  int_fast16_t z1;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  row_fdct(data);
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+    tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+    tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+    tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    tmp13 = tmp0 - tmp3;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11;
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
+    dataptr[DCTSIZE*2] = tmp13 + z1;
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    tmp10 = tmp4 + tmp7;
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp5 - tmp6;
+    tmp13 = tmp4 - tmp7;
+
+    dataptr[DCTSIZE*1] = tmp10 + tmp11;
+    dataptr[DCTSIZE*5] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
+    dataptr[DCTSIZE*3] = tmp13 + z1;
+    dataptr[DCTSIZE*7] = tmp13 - z1;
+
+    dataptr++;                        /* advance pointer to next column */
+  }
+}
+
+
+#undef GLOBAL
+#undef CONST_BITS
+#undef DESCALE
+#undef FIX_0_541196100
+#undef FIX_1_306562965
diff --git a/contrib/ffmpeg/libavcodec/jfdctint.c b/contrib/ffmpeg/libavcodec/jfdctint.c
new file mode 100644
index 000000000..58f3a1446
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/jfdctint.c
@@ -0,0 +1,406 @@
+/*
+ * jfdctint.c
+ *
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991-1996, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+/**
+ * @file jfdctint.c
+ * Independent JPEG Group's slow & accurate dct.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "common.h"
+#include "dsputil.h"
+
+#define SHIFT_TEMPS
+#define DCTSIZE 8
+#define BITS_IN_JSAMPLE 8
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+#define MULTIPLY16C16(var,const) ((var)*(const))
+
+#if 1 //def USE_ACCURATE_ROUNDING
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
+#else
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is int32_t anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  4   /* set this to 2 if 16x16 multiplies are faster */
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((int32_t)  2446)      /* FIX(0.298631336) */
+#define FIX_0_390180644  ((int32_t)  3196)      /* FIX(0.390180644) */
+#define FIX_0_541196100  ((int32_t)  4433)      /* FIX(0.541196100) */
+#define FIX_0_765366865  ((int32_t)  6270)      /* FIX(0.765366865) */
+#define FIX_0_899976223  ((int32_t)  7373)      /* FIX(0.899976223) */
+#define FIX_1_175875602  ((int32_t)  9633)      /* FIX(1.175875602) */
+#define FIX_1_501321110  ((int32_t)  12299)     /* FIX(1.501321110) */
+#define FIX_1_847759065  ((int32_t)  15137)     /* FIX(1.847759065) */
+#define FIX_1_961570560  ((int32_t)  16069)     /* FIX(1.961570560) */
+#define FIX_2_053119869  ((int32_t)  16819)     /* FIX(2.053119869) */
+#define FIX_2_562915447  ((int32_t)  20995)     /* FIX(2.562915447) */
+#define FIX_3_072711026  ((int32_t)  25172)     /* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+static always_inline void row_fdct(DCTELEM * data){
+  int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int_fast32_t tmp10, tmp11, tmp12, tmp13;
+  int_fast32_t z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                   CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+}
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+ff_jpeg_fdct_islow (DCTELEM * data)
+{
+  int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int_fast32_t tmp10, tmp11, tmp12, tmp13;
+  int_fast32_t z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  row_fdct(data);
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                           CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                           CONST_BITS+PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
+                                           CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
+                                           CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
+                                           CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
+                                           CONST_BITS+PASS1_BITS);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+/*
+ * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
+ * on the rows and then, instead of doing even and odd, part on the colums
+ * you do even part two times.
+ */
+GLOBAL(void)
+ff_fdct248_islow (DCTELEM * data)
+{
+  int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int_fast32_t tmp10, tmp11, tmp12, tmp13;
+  int_fast32_t z1;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  row_fdct(data);
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
+     tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+     tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+     tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+     tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
+     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+     tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+     tmp10 = tmp0 + tmp3;
+     tmp11 = tmp1 + tmp2;
+     tmp12 = tmp1 - tmp2;
+     tmp13 = tmp0 - tmp3;
+
+     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+
+     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                            CONST_BITS+PASS1_BITS);
+     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                            CONST_BITS+PASS1_BITS);
+
+     tmp10 = tmp4 + tmp7;
+     tmp11 = tmp5 + tmp6;
+     tmp12 = tmp5 - tmp6;
+     tmp13 = tmp4 - tmp7;
+
+     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+
+     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                            CONST_BITS+PASS1_BITS);
+     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                            CONST_BITS+PASS1_BITS);
+
+     dataptr++;                 /* advance pointer to next column */
+  }
+}
diff --git a/contrib/ffmpeg/libavcodec/jpeg_ls.c b/contrib/ffmpeg/libavcodec/jpeg_ls.c
new file mode 100644
index 000000000..1b4df2b1a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/jpeg_ls.c
@@ -0,0 +1,855 @@
+/*
+ * JPEG-LS encoder and decoder
+ * Copyright (c) 2003 Michael Niedermayer
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "golomb.h"
+
+/**
+ * @file jpeg_ls.c
+ * JPEG-LS encoder and decoder.
+ */
+
+typedef struct JpeglsContext{
+    AVCodecContext *avctx;
+    AVFrame picture;
+}JpeglsContext;
+
+typedef struct JLSState{
+    int T1, T2, T3;
+    int A[367], B[367], C[365], N[367];
+    int limit, reset, bpp, qbpp, maxval, range;
+    int near, twonear;
+    int run_index[3];
+}JLSState;
+
+static const uint8_t log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15
+};
+
+/*
+* Uncomment this to significantly speed up decoding of broken JPEG-LS
+* (or test broken JPEG-LS decoder) and slow down ordinary decoding a bit.
+*
+* There is no Golomb code with length >= 32 bits possible, so check and
+* avoid situation of 32 zeros, FFmpeg Golomb decoder is painfully slow
+* on this errors.
+*/
+//#define JLS_BROKEN
+
+/********** Functions for both encoder and decoder **********/
+
+/**
+ * Calculate initial JPEG-LS parameters
+ */
+static void ls_init_state(JLSState *state){
+    int i;
+
+    state->twonear = state->near * 2 + 1;
+    state->range = ((state->maxval + state->twonear - 1) / state->twonear) + 1;
+
+    // QBPP = ceil(log2(RANGE))
+    for(state->qbpp = 0; (1 << state->qbpp) < state->range; state->qbpp++);
+
+    if(state->bpp < 8)
+        state->limit = 16 + 2 * state->bpp - state->qbpp;
+    else
+        state->limit = (4 * state->bpp) - state->qbpp;
+
+    for(i = 0; i < 367; i++) {
+        state->A[i] = FFMAX((state->range + 32) >> 6, 2);
+        state->N[i] = 1;
+    }
+
+}
+
+/**
+ * Calculate quantized gradient value, used for context determination
+ */
+static inline int quantize(JLSState *s, int v){ //FIXME optimize
+    if(v==0) return 0;
+    if(v < 0){
+        if(v <= -s->T3) return -4;
+        if(v <= -s->T2) return -3;
+        if(v <= -s->T1) return -2;
+        if(v <  -s->near) return -1;
+        return 0;
+    }else{
+        if(v <= s->near) return 0;
+        if(v <  s->T1) return 1;
+        if(v <  s->T2) return 2;
+        if(v <  s->T3) return 3;
+        return 4;
+    }
+}
+
+/**
+ * Custom value clipping function used in T1, T2, T3 calculation
+ */
+static inline int iso_clip(int v, int vmin, int vmax){
+    if(v > vmax || v < vmin) return vmin;
+    else                     return v;
+}
+
+/**
+ * Calculate JPEG-LS codec values
+ */
+static void reset_ls_coding_parameters(JLSState *s, int reset_all){
+    const int basic_t1= 3;
+    const int basic_t2= 7;
+    const int basic_t3= 21;
+    int factor;
+
+    if(s->maxval==0 || reset_all) s->maxval= (1 << s->bpp) - 1;
+
+    if(s->maxval >=128){
+        factor= (FFMIN(s->maxval, 4095) + 128)>>8;
+
+        if(s->T1==0     || reset_all)
+            s->T1= iso_clip(factor*(basic_t1-2) + 2 + 3*s->near, s->near+1, s->maxval);
+        if(s->T2==0     || reset_all)
+            s->T2= iso_clip(factor*(basic_t2-3) + 3 + 5*s->near, s->T1, s->maxval);
+        if(s->T3==0     || reset_all)
+            s->T3= iso_clip(factor*(basic_t3-4) + 4 + 7*s->near, s->T2, s->maxval);
+    }else{
+        factor= 256 / (s->maxval + 1);
+
+        if(s->T1==0     || reset_all)
+            s->T1= iso_clip(FFMAX(2, basic_t1/factor + 3*s->near), s->near+1, s->maxval);
+        if(s->T2==0     || reset_all)
+            s->T2= iso_clip(FFMAX(3, basic_t2/factor + 5*s->near), s->T1, s->maxval);
+        if(s->T3==0     || reset_all)
+            s->T3= iso_clip(FFMAX(4, basic_t3/factor + 6*s->near), s->T2, s->maxval);
+    }
+
+    if(s->reset==0  || reset_all) s->reset= 64;
+//    av_log(NULL, AV_LOG_DEBUG, "[JPEG-LS RESET] T=%i,%i,%i\n", s->T1, s->T2, s->T3);
+}
+
+
+/********** Decoder-specific functions **********/
+
+/**
+ * Decode LSE block with initialization parameters
+ */
+static int decode_lse(MJpegDecodeContext *s)
+{
+    int len, id;
+
+    /* XXX: verify len field validity */
+    len = get_bits(&s->gb, 16);
+    id = get_bits(&s->gb, 8);
+
+    switch(id){
+    case 1:
+        s->maxval= get_bits(&s->gb, 16);
+        s->t1= get_bits(&s->gb, 16);
+        s->t2= get_bits(&s->gb, 16);
+        s->t3= get_bits(&s->gb, 16);
+        s->reset= get_bits(&s->gb, 16);
+
+//        reset_ls_coding_parameters(s, 0);
+        //FIXME quant table?
+        break;
+    case 2:
+    case 3:
+        av_log(s->avctx, AV_LOG_ERROR, "palette not supported\n");
+        return -1;
+    case 4:
+        av_log(s->avctx, AV_LOG_ERROR, "oversize image not supported\n");
+        return -1;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "invalid id %d\n", id);
+        return -1;
+    }
+//    av_log(s->avctx, AV_LOG_DEBUG, "ID=%i, T=%i,%i,%i\n", id, s->t1, s->t2, s->t3);
+
+    return 0;
+}
+
+static void inline downscale_state(JLSState *state, int Q){
+    if(state->N[Q] == state->reset){
+        state->A[Q] >>=1;
+        state->B[Q] >>=1;
+        state->N[Q] >>=1;
+    }
+    state->N[Q]++;
+}
+
+static inline int update_state_regular(JLSState *state, int Q, int err){
+    state->A[Q] += FFABS(err);
+    err *= state->twonear;
+    state->B[Q] += err;
+
+    downscale_state(state, Q);
+
+    if(state->B[Q] <= -state->N[Q]) {
+        state->B[Q]= FFMAX(state->B[Q] + state->N[Q], 1-state->N[Q]);
+        if(state->C[Q] > -128)
+            state->C[Q]--;
+    }else if(state->B[Q] > 0){
+        state->B[Q]= FFMIN(state->B[Q] - state->N[Q], 0);
+        if(state->C[Q] < 127)
+            state->C[Q]++;
+    }
+
+    return err;
+}
+
+/**
+ * Get context-dependent Golomb code, decode it and update context
+ */
+static inline int ls_get_code_regular(GetBitContext *gb, JLSState *state, int Q){
+    int k, ret;
+
+    for(k = 0; (state->N[Q] << k) < state->A[Q]; k++);
+
+#ifdef JLS_BROKEN
+    if(!show_bits_long(gb, 32))return -1;
+#endif
+    ret = get_ur_golomb_jpegls(gb, k, state->limit, state->qbpp);
+
+    /* decode mapped error */
+    if(ret & 1)
+        ret = -((ret + 1) >> 1);
+    else
+        ret >>= 1;
+
+    /* for NEAR=0, k=0 and 2*B[Q] <= - N[Q] mapping is reversed */
+    if(!state->near && !k && (2 * state->B[Q] <= -state->N[Q]))
+        ret = -(ret + 1);
+
+    ret= update_state_regular(state, Q, ret);
+
+    return ret;
+}
+
+/**
+ * Get Golomb code, decode it and update state for run termination
+ */
+static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state, int RItype, int limit_add){
+    int k, ret, temp, map;
+    int Q = 365 + RItype;
+
+    temp=  state->A[Q];
+    if(RItype)
+        temp += state->N[Q] >> 1;
+
+    for(k = 0; (state->N[Q] << k) < temp; k++);
+
+#ifdef JLS_BROKEN
+    if(!show_bits_long(gb, 32))return -1;
+#endif
+    ret = get_ur_golomb_jpegls(gb, k, state->limit - limit_add - 1, state->qbpp);
+
+    /* decode mapped error */
+    map = 0;
+    if(!k && (RItype || ret) && (2 * state->B[Q] < state->N[Q]))
+        map = 1;
+    ret += RItype + map;
+
+    if(ret & 1){
+        ret = map - ((ret + 1) >> 1);
+        state->B[Q]++;
+    } else {
+        ret = ret >> 1;
+    }
+
+    /* update state */
+    state->A[Q] += FFABS(ret) - RItype;
+    ret *= state->twonear;
+    downscale_state(state, Q);
+
+    return ret;
+}
+
+#define R(a, i   ) (bits == 8 ?  ((uint8_t*)(a))[i]    :  ((uint16_t*)(a))[i]  )
+#define W(a, i, v) (bits == 8 ? (((uint8_t*)(a))[i]=v) : (((uint16_t*)(a))[i]=v))
+/**
+ * Decode one line of image
+ */
+static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s, void *last, void *dst, int last2, int w, int stride, int comp, int bits){
+    int i, x = 0;
+    int Ra, Rb, Rc, Rd;
+    int D0, D1, D2;
+
+    while(x < w) {
+        int err, pred;
+
+        /* compute gradients */
+        Ra = x ? R(dst, x - stride) : R(last, x);
+        Rb = R(last, x);
+        Rc = x ? R(last, x - stride) : last2;
+        Rd = (x >= w - stride) ? R(last, x) : R(last, x + stride);
+        D0 = Rd - Rb;
+        D1 = Rb - Rc;
+        D2 = Rc - Ra;
+        /* run mode */
+        if((FFABS(D0) <= state->near) && (FFABS(D1) <= state->near) && (FFABS(D2) <= state->near)) {
+            int r;
+            int RItype;
+
+            /* decode full runs while available */
+            while(get_bits1(&s->gb)) {
+                int r;
+                r = 1 << log2_run[state->run_index[comp]];
+                if(x + r * stride > w) {
+                    r = (w - x) / stride;
+                }
+                for(i = 0; i < r; i++) {
+                    W(dst, x, Ra);
+                    x += stride;
+                }
+                /* if EOL reached, we stop decoding */
+                if(r != (1 << log2_run[state->run_index[comp]]))
+                    return;
+                if(state->run_index[comp] < 31)
+                    state->run_index[comp]++;
+                if(x + stride > w)
+                    return;
+            }
+            /* decode aborted run */
+            r = log2_run[state->run_index[comp]];
+            if(r)
+                r = get_bits_long(&s->gb, r);
+            for(i = 0; i < r; i++) {
+                W(dst, x, Ra);
+                x += stride;
+            }
+
+            /* decode run termination value */
+            Rb = R(last, x);
+            RItype = (FFABS(Ra - Rb) <= state->near) ? 1 : 0;
+            err = ls_get_code_runterm(&s->gb, state, RItype, log2_run[state->run_index[comp]]);
+            if(state->run_index[comp])
+                state->run_index[comp]--;
+
+            if(state->near && RItype){
+                pred = Ra + err;
+            } else {
+                if(Rb < Ra)
+                    pred = Rb - err;
+                else
+                    pred = Rb + err;
+            }
+        } else { /* regular mode */
+            int context, sign;
+
+            context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2);
+            pred = mid_pred(Ra, Ra + Rb - Rc, Rb);
+
+            if(context < 0){
+                context = -context;
+                sign = 1;
+            }else{
+                sign = 0;
+            }
+
+            if(sign){
+                pred = clip(pred - state->C[context], 0, state->maxval);
+                err = -ls_get_code_regular(&s->gb, state, context);
+            } else {
+                pred = clip(pred + state->C[context], 0, state->maxval);
+                err = ls_get_code_regular(&s->gb, state, context);
+            }
+
+            /* we have to do something more for near-lossless coding */
+            pred += err;
+        }
+        if(state->near){
+            if(pred < -state->near)
+                pred += state->range * state->twonear;
+            else if(pred > state->maxval + state->near)
+                pred -= state->range * state->twonear;
+            pred = clip(pred, 0, state->maxval);
+        }
+
+        pred &= state->maxval;
+        W(dst, x, pred);
+        x += stride;
+    }
+}
+
+static int ls_decode_picture(MJpegDecodeContext *s, int near, int point_transform, int ilv){
+    int i, t = 0;
+    uint8_t *zero, *last, *cur;
+    JLSState *state;
+    int off = 0, stride = 1, width, shift;
+
+    zero = av_mallocz(s->picture.linesize[0]);
+    last = zero;
+    cur = s->picture.data[0];
+
+    state = av_mallocz(sizeof(JLSState));
+    /* initialize JPEG-LS state from JPEG parameters */
+    state->near = near;
+    state->bpp = (s->bits < 2) ? 2 : s->bits;
+    state->maxval = s->maxval;
+    state->T1 = s->t1;
+    state->T2 = s->t2;
+    state->T3 = s->t3;
+    state->reset = s->reset;
+    reset_ls_coding_parameters(state, 0);
+    ls_init_state(state);
+
+    if(s->bits <= 8)
+        shift = point_transform + (8 - s->bits);
+    else
+        shift = point_transform + (16 - s->bits);
+
+//    av_log(s->avctx, AV_LOG_DEBUG, "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",s->width,s->height,state->near,state->maxval,state->T1,state->T2,state->T3,state->reset,state->limit,state->qbpp, state->range);
+//    av_log(s->avctx, AV_LOG_DEBUG, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n", ilv, point_transform, s->bits, s->cur_scan);
+    if(ilv == 0) { /* separate planes */
+        off = s->cur_scan - 1;
+        stride = (s->nb_components > 1) ? 3 : 1;
+        width = s->width * stride;
+        cur += off;
+        for(i = 0; i < s->height; i++) {
+            if(s->bits <= 8){
+                ls_decode_line(state, s, last, cur, t, width, stride, off,  8);
+                t = last[0];
+            }else{
+                ls_decode_line(state, s, last, cur, t, width, stride, off, 16);
+                t = *((uint16_t*)last);
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+
+            if (s->restart_interval && !--s->restart_count) {
+                align_get_bits(&s->gb);
+                skip_bits(&s->gb, 16); /* skip RSTn */
+            }
+        }
+    } else if(ilv == 1) { /* line interleaving */
+        int j;
+        int Rc[3] = {0, 0, 0};
+        memset(cur, 0, s->picture.linesize[0]);
+        width = s->width * 3;
+        for(i = 0; i < s->height; i++) {
+            for(j = 0; j < 3; j++) {
+                ls_decode_line(state, s, last + j, cur + j, Rc[j], width, 3, j, 8);
+                Rc[j] = last[j];
+
+                if (s->restart_interval && !--s->restart_count) {
+                    align_get_bits(&s->gb);
+                    skip_bits(&s->gb, 16); /* skip RSTn */
+                }
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+        }
+    } else if(ilv == 2) { /* sample interleaving */
+        av_log(s->avctx, AV_LOG_ERROR, "Sample interleaved images are not supported.\n");
+        av_free(state);
+        av_free(zero);
+        return -1;
+    }
+
+    if(shift){ /* we need to do point transform or normalize samples */
+        int x, w;
+
+        w = s->width * s->nb_components;
+
+        if(s->bits <= 8){
+            uint8_t *src = s->picture.data[0];
+
+            for(i = 0; i < s->height; i++){
+                for(x = off; x < w; x+= stride){
+                    src[x] <<= shift;
+                }
+                src += s->picture.linesize[0];
+            }
+        }else{
+            uint16_t *src = s->picture.data[0];
+
+            for(i = 0; i < s->height; i++){
+                for(x = 0; x < w; x++){
+                    src[x] <<= shift;
+                }
+                src += s->picture.linesize[0]/2;
+            }
+        }
+    }
+    av_free(state);
+    av_free(zero);
+
+    return 0;
+}
+
+#if defined(CONFIG_ENCODERS) && defined(CONFIG_JPEGLS_ENCODER)
+/********** Encoder-specific functions **********/
+
+/**
+ * Encode error from regular symbol
+ */
+static inline void ls_encode_regular(JLSState *state, PutBitContext *pb, int Q, int err){
+    int k;
+    int val;
+    int map;
+
+    for(k = 0; (state->N[Q] << k) < state->A[Q]; k++);
+
+    map = !state->near && !k && (2 * state->B[Q] <= -state->N[Q]);
+
+    if(err < 0)
+        err += state->range;
+    if(err >= ((state->range + 1) >> 1)) {
+        err -= state->range;
+        val = 2 * FFABS(err) - 1 - map;
+    } else
+        val = 2 * err + map;
+
+    set_ur_golomb_jpegls(pb, val, k, state->limit, state->qbpp);
+
+    update_state_regular(state, Q, err);
+}
+
+/**
+ * Encode error from run termination
+ */
+static inline void ls_encode_runterm(JLSState *state, PutBitContext *pb, int RItype, int err, int limit_add){
+    int k;
+    int val, map;
+    int Q = 365 + RItype;
+    int temp;
+
+    temp = state->A[Q];
+    if(RItype)
+        temp += state->N[Q] >> 1;
+    for(k = 0; (state->N[Q] << k) < temp; k++);
+    map = 0;
+    if(!k && err && (2 * state->B[Q] < state->N[Q]))
+        map = 1;
+
+    if(err < 0)
+        val = - (2 * err) - 1 - RItype + map;
+    else
+        val = 2 * err - RItype - map;
+    set_ur_golomb_jpegls(pb, val, k, state->limit - limit_add - 1, state->qbpp);
+
+    if(err < 0)
+        state->B[Q]++;
+    state->A[Q] += (val + 1 - RItype) >> 1;
+
+    downscale_state(state, Q);
+}
+
+/**
+ * Encode run value as specified by JPEG-LS standard
+ */
+static inline void ls_encode_run(JLSState *state, PutBitContext *pb, int run, int comp, int trail){
+    while(run >= (1 << log2_run[state->run_index[comp]])){
+        put_bits(pb, 1, 1);
+        run -= 1 << log2_run[state->run_index[comp]];
+        if(state->run_index[comp] < 31)
+            state->run_index[comp]++;
+    }
+    /* if hit EOL, encode another full run, else encode aborted run */
+    if(!trail && run) {
+        put_bits(pb, 1, 1);
+    }else if(trail){
+        put_bits(pb, 1, 0);
+        if(log2_run[state->run_index[comp]])
+            put_bits(pb, log2_run[state->run_index[comp]], run);
+    }
+}
+
+/**
+ * Encode one line of image
+ */
+static inline void ls_encode_line(JLSState *state, PutBitContext *pb, void *last, void *cur, int last2, int w, int stride, int comp, int bits){
+    int x = 0;
+    int Ra, Rb, Rc, Rd;
+    int D0, D1, D2;
+
+    while(x < w) {
+        int err, pred, sign;
+
+        /* compute gradients */
+        Ra = x ? R(cur, x - stride) : R(last, x);
+        Rb = R(last, x);
+        Rc = x ? R(last, x - stride) : last2;
+        Rd = (x >= w - stride) ? R(last, x) : R(last, x + stride);
+        D0 = Rd - Rb;
+        D1 = Rb - Rc;
+        D2 = Rc - Ra;
+
+        /* run mode */
+        if((FFABS(D0) <= state->near) && (FFABS(D1) <= state->near) && (FFABS(D2) <= state->near)) {
+            int RUNval, RItype, run;
+
+            run = 0;
+            RUNval = Ra;
+            while(x < w && (FFABS(R(cur, x) - RUNval) <= state->near)){
+                run++;
+                W(cur, x, Ra);
+                x += stride;
+            }
+            ls_encode_run(state, pb, run, comp, x < w);
+            if(x >= w)
+                return;
+            Rb = R(last, x);
+            RItype = (FFABS(Ra - Rb) <= state->near);
+            pred = RItype ? Ra : Rb;
+            err = R(cur, x) - pred;
+
+            if(!RItype && Ra > Rb)
+                err = -err;
+
+            if(state->near){
+                if(err > 0)
+                    err = (state->near + err) / state->twonear;
+                else
+                    err = -(state->near - err) / state->twonear;
+
+                if(RItype || (Rb >= Ra))
+                    Ra = clip(pred + err * state->twonear, 0, state->maxval);
+                else
+                    Ra = clip(pred - err * state->twonear, 0, state->maxval);
+                W(cur, x, Ra);
+            }
+            if(err < 0)
+                err += state->range;
+            if(err >= ((state->range + 1) >> 1))
+                err -= state->range;
+
+            ls_encode_runterm(state, pb, RItype, err, log2_run[state->run_index[comp]]);
+
+            if(state->run_index[comp] > 0)
+                state->run_index[comp]--;
+        } else { /* regular mode */
+            int context;
+
+            context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2);
+            pred = mid_pred(Ra, Ra + Rb - Rc, Rb);
+
+            if(context < 0){
+                context = -context;
+                sign = 1;
+                pred = clip(pred - state->C[context], 0, state->maxval);
+                err = pred - R(cur, x);
+            }else{
+                sign = 0;
+                pred = clip(pred + state->C[context], 0, state->maxval);
+                err = R(cur, x) - pred;
+            }
+
+            if(state->near){
+                if(err > 0)
+                    err = (state->near + err) / state->twonear;
+                else
+                    err = -(state->near - err) / state->twonear;
+                if(!sign)
+                    Ra = clip(pred + err * state->twonear, 0, state->maxval);
+                else
+                    Ra = clip(pred - err * state->twonear, 0, state->maxval);
+                W(cur, x, Ra);
+            }
+
+            ls_encode_regular(state, pb, context, err);
+        }
+        x += stride;
+    }
+}
+
+static void ls_store_lse(JLSState *state, PutBitContext *pb){
+    /* Test if we have default params and don't need to store LSE */
+    JLSState state2;
+    memset(&state2, 0, sizeof(JLSState));
+    state2.bpp = state->bpp;
+    state2.near = state->near;
+    reset_ls_coding_parameters(&state2, 1);
+    if(state->T1 == state2.T1 && state->T2 == state2.T2 && state->T3 == state2.T3 && state->reset == state2.reset)
+        return;
+    /* store LSE type 1 */
+    put_marker(pb, LSE);
+    put_bits(pb, 16, 13);
+    put_bits(pb, 8,   1);
+    put_bits(pb, 16, state->maxval);
+    put_bits(pb, 16, state->T1);
+    put_bits(pb, 16, state->T2);
+    put_bits(pb, 16, state->T3);
+    put_bits(pb, 16, state->reset);
+}
+
+static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    JpeglsContext * const s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    const int near = avctx->prediction_method;
+    PutBitContext pb, pb2;
+    GetBitContext gb;
+    uint8_t *buf2, *zero, *cur, *last;
+    JLSState *state;
+    int i, size;
+    int comps;
+
+    buf2 = av_malloc(buf_size);
+
+    init_put_bits(&pb, buf, buf_size);
+    init_put_bits(&pb2, buf2, buf_size);
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    if(avctx->pix_fmt == PIX_FMT_GRAY8 || avctx->pix_fmt == PIX_FMT_GRAY16)
+        comps = 1;
+    else
+        comps = 3;
+
+    /* write our own JPEG header, can't use mjpeg_picture_header */
+    put_marker(&pb, SOI);
+    put_marker(&pb, SOF48);
+    put_bits(&pb, 16, 8 + comps * 3); // header size depends on components
+    put_bits(&pb,  8, (avctx->pix_fmt == PIX_FMT_GRAY16) ? 16 : 8); // bpp
+    put_bits(&pb, 16, avctx->height);
+    put_bits(&pb, 16, avctx->width);
+    put_bits(&pb,  8, comps);         // components
+    for(i = 1; i <= comps; i++) {
+        put_bits(&pb,  8, i);    // component ID
+        put_bits(&pb,  8, 0x11); // subsampling: none
+        put_bits(&pb,  8, 0);    // Tiq, used by JPEG-LS ext
+    }
+
+    put_marker(&pb, SOS);
+    put_bits(&pb, 16, 6 + comps * 2);
+    put_bits(&pb,  8, comps);
+    for(i = 1; i <= comps; i++) {
+        put_bits(&pb,  8, i);  // component ID
+        put_bits(&pb,  8, 0);  // mapping index: none
+    }
+    put_bits(&pb,  8, near);
+    put_bits(&pb,  8, (comps > 1) ? 1 : 0); // interleaving: 0 - plane, 1 - line
+    put_bits(&pb,  8, 0); // point transform: none
+
+    state = av_mallocz(sizeof(JLSState));
+    /* initialize JPEG-LS state from JPEG parameters */
+    state->near = near;
+    state->bpp = (avctx->pix_fmt == PIX_FMT_GRAY16) ? 16 : 8;
+    reset_ls_coding_parameters(state, 0);
+    ls_init_state(state);
+
+    ls_store_lse(state, &pb);
+
+    zero = av_mallocz(p->linesize[0]);
+    last = zero;
+    cur = p->data[0];
+    if(avctx->pix_fmt == PIX_FMT_GRAY8){
+        int t = 0;
+
+        for(i = 0; i < avctx->height; i++) {
+            ls_encode_line(state, &pb2, last, cur, t, avctx->width, 1, 0,  8);
+            t = last[0];
+            last = cur;
+            cur += p->linesize[0];
+        }
+    }else if(avctx->pix_fmt == PIX_FMT_GRAY16){
+        int t = 0;
+
+        for(i = 0; i < avctx->height; i++) {
+            ls_encode_line(state, &pb2, last, cur, t, avctx->width, 1, 0, 16);
+            t = *((uint16_t*)last);
+            last = cur;
+            cur += p->linesize[0];
+        }
+    }else if(avctx->pix_fmt == PIX_FMT_RGB24){
+        int j, width;
+        int Rc[3] = {0, 0, 0};
+
+        width = avctx->width * 3;
+        for(i = 0; i < avctx->height; i++) {
+            for(j = 0; j < 3; j++) {
+                ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j, 8);
+                Rc[j] = last[j];
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+        }
+    }else if(avctx->pix_fmt == PIX_FMT_BGR24){
+        int j, width;
+        int Rc[3] = {0, 0, 0};
+
+        width = avctx->width * 3;
+        for(i = 0; i < avctx->height; i++) {
+            for(j = 2; j >= 0; j--) {
+                ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j, 8);
+                Rc[j] = last[j];
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+        }
+    }
+
+    av_free(zero);
+    av_free(state);
+
+    flush_put_bits(&pb2);
+    /* do escape coding */
+    size = put_bits_count(&pb2) >> 3;
+    init_get_bits(&gb, buf2, size);
+    while(get_bits_count(&gb) < size * 8){
+        int v;
+        v = get_bits(&gb, 8);
+        put_bits(&pb, 8, v);
+        if(v == 0xFF){
+            v = get_bits(&gb, 7);
+            put_bits(&pb, 8, v);
+        }
+    }
+    align_put_bits(&pb);
+    av_free(buf2);
+
+    /* End of image */
+    put_marker(&pb, EOI);
+    flush_put_bits(&pb);
+
+    emms_c();
+
+    return put_bits_count(&pb) >> 3;
+}
+
+static int encode_init_ls(AVCodecContext *ctx) {
+    JpeglsContext *c = (JpeglsContext*)ctx->priv_data;
+
+    c->avctx = ctx;
+    ctx->coded_frame = &c->picture;
+
+    if(ctx->pix_fmt != PIX_FMT_GRAY8 && ctx->pix_fmt != PIX_FMT_GRAY16 && ctx->pix_fmt != PIX_FMT_RGB24 && ctx->pix_fmt != PIX_FMT_BGR24){
+        av_log(ctx, AV_LOG_ERROR, "Only grayscale and RGB24/BGR24 images are supported\n");
+        return -1;
+    }
+    return 0;
+}
+
+AVCodec jpegls_encoder = { //FIXME avoid MPV_* lossless jpeg shouldnt need them
+    "jpegls",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_JPEGLS,
+    sizeof(JpeglsContext),
+    encode_init_ls,
+    encode_picture_ls,
+    NULL,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_BGR24, PIX_FMT_RGB24, PIX_FMT_GRAY8, PIX_FMT_GRAY16, -1},
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/jrevdct.c b/contrib/ffmpeg/libavcodec/jrevdct.c
new file mode 100644
index 000000000..f055cc1ac
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/jrevdct.c
@@ -0,0 +1,1159 @@
+/*
+ * jrevdct.c
+ *
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991, 1992, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains the basic inverse-DCT transformation subroutine.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * I've made lots of modifications to attempt to take advantage of the
+ * sparse nature of the DCT matrices we're getting.  Although the logic
+ * is cumbersome, it's straightforward and the resulting code is much
+ * faster.
+ *
+ * A better way to do this would be to pass in the DCT block as a sparse
+ * matrix, perhaps with the difference cases encoded.
+ */
+
+/**
+ * @file jrevdct.c
+ * Independent JPEG Group's LLM idct.
+ */
+
+#include "common.h"
+#include "dsputil.h"
+
+#define EIGHT_BIT_SAMPLES
+
+#define DCTSIZE 8
+#define DCTSIZE2 64
+
+#define GLOBAL
+
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+
+typedef DCTELEM DCTBLOCK[DCTSIZE2];
+
+#define CONST_BITS 13
+
+/*
+ * This routine is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate int32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#ifdef EIGHT_BIT_SAMPLES
+#define PASS1_BITS  2
+#else
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
+#endif
+
+#define ONE         ((int32_t) 1)
+
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
+ * you will pay a significant penalty in run time.  In that case, figure
+ * the correct integer constant values and insert them by hand.
+ */
+
+/* Actually FIX is no longer used, we precomputed them all */
+#define FIX(x)  ((int32_t) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an int32_t value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
+ * this provides a useful speedup on many machines.
+ * There is no way to specify a 16x16->32 multiply in portable C, but
+ * some C compilers will do the right thing if you provide the correct
+ * combination of casts.
+ * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#ifdef EIGHT_BIT_SAMPLES
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
+#define MULTIPLY(var,const)  (((int16_t) (var)) * ((int16_t) (const)))
+#endif
+#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
+#define MULTIPLY(var,const)  (((int16_t) (var)) * ((int32_t) (const)))
+#endif
+#endif
+
+#ifndef MULTIPLY                /* default definition */
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+  Unlike our decoder where we approximate the FIXes, we need to use exact
+ones here or successive P-frames will drift too much with Reference frame coding
+*/
+#define FIX_0_211164243 1730
+#define FIX_0_275899380 2260
+#define FIX_0_298631336 2446
+#define FIX_0_390180644 3196
+#define FIX_0_509795579 4176
+#define FIX_0_541196100 4433
+#define FIX_0_601344887 4926
+#define FIX_0_765366865 6270
+#define FIX_0_785694958 6436
+#define FIX_0_899976223 7373
+#define FIX_1_061594337 8697
+#define FIX_1_111140466 9102
+#define FIX_1_175875602 9633
+#define FIX_1_306562965 10703
+#define FIX_1_387039845 11363
+#define FIX_1_451774981 11893
+#define FIX_1_501321110 12299
+#define FIX_1_662939225 13623
+#define FIX_1_847759065 15137
+#define FIX_1_961570560 16069
+#define FIX_2_053119869 16819
+#define FIX_2_172734803 17799
+#define FIX_2_562915447 20995
+#define FIX_3_072711026 25172
+
+/*
+ * Perform the inverse DCT on one block of coefficients.
+ */
+
+void j_rev_dct(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1, z2, z3, z4, z5;
+  int32_t d0, d1, d2, d3, d4, d5, d6, d7;
+  register DCTELEM *dataptr;
+  int rowctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register int *idataptr = (int*)dataptr;
+
+    /* WARNING: we do the same permutation as MMX idct to simplify the
+       video core */
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+    d1 = dataptr[4];
+    d3 = dataptr[5];
+    d5 = dataptr[6];
+    d7 = dataptr[7];
+
+    if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+          /* Compute a 32 bit value to assign. */
+          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+
+          idataptr[0] = v;
+          idataptr[1] = v;
+          idataptr[2] = v;
+          idataptr[3] = v;
+      }
+
+      dataptr += DCTSIZE;       /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+{
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+      }
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    if (d7) {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 = z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z3;
+                    tmp1 += z4;
+                    tmp2 = z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 = z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z3 = d7 + d3;
+
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z5 = MULTIPLY(z3, FIX_1_175875602);
+                    z3 = MULTIPLY(-z3, FIX_0_785694958);
+
+                    tmp0 += z3;
+                    tmp1 = z2 + z5;
+                    tmp2 += z3;
+                    tmp3 = z1 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z5 = MULTIPLY(z1, FIX_1_175875602);
+
+                    z1 = MULTIPLY(z1, FIX_0_275899380);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp0 = MULTIPLY(-d7, FIX_1_662939225);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
+
+                    tmp0 += z1;
+                    tmp1 = z4 + z5;
+                    tmp2 = z3 + z5;
+                    tmp3 += z1;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
+                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
+                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
+                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
+                }
+            }
+        }
+    } else {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
+
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 = z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+
+                    z5 = MULTIPLY(z2, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z2 = MULTIPLY(-z2, FIX_1_387039845);
+                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+
+                    tmp0 = z3 + z5;
+                    tmp1 += z2;
+                    tmp2 += z2;
+                    tmp3 = z4 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+                    z4 = d5 + d1;
+
+                    z5 = MULTIPLY(z4, FIX_1_175875602);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(z4, FIX_0_785694958);
+
+                    tmp0 = z1 + z5;
+                    tmp1 += z4;
+                    tmp2 = z2 + z5;
+                    tmp3 += z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
+                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
+                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+                    z5 = d1 + d3;
+                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
+                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
+                    z1 = MULTIPLY(d1, FIX_1_061594337);
+                    z2 = MULTIPLY(-d3, FIX_2_172734803);
+                    z4 = MULTIPLY(z5, FIX_0_785694958);
+                    z5 = MULTIPLY(z5, FIX_1_175875602);
+
+                    tmp0 = z1 - z4;
+                    tmp1 = z2 + z4;
+                    tmp2 += z5;
+                    tmp3 += z5;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
+                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
+                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
+                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
+                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
+                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
+                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
+                }
+            }
+        }
+    }
+}
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSIZE*0];
+    d1 = dataptr[DCTSIZE*1];
+    d2 = dataptr[DCTSIZE*2];
+    d3 = dataptr[DCTSIZE*3];
+    d4 = dataptr[DCTSIZE*4];
+    d5 = dataptr[DCTSIZE*5];
+    d6 = dataptr[DCTSIZE*6];
+    d7 = dataptr[DCTSIZE*7];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+    }
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    if (d7) {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5;
+                    z3 = d7;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 = z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z3;
+                    tmp1 += z4;
+                    tmp2 = z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 = z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z3 = d7 + d3;
+
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z5 = MULTIPLY(z3, FIX_1_175875602);
+                    z3 = MULTIPLY(-z3, FIX_0_785694958);
+
+                    tmp0 += z3;
+                    tmp1 = z2 + z5;
+                    tmp2 += z3;
+                    tmp3 = z1 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z5 = MULTIPLY(z1, FIX_1_175875602);
+
+                    z1 = MULTIPLY(z1, FIX_0_275899380);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp0 = MULTIPLY(-d7, FIX_1_662939225);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
+
+                    tmp0 += z1;
+                    tmp1 = z4 + z5;
+                    tmp2 = z3 + z5;
+                    tmp3 += z1;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
+                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
+                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
+                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
+                }
+            }
+        }
+    } else {
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
+
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 = z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+
+                    z5 = MULTIPLY(z2, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z2 = MULTIPLY(-z2, FIX_1_387039845);
+                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+
+                    tmp0 = z3 + z5;
+                    tmp1 += z2;
+                    tmp2 += z2;
+                    tmp3 = z4 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+                    z4 = d5 + d1;
+
+                    z5 = MULTIPLY(z4, FIX_1_175875602);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(z4, FIX_0_785694958);
+
+                    tmp0 = z1 + z5;
+                    tmp1 += z4;
+                    tmp2 = z2 + z5;
+                    tmp3 += z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
+                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
+                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+                    z5 = d1 + d3;
+                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
+                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
+                    z1 = MULTIPLY(d1, FIX_1_061594337);
+                    z2 = MULTIPLY(-d3, FIX_2_172734803);
+                    z4 = MULTIPLY(z5, FIX_0_785694958);
+                    z5 = MULTIPLY(z5, FIX_1_175875602);
+
+                    tmp0 = z1 - z4;
+                    tmp1 = z2 + z4;
+                    tmp2 += z5;
+                    tmp3 += z5;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
+                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
+                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
+                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
+                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
+                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
+                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
+                }
+            }
+        }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
+                                           CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
+                                           CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+#undef DCTSIZE
+#define DCTSIZE 4
+#define DCTSTRIDE 8
+
+void j_rev_dct4(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1;
+  int32_t d0, d2, d4, d6;
+  register DCTELEM *dataptr;
+  int rowctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  data[0] += 4;
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register int *idataptr = (int*)dataptr;
+
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+
+    if ((d2 | d4 | d6) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+          /* Compute a 32 bit value to assign. */
+          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+
+          idataptr[0] = v;
+          idataptr[1] = v;
+      }
+
+      dataptr += DCTSTRIDE;     /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+      }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSTRIDE;       /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSTRIDE*0];
+    d2 = dataptr[DCTSTRIDE*1];
+    d4 = dataptr[DCTSTRIDE*2];
+    d6 = dataptr[DCTSTRIDE*3];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+void j_rev_dct2(DCTBLOCK data){
+  int d00, d01, d10, d11;
+
+  data[0] += 4;
+  d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
+  d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
+  d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
+  d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
+
+  data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
+  data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
+  data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
+  data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
+}
+
+void j_rev_dct1(DCTBLOCK data){
+  data[0] = (data[0] + 4)>>3;
+}
+
+#undef FIX
+#undef CONST_BITS
diff --git a/contrib/ffmpeg/libavcodec/kmvc.c b/contrib/ffmpeg/libavcodec/kmvc.c
new file mode 100644
index 000000000..e8f39fca1
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/kmvc.c
@@ -0,0 +1,417 @@
+/*
+ * KMVC decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file kmvc.c
+ * Karl Morton's Video Codec decoder
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#define KMVC_KEYFRAME 0x80
+#define KMVC_PALETTE  0x40
+#define KMVC_METHOD   0x0F
+
+/*
+ * Decoder context
+ */
+typedef struct KmvcContext {
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    int setpal;
+    int palsize;
+    uint32_t pal[256];
+    uint8_t *cur, *prev;
+    uint8_t *frm0, *frm1;
+} KmvcContext;
+
+typedef struct BitBuf {
+    int bits;
+    int bitbuf;
+} BitBuf;
+
+#define BLK(data, x, y)  data[(x) + (y) * 320]
+
+#define kmvc_init_getbits(bb, src)  bb.bits = 7; bb.bitbuf = *src++;
+
+#define kmvc_getbit(bb, src, res) {\
+    res = 0; \
+    if (bb.bitbuf & (1 << bb.bits)) res = 1; \
+    bb.bits--; \
+    if(bb.bits == -1) { \
+        bb.bitbuf = *src++; \
+        bb.bits = 7; \
+    } \
+}
+
+static void kmvc_decode_intra_8x8(KmvcContext * ctx, uint8_t * src, int w, int h)
+{
+    BitBuf bb;
+    int res, val;
+    int i, j;
+    int bx, by;
+    int l0x, l1x, l0y, l1y;
+    int mx, my;
+
+    kmvc_init_getbits(bb, src);
+
+    for (by = 0; by < h; by += 8)
+        for (bx = 0; bx < w; bx += 8) {
+            kmvc_getbit(bb, src, res);
+            if (!res) {         // fill whole 8x8 block
+                val = *src++;
+                for (i = 0; i < 64; i++)
+                    BLK(ctx->cur, bx + (i & 0x7), by + (i >> 3)) = val;
+            } else {            // handle four 4x4 subblocks
+                for (i = 0; i < 4; i++) {
+                    l0x = bx + (i & 1) * 4;
+                    l0y = by + (i & 2) * 2;
+                    kmvc_getbit(bb, src, res);
+                    if (!res) {
+                        kmvc_getbit(bb, src, res);
+                        if (!res) {     // fill whole 4x4 block
+                            val = *src++;
+                            for (j = 0; j < 16; j++)
+                                BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) = val;
+                        } else {        // copy block from already decoded place
+                            val = *src++;
+                            mx = val & 0xF;
+                            my = val >> 4;
+                            for (j = 0; j < 16; j++)
+                                BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) =
+                                    BLK(ctx->cur, l0x + (j & 3) - mx, l0y + (j >> 2) - my);
+                        }
+                    } else {    // descend to 2x2 sub-sub-blocks
+                        for (j = 0; j < 4; j++) {
+                            l1x = l0x + (j & 1) * 2;
+                            l1y = l0y + (j & 2);
+                            kmvc_getbit(bb, src, res);
+                            if (!res) {
+                                kmvc_getbit(bb, src, res);
+                                if (!res) {     // fill whole 2x2 block
+                                    val = *src++;
+                                    BLK(ctx->cur, l1x, l1y) = val;
+                                    BLK(ctx->cur, l1x + 1, l1y) = val;
+                                    BLK(ctx->cur, l1x, l1y + 1) = val;
+                                    BLK(ctx->cur, l1x + 1, l1y + 1) = val;
+                                } else {        // copy block from already decoded place
+                                    val = *src++;
+                                    mx = val & 0xF;
+                                    my = val >> 4;
+                                    BLK(ctx->cur, l1x, l1y) = BLK(ctx->cur, l1x - mx, l1y - my);
+                                    BLK(ctx->cur, l1x + 1, l1y) =
+                                        BLK(ctx->cur, l1x + 1 - mx, l1y - my);
+                                    BLK(ctx->cur, l1x, l1y + 1) =
+                                        BLK(ctx->cur, l1x - mx, l1y + 1 - my);
+                                    BLK(ctx->cur, l1x + 1, l1y + 1) =
+                                        BLK(ctx->cur, l1x + 1 - mx, l1y + 1 - my);
+                                }
+                            } else {    // read values for block
+                                BLK(ctx->cur, l1x, l1y) = *src++;
+                                BLK(ctx->cur, l1x + 1, l1y) = *src++;
+                                BLK(ctx->cur, l1x, l1y + 1) = *src++;
+                                BLK(ctx->cur, l1x + 1, l1y + 1) = *src++;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+}
+
+static void kmvc_decode_inter_8x8(KmvcContext * ctx, uint8_t * src, int w, int h)
+{
+    BitBuf bb;
+    int res, val;
+    int i, j;
+    int bx, by;
+    int l0x, l1x, l0y, l1y;
+    int mx, my;
+
+    kmvc_init_getbits(bb, src);
+
+    for (by = 0; by < h; by += 8)
+        for (bx = 0; bx < w; bx += 8) {
+            kmvc_getbit(bb, src, res);
+            if (!res) {
+                kmvc_getbit(bb, src, res);
+                if (!res) {     // fill whole 8x8 block
+                    val = *src++;
+                    for (i = 0; i < 64; i++)
+                        BLK(ctx->cur, bx + (i & 0x7), by + (i >> 3)) = val;
+                } else {        // copy block from previous frame
+                    for (i = 0; i < 64; i++)
+                        BLK(ctx->cur, bx + (i & 0x7), by + (i >> 3)) =
+                            BLK(ctx->prev, bx + (i & 0x7), by + (i >> 3));
+                }
+            } else {            // handle four 4x4 subblocks
+                for (i = 0; i < 4; i++) {
+                    l0x = bx + (i & 1) * 4;
+                    l0y = by + (i & 2) * 2;
+                    kmvc_getbit(bb, src, res);
+                    if (!res) {
+                        kmvc_getbit(bb, src, res);
+                        if (!res) {     // fill whole 4x4 block
+                            val = *src++;
+                            for (j = 0; j < 16; j++)
+                                BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) = val;
+                        } else {        // copy block
+                            val = *src++;
+                            mx = (val & 0xF) - 8;
+                            my = (val >> 4) - 8;
+                            for (j = 0; j < 16; j++)
+                                BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) =
+                                    BLK(ctx->prev, l0x + (j & 3) + mx, l0y + (j >> 2) + my);
+                        }
+                    } else {    // descend to 2x2 sub-sub-blocks
+                        for (j = 0; j < 4; j++) {
+                            l1x = l0x + (j & 1) * 2;
+                            l1y = l0y + (j & 2);
+                            kmvc_getbit(bb, src, res);
+                            if (!res) {
+                                kmvc_getbit(bb, src, res);
+                                if (!res) {     // fill whole 2x2 block
+                                    val = *src++;
+                                    BLK(ctx->cur, l1x, l1y) = val;
+                                    BLK(ctx->cur, l1x + 1, l1y) = val;
+                                    BLK(ctx->cur, l1x, l1y + 1) = val;
+                                    BLK(ctx->cur, l1x + 1, l1y + 1) = val;
+                                } else {        // copy block
+                                    val = *src++;
+                                    mx = (val & 0xF) - 8;
+                                    my = (val >> 4) - 8;
+                                    BLK(ctx->cur, l1x, l1y) = BLK(ctx->prev, l1x + mx, l1y + my);
+                                    BLK(ctx->cur, l1x + 1, l1y) =
+                                        BLK(ctx->prev, l1x + 1 + mx, l1y + my);
+                                    BLK(ctx->cur, l1x, l1y + 1) =
+                                        BLK(ctx->prev, l1x + mx, l1y + 1 + my);
+                                    BLK(ctx->cur, l1x + 1, l1y + 1) =
+                                        BLK(ctx->prev, l1x + 1 + mx, l1y + 1 + my);
+                                }
+                            } else {    // read values for block
+                                BLK(ctx->cur, l1x, l1y) = *src++;
+                                BLK(ctx->cur, l1x + 1, l1y) = *src++;
+                                BLK(ctx->cur, l1x, l1y + 1) = *src++;
+                                BLK(ctx->cur, l1x + 1, l1y + 1) = *src++;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+}
+
+static int decode_frame(AVCodecContext * avctx, void *data, int *data_size, uint8_t * buf,
+                        int buf_size)
+{
+    KmvcContext *const ctx = (KmvcContext *) avctx->priv_data;
+    uint8_t *out, *src;
+    int i;
+    int header;
+    int blocksize;
+
+    if (ctx->pic.data[0])
+        avctx->release_buffer(avctx, &ctx->pic);
+
+    ctx->pic.reference = 1;
+    ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+    if (avctx->get_buffer(avctx, &ctx->pic) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    header = *buf++;
+
+    /* blocksize 127 is really palette change event */
+    if (buf[0] == 127) {
+        buf += 3;
+        for (i = 0; i < 127; i++) {
+            ctx->pal[i + (header & 0x81)] = (buf[0] << 16) | (buf[1] << 8) | buf[2];
+            buf += 4;
+        }
+        buf -= 127 * 4 + 3;
+    }
+
+    if (header & KMVC_KEYFRAME) {
+        ctx->pic.key_frame = 1;
+        ctx->pic.pict_type = FF_I_TYPE;
+    } else {
+        ctx->pic.key_frame = 0;
+        ctx->pic.pict_type = FF_P_TYPE;
+    }
+
+    /* if palette has been changed, copy it from palctrl */
+    if (ctx->avctx->palctrl && ctx->avctx->palctrl->palette_changed) {
+        memcpy(ctx->pal, ctx->avctx->palctrl->palette, AVPALETTE_SIZE);
+        ctx->setpal = 1;
+        ctx->avctx->palctrl->palette_changed = 0;
+    }
+
+    if (header & KMVC_PALETTE) {
+        ctx->pic.palette_has_changed = 1;
+        // palette starts from index 1 and has 127 entries
+        for (i = 1; i <= ctx->palsize; i++) {
+            ctx->pal[i] = (buf[0] << 16) | (buf[1] << 8) | buf[2];
+            buf += 3;
+        }
+    }
+
+    if (ctx->setpal) {
+        ctx->setpal = 0;
+        ctx->pic.palette_has_changed = 1;
+    }
+
+    /* make the palette available on the way out */
+    memcpy(ctx->pic.data[1], ctx->pal, 1024);
+
+    blocksize = *buf++;
+
+    if (blocksize != 8 && blocksize != 127) {
+        av_log(avctx, AV_LOG_ERROR, "Block size = %i\n", blocksize);
+        return -1;
+    }
+    memset(ctx->cur, 0, 320 * 200);
+    switch (header & KMVC_METHOD) {
+    case 0:
+    case 1: // used in palette changed event
+        memcpy(ctx->cur, ctx->prev, 320 * 200);
+        break;
+    case 3:
+        kmvc_decode_intra_8x8(ctx, buf, avctx->width, avctx->height);
+        break;
+    case 4:
+        kmvc_decode_inter_8x8(ctx, buf, avctx->width, avctx->height);
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unknown compression method %i\n", header & KMVC_METHOD);
+        return -1;
+    }
+
+    out = ctx->pic.data[0];
+    src = ctx->cur;
+    for (i = 0; i < avctx->height; i++) {
+        memcpy(out, src, avctx->width);
+        src += 320;
+        out += ctx->pic.linesize[0];
+    }
+
+    /* flip buffers */
+    if (ctx->cur == ctx->frm0) {
+        ctx->cur = ctx->frm1;
+        ctx->prev = ctx->frm0;
+    } else {
+        ctx->cur = ctx->frm0;
+        ctx->prev = ctx->frm1;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame *) data = ctx->pic;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+
+
+/*
+ * Init kmvc decoder
+ */
+static int decode_init(AVCodecContext * avctx)
+{
+    KmvcContext *const c = (KmvcContext *) avctx->priv_data;
+    int i;
+
+    c->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    c->pic.data[0] = NULL;
+
+    if (avctx->width > 320 || avctx->height > 200) {
+        av_log(avctx, AV_LOG_ERROR, "KMVC supports frames <= 320x200\n");
+        return -1;
+    }
+
+    c->frm0 = av_mallocz(320 * 200);
+    c->frm1 = av_mallocz(320 * 200);
+    c->cur = c->frm0;
+    c->prev = c->frm1;
+
+    for (i = 0; i < 256; i++) {
+        c->pal[i] = i * 0x10101;
+    }
+
+    if (avctx->extradata_size < 12) {
+        av_log(NULL, 0, "Extradata missing, decoding may not work properly...\n");
+        c->palsize = 127;
+    } else {
+        c->palsize = LE_16(avctx->extradata + 10);
+    }
+
+    if (avctx->extradata_size == 1036) {        // palette in extradata
+        uint8_t *src = avctx->extradata + 12;
+        for (i = 0; i < 256; i++) {
+            c->pal[i] = LE_32(src);
+            src += 4;
+        }
+        c->setpal = 1;
+        if (c->avctx->palctrl) {
+            c->avctx->palctrl->palette_changed = 0;
+        }
+    }
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+
+    return 0;
+}
+
+
+
+/*
+ * Uninit kmvc decoder
+ */
+static int decode_end(AVCodecContext * avctx)
+{
+    KmvcContext *const c = (KmvcContext *) avctx->priv_data;
+
+    av_freep(&c->frm0);
+    av_freep(&c->frm1);
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+
+    return 0;
+}
+
+AVCodec kmvc_decoder = {
+    "kmvc",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_KMVC,
+    sizeof(KmvcContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame
+};
diff --git a/contrib/ffmpeg/libavcodec/lcl.c b/contrib/ffmpeg/libavcodec/lcl.c
new file mode 100644
index 000000000..b02ea1543
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/lcl.c
@@ -0,0 +1,928 @@
+/*
+ * LCL (LossLess Codec Library) Codec
+ * Copyright (c) 2002-2004 Roberto Togni
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file lcl.c
+ * LCL (LossLess Codec Library) Video Codec
+ * Decoder for MSZH and ZLIB codecs
+ * Experimental encoder for ZLIB RGB24
+ *
+ * Fourcc: MSZH, ZLIB
+ *
+ * Original Win32 dll:
+ * Ver2.23 By Kenji Oshima 2000.09.20
+ * avimszh.dll, avizlib.dll
+ *
+ * A description of the decoding algorithm can be found here:
+ *   http://www.pcisys.net/~melanson/codecs
+ *
+ * Supports: BGR24 (RGB 24bpp)
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "bitstream.h"
+#include "avcodec.h"
+
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+#endif
+
+
+#define BMPTYPE_YUV 1
+#define BMPTYPE_RGB 2
+
+#define IMGTYPE_YUV111 0
+#define IMGTYPE_YUV422 1
+#define IMGTYPE_RGB24 2
+#define IMGTYPE_YUV411 3
+#define IMGTYPE_YUV211 4
+#define IMGTYPE_YUV420 5
+
+#define COMP_MSZH 0
+#define COMP_MSZH_NOCOMP 1
+#define COMP_ZLIB_HISPEED 1
+#define COMP_ZLIB_HICOMP 9
+#define COMP_ZLIB_NORMAL -1
+
+#define FLAG_MULTITHREAD 1
+#define FLAG_NULLFRAME 2
+#define FLAG_PNGFILTER 4
+#define FLAGMASK_UNUSED 0xf8
+
+#define CODEC_MSZH 1
+#define CODEC_ZLIB 3
+
+#define FOURCC_MSZH mmioFOURCC('M','S','Z','H')
+#define FOURCC_ZLIB mmioFOURCC('Z','L','I','B')
+
+/*
+ * Decoder context
+ */
+typedef struct LclContext {
+
+        AVCodecContext *avctx;
+        AVFrame pic;
+    PutBitContext pb;
+
+    // Image type
+    int imgtype;
+    // Compression type
+    int compression;
+    // Flags
+    int flags;
+    // Decompressed data size
+    unsigned int decomp_size;
+    // Decompression buffer
+    unsigned char* decomp_buf;
+    // Maximum compressed data size
+    unsigned int max_comp_size;
+    // Compression buffer
+    unsigned char* comp_buf;
+#ifdef CONFIG_ZLIB
+    z_stream zstream;
+#endif
+} LclContext;
+
+
+/*
+ *
+ * Helper functions
+ *
+ */
+static inline unsigned char fix (int pix14)
+{
+    int tmp;
+
+    tmp = (pix14 + 0x80000) >> 20;
+    if (tmp < 0)
+        return 0;
+    if (tmp > 255)
+        return 255;
+    return tmp;
+}
+
+
+
+static inline unsigned char get_b (unsigned char yq, signed char bq)
+{
+    return fix((yq << 20) + bq * 1858076);
+}
+
+
+
+static inline unsigned char get_g (unsigned char yq, signed char bq, signed char rq)
+{
+    return fix((yq << 20) - bq * 360857 - rq * 748830);
+}
+
+
+
+static inline unsigned char get_r (unsigned char yq, signed char rq)
+{
+    return fix((yq << 20) + rq * 1470103);
+}
+
+
+
+static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned char * destptr, unsigned int destsize)
+{
+    unsigned char *destptr_bak = destptr;
+    unsigned char *destptr_end = destptr + destsize;
+    unsigned char mask = 0;
+    unsigned char maskbit = 0;
+    unsigned int ofs, cnt;
+
+    while ((srclen > 0) && (destptr < destptr_end)) {
+        if (maskbit == 0) {
+            mask = *(srcptr++);
+            maskbit = 8;
+            srclen--;
+            continue;
+        }
+        if ((mask & (1 << (--maskbit))) == 0) {
+            if (destptr + 4 > destptr_end)
+                break;
+            *(int*)destptr = *(int*)srcptr;
+            srclen -= 4;
+            destptr += 4;
+            srcptr += 4;
+        } else {
+            ofs = *(srcptr++);
+            cnt = *(srcptr++);
+            ofs += cnt * 256;;
+            cnt = ((cnt >> 3) & 0x1f) + 1;
+            ofs &= 0x7ff;
+            srclen -= 2;
+            cnt *= 4;
+            if (destptr + cnt > destptr_end) {
+                cnt =  destptr_end - destptr;
+            }
+            for (; cnt > 0; cnt--) {
+                *(destptr) = *(destptr - ofs);
+                destptr++;
+            }
+        }
+    }
+
+    return (destptr - destptr_bak);
+}
+
+
+
+#ifdef CONFIG_DECODERS
+/*
+ *
+ * Decode a frame
+ *
+ */
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+        LclContext * const c = (LclContext *)avctx->priv_data;
+        unsigned char *encoded = (unsigned char *)buf;
+    unsigned int pixel_ptr;
+    int row, col;
+    unsigned char *outptr;
+    unsigned int width = avctx->width; // Real image width
+    unsigned int height = avctx->height; // Real image height
+    unsigned int mszh_dlen;
+    unsigned char yq, y1q, uq, vq;
+    int uqvq;
+    unsigned int mthread_inlen, mthread_outlen;
+#ifdef CONFIG_ZLIB
+    int zret; // Zlib return code
+#endif
+    unsigned int len = buf_size;
+
+        if(c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
+
+        c->pic.reference = 0;
+        c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+        if(avctx->get_buffer(avctx, &c->pic) < 0){
+                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                return -1;
+        }
+
+    outptr = c->pic.data[0]; // Output image pointer
+
+    /* Decompress frame */
+    switch (avctx->codec_id) {
+        case CODEC_ID_MSZH:
+            switch (c->compression) {
+                case COMP_MSZH:
+                    if (c->flags & FLAG_MULTITHREAD) {
+                        mthread_inlen = *((unsigned int*)encoded);
+                        mthread_outlen = *((unsigned int*)(encoded+4));
+                        if (mthread_outlen > c->decomp_size) // this should not happen
+                            mthread_outlen = c->decomp_size;
+                        mszh_dlen = mszh_decomp(encoded + 8, mthread_inlen, c->decomp_buf, c->decomp_size);
+                        if (mthread_outlen != mszh_dlen) {
+                            av_log(avctx, AV_LOG_ERROR, "Mthread1 decoded size differs (%d != %d)\n",
+                                   mthread_outlen, mszh_dlen);
+                            return -1;
+                        }
+                        mszh_dlen = mszh_decomp(encoded + 8 + mthread_inlen, len - mthread_inlen,
+                                                c->decomp_buf + mthread_outlen, c->decomp_size - mthread_outlen);
+                        if (mthread_outlen != mszh_dlen) {
+                            av_log(avctx, AV_LOG_ERROR, "Mthread2 decoded size differs (%d != %d)\n",
+                                   mthread_outlen, mszh_dlen);
+                            return -1;
+                        }
+                        encoded = c->decomp_buf;
+                        len = c->decomp_size;
+                    } else {
+                        mszh_dlen = mszh_decomp(encoded, len, c->decomp_buf, c->decomp_size);
+                        if (c->decomp_size != mszh_dlen) {
+                            av_log(avctx, AV_LOG_ERROR, "Decoded size differs (%d != %d)\n",
+                                   c->decomp_size, mszh_dlen);
+                            return -1;
+                        }
+                        encoded = c->decomp_buf;
+                        len = mszh_dlen;
+                    }
+                    break;
+                case COMP_MSZH_NOCOMP:
+                    break;
+                default:
+                    av_log(avctx, AV_LOG_ERROR, "BUG! Unknown MSZH compression in frame decoder.\n");
+                    return -1;
+            }
+            break;
+        case CODEC_ID_ZLIB:
+#ifdef CONFIG_ZLIB
+            /* Using the original dll with normal compression (-1) and RGB format
+             * gives a file with ZLIB fourcc, but frame is really uncompressed.
+             * To be sure that's true check also frame size */
+            if ((c->compression == COMP_ZLIB_NORMAL) && (c->imgtype == IMGTYPE_RGB24) &&
+               (len == width * height * 3))
+                break;
+            zret = inflateReset(&(c->zstream));
+            if (zret != Z_OK) {
+                av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
+                return -1;
+            }
+            if (c->flags & FLAG_MULTITHREAD) {
+                mthread_inlen = *((unsigned int*)encoded);
+                mthread_outlen = *((unsigned int*)(encoded+4));
+                if (mthread_outlen > c->decomp_size)
+                    mthread_outlen = c->decomp_size;
+                c->zstream.next_in = encoded + 8;
+                c->zstream.avail_in = mthread_inlen;
+                c->zstream.next_out = c->decomp_buf;
+                c->zstream.avail_out = c->decomp_size;
+                zret = inflate(&(c->zstream), Z_FINISH);
+                if ((zret != Z_OK) && (zret != Z_STREAM_END)) {
+                    av_log(avctx, AV_LOG_ERROR, "Mthread1 inflate error: %d\n", zret);
+                    return -1;
+                }
+                if (mthread_outlen != (unsigned int)(c->zstream.total_out)) {
+                    av_log(avctx, AV_LOG_ERROR, "Mthread1 decoded size differs (%u != %lu)\n",
+                           mthread_outlen, c->zstream.total_out);
+                    return -1;
+                }
+                zret = inflateReset(&(c->zstream));
+                if (zret != Z_OK) {
+                    av_log(avctx, AV_LOG_ERROR, "Mthread2 inflate reset error: %d\n", zret);
+                    return -1;
+                }
+                c->zstream.next_in = encoded + 8 + mthread_inlen;
+                c->zstream.avail_in = len - mthread_inlen;
+                c->zstream.next_out = c->decomp_buf + mthread_outlen;
+                c->zstream.avail_out = c->decomp_size - mthread_outlen;
+                zret = inflate(&(c->zstream), Z_FINISH);
+                if ((zret != Z_OK) && (zret != Z_STREAM_END)) {
+                    av_log(avctx, AV_LOG_ERROR, "Mthread2 inflate error: %d\n", zret);
+                    return -1;
+                }
+                if (mthread_outlen != (unsigned int)(c->zstream.total_out)) {
+                    av_log(avctx, AV_LOG_ERROR, "Mthread2 decoded size differs (%d != %lu)\n",
+                           mthread_outlen, c->zstream.total_out);
+                    return -1;
+                }
+            } else {
+                c->zstream.next_in = encoded;
+                c->zstream.avail_in = len;
+                c->zstream.next_out = c->decomp_buf;
+                c->zstream.avail_out = c->decomp_size;
+                zret = inflate(&(c->zstream), Z_FINISH);
+                if ((zret != Z_OK) && (zret != Z_STREAM_END)) {
+                    av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", zret);
+                    return -1;
+                }
+                if (c->decomp_size != (unsigned int)(c->zstream.total_out)) {
+                    av_log(avctx, AV_LOG_ERROR, "Decoded size differs (%d != %lu)\n",
+                           c->decomp_size, c->zstream.total_out);
+                    return -1;
+                }
+            }
+            encoded = c->decomp_buf;
+            len = c->decomp_size;;
+#else
+            av_log(avctx, AV_LOG_ERROR, "BUG! Zlib support not compiled in frame decoder.\n");
+            return -1;
+#endif
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "BUG! Unknown codec in frame decoder compression switch.\n");
+            return -1;
+    }
+
+
+    /* Apply PNG filter */
+    if ((avctx->codec_id == CODEC_ID_ZLIB) && (c->flags & FLAG_PNGFILTER)) {
+        switch (c->imgtype) {
+            case IMGTYPE_YUV111:
+            case IMGTYPE_RGB24:
+                for (row = 0; row < height; row++) {
+                    pixel_ptr = row * width * 3;
+                    yq = encoded[pixel_ptr++];
+                    uqvq = encoded[pixel_ptr++];
+                    uqvq+=(encoded[pixel_ptr++] << 8);
+                    for (col = 1; col < width; col++) {
+                        encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
+                        uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8));
+                        encoded[pixel_ptr+1] = (uqvq) & 0xff;
+                        encoded[pixel_ptr+2] = ((uqvq)>>8) & 0xff;
+                        pixel_ptr += 3;
+                    }
+                }
+                break;
+            case IMGTYPE_YUV422:
+                for (row = 0; row < height; row++) {
+                    pixel_ptr = row * width * 2;
+                    yq = uq = vq =0;
+                    for (col = 0; col < width/4; col++) {
+                        encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
+                        encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1];
+                        encoded[pixel_ptr+2] = yq -= encoded[pixel_ptr+2];
+                        encoded[pixel_ptr+3] = yq -= encoded[pixel_ptr+3];
+                        encoded[pixel_ptr+4] = uq -= encoded[pixel_ptr+4];
+                        encoded[pixel_ptr+5] = uq -= encoded[pixel_ptr+5];
+                        encoded[pixel_ptr+6] = vq -= encoded[pixel_ptr+6];
+                        encoded[pixel_ptr+7] = vq -= encoded[pixel_ptr+7];
+                        pixel_ptr += 8;
+                    }
+                }
+                break;
+            case IMGTYPE_YUV411:
+                for (row = 0; row < height; row++) {
+                    pixel_ptr = row * width / 2 * 3;
+                    yq = uq = vq =0;
+                    for (col = 0; col < width/4; col++) {
+                        encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
+                        encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1];
+                        encoded[pixel_ptr+2] = yq -= encoded[pixel_ptr+2];
+                        encoded[pixel_ptr+3] = yq -= encoded[pixel_ptr+3];
+                        encoded[pixel_ptr+4] = uq -= encoded[pixel_ptr+4];
+                        encoded[pixel_ptr+5] = vq -= encoded[pixel_ptr+5];
+                        pixel_ptr += 6;
+                    }
+                }
+                break;
+            case IMGTYPE_YUV211:
+                for (row = 0; row < height; row++) {
+                    pixel_ptr = row * width * 2;
+                    yq = uq = vq =0;
+                    for (col = 0; col < width/2; col++) {
+                        encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
+                        encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1];
+                        encoded[pixel_ptr+2] = uq -= encoded[pixel_ptr+2];
+                        encoded[pixel_ptr+3] = vq -= encoded[pixel_ptr+3];
+                        pixel_ptr += 4;
+                    }
+                }
+                break;
+            case IMGTYPE_YUV420:
+                for (row = 0; row < height/2; row++) {
+                    pixel_ptr = row * width * 3;
+                    yq = y1q = uq = vq =0;
+                    for (col = 0; col < width/2; col++) {
+                        encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
+                        encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1];
+                        encoded[pixel_ptr+2] = y1q -= encoded[pixel_ptr+2];
+                        encoded[pixel_ptr+3] = y1q -= encoded[pixel_ptr+3];
+                        encoded[pixel_ptr+4] = uq -= encoded[pixel_ptr+4];
+                        encoded[pixel_ptr+5] = vq -= encoded[pixel_ptr+5];
+                        pixel_ptr += 6;
+                    }
+                }
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "BUG! Unknown imagetype in pngfilter switch.\n");
+                return -1;
+        }
+    }
+
+    /* Convert colorspace */
+    switch (c->imgtype) {
+        case IMGTYPE_YUV111:
+            for (row = height - 1; row >= 0; row--) {
+                pixel_ptr = row * c->pic.linesize[0];
+                for (col = 0; col < width; col++) {
+                    outptr[pixel_ptr++] = get_b(encoded[0], encoded[1]);
+                    outptr[pixel_ptr++] = get_g(encoded[0], encoded[1], encoded[2]);
+                    outptr[pixel_ptr++] = get_r(encoded[0], encoded[2]);
+                    encoded += 3;
+                }
+            }
+            break;
+        case IMGTYPE_YUV422:
+            for (row = height - 1; row >= 0; row--) {
+                pixel_ptr = row * c->pic.linesize[0];
+                for (col = 0; col < width/4; col++) {
+                    outptr[pixel_ptr++] = get_b(encoded[0], encoded[4]);
+                    outptr[pixel_ptr++] = get_g(encoded[0], encoded[4], encoded[6]);
+                    outptr[pixel_ptr++] = get_r(encoded[0], encoded[6]);
+                    outptr[pixel_ptr++] = get_b(encoded[1], encoded[4]);
+                    outptr[pixel_ptr++] = get_g(encoded[1], encoded[4], encoded[6]);
+                    outptr[pixel_ptr++] = get_r(encoded[1], encoded[6]);
+                    outptr[pixel_ptr++] = get_b(encoded[2], encoded[5]);
+                    outptr[pixel_ptr++] = get_g(encoded[2], encoded[5], encoded[7]);
+                    outptr[pixel_ptr++] = get_r(encoded[2], encoded[7]);
+                    outptr[pixel_ptr++] = get_b(encoded[3], encoded[5]);
+                    outptr[pixel_ptr++] = get_g(encoded[3], encoded[5], encoded[7]);
+                    outptr[pixel_ptr++] = get_r(encoded[3], encoded[7]);
+                    encoded += 8;
+                }
+            }
+            break;
+        case IMGTYPE_RGB24:
+            for (row = height - 1; row >= 0; row--) {
+                pixel_ptr = row * c->pic.linesize[0];
+                for (col = 0; col < width; col++) {
+                    outptr[pixel_ptr++] = encoded[0];
+                    outptr[pixel_ptr++] = encoded[1];
+                    outptr[pixel_ptr++] = encoded[2];
+                    encoded += 3;
+                }
+            }
+            break;
+        case IMGTYPE_YUV411:
+            for (row = height - 1; row >= 0; row--) {
+                pixel_ptr = row * c->pic.linesize[0];
+                for (col = 0; col < width/4; col++) {
+                    outptr[pixel_ptr++] = get_b(encoded[0], encoded[4]);
+                    outptr[pixel_ptr++] = get_g(encoded[0], encoded[4], encoded[5]);
+                    outptr[pixel_ptr++] = get_r(encoded[0], encoded[5]);
+                    outptr[pixel_ptr++] = get_b(encoded[1], encoded[4]);
+                    outptr[pixel_ptr++] = get_g(encoded[1], encoded[4], encoded[5]);
+                    outptr[pixel_ptr++] = get_r(encoded[1], encoded[5]);
+                    outptr[pixel_ptr++] = get_b(encoded[2], encoded[4]);
+                    outptr[pixel_ptr++] = get_g(encoded[2], encoded[4], encoded[5]);
+                    outptr[pixel_ptr++] = get_r(encoded[2], encoded[5]);
+                    outptr[pixel_ptr++] = get_b(encoded[3], encoded[4]);
+                    outptr[pixel_ptr++] = get_g(encoded[3], encoded[4], encoded[5]);
+                    outptr[pixel_ptr++] = get_r(encoded[3], encoded[5]);
+                    encoded += 6;
+                }
+            }
+            break;
+        case IMGTYPE_YUV211:
+            for (row = height - 1; row >= 0; row--) {
+                pixel_ptr = row * c->pic.linesize[0];
+                for (col = 0; col < width/2; col++) {
+                    outptr[pixel_ptr++] = get_b(encoded[0], encoded[2]);
+                    outptr[pixel_ptr++] = get_g(encoded[0], encoded[2], encoded[3]);
+                    outptr[pixel_ptr++] = get_r(encoded[0], encoded[3]);
+                    outptr[pixel_ptr++] = get_b(encoded[1], encoded[2]);
+                    outptr[pixel_ptr++] = get_g(encoded[1], encoded[2], encoded[3]);
+                    outptr[pixel_ptr++] = get_r(encoded[1], encoded[3]);
+                    encoded += 4;
+                }
+            }
+            break;
+        case IMGTYPE_YUV420:
+            for (row = height / 2 - 1; row >= 0; row--) {
+                pixel_ptr = 2 * row * c->pic.linesize[0];
+                for (col = 0; col < width/2; col++) {
+                    outptr[pixel_ptr] = get_b(encoded[0], encoded[4]);
+                    outptr[pixel_ptr+1] = get_g(encoded[0], encoded[4], encoded[5]);
+                    outptr[pixel_ptr+2] = get_r(encoded[0], encoded[5]);
+                    outptr[pixel_ptr+3] = get_b(encoded[1], encoded[4]);
+                    outptr[pixel_ptr+4] = get_g(encoded[1], encoded[4], encoded[5]);
+                    outptr[pixel_ptr+5] = get_r(encoded[1], encoded[5]);
+                    outptr[pixel_ptr-c->pic.linesize[0]] = get_b(encoded[2], encoded[4]);
+                    outptr[pixel_ptr-c->pic.linesize[0]+1] = get_g(encoded[2], encoded[4], encoded[5]);
+                    outptr[pixel_ptr-c->pic.linesize[0]+2] = get_r(encoded[2], encoded[5]);
+                    outptr[pixel_ptr-c->pic.linesize[0]+3] = get_b(encoded[3], encoded[4]);
+                    outptr[pixel_ptr-c->pic.linesize[0]+4] = get_g(encoded[3], encoded[4], encoded[5]);
+                    outptr[pixel_ptr-c->pic.linesize[0]+5] = get_r(encoded[3], encoded[5]);
+                    pixel_ptr += 6;
+                    encoded += 6;
+                }
+            }
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "BUG! Unknown imagetype in image decoder.\n");
+            return -1;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = c->pic;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+/*
+ *
+ * Encode a frame
+ *
+ */
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    LclContext *c = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p = &c->pic;
+    int i;
+    int zret; // Zlib return code
+
+#ifndef CONFIG_ZLIB
+    av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled in.\n");
+    return -1;
+#else
+
+    init_put_bits(&c->pb, buf, buf_size);
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    if(avctx->pix_fmt != PIX_FMT_BGR24){
+        av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
+        return -1;
+    }
+
+    zret = deflateReset(&(c->zstream));
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Deflate reset error: %d\n", zret);
+        return -1;
+    }
+    c->zstream.next_out = c->comp_buf;
+    c->zstream.avail_out = c->max_comp_size;
+
+    for(i = avctx->height - 1; i >= 0; i--) {
+        c->zstream.next_in = p->data[0]+p->linesize[0]*i;
+        c->zstream.avail_in = avctx->width*3;
+        zret = deflate(&(c->zstream), Z_NO_FLUSH);
+        if (zret != Z_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
+            return -1;
+        }
+    }
+    zret = deflate(&(c->zstream), Z_FINISH);
+    if (zret != Z_STREAM_END) {
+        av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
+        return -1;
+    }
+
+    for (i = 0; i < c->zstream.total_out; i++)
+        put_bits(&c->pb, 8, c->comp_buf[i]);
+    flush_put_bits(&c->pb);
+
+    return c->zstream.total_out;
+#endif
+}
+#endif /* CONFIG_ENCODERS */
+
+#ifdef CONFIG_DECODERS
+/*
+ *
+ * Init lcl decoder
+ *
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+    LclContext * const c = (LclContext *)avctx->priv_data;
+    unsigned int basesize = avctx->width * avctx->height;
+    unsigned int max_basesize = ((avctx->width + 3) & ~3) * ((avctx->height + 3) & ~3);
+    unsigned int max_decomp_size;
+    int zret; // Zlib return code
+
+    c->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    c->pic.data[0] = NULL;
+
+#ifdef CONFIG_ZLIB
+    // Needed if zlib unused or init aborted before inflateInit
+    memset(&(c->zstream), 0, sizeof(z_stream));
+#endif
+
+    if (avctx->extradata_size < 8) {
+        av_log(avctx, AV_LOG_ERROR, "Extradata size too small.\n");
+        return 1;
+    }
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+
+    /* Check codec type */
+    if (((avctx->codec_id == CODEC_ID_MSZH)  && (*((char *)avctx->extradata + 7) != CODEC_MSZH)) ||
+        ((avctx->codec_id == CODEC_ID_ZLIB)  && (*((char *)avctx->extradata + 7) != CODEC_ZLIB))) {
+        av_log(avctx, AV_LOG_ERROR, "Codec id and codec type mismatch. This should not happen.\n");
+    }
+
+    /* Detect image type */
+    switch (c->imgtype = *((char *)avctx->extradata + 4)) {
+        case IMGTYPE_YUV111:
+            c->decomp_size = basesize * 3;
+            max_decomp_size = max_basesize * 3;
+            av_log(avctx, AV_LOG_INFO, "Image type is YUV 1:1:1.\n");
+            break;
+        case IMGTYPE_YUV422:
+            c->decomp_size = basesize * 2;
+            max_decomp_size = max_basesize * 2;
+            av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:2:2.\n");
+            break;
+        case IMGTYPE_RGB24:
+            c->decomp_size = basesize * 3;
+            max_decomp_size = max_basesize * 3;
+            av_log(avctx, AV_LOG_INFO, "Image type is RGB 24.\n");
+            break;
+        case IMGTYPE_YUV411:
+            c->decomp_size = basesize / 2 * 3;
+            max_decomp_size = max_basesize / 2 * 3;
+            av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:1:1.\n");
+            break;
+        case IMGTYPE_YUV211:
+            c->decomp_size = basesize * 2;
+            max_decomp_size = max_basesize * 2;
+            av_log(avctx, AV_LOG_INFO, "Image type is YUV 2:1:1.\n");
+            break;
+        case IMGTYPE_YUV420:
+            c->decomp_size = basesize / 2 * 3;
+            max_decomp_size = max_basesize / 2 * 3;
+            av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:2:0.\n");
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported image format %d.\n", c->imgtype);
+            return 1;
+    }
+
+    /* Detect compression method */
+    c->compression = *((char *)avctx->extradata + 5);
+    switch (avctx->codec_id) {
+        case CODEC_ID_MSZH:
+            switch (c->compression) {
+                case COMP_MSZH:
+                    av_log(avctx, AV_LOG_INFO, "Compression enabled.\n");
+                    break;
+                case COMP_MSZH_NOCOMP:
+                    c->decomp_size = 0;
+                    av_log(avctx, AV_LOG_INFO, "No compression.\n");
+                    break;
+                default:
+                    av_log(avctx, AV_LOG_ERROR, "Unsupported compression format for MSZH (%d).\n", c->compression);
+                    return 1;
+            }
+            break;
+        case CODEC_ID_ZLIB:
+#ifdef CONFIG_ZLIB
+            switch (c->compression) {
+                case COMP_ZLIB_HISPEED:
+                    av_log(avctx, AV_LOG_INFO, "High speed compression.\n");
+                    break;
+                case COMP_ZLIB_HICOMP:
+                    av_log(avctx, AV_LOG_INFO, "High compression.\n");
+                    break;
+                case COMP_ZLIB_NORMAL:
+                    av_log(avctx, AV_LOG_INFO, "Normal compression.\n");
+                    break;
+                default:
+                    if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) {
+                            av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
+                        return 1;
+                    }
+                    av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression);
+            }
+#else
+            av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n");
+            return 1;
+#endif
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "BUG! Unknown codec in compression switch.\n");
+            return 1;
+    }
+
+    /* Allocate decompression buffer */
+    if (c->decomp_size) {
+        if ((c->decomp_buf = av_malloc(max_decomp_size)) == NULL) {
+            av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
+            return 1;
+        }
+    }
+
+    /* Detect flags */
+    c->flags = *((char *)avctx->extradata + 6);
+    if (c->flags & FLAG_MULTITHREAD)
+        av_log(avctx, AV_LOG_INFO, "Multithread encoder flag set.\n");
+    if (c->flags & FLAG_NULLFRAME)
+        av_log(avctx, AV_LOG_INFO, "Nullframe insertion flag set.\n");
+    if ((avctx->codec_id == CODEC_ID_ZLIB) && (c->flags & FLAG_PNGFILTER))
+        av_log(avctx, AV_LOG_INFO, "PNG filter flag set.\n");
+    if (c->flags & FLAGMASK_UNUSED)
+        av_log(avctx, AV_LOG_ERROR, "Unknown flag set (%d).\n", c->flags);
+
+    /* If needed init zlib */
+    if (avctx->codec_id == CODEC_ID_ZLIB) {
+#ifdef CONFIG_ZLIB
+        c->zstream.zalloc = Z_NULL;
+        c->zstream.zfree = Z_NULL;
+        c->zstream.opaque = Z_NULL;
+        zret = inflateInit(&(c->zstream));
+        if (zret != Z_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+            return 1;
+        }
+#else
+    av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n");
+    return 1;
+#endif
+    }
+
+    avctx->pix_fmt = PIX_FMT_BGR24;
+
+    return 0;
+}
+#endif /* CONFIG_DECODERS */
+
+#ifdef CONFIG_ENCODERS
+/*
+ *
+ * Init lcl encoder
+ *
+ */
+static int encode_init(AVCodecContext *avctx)
+{
+    LclContext *c = avctx->priv_data;
+    int zret; // Zlib return code
+
+#ifndef CONFIG_ZLIB
+    av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n");
+    return 1;
+#else
+
+    c->avctx= avctx;
+
+    assert(avctx->width && avctx->height);
+
+    avctx->extradata= av_mallocz(8);
+    avctx->coded_frame= &c->pic;
+
+    // Will be user settable someday
+    c->compression = 6;
+    c->flags = 0;
+
+    switch(avctx->pix_fmt){
+        case PIX_FMT_BGR24:
+            c->imgtype = IMGTYPE_RGB24;
+            c->decomp_size = avctx->width * avctx->height * 3;
+            avctx->bits_per_sample= 24;
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Format %d not supported\n", avctx->pix_fmt);
+            return -1;
+    }
+
+    ((uint8_t*)avctx->extradata)[0]= 4;
+    ((uint8_t*)avctx->extradata)[1]= 0;
+    ((uint8_t*)avctx->extradata)[2]= 0;
+    ((uint8_t*)avctx->extradata)[3]= 0;
+    ((uint8_t*)avctx->extradata)[4]= c->imgtype;
+    ((uint8_t*)avctx->extradata)[5]= c->compression;
+    ((uint8_t*)avctx->extradata)[6]= c->flags;
+    ((uint8_t*)avctx->extradata)[7]= CODEC_ZLIB;
+    c->avctx->extradata_size= 8;
+
+    c->zstream.zalloc = Z_NULL;
+    c->zstream.zfree = Z_NULL;
+    c->zstream.opaque = Z_NULL;
+    zret = deflateInit(&(c->zstream), c->compression);
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Deflate init error: %d\n", zret);
+        return 1;
+    }
+
+        /* Conservative upper bound taken from zlib v1.2.1 source */
+        c->max_comp_size = c->decomp_size + ((c->decomp_size + 7) >> 3) +
+                           ((c->decomp_size + 63) >> 6) + 11;
+    if ((c->comp_buf = av_malloc(c->max_comp_size)) == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n");
+        return 1;
+    }
+
+    return 0;
+#endif
+}
+#endif /* CONFIG_ENCODERS */
+
+
+
+#ifdef CONFIG_DECODERS
+/*
+ *
+ * Uninit lcl decoder
+ *
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+        LclContext * const c = (LclContext *)avctx->priv_data;
+
+        if (c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
+#ifdef CONFIG_ZLIB
+    inflateEnd(&(c->zstream));
+#endif
+
+        return 0;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+/*
+ *
+ * Uninit lcl encoder
+ *
+ */
+static int encode_end(AVCodecContext *avctx)
+{
+    LclContext *c = avctx->priv_data;
+
+    av_freep(&avctx->extradata);
+    av_freep(&c->comp_buf);
+#ifdef CONFIG_ZLIB
+    deflateEnd(&(c->zstream));
+#endif
+
+    return 0;
+}
+#endif
+
+#ifdef CONFIG_MSZH_DECODER
+AVCodec mszh_decoder = {
+        "mszh",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_MSZH,
+        sizeof(LclContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
+};
+#endif
+
+#ifdef CONFIG_ZLIB_DECODER
+AVCodec zlib_decoder = {
+        "zlib",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_ZLIB,
+        sizeof(LclContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
+};
+#endif
+
+#ifdef CONFIG_ENCODERS
+
+AVCodec zlib_encoder = {
+    "zlib",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ZLIB,
+    sizeof(LclContext),
+    encode_init,
+    encode_frame,
+    encode_end,
+};
+
+#endif //CONFIG_ENCODERS
diff --git a/contrib/ffmpeg/libavcodec/liba52/a52.h b/contrib/ffmpeg/libavcodec/liba52/a52.h
new file mode 100644
index 000000000..f2ea5f836
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/a52.h
@@ -0,0 +1,73 @@
+/*
+ * a52.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef A52_H
+#define A52_H
+
+#include "../avcodec.h"
+
+#undef malloc
+#undef free
+#undef realloc
+
+#if defined(LIBA52_FIXED)
+typedef int32_t sample_t;
+typedef int32_t level_t;
+#elif defined(LIBA52_DOUBLE)
+typedef double sample_t;
+typedef double level_t;
+#else
+typedef float sample_t;
+typedef float level_t;
+#endif
+
+typedef struct a52_state_s a52_state_t;
+
+#define A52_CHANNEL 0
+#define A52_MONO 1
+#define A52_STEREO 2
+#define A52_3F 3
+#define A52_2F1R 4
+#define A52_3F1R 5
+#define A52_2F2R 6
+#define A52_3F2R 7
+#define A52_CHANNEL1 8
+#define A52_CHANNEL2 9
+#define A52_DOLBY 10
+#define A52_CHANNEL_MASK 15
+
+#define A52_LFE 16
+#define A52_ADJUST_LEVEL 32
+
+a52_state_t * a52_init (uint32_t mm_accel);
+sample_t * a52_samples (a52_state_t * state);
+int a52_syncinfo (uint8_t * buf, int * flags,
+		  int * sample_rate, int * bit_rate);
+int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
+	       level_t * level, sample_t bias);
+void a52_dynrng (a52_state_t * state,
+		 level_t (* call) (level_t, void *), void * data);
+int a52_block (a52_state_t * state);
+void a52_free (a52_state_t * state);
+
+#endif /* A52_H */
diff --git a/contrib/ffmpeg/libavcodec/liba52/a52_internal.h b/contrib/ffmpeg/libavcodec/liba52/a52_internal.h
new file mode 100644
index 000000000..49fd4ef99
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/a52_internal.h
@@ -0,0 +1,162 @@
+/*
+ * a52_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+typedef struct {
+    uint8_t bai;		/* fine SNR offset, fast gain */
+    uint8_t deltbae;		/* delta bit allocation exists */
+    int8_t deltba[50];		/* per-band delta bit allocation */
+} ba_t;
+
+typedef struct {
+    uint8_t exp[256];		/* decoded channel exponents */
+    int8_t bap[256];		/* derived channel bit allocation */
+} expbap_t;
+
+struct a52_state_s {
+    uint8_t fscod;		/* sample rate */
+    uint8_t halfrate;		/* halfrate factor */
+    uint8_t acmod;		/* coded channels */
+    uint8_t lfeon;		/* coded lfe channel */
+    level_t clev;		/* centre channel mix level */
+    level_t slev;		/* surround channels mix level */
+
+    int output;			/* type of output */
+    level_t level;		/* output level */
+    sample_t bias;		/* output bias */
+
+    int dynrnge;		/* apply dynamic range */
+    level_t dynrng;		/* dynamic range */
+    void * dynrngdata;		/* dynamic range callback funtion and data */
+    level_t (* dynrngcall) (level_t range, void * dynrngdata);
+
+    uint8_t chincpl;		/* channel coupled */
+    uint8_t phsflginu;		/* phase flags in use (stereo only) */
+    uint8_t cplstrtmant;	/* coupling channel start mantissa */
+    uint8_t cplendmant;		/* coupling channel end mantissa */
+    uint32_t cplbndstrc;	/* coupling band structure */
+    level_t cplco[5][18];	/* coupling coordinates */
+
+    /* derived information */
+    uint8_t cplstrtbnd;		/* coupling start band (for bit allocation) */
+    uint8_t ncplbnd;		/* number of coupling bands */
+
+    uint8_t rematflg;		/* stereo rematrixing */
+
+    uint8_t endmant[5];		/* channel end mantissa */
+
+    uint16_t bai;		/* bit allocation information */
+
+    uint32_t * buffer_start;
+    uint16_t lfsr_state;	/* dither state */
+    uint32_t bits_left;
+    uint32_t current_word;
+
+    uint8_t csnroffst;		/* coarse SNR offset */
+    ba_t cplba;			/* coupling bit allocation parameters */
+    ba_t ba[5];			/* channel bit allocation parameters */
+    ba_t lfeba;			/* lfe bit allocation parameters */
+
+    uint8_t cplfleak;		/* coupling fast leak init */
+    uint8_t cplsleak;		/* coupling slow leak init */
+
+    expbap_t cpl_expbap;
+    expbap_t fbw_expbap[5];
+    expbap_t lfe_expbap;
+
+    sample_t * samples;
+    int downmixed;
+};
+
+#define LEVEL_PLUS6DB 2.0
+#define LEVEL_PLUS3DB 1.4142135623730951
+#define LEVEL_3DB 0.7071067811865476
+#define LEVEL_45DB 0.5946035575013605
+#define LEVEL_6DB 0.5
+
+#define EXP_REUSE (0)
+#define EXP_D15   (1)
+#define EXP_D25   (2)
+#define EXP_D45   (3)
+
+#define DELTA_BIT_REUSE (0)
+#define DELTA_BIT_NEW (1)
+#define DELTA_BIT_NONE (2)
+#define DELTA_BIT_RESERVED (3)
+
+void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
+		       int start, int end, int fastleak, int slowleak,
+		       expbap_t * expbap);
+
+int a52_downmix_init (int input, int flags, level_t * level,
+		      level_t clev, level_t slev);
+int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level,
+		       level_t clev, level_t slev);
+void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
+		  level_t clev, level_t slev);
+void a52_upmix (sample_t * samples, int acmod, int output);
+
+void a52_imdct_init (uint32_t mm_accel);
+void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
+void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias);
+//extern void (* a52_imdct_256) (sample_t data[], sample_t delay[], sample_t bias);
+//extern void (* a52_imdct_512) (sample_t data[], sample_t delay[], sample_t bias);
+
+#define ROUND(x) ((int)((x) + ((x) > 0 ? 0.5 : -0.5)))
+
+#ifndef LIBA52_FIXED
+
+typedef sample_t quantizer_t;
+#define SAMPLE(x) (x)
+#define LEVEL(x) (x)
+#define MUL(a,b) ((a) * (b))
+#define MUL_L(a,b) ((a) * (b))
+#define MUL_C(a,b) ((a) * (b))
+#define DIV(a,b) ((a) / (b))
+#define BIAS(x) ((x) + bias)
+
+#else /* LIBA52_FIXED */
+
+typedef int16_t quantizer_t;
+#define SAMPLE(x) (sample_t)((x) * (1 << 30))
+#define LEVEL(x) (level_t)((x) * (1 << 26))
+
+#if 0
+#define MUL(a,b) ((int)(((int64_t)(a) * (b) + (1 << 29)) >> 30))
+#define MUL_L(a,b) ((int)(((int64_t)(a) * (b) + (1 << 25)) >> 26))
+#elif 1
+#define MUL(a,b) \
+({ int32_t _ta=(a), _tb=(b), _tc; \
+   _tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)(((_tc >> 14))+ (((_ta >> 16)*(_tb >> 16)) << 2 )); })
+#define MUL_L(a,b) \
+({ int32_t _ta=(a), _tb=(b), _tc; \
+   _tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)((_tc >> 10) + (((_ta >> 16)*(_tb >> 16)) << 6)); })
+#else
+#define MUL(a,b) (((a) >> 15) * ((b) >> 15))
+#define MUL_L(a,b) (((a) >> 13) * ((b) >> 13))
+#endif
+
+#define MUL_C(a,b) MUL_L (a, LEVEL (b))
+#define DIV(a,b) ((((int64_t)LEVEL (a)) << 26) / (b))
+#define BIAS(x) (x)
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/liba52/a52_util.h b/contrib/ffmpeg/libavcodec/liba52/a52_util.h
new file mode 100644
index 000000000..8ef2cece9
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/a52_util.h
@@ -0,0 +1,32 @@
+/*
+ * a52_util.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef A52_UTIL_H
+#define A52_UTIL_H
+
+uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes);
+
+void* a52_resample_init(uint32_t mm_accel,int flags,int chans);
+extern int (* a52_resample) (float * _f, int16_t * s16);
+
+#endif /* A52_H */
diff --git a/contrib/ffmpeg/libavcodec/liba52/bit_allocate.c b/contrib/ffmpeg/libavcodec/liba52/bit_allocate.c
new file mode 100644
index 000000000..415a08d21
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/bit_allocate.c
@@ -0,0 +1,260 @@
+/*
+ * bit_allocate.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "a52.h"
+#include "a52_internal.h"
+
+static int hthtab[3][50] = {
+    {0x730, 0x730, 0x7c0, 0x800, 0x820, 0x840, 0x850, 0x850, 0x860, 0x860,
+     0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x890, 0x890,
+     0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900,
+     0x910, 0x910, 0x910, 0x910, 0x900, 0x8f0, 0x8c0, 0x870, 0x820, 0x7e0,
+     0x7a0, 0x770, 0x760, 0x7a0, 0x7c0, 0x7c0, 0x6e0, 0x400, 0x3c0, 0x3c0},
+    {0x710, 0x710, 0x7a0, 0x7f0, 0x820, 0x830, 0x840, 0x850, 0x850, 0x860,
+     0x860, 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880,
+     0x890, 0x890, 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8e0, 0x8f0,
+     0x900, 0x910, 0x910, 0x910, 0x910, 0x900, 0x8e0, 0x8b0, 0x870, 0x820,
+     0x7e0, 0x7b0, 0x760, 0x770, 0x7a0, 0x7c0, 0x780, 0x5d0, 0x3c0, 0x3c0},
+    {0x680, 0x680, 0x750, 0x7b0, 0x7e0, 0x810, 0x820, 0x830, 0x840, 0x850,
+     0x850, 0x850, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860,
+     0x870, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, 0x890, 0x8a0, 0x8b0,
+     0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, 0x910, 0x910, 0x910, 0x900, 0x8f0,
+     0x8d0, 0x8b0, 0x840, 0x7f0, 0x790, 0x760, 0x7a0, 0x7c0, 0x7b0, 0x720}
+};
+
+static int8_t baptab[305] = {
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,	/* 93 padding elems */
+
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 14, 14, 14, 14, 14, 14,
+    14, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10,  9,  9,  9,
+     9,  8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,
+     5,  4,  4, -3, -3,  3,  3,  3, -2, -2, -1, -1, -1, -1, -1,  0,
+
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0					/* 148 padding elems */
+};
+
+static int bndtab[30] = {21, 22,  23,  24,  25,  26,  27,  28,  31,  34,
+			 37, 40,  43,  46,  49,  55,  61,  67,  73,  79,
+			 85, 97, 109, 121, 133, 157, 181, 205, 229, 253};
+
+static int8_t latab[256] = {
+    -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53,
+    -52, -52, -51, -50, -49, -48, -47, -47, -46, -45, -44, -44,
+    -43, -42, -41, -41, -40, -39, -38, -38, -37, -36, -36, -35,
+    -35, -34, -33, -33, -32, -32, -31, -30, -30, -29, -29, -28,
+    -28, -27, -27, -26, -26, -25, -25, -24, -24, -23, -23, -22,
+    -22, -21, -21, -21, -20, -20, -19, -19, -19, -18, -18, -18,
+    -17, -17, -17, -16, -16, -16, -15, -15, -15, -14, -14, -14,
+    -13, -13, -13, -13, -12, -12, -12, -12, -11, -11, -11, -11,
+    -10, -10, -10, -10, -10,  -9,  -9,  -9,  -9,  -9,  -8,  -8,
+     -8,  -8,  -8,  -8,  -7,  -7,  -7,  -7,  -7,  -7,  -6,  -6,
+     -6,  -6,  -6,  -6,  -6,  -6,  -5,  -5,  -5,  -5,  -5,  -5,
+     -5,  -5,  -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,
+     -4,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,
+     -3,  -3,  -3,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+     -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -1,  -1,
+     -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+     -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+     -1,  -1,  -1,  -1,  -1,  -1,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0
+};
+
+#define UPDATE_LEAK() 		\
+do {				\
+    fastleak += fdecay;		\
+    if (fastleak > psd + fgain)	\
+	fastleak = psd + fgain;	\
+    slowleak += sdecay;		\
+    if (slowleak > psd + sgain)	\
+	slowleak = psd + sgain;	\
+} while (0)
+
+#define COMPUTE_MASK()				\
+do {						\
+    if (psd > dbknee)				\
+	mask -= (psd - dbknee) >> 2;		\
+    if (mask > hth [i >> halfrate])		\
+	mask = hth [i >> halfrate];		\
+    mask -= snroffset + 128 * deltba[i];	\
+    mask = (mask > 0) ? 0 : ((-mask) >> 5);	\
+    mask -= floor;				\
+} while (0)
+
+void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
+		       int start, int end, int fastleak, int slowleak,
+		       expbap_t * expbap)
+{
+    static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410};
+    static int dbpbtab[4]  = {0xc00, 0x500, 0x300, 0x100};
+    static int floortab[8] = {0x910, 0x950, 0x990, 0x9d0,
+			      0xa10, 0xa90, 0xb10, 0x1400};
+
+    int i, j;
+    uint8_t * exp;
+    int8_t * bap;
+    int fdecay, fgain, sdecay, sgain, dbknee, floor, snroffset;
+    int psd, mask;
+    int8_t * deltba;
+    int * hth;
+    int halfrate;
+
+    halfrate = state->halfrate;
+    fdecay = (63 + 20 * ((state->bai >> 7) & 3)) >> halfrate;	/* fdcycod */
+    fgain = 128 + 128 * (ba->bai & 7);				/* fgaincod */
+    sdecay = (15 + 2 * (state->bai >> 9)) >> halfrate;		/* sdcycod */
+    sgain = slowgain[(state->bai >> 5) & 3];			/* sgaincod */
+    dbknee = dbpbtab[(state->bai >> 3) & 3];			/* dbpbcod */
+    hth = hthtab[state->fscod];
+    /*
+     * if there is no delta bit allocation, make deltba point to an area
+     * known to contain zeroes. baptab+156 here.
+     */
+    deltba = (ba->deltbae == DELTA_BIT_NONE) ? baptab + 156 : ba->deltba;
+    floor = floortab[state->bai & 7];				/* floorcod */
+    snroffset = 960 - 64 * state->csnroffst - 4 * (ba->bai >> 3) + floor;
+    floor >>= 5;
+
+    exp = expbap->exp;
+    bap = expbap->bap;
+
+    i = bndstart;
+    j = start;
+    if (start == 0) {	/* not the coupling channel */
+	int lowcomp;
+
+	lowcomp = 0;
+	j = end - 1;
+	do {
+	    if (i < j) {
+		if (exp[i+1] == exp[i] - 2)
+		    lowcomp = 384;
+		else if (lowcomp && (exp[i+1] > exp[i]))
+		    lowcomp -= 64;
+	    }
+	    psd = 128 * exp[i];
+	    mask = psd + fgain + lowcomp;
+	    COMPUTE_MASK ();
+	    bap[i] = (baptab+156)[mask + 4 * exp[i]];
+	    i++;
+	} while ((i < 3) || ((i < 7) && (exp[i] > exp[i-1])));
+	fastleak = psd + fgain;
+	slowleak = psd + sgain;
+
+	while (i < 7) {
+	    if (i < j) {
+		if (exp[i+1] == exp[i] - 2)
+		    lowcomp = 384;
+		else if (lowcomp && (exp[i+1] > exp[i]))
+		    lowcomp -= 64;
+	    }
+	    psd = 128 * exp[i];
+	    UPDATE_LEAK ();
+	    mask = ((fastleak + lowcomp < slowleak) ?
+		    fastleak + lowcomp : slowleak);
+	    COMPUTE_MASK ();
+	    bap[i] = (baptab+156)[mask + 4 * exp[i]];
+	    i++;
+	}
+
+	if (end == 7)	/* lfe channel */
+	    return;
+
+	do {
+	    if (exp[i+1] == exp[i] - 2)
+		lowcomp = 320;
+	    else if (lowcomp && (exp[i+1] > exp[i]))
+		lowcomp -= 64;
+	    psd = 128 * exp[i];
+	    UPDATE_LEAK ();
+	    mask = ((fastleak + lowcomp < slowleak) ?
+		    fastleak + lowcomp : slowleak);
+	    COMPUTE_MASK ();
+	    bap[i] = (baptab+156)[mask + 4 * exp[i]];
+	    i++;
+	} while (i < 20);
+
+	while (lowcomp > 128) {		/* two iterations maximum */
+	    lowcomp -= 128;
+	    psd = 128 * exp[i];
+	    UPDATE_LEAK ();
+	    mask = ((fastleak + lowcomp < slowleak) ?
+		    fastleak + lowcomp : slowleak);
+	    COMPUTE_MASK ();
+	    bap[i] = (baptab+156)[mask + 4 * exp[i]];
+	    i++;
+	}
+	j = i;
+    }
+
+    do {
+	int startband, endband;
+
+	startband = j;
+	endband = (bndtab[i-20] < end) ? bndtab[i-20] : end;
+	psd = 128 * exp[j++];
+	while (j < endband) {
+	    int next, delta;
+
+	    next = 128 * exp[j++];
+	    delta = next - psd;
+	    switch (delta >> 9) {
+	    case -6: case -5: case -4: case -3: case -2:
+		psd = next;
+		break;
+	    case -1:
+		psd = next + latab[(-delta) >> 1];
+		break;
+	    case 0:
+		psd += latab[delta >> 1];
+		break;
+	    }
+	}
+	/* minpsd = -289 */
+	UPDATE_LEAK ();
+	mask = (fastleak < slowleak) ? fastleak : slowleak;
+	COMPUTE_MASK ();
+	i++;
+	j = startband;
+	do {
+	    /* max(mask+4*exp)=147=-(minpsd+fgain-deltba-snroffset)>>5+4*exp */
+	    /* min(mask+4*exp)=-156=-(sgain-deltba-snroffset)>>5 */
+	    bap[j] = (baptab+156)[mask + 4 * exp[j]];
+	} while (++j < endband);
+    } while (j < end);
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/bitstream.c b/contrib/ffmpeg/libavcodec/liba52/bitstream.c
new file mode 100644
index 000000000..f6b05c5e6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/bitstream.c
@@ -0,0 +1,91 @@
+/*
+ * bitstream.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "a52.h"
+#include "a52_internal.h"
+#include "bitstream.h"
+
+#define BUFFER_SIZE 4096
+
+void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
+{
+    int align;
+
+    align = (long)buf & 3;
+    state->buffer_start = (uint32_t *) (buf - align);
+    state->bits_left = 0;
+    state->current_word = 0;
+    bitstream_get (state, align * 8);
+}
+
+static inline void bitstream_fill_current (a52_state_t * state)
+{
+    uint32_t tmp;
+
+    tmp = *(state->buffer_start++);
+    state->current_word = swab32 (tmp);
+}
+
+/*
+ * The fast paths for _get is in the
+ * bitstream.h header file so it can be inlined.
+ *
+ * The "bottom half" of this routine is suffixed _bh
+ *
+ * -ah
+ */
+
+uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits)
+{
+    uint32_t result;
+
+    num_bits -= state->bits_left;
+    result = ((state->current_word << (32 - state->bits_left)) >>
+	      (32 - state->bits_left));
+
+    bitstream_fill_current (state);
+
+    if (num_bits != 0)
+	result = (result << num_bits) | (state->current_word >> (32 - num_bits));
+
+    state->bits_left = 32 - num_bits;
+
+    return result;
+}
+
+int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits)
+{
+    int32_t result;
+
+    num_bits -= state->bits_left;
+    result = ((((int32_t)state->current_word) << (32 - state->bits_left)) >>
+	      (32 - state->bits_left));
+
+    bitstream_fill_current(state);
+
+    if (num_bits != 0)
+	result = (result << num_bits) | (state->current_word >> (32 - num_bits));
+
+    state->bits_left = 32 - num_bits;
+
+    return result;
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/bitstream.h b/contrib/ffmpeg/libavcodec/liba52/bitstream.h
new file mode 100644
index 000000000..4a64bf3d9
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/bitstream.h
@@ -0,0 +1,77 @@
+/*
+ * bitstream.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* (stolen from the kernel) */
+#ifdef WORDS_BIGENDIAN
+
+#	define swab32(x) (x)
+
+#else
+
+#	if 0 && defined (__i386__)
+
+#	define swab32(x) __i386_swab32(x)
+	static inline const uint32_t __i386_swab32(uint32_t x)
+	{
+		__asm__("bswap %0" : "=r" (x) : "0" (x));
+		return x;
+	}
+
+#	else
+
+#	define swab32(x)\
+((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) |  \
+ (((uint8_t*)&x)[2] << 8)  | (((uint8_t*)&x)[3]))
+
+#	endif
+#endif
+
+void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
+uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
+int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
+
+static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
+{
+    uint32_t result;
+
+    if (num_bits < state->bits_left) {
+	result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits);
+	state->bits_left -= num_bits;
+	return result;
+    }
+
+    return a52_bitstream_get_bh (state, num_bits);
+}
+
+static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
+{
+    int32_t result;
+
+    if (num_bits < state->bits_left) {
+	result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits);
+	state->bits_left -= num_bits;
+	return result;
+    }
+
+    return a52_bitstream_get_bh_2 (state, num_bits);
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/crc.c b/contrib/ffmpeg/libavcodec/liba52/crc.c
new file mode 100644
index 000000000..1ec4b085f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/crc.c
@@ -0,0 +1,73 @@
+/*
+ *    crc.c
+ *
+ *	Copyright (C) Aaron Holtzman - May 1999
+ *
+ *  This file is part of ac3dec, a free Dolby AC-3 stream decoder.
+ *
+ *  ac3dec is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  ac3dec is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+static const uint16_t crc_lut[256] =
+{
+	0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011,
+	0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022,
+	0x8063,0x0066,0x006c,0x8069,0x0078,0x807d,0x8077,0x0072,
+	0x0050,0x8055,0x805f,0x005a,0x804b,0x004e,0x0044,0x8041,
+	0x80c3,0x00c6,0x00cc,0x80c9,0x00d8,0x80dd,0x80d7,0x00d2,
+	0x00f0,0x80f5,0x80ff,0x00fa,0x80eb,0x00ee,0x00e4,0x80e1,
+	0x00a0,0x80a5,0x80af,0x00aa,0x80bb,0x00be,0x00b4,0x80b1,
+	0x8093,0x0096,0x009c,0x8099,0x0088,0x808d,0x8087,0x0082,
+	0x8183,0x0186,0x018c,0x8189,0x0198,0x819d,0x8197,0x0192,
+	0x01b0,0x81b5,0x81bf,0x01ba,0x81ab,0x01ae,0x01a4,0x81a1,
+	0x01e0,0x81e5,0x81ef,0x01ea,0x81fb,0x01fe,0x01f4,0x81f1,
+	0x81d3,0x01d6,0x01dc,0x81d9,0x01c8,0x81cd,0x81c7,0x01c2,
+	0x0140,0x8145,0x814f,0x014a,0x815b,0x015e,0x0154,0x8151,
+	0x8173,0x0176,0x017c,0x8179,0x0168,0x816d,0x8167,0x0162,
+	0x8123,0x0126,0x012c,0x8129,0x0138,0x813d,0x8137,0x0132,
+	0x0110,0x8115,0x811f,0x011a,0x810b,0x010e,0x0104,0x8101,
+	0x8303,0x0306,0x030c,0x8309,0x0318,0x831d,0x8317,0x0312,
+	0x0330,0x8335,0x833f,0x033a,0x832b,0x032e,0x0324,0x8321,
+	0x0360,0x8365,0x836f,0x036a,0x837b,0x037e,0x0374,0x8371,
+	0x8353,0x0356,0x035c,0x8359,0x0348,0x834d,0x8347,0x0342,
+	0x03c0,0x83c5,0x83cf,0x03ca,0x83db,0x03de,0x03d4,0x83d1,
+	0x83f3,0x03f6,0x03fc,0x83f9,0x03e8,0x83ed,0x83e7,0x03e2,
+	0x83a3,0x03a6,0x03ac,0x83a9,0x03b8,0x83bd,0x83b7,0x03b2,
+	0x0390,0x8395,0x839f,0x039a,0x838b,0x038e,0x0384,0x8381,
+	0x0280,0x8285,0x828f,0x028a,0x829b,0x029e,0x0294,0x8291,
+	0x82b3,0x02b6,0x02bc,0x82b9,0x02a8,0x82ad,0x82a7,0x02a2,
+	0x82e3,0x02e6,0x02ec,0x82e9,0x02f8,0x82fd,0x82f7,0x02f2,
+	0x02d0,0x82d5,0x82df,0x02da,0x82cb,0x02ce,0x02c4,0x82c1,
+	0x8243,0x0246,0x024c,0x8249,0x0258,0x825d,0x8257,0x0252,
+	0x0270,0x8275,0x827f,0x027a,0x826b,0x026e,0x0264,0x8261,
+	0x0220,0x8225,0x822f,0x022a,0x823b,0x023e,0x0234,0x8231,
+	0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202
+};
+
+uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes)
+{
+	uint32_t i;
+	uint16_t state=0;
+
+	for(i=0;i<num_bytes;i++)
+		state = crc_lut[data[i] ^ (state>>8)] ^ (state<<8);
+
+	return state;
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/downmix.c b/contrib/ffmpeg/libavcodec/liba52/downmix.c
new file mode 100644
index 000000000..7999b7db0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/downmix.c
@@ -0,0 +1,679 @@
+/*
+ * downmix.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "a52.h"
+#include "a52_internal.h"
+
+#define CONVERT(acmod,output) (((output) << 3) + (acmod))
+
+int a52_downmix_init (int input, int flags, level_t * level,
+		      level_t clev, level_t slev)
+{
+    static uint8_t table[11][8] = {
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_STEREO,
+	 A52_STEREO,	A52_STEREO,	A52_STEREO,	A52_STEREO},
+	{A52_MONO,	A52_MONO,	A52_MONO,	A52_MONO,
+	 A52_MONO,	A52_MONO,	A52_MONO,	A52_MONO},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_STEREO,
+	 A52_STEREO,	A52_STEREO,	A52_STEREO,	A52_STEREO},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_3F,
+	 A52_STEREO,	A52_3F,		A52_STEREO,	A52_3F},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_STEREO,
+	 A52_2F1R,	A52_2F1R,	A52_2F1R,	A52_2F1R},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_STEREO,
+	 A52_2F1R,	A52_3F1R,	A52_2F1R,	A52_3F1R},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_3F,
+	 A52_2F2R,	A52_2F2R,	A52_2F2R,	A52_2F2R},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_3F,
+	 A52_2F2R,	A52_3F2R,	A52_2F2R,	A52_3F2R},
+	{A52_CHANNEL1,	A52_MONO,	A52_MONO,	A52_MONO,
+	 A52_MONO,	A52_MONO,	A52_MONO,	A52_MONO},
+	{A52_CHANNEL2,	A52_MONO,	A52_MONO,	A52_MONO,
+	 A52_MONO,	A52_MONO,	A52_MONO,	A52_MONO},
+	{A52_CHANNEL,	A52_DOLBY,	A52_STEREO,	A52_DOLBY,
+	 A52_DOLBY,	A52_DOLBY,	A52_DOLBY,	A52_DOLBY}
+    };
+    int output;
+
+    output = flags & A52_CHANNEL_MASK;
+    if (output > A52_DOLBY)
+	return -1;
+
+    output = table[output][input & 7];
+
+    if (output == A52_STEREO &&
+	(input == A52_DOLBY || (input == A52_3F && clev == LEVEL (LEVEL_3DB))))
+	output = A52_DOLBY;
+
+    if (flags & A52_ADJUST_LEVEL) {
+	level_t adjust;
+
+	switch (CONVERT (input & 7, output)) {
+
+	case CONVERT (A52_3F, A52_MONO):
+	    adjust = DIV (LEVEL_3DB, LEVEL (1) + clev);
+	    break;
+
+	case CONVERT (A52_STEREO, A52_MONO):
+	case CONVERT (A52_2F2R, A52_2F1R):
+	case CONVERT (A52_3F2R, A52_3F1R):
+	level_3db:
+	    adjust = LEVEL (LEVEL_3DB);
+	    break;
+
+	case CONVERT (A52_3F2R, A52_2F1R):
+	    if (clev < LEVEL (LEVEL_PLUS3DB - 1))
+		goto level_3db;
+	    /* break thru */
+	case CONVERT (A52_3F, A52_STEREO):
+	case CONVERT (A52_3F1R, A52_2F1R):
+	case CONVERT (A52_3F1R, A52_2F2R):
+	case CONVERT (A52_3F2R, A52_2F2R):
+	    adjust = DIV (1, LEVEL (1) + clev);
+	    break;
+
+	case CONVERT (A52_2F1R, A52_MONO):
+	    adjust = DIV (LEVEL_PLUS3DB, LEVEL (2) + slev);
+	    break;
+
+	case CONVERT (A52_2F1R, A52_STEREO):
+	case CONVERT (A52_3F1R, A52_3F):
+	    adjust = DIV (1, LEVEL (1) + MUL_C (slev, LEVEL_3DB));
+	    break;
+
+	case CONVERT (A52_3F1R, A52_MONO):
+	    adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + MUL_C (slev, 0.5));
+	    break;
+
+	case CONVERT (A52_3F1R, A52_STEREO):
+	    adjust = DIV (1, LEVEL (1) + clev + MUL_C (slev, LEVEL_3DB));
+	    break;
+
+	case CONVERT (A52_2F2R, A52_MONO):
+	    adjust = DIV (LEVEL_3DB, LEVEL (1) + slev);
+	    break;
+
+	case CONVERT (A52_2F2R, A52_STEREO):
+	case CONVERT (A52_3F2R, A52_3F):
+	    adjust = DIV (1, LEVEL (1) + slev);
+	    break;
+
+	case CONVERT (A52_3F2R, A52_MONO):
+	    adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + slev);
+	    break;
+
+	case CONVERT (A52_3F2R, A52_STEREO):
+	    adjust = DIV (1, LEVEL (1) + clev + slev);
+	    break;
+
+	case CONVERT (A52_MONO, A52_DOLBY):
+	    adjust = LEVEL (LEVEL_PLUS3DB);
+	    break;
+
+	case CONVERT (A52_3F, A52_DOLBY):
+	case CONVERT (A52_2F1R, A52_DOLBY):
+	    adjust = LEVEL (1 / (1 + LEVEL_3DB));
+	    break;
+
+	case CONVERT (A52_3F1R, A52_DOLBY):
+	case CONVERT (A52_2F2R, A52_DOLBY):
+	    adjust = LEVEL (1 / (1 + 2 * LEVEL_3DB));
+	    break;
+
+	case CONVERT (A52_3F2R, A52_DOLBY):
+	    adjust = LEVEL (1 / (1 + 3 * LEVEL_3DB));
+	    break;
+
+	default:
+	    return output;
+	}
+
+	*level = MUL_L (*level, adjust);
+    }
+
+    return output;
+}
+
+int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level,
+		       level_t clev, level_t slev)
+{
+    level_t level_3db;
+
+    level_3db = MUL_C (level, LEVEL_3DB);
+
+    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL):
+    case CONVERT (A52_MONO, A52_MONO):
+    case CONVERT (A52_STEREO, A52_STEREO):
+    case CONVERT (A52_3F, A52_3F):
+    case CONVERT (A52_2F1R, A52_2F1R):
+    case CONVERT (A52_3F1R, A52_3F1R):
+    case CONVERT (A52_2F2R, A52_2F2R):
+    case CONVERT (A52_3F2R, A52_3F2R):
+    case CONVERT (A52_STEREO, A52_DOLBY):
+	coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
+	return 0;
+
+    case CONVERT (A52_CHANNEL, A52_MONO):
+	coeff[0] = coeff[1] = MUL_C (level, LEVEL_6DB);
+	return 3;
+
+    case CONVERT (A52_STEREO, A52_MONO):
+	coeff[0] = coeff[1] = level_3db;
+	return 3;
+
+    case CONVERT (A52_3F, A52_MONO):
+	coeff[0] = coeff[2] = level_3db;
+	coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
+	return 7;
+
+    case CONVERT (A52_2F1R, A52_MONO):
+	coeff[0] = coeff[1] = level_3db;
+	coeff[2] = MUL_L (level_3db, slev);
+	return 7;
+
+    case CONVERT (A52_2F2R, A52_MONO):
+	coeff[0] = coeff[1] = level_3db;
+	coeff[2] = coeff[3] = MUL_L (level_3db, slev);
+	return 15;
+
+    case CONVERT (A52_3F1R, A52_MONO):
+	coeff[0] = coeff[2] = level_3db;
+	coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
+	coeff[3] = MUL_L (level_3db, slev);
+	return 15;
+
+    case CONVERT (A52_3F2R, A52_MONO):
+	coeff[0] = coeff[2] = level_3db;
+	coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
+	coeff[3] = coeff[4] = MUL_L (level_3db, slev);
+	return 31;
+
+    case CONVERT (A52_MONO, A52_DOLBY):
+	coeff[0] = level_3db;
+	return 0;
+
+    case CONVERT (A52_3F, A52_DOLBY):
+	coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
+	coeff[1] = level_3db;
+	return 7;
+
+    case CONVERT (A52_3F, A52_STEREO):
+    case CONVERT (A52_3F1R, A52_2F1R):
+    case CONVERT (A52_3F2R, A52_2F2R):
+	coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
+	coeff[1] = MUL_L (level, clev);
+	return 7;
+
+    case CONVERT (A52_2F1R, A52_DOLBY):
+	coeff[0] = coeff[1] = level;
+	coeff[2] = level_3db;
+	return 7;
+
+    case CONVERT (A52_2F1R, A52_STEREO):
+	coeff[0] = coeff[1] = level;
+	coeff[2] = MUL_L (level_3db, slev);
+	return 7;
+
+    case CONVERT (A52_3F1R, A52_DOLBY):
+	coeff[0] = coeff[2] = level;
+	coeff[1] = coeff[3] = level_3db;
+	return 15;
+
+    case CONVERT (A52_3F1R, A52_STEREO):
+	coeff[0] = coeff[2] = level;
+	coeff[1] = MUL_L (level, clev);
+	coeff[3] = MUL_L (level_3db, slev);
+	return 15;
+
+    case CONVERT (A52_2F2R, A52_DOLBY):
+	coeff[0] = coeff[1] = level;
+	coeff[2] = coeff[3] = level_3db;
+	return 15;
+
+    case CONVERT (A52_2F2R, A52_STEREO):
+	coeff[0] = coeff[1] = level;
+	coeff[2] = coeff[3] = MUL_L (level, slev);
+	return 15;
+
+    case CONVERT (A52_3F2R, A52_DOLBY):
+	coeff[0] = coeff[2] = level;
+	coeff[1] = coeff[3] = coeff[4] = level_3db;
+	return 31;
+
+    case CONVERT (A52_3F2R, A52_2F1R):
+	coeff[0] = coeff[2] = level;
+	coeff[1] = MUL_L (level, clev);
+	coeff[3] = coeff[4] = level_3db;
+	return 31;
+
+    case CONVERT (A52_3F2R, A52_STEREO):
+	coeff[0] = coeff[2] = level;
+	coeff[1] = MUL_L (level, clev);
+	coeff[3] = coeff[4] = MUL_L (level, slev);
+	return 31;
+
+    case CONVERT (A52_3F1R, A52_3F):
+	coeff[0] = coeff[1] = coeff[2] = level;
+	coeff[3] = MUL_L (level_3db, slev);
+	return 13;
+
+    case CONVERT (A52_3F2R, A52_3F):
+	coeff[0] = coeff[1] = coeff[2] = level;
+	coeff[3] = coeff[4] = MUL_L (level, slev);
+	return 29;
+
+    case CONVERT (A52_2F2R, A52_2F1R):
+	coeff[0] = coeff[1] = level;
+	coeff[2] = coeff[3] = level_3db;
+	return 12;
+
+    case CONVERT (A52_3F2R, A52_3F1R):
+	coeff[0] = coeff[1] = coeff[2] = level;
+	coeff[3] = coeff[4] = level_3db;
+	return 24;
+
+    case CONVERT (A52_2F1R, A52_2F2R):
+	coeff[0] = coeff[1] = level;
+	coeff[2] = level_3db;
+	return 0;
+
+    case CONVERT (A52_3F1R, A52_2F2R):
+	coeff[0] = coeff[2] = level;
+	coeff[1] = MUL_L (level, clev);
+	coeff[3] = level_3db;
+	return 7;
+
+    case CONVERT (A52_3F1R, A52_3F2R):
+	coeff[0] = coeff[1] = coeff[2] = level;
+	coeff[3] = level_3db;
+	return 0;
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL1):
+	coeff[0] = level;
+	coeff[1] = 0;
+	return 0;
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
+	coeff[0] = 0;
+	coeff[1] = level;
+	return 0;
+    }
+
+    return -1;	/* NOTREACHED */
+}
+
+static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	dest[i] += BIAS (src[i]);
+}
+
+static void mix3to1 (sample_t * samples, sample_t bias)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	samples[i] += BIAS (samples[i + 256] + samples[i + 512]);
+}
+
+static void mix4to1 (sample_t * samples, sample_t bias)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	samples[i] += BIAS (samples[i + 256] + samples[i + 512] +
+			    samples[i + 768]);
+}
+
+static void mix5to1 (sample_t * samples, sample_t bias)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	samples[i] += BIAS (samples[i + 256] + samples[i + 512] +
+			    samples[i + 768] + samples[i + 1024]);
+}
+
+static void mix3to2 (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+	common = BIAS (samples[i + 256]);
+	samples[i] += common;
+	samples[i + 256] = samples[i + 512] + common;
+    }
+}
+
+static void mix21to2 (sample_t * left, sample_t * right, sample_t bias)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+	common = BIAS (right[i + 256]);
+	left[i] += common;
+	right[i] += common;
+    }
+}
+
+static void mix21toS (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t surround;
+
+    for (i = 0; i < 256; i++) {
+	surround = samples[i + 512];
+	samples[i] += BIAS (-surround);
+	samples[i + 256] += BIAS (surround);
+    }
+}
+
+static void mix31to2 (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+	common = BIAS (samples[i + 256] + samples[i + 768]);
+	samples[i] += common;
+	samples[i + 256] = samples[i + 512] + common;
+    }
+}
+
+static void mix31toS (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t common, surround;
+
+    for (i = 0; i < 256; i++) {
+	common = BIAS (samples[i + 256]);
+	surround = samples[i + 768];
+	samples[i] += common - surround;
+	samples[i + 256] = samples[i + 512] + common + surround;
+    }
+}
+
+static void mix22toS (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t surround;
+
+    for (i = 0; i < 256; i++) {
+	surround = samples[i + 512] + samples[i + 768];
+	samples[i] += BIAS (-surround);
+	samples[i + 256] += BIAS (surround);
+    }
+}
+
+static void mix32to2 (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+	common = BIAS (samples[i + 256]);
+	samples[i] += common + samples[i + 768];
+	samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
+    }
+}
+
+static void mix32toS (sample_t * samples, sample_t bias)
+{
+    int i;
+    sample_t common, surround;
+
+    for (i = 0; i < 256; i++) {
+	common = BIAS (samples[i + 256]);
+	surround = samples[i + 768] + samples[i + 1024];
+	samples[i] += common - surround;
+	samples[i + 256] = samples[i + 512] + common + surround;
+    }
+}
+
+static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	dest[i] = BIAS (src[i] + src[i + 256]);
+}
+
+static void zero (sample_t * samples)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	samples[i] = 0;
+}
+
+void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
+		  level_t clev, level_t slev)
+{
+    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
+	memcpy (samples, samples + 256, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_CHANNEL, A52_MONO):
+    case CONVERT (A52_STEREO, A52_MONO):
+    mix_2to1:
+	mix2to1 (samples, samples + 256, bias);
+	break;
+
+    case CONVERT (A52_2F1R, A52_MONO):
+	if (slev == 0)
+	    goto mix_2to1;
+    case CONVERT (A52_3F, A52_MONO):
+    mix_3to1:
+	mix3to1 (samples, bias);
+	break;
+
+    case CONVERT (A52_3F1R, A52_MONO):
+	if (slev == 0)
+	    goto mix_3to1;
+    case CONVERT (A52_2F2R, A52_MONO):
+	if (slev == 0)
+	    goto mix_2to1;
+	mix4to1 (samples, bias);
+	break;
+
+    case CONVERT (A52_3F2R, A52_MONO):
+	if (slev == 0)
+	    goto mix_3to1;
+	mix5to1 (samples, bias);
+	break;
+
+    case CONVERT (A52_MONO, A52_DOLBY):
+	memcpy (samples + 256, samples, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_3F, A52_STEREO):
+    case CONVERT (A52_3F, A52_DOLBY):
+    mix_3to2:
+	mix3to2 (samples, bias);
+	break;
+
+    case CONVERT (A52_2F1R, A52_STEREO):
+	if (slev == 0)
+	    break;
+	mix21to2 (samples, samples + 256, bias);
+	break;
+
+    case CONVERT (A52_2F1R, A52_DOLBY):
+	mix21toS (samples, bias);
+	break;
+
+    case CONVERT (A52_3F1R, A52_STEREO):
+	if (slev == 0)
+	    goto mix_3to2;
+	mix31to2 (samples, bias);
+	break;
+
+    case CONVERT (A52_3F1R, A52_DOLBY):
+	mix31toS (samples, bias);
+	break;
+
+    case CONVERT (A52_2F2R, A52_STEREO):
+	if (slev == 0)
+	    break;
+	mix2to1 (samples, samples + 512, bias);
+	mix2to1 (samples + 256, samples + 768, bias);
+	break;
+
+    case CONVERT (A52_2F2R, A52_DOLBY):
+	mix22toS (samples, bias);
+	break;
+
+    case CONVERT (A52_3F2R, A52_STEREO):
+	if (slev == 0)
+	    goto mix_3to2;
+	mix32to2 (samples, bias);
+	break;
+
+    case CONVERT (A52_3F2R, A52_DOLBY):
+	mix32toS (samples, bias);
+	break;
+
+    case CONVERT (A52_3F1R, A52_3F):
+	if (slev == 0)
+	    break;
+	mix21to2 (samples, samples + 512, bias);
+	break;
+
+    case CONVERT (A52_3F2R, A52_3F):
+	if (slev == 0)
+	    break;
+	mix2to1 (samples, samples + 768, bias);
+	mix2to1 (samples + 512, samples + 1024, bias);
+	break;
+
+    case CONVERT (A52_3F1R, A52_2F1R):
+	mix3to2 (samples, bias);
+	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_2F2R, A52_2F1R):
+	mix2to1 (samples + 512, samples + 768, bias);
+	break;
+
+    case CONVERT (A52_3F2R, A52_2F1R):
+	mix3to2 (samples, bias);
+	move2to1 (samples + 768, samples + 512, bias);
+	break;
+
+    case CONVERT (A52_3F2R, A52_3F1R):
+	mix2to1 (samples + 768, samples + 1024, bias);
+	break;
+
+    case CONVERT (A52_2F1R, A52_2F2R):
+	memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_3F1R, A52_2F2R):
+	mix3to2 (samples, bias);
+	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_3F2R, A52_2F2R):
+	mix3to2 (samples, bias);
+	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
+	memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_3F1R, A52_3F2R):
+	memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
+	break;
+    }
+}
+
+void a52_upmix (sample_t * samples, int acmod, int output)
+{
+    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
+	memcpy (samples + 256, samples, 256 * sizeof (sample_t));
+	break;
+
+    case CONVERT (A52_3F2R, A52_MONO):
+	zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_MONO):
+    case CONVERT (A52_2F2R, A52_MONO):
+	zero (samples + 768);
+    case CONVERT (A52_3F, A52_MONO):
+    case CONVERT (A52_2F1R, A52_MONO):
+	zero (samples + 512);
+    case CONVERT (A52_CHANNEL, A52_MONO):
+    case CONVERT (A52_STEREO, A52_MONO):
+	zero (samples + 256);
+	break;
+
+    case CONVERT (A52_3F2R, A52_STEREO):
+    case CONVERT (A52_3F2R, A52_DOLBY):
+	zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_STEREO):
+    case CONVERT (A52_3F1R, A52_DOLBY):
+	zero (samples + 768);
+    case CONVERT (A52_3F, A52_STEREO):
+    case CONVERT (A52_3F, A52_DOLBY):
+    mix_3to2:
+	memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
+	zero (samples + 256);
+	break;
+
+    case CONVERT (A52_2F2R, A52_STEREO):
+    case CONVERT (A52_2F2R, A52_DOLBY):
+	zero (samples + 768);
+    case CONVERT (A52_2F1R, A52_STEREO):
+    case CONVERT (A52_2F1R, A52_DOLBY):
+	zero (samples + 512);
+	break;
+
+    case CONVERT (A52_3F2R, A52_3F):
+	zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_3F):
+    case CONVERT (A52_2F2R, A52_2F1R):
+	zero (samples + 768);
+	break;
+
+    case CONVERT (A52_3F2R, A52_3F1R):
+	zero (samples + 1024);
+	break;
+
+    case CONVERT (A52_3F2R, A52_2F1R):
+	zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_2F1R):
+    mix_31to21:
+	memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
+	goto mix_3to2;
+
+    case CONVERT (A52_3F2R, A52_2F2R):
+	memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
+	goto mix_31to21;
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/imdct.c b/contrib/ffmpeg/libavcodec/liba52/imdct.c
new file mode 100644
index 000000000..21a2a6565
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/imdct.c
@@ -0,0 +1,411 @@
+/*
+ * imdct.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * The ifft algorithms in this file have been largely inspired by Dan
+ * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "a52.h"
+#include "a52_internal.h"
+#include "mm_accel.h"
+
+typedef struct complex_s {
+    sample_t real;
+    sample_t imag;
+} complex_t;
+
+static uint8_t fftorder[] = {
+      0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
+      8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
+      4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
+    252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
+      2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
+     10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
+    254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
+      6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
+};
+
+/* Root values for IFFT */
+static sample_t roots16[3];
+static sample_t roots32[7];
+static sample_t roots64[15];
+static sample_t roots128[31];
+
+/* Twiddle factors for IMDCT */
+static complex_t pre1[128];
+static complex_t post1[64];
+static complex_t pre2[64];
+static complex_t post2[32];
+
+static sample_t a52_imdct_window[256];
+
+static void (* ifft128) (complex_t * buf);
+static void (* ifft64) (complex_t * buf);
+
+static inline void ifft2 (complex_t * buf)
+{
+    sample_t r, i;
+
+    r = buf[0].real;
+    i = buf[0].imag;
+    buf[0].real += buf[1].real;
+    buf[0].imag += buf[1].imag;
+    buf[1].real = r - buf[1].real;
+    buf[1].imag = i - buf[1].imag;
+}
+
+static inline void ifft4 (complex_t * buf)
+{
+    sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+
+    tmp1 = buf[0].real + buf[1].real;
+    tmp2 = buf[3].real + buf[2].real;
+    tmp3 = buf[0].imag + buf[1].imag;
+    tmp4 = buf[2].imag + buf[3].imag;
+    tmp5 = buf[0].real - buf[1].real;
+    tmp6 = buf[0].imag - buf[1].imag;
+    tmp7 = buf[2].imag - buf[3].imag;
+    tmp8 = buf[3].real - buf[2].real;
+
+    buf[0].real = tmp1 + tmp2;
+    buf[0].imag = tmp3 + tmp4;
+    buf[2].real = tmp1 - tmp2;
+    buf[2].imag = tmp3 - tmp4;
+    buf[1].real = tmp5 + tmp7;
+    buf[1].imag = tmp6 + tmp8;
+    buf[3].real = tmp5 - tmp7;
+    buf[3].imag = tmp6 - tmp8;
+}
+
+/* basic radix-2 ifft butterfly */
+
+#define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do {	\
+    t0 = MUL (W1, d1) + MUL (W0, d0);		\
+    t1 = MUL (W0, d1) - MUL (W1, d0);		\
+} while (0)
+
+/* radix-2 ifft butterfly with bias */
+
+#define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do {	\
+    t0 = BIAS (MUL (d1, W1) + MUL (d0, W0));	\
+    t1 = BIAS (MUL (d1, W0) - MUL (d0, W1));	\
+} while (0)
+
+/* the basic split-radix ifft butterfly */
+
+#define BUTTERFLY(a0,a1,a2,a3,wr,wi) do {		\
+    BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag);	\
+    BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real);	\
+    tmp1 = tmp5 + tmp7;					\
+    tmp2 = tmp6 + tmp8;					\
+    tmp3 = tmp6 - tmp8;					\
+    tmp4 = tmp7 - tmp5;					\
+    a2.real = a0.real - tmp1;				\
+    a2.imag = a0.imag - tmp2;				\
+    a3.real = a1.real - tmp3;				\
+    a3.imag = a1.imag - tmp4;				\
+    a0.real += tmp1;					\
+    a0.imag += tmp2;					\
+    a1.real += tmp3;					\
+    a1.imag += tmp4;					\
+} while (0)
+
+/* split-radix ifft butterfly, specialized for wr=1 wi=0 */
+
+#define BUTTERFLY_ZERO(a0,a1,a2,a3) do {	\
+    tmp1 = a2.real + a3.real;			\
+    tmp2 = a2.imag + a3.imag;			\
+    tmp3 = a2.imag - a3.imag;			\
+    tmp4 = a3.real - a2.real;			\
+    a2.real = a0.real - tmp1;			\
+    a2.imag = a0.imag - tmp2;			\
+    a3.real = a1.real - tmp3;			\
+    a3.imag = a1.imag - tmp4;			\
+    a0.real += tmp1;				\
+    a0.imag += tmp2;				\
+    a1.real += tmp3;				\
+    a1.imag += tmp4;				\
+} while (0)
+
+/* split-radix ifft butterfly, specialized for wr=wi */
+
+#define BUTTERFLY_HALF(a0,a1,a2,a3,w) do {	\
+    tmp5 = MUL (a2.real + a2.imag, w);		\
+    tmp6 = MUL (a2.imag - a2.real, w);		\
+    tmp7 = MUL (a3.real - a3.imag, w);		\
+    tmp8 = MUL (a3.imag + a3.real, w);		\
+    tmp1 = tmp5 + tmp7;				\
+    tmp2 = tmp6 + tmp8;				\
+    tmp3 = tmp6 - tmp8;				\
+    tmp4 = tmp7 - tmp5;				\
+    a2.real = a0.real - tmp1;			\
+    a2.imag = a0.imag - tmp2;			\
+    a3.real = a1.real - tmp3;			\
+    a3.imag = a1.imag - tmp4;			\
+    a0.real += tmp1;				\
+    a0.imag += tmp2;				\
+    a1.real += tmp3;				\
+    a1.imag += tmp4;				\
+} while (0)
+
+static inline void ifft8 (complex_t * buf)
+{
+    sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+
+    ifft4 (buf);
+    ifft2 (buf + 4);
+    ifft2 (buf + 6);
+    BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
+    BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
+}
+
+static void ifft_pass (complex_t * buf, sample_t * weight, int n)
+{
+    complex_t * buf1;
+    complex_t * buf2;
+    complex_t * buf3;
+    sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    int i;
+
+    buf++;
+    buf1 = buf + n;
+    buf2 = buf + 2 * n;
+    buf3 = buf + 3 * n;
+
+    BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]);
+
+    i = n - 1;
+
+    do {
+	BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0],
+		   weight[0], weight[2*i-n]);
+	buf++;
+	buf1++;
+	buf2++;
+	buf3++;
+	weight++;
+    } while (--i);
+}
+
+static void ifft16 (complex_t * buf)
+{
+    ifft8 (buf);
+    ifft4 (buf + 8);
+    ifft4 (buf + 12);
+    ifft_pass (buf, roots16, 4);
+}
+
+static void ifft32 (complex_t * buf)
+{
+    ifft16 (buf);
+    ifft8 (buf + 16);
+    ifft8 (buf + 24);
+    ifft_pass (buf, roots32, 8);
+}
+
+static void ifft64_c (complex_t * buf)
+{
+    ifft32 (buf);
+    ifft16 (buf + 32);
+    ifft16 (buf + 48);
+    ifft_pass (buf, roots64, 16);
+}
+
+static void ifft128_c (complex_t * buf)
+{
+    ifft32 (buf);
+    ifft16 (buf + 32);
+    ifft16 (buf + 48);
+    ifft_pass (buf, roots64, 16);
+
+    ifft32 (buf + 64);
+    ifft32 (buf + 96);
+    ifft_pass (buf, roots128, 32);
+}
+
+void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
+{
+    int i, k;
+    sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
+    const sample_t * window = a52_imdct_window;
+    complex_t buf[128];
+
+    for (i = 0; i < 128; i++) {
+	k = fftorder[i];
+	t_r = pre1[i].real;
+	t_i = pre1[i].imag;
+	BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]);
+    }
+
+    ifft128 (buf);
+
+    /* Post IFFT complex multiply plus IFFT complex conjugate*/
+    /* Window and convert to real valued signal */
+    for (i = 0; i < 64; i++) {
+	/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
+	t_r = post1[i].real;
+	t_i = post1[i].imag;
+	BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real);
+	BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real);
+
+	w_1 = window[2*i];
+	w_2 = window[255-2*i];
+	BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]);
+	delay[2*i] = a_i;
+
+	w_1 = window[2*i+1];
+	w_2 = window[254-2*i];
+	BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]);
+	delay[2*i+1] = b_i;
+    }
+}
+
+void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias)
+{
+    int i, k;
+    sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
+    const sample_t * window = a52_imdct_window;
+    complex_t buf1[64], buf2[64];
+
+    /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+    for (i = 0; i < 64; i++) {
+	k = fftorder[i];
+	t_r = pre2[i].real;
+	t_i = pre2[i].imag;
+	BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]);
+	BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]);
+    }
+
+    ifft64 (buf1);
+    ifft64 (buf2);
+
+    /* Post IFFT complex multiply */
+    /* Window and convert to real valued signal */
+    for (i = 0; i < 32; i++) {
+	/* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
+	t_r = post2[i].real;
+	t_i = post2[i].imag;
+	BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real);
+	BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real);
+	BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real);
+	BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real);
+
+	w_1 = window[2*i];
+	w_2 = window[255-2*i];
+	BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]);
+	delay[2*i] = c_i;
+
+	w_1 = window[128+2*i];
+	w_2 = window[127-2*i];
+	BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]);
+	delay[127-2*i] = c_r;
+
+	w_1 = window[2*i+1];
+	w_2 = window[254-2*i];
+	BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]);
+	delay[2*i+1] = d_r;
+
+	w_1 = window[129+2*i];
+	w_2 = window[126-2*i];
+	BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]);
+	delay[126-2*i] = d_i;
+    }
+}
+
+static double besselI0 (double x)
+{
+    double bessel = 1;
+    int i = 100;
+
+    do
+	bessel = bessel * x / (i * i) + 1;
+    while (--i);
+    return bessel;
+}
+
+void a52_imdct_init (uint32_t mm_accel)
+{
+    int i, k;
+    double sum;
+    double local_imdct_window[256];
+
+    /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
+    sum = 0;
+    for (i = 0; i < 256; i++) {
+	sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
+	local_imdct_window[i] = sum;
+    }
+    sum++;
+    for (i = 0; i < 256; i++)
+	a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum));
+
+    for (i = 0; i < 3; i++)
+	roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1)));
+
+    for (i = 0; i < 7; i++)
+	roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1)));
+
+    for (i = 0; i < 15; i++)
+	roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1)));
+
+    for (i = 0; i < 31; i++)
+	roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1)));
+
+    for (i = 0; i < 64; i++) {
+	k = fftorder[i] / 2 + 64;
+	pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25)));
+	pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25)));
+    }
+
+    for (i = 64; i < 128; i++) {
+	k = fftorder[i] / 2 + 64;
+	pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25)));
+	pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25)));
+    }
+
+    for (i = 0; i < 64; i++) {
+	post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5)));
+	post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5)));
+    }
+
+    for (i = 0; i < 64; i++) {
+	k = fftorder[i] / 4;
+	pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25)));
+	pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25)));
+    }
+
+    for (i = 0; i < 32; i++) {
+	post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5)));
+	post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5)));
+    }
+
+#ifdef LIBA52_DJBFFT
+    if (mm_accel & MM_ACCEL_DJBFFT) {
+	ifft128 = (void (*) (complex_t *)) fftc4_un128;
+	ifft64 = (void (*) (complex_t *)) fftc4_un64;
+    } else
+#endif
+    {
+	ifft128 = ifft128_c;
+	ifft64 = ifft64_c;
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/mm_accel.h b/contrib/ffmpeg/libavcodec/liba52/mm_accel.h
new file mode 100644
index 000000000..9a475f5a2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/mm_accel.h
@@ -0,0 +1,42 @@
+/*
+ * mm_accel.h
+ * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MM_ACCEL_H
+#define MM_ACCEL_H
+
+/* generic accelerations */
+#define MM_ACCEL_DJBFFT		0x00000001
+
+/* x86 accelerations */
+#define MM_ACCEL_X86_MMX	0x80000000
+#define MM_ACCEL_X86_3DNOW	0x40000000
+#define MM_ACCEL_X86_MMXEXT	0x20000000
+#define MM_ACCEL_X86_SSE	0x10000000
+#define MM_ACCEL_X86_3DNOWEXT	0x08000000
+
+/* PPC accelerations */
+#define MM_ACCEL_PPC_ALTIVEC	0x00010000
+
+uint32_t mm_accel (void);
+
+#endif /* MM_ACCEL_H */
diff --git a/contrib/ffmpeg/libavcodec/liba52/parse.c b/contrib/ffmpeg/libavcodec/liba52/parse.c
new file mode 100644
index 000000000..5a0701564
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/parse.c
@@ -0,0 +1,939 @@
+/*
+ * parse.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "a52.h"
+#include "a52_internal.h"
+#include "bitstream.h"
+#include "tables.h"
+
+#if defined(HAVE_MEMALIGN) && !defined(__cplusplus)
+/* some systems have memalign() but no declaration for it */
+void * memalign (size_t align, size_t size);
+#else
+/* assume malloc alignment is sufficient */
+#define memalign(align,size) malloc (size)
+#endif
+
+typedef struct {
+    quantizer_t q1[2];
+    quantizer_t q2[2];
+    quantizer_t q4;
+    int q1_ptr;
+    int q2_ptr;
+    int q4_ptr;
+} quantizer_set_t;
+
+static uint8_t halfrate[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3};
+
+a52_state_t * a52_init (uint32_t mm_accel)
+{
+    a52_state_t * state;
+    int i;
+
+    state = (a52_state_t *) malloc (sizeof (a52_state_t));
+    if (state == NULL)
+	return NULL;
+
+    state->samples = (sample_t *) memalign (16, 256 * 12 * sizeof (sample_t));
+    if (state->samples == NULL) {
+	free (state);
+	return NULL;
+    }
+
+    for (i = 0; i < 256 * 12; i++)
+	state->samples[i] = 0;
+
+    state->downmixed = 1;
+
+    state->lfsr_state = 1;
+
+    a52_imdct_init (mm_accel);
+
+    return state;
+}
+
+sample_t * a52_samples (a52_state_t * state)
+{
+    return state->samples;
+}
+
+int a52_syncinfo (uint8_t * buf, int * flags,
+		  int * sample_rate, int * bit_rate)
+{
+    static int rate[] = { 32,  40,  48,  56,  64,  80,  96, 112,
+			 128, 160, 192, 224, 256, 320, 384, 448,
+			 512, 576, 640};
+    static uint8_t lfeon[8] = {0x10, 0x10, 0x04, 0x04, 0x04, 0x01, 0x04, 0x01};
+    int frmsizecod;
+    int bitrate;
+    int half;
+    int acmod;
+
+    if ((buf[0] != 0x0b) || (buf[1] != 0x77))	/* syncword */
+	return 0;
+
+    if (buf[5] >= 0x60)		/* bsid >= 12 */
+	return 0;
+    half = halfrate[buf[5] >> 3];
+
+    /* acmod, dsurmod and lfeon */
+    acmod = buf[6] >> 5;
+    *flags = ((((buf[6] & 0xf8) == 0x50) ? A52_DOLBY : acmod) |
+	      ((buf[6] & lfeon[acmod]) ? A52_LFE : 0));
+
+    frmsizecod = buf[4] & 63;
+    if (frmsizecod >= 38)
+	return 0;
+    bitrate = rate [frmsizecod >> 1];
+    *bit_rate = (bitrate * 1000) >> half;
+
+    switch (buf[4] & 0xc0) {
+    case 0:
+	*sample_rate = 48000 >> half;
+	return 4 * bitrate;
+    case 0x40:
+	*sample_rate = 44100 >> half;
+	return 2 * (320 * bitrate / 147 + (frmsizecod & 1));
+    case 0x80:
+	*sample_rate = 32000 >> half;
+	return 6 * bitrate;
+    default:
+	return 0;
+    }
+}
+
+int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
+	       level_t * level, sample_t bias)
+{
+    static level_t clev[4] = { LEVEL (LEVEL_3DB), LEVEL (LEVEL_45DB),
+			       LEVEL (LEVEL_6DB), LEVEL (LEVEL_45DB) };
+    static level_t slev[4] = { LEVEL (LEVEL_3DB), LEVEL (LEVEL_6DB),
+			       0,                 LEVEL (LEVEL_6DB) };
+    int chaninfo;
+    int acmod;
+
+    state->fscod = buf[4] >> 6;
+    state->halfrate = halfrate[buf[5] >> 3];
+    state->acmod = acmod = buf[6] >> 5;
+
+    a52_bitstream_set_ptr (state, buf + 6);
+    bitstream_get (state, 3);	/* skip acmod we already parsed */
+
+    if ((acmod == 2) && (bitstream_get (state, 2) == 2))	/* dsurmod */
+	acmod = A52_DOLBY;
+
+    state->clev = state->slev = 0;
+
+    if ((acmod & 1) && (acmod != 1))
+	state->clev = clev[bitstream_get (state, 2)];	/* cmixlev */
+
+    if (acmod & 4)
+	state->slev = slev[bitstream_get (state, 2)];	/* surmixlev */
+
+    state->lfeon = bitstream_get (state, 1);
+
+    state->output = a52_downmix_init (acmod, *flags, level,
+				      state->clev, state->slev);
+    if (state->output < 0)
+	return 1;
+    if (state->lfeon && (*flags & A52_LFE))
+	state->output |= A52_LFE;
+    *flags = state->output;
+    /* the 2* compensates for differences in imdct */
+    state->dynrng = state->level = MUL_C (*level, 2);
+    state->bias = bias;
+    state->dynrnge = 1;
+    state->dynrngcall = NULL;
+    state->cplba.deltbae = DELTA_BIT_NONE;
+    state->ba[0].deltbae = state->ba[1].deltbae = state->ba[2].deltbae =
+	state->ba[3].deltbae = state->ba[4].deltbae = DELTA_BIT_NONE;
+
+    chaninfo = !acmod;
+    do {
+	bitstream_get (state, 5);	/* dialnorm */
+	if (bitstream_get (state, 1))	/* compre */
+	    bitstream_get (state, 8);	/* compr */
+	if (bitstream_get (state, 1))	/* langcode */
+	    bitstream_get (state, 8);	/* langcod */
+	if (bitstream_get (state, 1))	/* audprodie */
+	    bitstream_get (state, 7);	/* mixlevel + roomtyp */
+    } while (chaninfo--);
+
+    bitstream_get (state, 2);		/* copyrightb + origbs */
+
+    if (bitstream_get (state, 1))	/* timecod1e */
+	bitstream_get (state, 14);	/* timecod1 */
+    if (bitstream_get (state, 1))	/* timecod2e */
+	bitstream_get (state, 14);	/* timecod2 */
+
+    if (bitstream_get (state, 1)) {	/* addbsie */
+	int addbsil;
+
+	addbsil = bitstream_get (state, 6);
+	do {
+	    bitstream_get (state, 8);	/* addbsi */
+	} while (addbsil--);
+    }
+
+    return 0;
+}
+
+void a52_dynrng (a52_state_t * state,
+		 level_t (* call) (level_t, void *), void * data)
+{
+    state->dynrnge = 0;
+    if (call) {
+	state->dynrnge = 1;
+	state->dynrngcall = call;
+	state->dynrngdata = data;
+    }
+}
+
+static int parse_exponents (a52_state_t * state, int expstr, int ngrps,
+			    uint8_t exponent, uint8_t * dest)
+{
+    int exps;
+
+    while (ngrps--) {
+	exps = bitstream_get (state, 7);
+
+	exponent += exp_1[exps];
+	if (exponent > 24)
+	    return 1;
+
+	switch (expstr) {
+	case EXP_D45:
+	    *(dest++) = exponent;
+	    *(dest++) = exponent;
+	case EXP_D25:
+	    *(dest++) = exponent;
+	case EXP_D15:
+	    *(dest++) = exponent;
+	}
+
+	exponent += exp_2[exps];
+	if (exponent > 24)
+	    return 1;
+
+	switch (expstr) {
+	case EXP_D45:
+	    *(dest++) = exponent;
+	    *(dest++) = exponent;
+	case EXP_D25:
+	    *(dest++) = exponent;
+	case EXP_D15:
+	    *(dest++) = exponent;
+	}
+
+	exponent += exp_3[exps];
+	if (exponent > 24)
+	    return 1;
+
+	switch (expstr) {
+	case EXP_D45:
+	    *(dest++) = exponent;
+	    *(dest++) = exponent;
+	case EXP_D25:
+	    *(dest++) = exponent;
+	case EXP_D15:
+	    *(dest++) = exponent;
+	}
+    }
+
+    return 0;
+}
+
+static int parse_deltba (a52_state_t * state, int8_t * deltba)
+{
+    int deltnseg, deltlen, delta, j;
+
+    memset (deltba, 0, 50);
+
+    deltnseg = bitstream_get (state, 3);
+    j = 0;
+    do {
+	j += bitstream_get (state, 5);
+	deltlen = bitstream_get (state, 4);
+	delta = bitstream_get (state, 3);
+	delta -= (delta >= 4) ? 3 : 4;
+	if (!deltlen)
+	    continue;
+	if (j + deltlen >= 50)
+	    return 1;
+	while (deltlen--)
+	    deltba[j++] = delta;
+    } while (deltnseg--);
+
+    return 0;
+}
+
+static inline int zero_snr_offsets (int nfchans, a52_state_t * state)
+{
+    int i;
+
+    if ((state->csnroffst) ||
+	(state->chincpl && state->cplba.bai >> 3) ||	/* cplinu, fsnroffst */
+	(state->lfeon && state->lfeba.bai >> 3))	/* fsnroffst */
+	return 0;
+    for (i = 0; i < nfchans; i++)
+	if (state->ba[i].bai >> 3)			/* fsnroffst */
+	    return 0;
+    return 1;
+}
+
+static inline int16_t dither_gen (a52_state_t * state)
+{
+    int16_t nstate;
+
+    nstate = dither_lut[state->lfsr_state >> 8] ^ (state->lfsr_state << 8);
+
+    state->lfsr_state = (uint16_t) nstate;
+
+    return (3 * nstate) >> 2;
+}
+
+#ifndef LIBA52_FIXED
+#define COEFF(c,t,l,s,e) (c) = (t) * (s)[e]
+#else
+#define COEFF(c,_t,_l,s,e) do {					\
+    quantizer_t t = (_t);					\
+    level_t l = (_l);						\
+    int shift = e - 5;						\
+    sample_t tmp = t * (l >> 16) + ((t * (l & 0xffff)) >> 16);	\
+    if (shift >= 0)						\
+	(c) = tmp >> shift;					\
+    else							\
+	(c) = tmp << -shift;					\
+} while (0)
+#endif
+
+static void coeff_get (a52_state_t * state, sample_t * coeff,
+		       expbap_t * expbap, quantizer_set_t * quant,
+		       level_t level, int dither, int end)
+{
+    int i;
+    uint8_t * exp;
+    int8_t * bap;
+
+#ifndef LIBA52_FIXED
+    sample_t factor[25];
+
+    for (i = 0; i <= 24; i++)
+	factor[i] = scale_factor[i] * level;
+#endif
+
+    exp = expbap->exp;
+    bap = expbap->bap;
+
+    for (i = 0; i < end; i++) {
+	int bapi;
+
+	bapi = bap[i];
+	switch (bapi) {
+	case 0:
+	    if (dither) {
+		COEFF (coeff[i], dither_gen (state), level, factor, exp[i]);
+		continue;
+	    } else {
+		coeff[i] = 0;
+		continue;
+	    }
+
+	case -1:
+	    if (quant->q1_ptr >= 0) {
+		COEFF (coeff[i], quant->q1[quant->q1_ptr--], level,
+		       factor, exp[i]);
+		continue;
+	    } else {
+		int code;
+
+		code = bitstream_get (state, 5);
+
+		quant->q1_ptr = 1;
+		quant->q1[0] = q_1_2[code];
+		quant->q1[1] = q_1_1[code];
+		COEFF (coeff[i], q_1_0[code], level, factor, exp[i]);
+		continue;
+	    }
+
+	case -2:
+	    if (quant->q2_ptr >= 0) {
+		COEFF (coeff[i], quant->q2[quant->q2_ptr--], level,
+		       factor, exp[i]);
+		continue;
+	    } else {
+		int code;
+
+		code = bitstream_get (state, 7);
+
+		quant->q2_ptr = 1;
+		quant->q2[0] = q_2_2[code];
+		quant->q2[1] = q_2_1[code];
+		COEFF (coeff[i], q_2_0[code], level, factor, exp[i]);
+		continue;
+	    }
+
+	case 3:
+	    COEFF (coeff[i], q_3[bitstream_get (state, 3)], level,
+		   factor, exp[i]);
+	    continue;
+
+	case -3:
+	    if (quant->q4_ptr == 0) {
+		quant->q4_ptr = -1;
+		COEFF (coeff[i], quant->q4, level, factor, exp[i]);
+		continue;
+	    } else {
+		int code;
+
+		code = bitstream_get (state, 7);
+
+		quant->q4_ptr = 0;
+		quant->q4 = q_4_1[code];
+		COEFF (coeff[i], q_4_0[code], level, factor, exp[i]);
+		continue;
+	    }
+
+	case 4:
+	    COEFF (coeff[i], q_5[bitstream_get (state, 4)], level,
+		   factor, exp[i]);
+	    continue;
+
+	default:
+	    COEFF (coeff[i], bitstream_get_2 (state, bapi) << (16 - bapi),
+		   level, factor, exp[i]);
+	}
+    }
+}
+
+static void coeff_get_coupling (a52_state_t * state, int nfchans,
+				level_t * coeff, sample_t (* samples)[256],
+				quantizer_set_t * quant, uint8_t dithflag[5])
+{
+    int cplbndstrc, bnd, i, i_end, ch;
+    uint8_t * exp;
+    int8_t * bap;
+    level_t cplco[5];
+
+    exp = state->cpl_expbap.exp;
+    bap = state->cpl_expbap.bap;
+    bnd = 0;
+    cplbndstrc = state->cplbndstrc;
+    i = state->cplstrtmant;
+    while (i < state->cplendmant) {
+	i_end = i + 12;
+	while (cplbndstrc & 1) {
+	    cplbndstrc >>= 1;
+	    i_end += 12;
+	}
+	cplbndstrc >>= 1;
+	for (ch = 0; ch < nfchans; ch++)
+	    cplco[ch] = MUL_L (state->cplco[ch][bnd], coeff[ch]);
+	bnd++;
+
+	while (i < i_end) {
+	    quantizer_t cplcoeff;
+	    int bapi;
+
+	    bapi = bap[i];
+	    switch (bapi) {
+	    case 0:
+		for (ch = 0; ch < nfchans; ch++)
+		    if ((state->chincpl >> ch) & 1) {
+			if (dithflag[ch])
+#ifndef LIBA52_FIXED
+			    samples[ch][i] = (scale_factor[exp[i]] *
+					      cplco[ch] * dither_gen (state));
+#else
+			    COEFF (samples[ch][i], dither_gen (state),
+				   cplco[ch], scale_factor, exp[i]);
+#endif
+			else
+			    samples[ch][i] = 0;
+		    }
+		i++;
+		continue;
+
+	    case -1:
+		if (quant->q1_ptr >= 0) {
+		    cplcoeff = quant->q1[quant->q1_ptr--];
+		    break;
+		} else {
+		    int code;
+
+		    code = bitstream_get (state, 5);
+
+		    quant->q1_ptr = 1;
+		    quant->q1[0] = q_1_2[code];
+		    quant->q1[1] = q_1_1[code];
+		    cplcoeff = q_1_0[code];
+		    break;
+		}
+
+	    case -2:
+		if (quant->q2_ptr >= 0) {
+		    cplcoeff = quant->q2[quant->q2_ptr--];
+		    break;
+		} else {
+		    int code;
+
+		    code = bitstream_get (state, 7);
+
+		    quant->q2_ptr = 1;
+		    quant->q2[0] = q_2_2[code];
+		    quant->q2[1] = q_2_1[code];
+		    cplcoeff = q_2_0[code];
+		    break;
+		}
+
+	    case 3:
+		cplcoeff = q_3[bitstream_get (state, 3)];
+		break;
+
+	    case -3:
+		if (quant->q4_ptr == 0) {
+		    quant->q4_ptr = -1;
+		    cplcoeff = quant->q4;
+		    break;
+		} else {
+		    int code;
+
+		    code = bitstream_get (state, 7);
+
+		    quant->q4_ptr = 0;
+		    quant->q4 = q_4_1[code];
+		    cplcoeff = q_4_0[code];
+		    break;
+		}
+
+	    case 4:
+		cplcoeff = q_5[bitstream_get (state, 4)];
+		break;
+
+	    default:
+		cplcoeff = bitstream_get_2 (state, bapi) << (16 - bapi);
+	    }
+#ifndef LIBA52_FIXED
+	    cplcoeff *= scale_factor[exp[i]];
+#endif
+	    for (ch = 0; ch < nfchans; ch++)
+	       if ((state->chincpl >> ch) & 1)
+#ifndef LIBA52_FIXED
+		    samples[ch][i] = cplcoeff * cplco[ch];
+#else
+		    COEFF (samples[ch][i], cplcoeff, cplco[ch],
+			   scale_factor, exp[i]);
+#endif
+	    i++;
+	}
+    }
+}
+
+int a52_block (a52_state_t * state)
+{
+    static const uint8_t nfchans_tbl[] = {2, 1, 2, 3, 3, 4, 4, 5, 1, 1, 2};
+    static int rematrix_band[4] = {25, 37, 61, 253};
+    int i, nfchans, chaninfo;
+    uint8_t cplexpstr, chexpstr[5], lfeexpstr, do_bit_alloc, done_cpl;
+    uint8_t blksw[5], dithflag[5];
+    level_t coeff[5];
+    int chanbias;
+    quantizer_set_t quant;
+    sample_t * samples;
+
+    nfchans = nfchans_tbl[state->acmod];
+
+    for (i = 0; i < nfchans; i++)
+	blksw[i] = bitstream_get (state, 1);
+
+    for (i = 0; i < nfchans; i++)
+	dithflag[i] = bitstream_get (state, 1);
+
+    chaninfo = !state->acmod;
+    do {
+	if (bitstream_get (state, 1)) {	/* dynrnge */
+	    int dynrng;
+
+	    dynrng = bitstream_get_2 (state, 8);
+	    if (state->dynrnge) {
+		level_t range;
+
+#if !defined(LIBA52_FIXED)
+		range = ((((dynrng & 0x1f) | 0x20) << 13) *
+			 scale_factor[3 - (dynrng >> 5)]);
+#else
+		range = ((dynrng & 0x1f) | 0x20) << (21 + (dynrng >> 5));
+#endif
+		if (state->dynrngcall)
+		    range = state->dynrngcall (range, state->dynrngdata);
+		state->dynrng = MUL_L (state->level, range);
+	    }
+	}
+    } while (chaninfo--);
+
+    if (bitstream_get (state, 1)) {	/* cplstre */
+	state->chincpl = 0;
+	if (bitstream_get (state, 1)) {	/* cplinu */
+	    static uint8_t bndtab[16] = {31, 35, 37, 39, 41, 42, 43, 44,
+					 45, 45, 46, 46, 47, 47, 48, 48};
+	    int cplbegf;
+	    int cplendf;
+	    int ncplsubnd;
+
+	    for (i = 0; i < nfchans; i++)
+		state->chincpl |= bitstream_get (state, 1) << i;
+	    switch (state->acmod) {
+	    case 0: case 1:
+		return 1;
+	    case 2:
+		state->phsflginu = bitstream_get (state, 1);
+	    }
+	    cplbegf = bitstream_get (state, 4);
+	    cplendf = bitstream_get (state, 4);
+
+	    if (cplendf + 3 - cplbegf < 0)
+		return 1;
+	    state->ncplbnd = ncplsubnd = cplendf + 3 - cplbegf;
+	    state->cplstrtbnd = bndtab[cplbegf];
+	    state->cplstrtmant = cplbegf * 12 + 37;
+	    state->cplendmant = cplendf * 12 + 73;
+
+	    state->cplbndstrc = 0;
+	    for (i = 0; i < ncplsubnd - 1; i++)
+		if (bitstream_get (state, 1)) {
+		    state->cplbndstrc |= 1 << i;
+		    state->ncplbnd--;
+		}
+	}
+    }
+
+    if (state->chincpl) {	/* cplinu */
+	int j, cplcoe;
+
+	cplcoe = 0;
+	for (i = 0; i < nfchans; i++)
+	    if ((state->chincpl) >> i & 1)
+		if (bitstream_get (state, 1)) {	/* cplcoe */
+		    int mstrcplco, cplcoexp, cplcomant;
+
+		    cplcoe = 1;
+		    mstrcplco = 3 * bitstream_get (state, 2);
+		    for (j = 0; j < state->ncplbnd; j++) {
+			cplcoexp = bitstream_get (state, 4);
+			cplcomant = bitstream_get (state, 4);
+			if (cplcoexp == 15)
+			    cplcomant <<= 14;
+			else
+			    cplcomant = (cplcomant | 0x10) << 13;
+#ifndef LIBA52_FIXED
+			state->cplco[i][j] =
+			    cplcomant * scale_factor[cplcoexp + mstrcplco];
+#else
+			state->cplco[i][j] = (cplcomant << 11) >> (cplcoexp + mstrcplco);
+#endif
+
+		    }
+		}
+	if ((state->acmod == 2) && state->phsflginu && cplcoe)
+	    for (j = 0; j < state->ncplbnd; j++)
+		if (bitstream_get (state, 1))	/* phsflg */
+		    state->cplco[1][j] = -state->cplco[1][j];
+    }
+
+    if ((state->acmod == 2) && (bitstream_get (state, 1))) {	/* rematstr */
+	int end;
+
+	state->rematflg = 0;
+	end = (state->chincpl) ? state->cplstrtmant : 253;	/* cplinu */
+	i = 0;
+	do
+	    state->rematflg |= bitstream_get (state, 1) << i;
+	while (rematrix_band[i++] < end);
+    }
+
+    cplexpstr = EXP_REUSE;
+    lfeexpstr = EXP_REUSE;
+    if (state->chincpl)	/* cplinu */
+	cplexpstr = bitstream_get (state, 2);
+    for (i = 0; i < nfchans; i++)
+	chexpstr[i] = bitstream_get (state, 2);
+    if (state->lfeon)
+	lfeexpstr = bitstream_get (state, 1);
+
+    for (i = 0; i < nfchans; i++)
+	if (chexpstr[i] != EXP_REUSE) {
+	    if ((state->chincpl >> i) & 1)
+		state->endmant[i] = state->cplstrtmant;
+	    else {
+		int chbwcod;
+
+		chbwcod = bitstream_get (state, 6);
+		if (chbwcod > 60)
+		    return 1;
+		state->endmant[i] = chbwcod * 3 + 73;
+	    }
+	}
+
+    do_bit_alloc = 0;
+
+    if (cplexpstr != EXP_REUSE) {
+	int cplabsexp, ncplgrps;
+
+	do_bit_alloc = 64;
+	ncplgrps = ((state->cplendmant - state->cplstrtmant) /
+		    (3 << (cplexpstr - 1)));
+	cplabsexp = bitstream_get (state, 4) << 1;
+	if (parse_exponents (state, cplexpstr, ncplgrps, cplabsexp,
+			     state->cpl_expbap.exp + state->cplstrtmant))
+	    return 1;
+    }
+    for (i = 0; i < nfchans; i++)
+	if (chexpstr[i] != EXP_REUSE) {
+	    int grp_size, nchgrps;
+
+	    do_bit_alloc |= 1 << i;
+	    grp_size = 3 << (chexpstr[i] - 1);
+	    nchgrps = (state->endmant[i] + grp_size - 4) / grp_size;
+	    state->fbw_expbap[i].exp[0] = bitstream_get (state, 4);
+	    if (parse_exponents (state, chexpstr[i], nchgrps,
+				 state->fbw_expbap[i].exp[0],
+				 state->fbw_expbap[i].exp + 1))
+		return 1;
+	    bitstream_get (state, 2);	/* gainrng */
+	}
+    if (lfeexpstr != EXP_REUSE) {
+	do_bit_alloc |= 32;
+	state->lfe_expbap.exp[0] = bitstream_get (state, 4);
+	if (parse_exponents (state, lfeexpstr, 2, state->lfe_expbap.exp[0],
+			     state->lfe_expbap.exp + 1))
+	    return 1;
+    }
+
+    if (bitstream_get (state, 1)) {	/* baie */
+	do_bit_alloc = 127;
+	state->bai = bitstream_get (state, 11);
+    }
+    if (bitstream_get (state, 1)) {	/* snroffste */
+	do_bit_alloc = 127;
+	state->csnroffst = bitstream_get (state, 6);
+	if (state->chincpl)	/* cplinu */
+	    state->cplba.bai = bitstream_get (state, 7);
+	for (i = 0; i < nfchans; i++)
+	    state->ba[i].bai = bitstream_get (state, 7);
+	if (state->lfeon)
+	    state->lfeba.bai = bitstream_get (state, 7);
+    }
+    if ((state->chincpl) && (bitstream_get (state, 1))) { /* cplleake */
+	do_bit_alloc |= 64;
+	state->cplfleak = 9 - bitstream_get (state, 3);
+	state->cplsleak = 9 - bitstream_get (state, 3);
+    }
+
+    if (bitstream_get (state, 1)) {	/* deltbaie */
+	do_bit_alloc = 127;
+	if (state->chincpl)	/* cplinu */
+	    state->cplba.deltbae = bitstream_get (state, 2);
+	for (i = 0; i < nfchans; i++)
+	    state->ba[i].deltbae = bitstream_get (state, 2);
+	if (state->chincpl &&	/* cplinu */
+	    (state->cplba.deltbae == DELTA_BIT_NEW) &&
+	    parse_deltba (state, state->cplba.deltba))
+	    return 1;
+	for (i = 0; i < nfchans; i++)
+	    if ((state->ba[i].deltbae == DELTA_BIT_NEW) &&
+		parse_deltba (state, state->ba[i].deltba))
+		return 1;
+    }
+
+    if (do_bit_alloc) {
+	if (zero_snr_offsets (nfchans, state)) {
+	    memset (state->cpl_expbap.bap, 0, sizeof (state->cpl_expbap.bap));
+	    for (i = 0; i < nfchans; i++)
+		memset (state->fbw_expbap[i].bap, 0,
+			sizeof (state->fbw_expbap[i].bap));
+	    memset (state->lfe_expbap.bap, 0, sizeof (state->lfe_expbap.bap));
+	} else {
+	    if (state->chincpl && (do_bit_alloc & 64))	/* cplinu */
+		a52_bit_allocate (state, &state->cplba, state->cplstrtbnd,
+				  state->cplstrtmant, state->cplendmant,
+				  state->cplfleak << 8, state->cplsleak << 8,
+				  &state->cpl_expbap);
+	    for (i = 0; i < nfchans; i++)
+		if (do_bit_alloc & (1 << i))
+		    a52_bit_allocate (state, state->ba + i, 0, 0,
+				      state->endmant[i], 0, 0,
+				      state->fbw_expbap +i);
+	    if (state->lfeon && (do_bit_alloc & 32)) {
+		state->lfeba.deltbae = DELTA_BIT_NONE;
+		a52_bit_allocate (state, &state->lfeba, 0, 0, 7, 0, 0,
+				  &state->lfe_expbap);
+	    }
+	}
+    }
+
+    if (bitstream_get (state, 1)) {	/* skiple */
+	i = bitstream_get (state, 9);	/* skipl */
+	while (i--)
+	    bitstream_get (state, 8);
+    }
+
+    samples = state->samples;
+    if (state->output & A52_LFE)
+	samples += 256;	/* shift for LFE channel */
+
+    chanbias = a52_downmix_coeff (coeff, state->acmod, state->output,
+				  state->dynrng, state->clev, state->slev);
+
+    quant.q1_ptr = quant.q2_ptr = quant.q4_ptr = -1;
+    done_cpl = 0;
+
+    for (i = 0; i < nfchans; i++) {
+	int j;
+
+	coeff_get (state, samples + 256 * i, state->fbw_expbap +i, &quant,
+		   coeff[i], dithflag[i], state->endmant[i]);
+
+	if ((state->chincpl >> i) & 1) {
+	    if (!done_cpl) {
+		done_cpl = 1;
+		coeff_get_coupling (state, nfchans, coeff,
+				    (sample_t (*)[256])samples, &quant,
+				    dithflag);
+	    }
+	    j = state->cplendmant;
+	} else
+	    j = state->endmant[i];
+	do
+	    (samples + 256 * i)[j] = 0;
+	while (++j < 256);
+    }
+
+    if (state->acmod == 2) {
+	int j, end, band, rematflg;
+
+	end = ((state->endmant[0] < state->endmant[1]) ?
+	       state->endmant[0] : state->endmant[1]);
+
+	i = 0;
+	j = 13;
+	rematflg = state->rematflg;
+	do {
+	    if (! (rematflg & 1)) {
+		rematflg >>= 1;
+		j = rematrix_band[i++];
+		continue;
+	    }
+	    rematflg >>= 1;
+	    band = rematrix_band[i++];
+	    if (band > end)
+		band = end;
+	    do {
+		sample_t tmp0, tmp1;
+
+		tmp0 = samples[j];
+		tmp1 = (samples+256)[j];
+		samples[j] = tmp0 + tmp1;
+		(samples+256)[j] = tmp0 - tmp1;
+	    } while (++j < band);
+	} while (j < end);
+    }
+
+    if (state->lfeon) {
+	if (state->output & A52_LFE) {
+	    coeff_get (state, samples - 256, &state->lfe_expbap, &quant,
+		       state->dynrng, 0, 7);
+	    for (i = 7; i < 256; i++)
+		(samples-256)[i] = 0;
+	    a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias);
+	} else {
+	    /* just skip the LFE coefficients */
+	    coeff_get (state, samples + 1280, &state->lfe_expbap, &quant,
+		       0, 0, 7);
+	}
+    }
+
+    i = 0;
+    if (nfchans_tbl[state->output & A52_CHANNEL_MASK] < nfchans)
+	for (i = 1; i < nfchans; i++)
+	    if (blksw[i] != blksw[0])
+		break;
+
+    if (i < nfchans) {
+	if (state->downmixed) {
+	    state->downmixed = 0;
+	    a52_upmix (samples + 1536, state->acmod, state->output);
+	}
+
+	for (i = 0; i < nfchans; i++) {
+	    sample_t bias;
+
+	    bias = 0;
+	    if (!(chanbias & (1 << i)))
+		bias = state->bias;
+
+	    if (coeff[i]) {
+		if (blksw[i])
+		    a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
+				   bias);
+		else
+		    a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
+				   bias);
+	    } else {
+		int j;
+
+		for (j = 0; j < 256; j++)
+		    (samples + 256 * i)[j] = bias;
+	    }
+	}
+
+	a52_downmix (samples, state->acmod, state->output, state->bias,
+		     state->clev, state->slev);
+    } else {
+	nfchans = nfchans_tbl[state->output & A52_CHANNEL_MASK];
+
+	a52_downmix (samples, state->acmod, state->output, 0,
+		     state->clev, state->slev);
+
+	if (!state->downmixed) {
+	    state->downmixed = 1;
+	    a52_downmix (samples + 1536, state->acmod, state->output, 0,
+			 state->clev, state->slev);
+	}
+
+	if (blksw[0])
+	    for (i = 0; i < nfchans; i++)
+		a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
+			       state->bias);
+	else
+	    for (i = 0; i < nfchans; i++)
+		a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
+			       state->bias);
+    }
+
+    return 0;
+}
+
+void a52_free (a52_state_t * state)
+{
+    free (state->samples);
+    free (state);
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/resample.c b/contrib/ffmpeg/libavcodec/liba52/resample.c
new file mode 100644
index 000000000..3f06aba1f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/resample.c
@@ -0,0 +1,63 @@
+/*
+ * copyright (C) 2001 Arpad Gereoffy
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+// a52_resample_init should find the requested converter (from type flags ->
+// given number of channels) and set up some function pointers...
+
+// a52_resample() should do the conversion.
+
+#include "a52.h"
+#include "mm_accel.h"
+#include "config.h"
+#include "../../libpostproc/mangle.h"
+
+int (* a52_resample) (float * _f, int16_t * s16)=NULL;
+
+#include "resample_c.c"
+
+#ifdef ARCH_X86_32
+#include "resample_mmx.c"
+#endif
+
+void* a52_resample_init(uint32_t mm_accel,int flags,int chans){
+void* tmp;
+
+#ifdef ARCH_X86_32
+    if(mm_accel&MM_ACCEL_X86_MMX){
+	tmp=a52_resample_MMX(flags,chans);
+	if(tmp){
+	    if(a52_resample==NULL) av_log(NULL, AV_LOG_INFO, "Using MMX optimized resampler\n");
+	    a52_resample=tmp;
+	    return tmp;
+	}
+    }
+#endif
+
+    tmp=a52_resample_C(flags,chans);
+    if(tmp){
+	if(a52_resample==NULL) av_log(NULL, AV_LOG_INFO, "No accelerated resampler found\n");
+	a52_resample=tmp;
+	return tmp;
+    }
+
+    av_log(NULL, AV_LOG_ERROR, "Unimplemented resampler for mode 0x%X -> %d channels conversion - Contact MPlayer developers!\n", flags, chans);
+    return NULL;
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/resample_c.c b/contrib/ffmpeg/libavcodec/liba52/resample_c.c
new file mode 100644
index 000000000..905146a7d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/resample_c.c
@@ -0,0 +1,203 @@
+/*
+ * this code is based on a52dec/libao/audio_out_oss.c
+ * copyright (C) 2001 Arpad Gereoffy
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static inline int16_t convert (int32_t i)
+{
+    if (i > 0x43c07fff)
+	return 32767;
+    else if (i < 0x43bf8000)
+	return -32768;
+    else
+	return i - 0x43c00000;
+}
+
+static int a52_resample_MONO_to_5_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0;
+	    s16[5*i+4] = convert (f[i]);
+	}
+    return 5*256;
+}
+
+static int a52_resample_MONO_to_1_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[i] = convert (f[i]);
+	}
+    return 1*256;
+}
+
+static int a52_resample_STEREO_to_2_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[2*i] = convert (f[i]);
+	    s16[2*i+1] = convert (f[i+256]);
+	}
+    return 2*256;
+}
+
+static int a52_resample_3F_to_5_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[5*i] = convert (f[i]);
+	    s16[5*i+1] = convert (f[i+512]);
+	    s16[5*i+2] = s16[5*i+3] = 0;
+	    s16[5*i+4] = convert (f[i+256]);
+	}
+    return 5*256;
+}
+
+static int a52_resample_2F_2R_to_4_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[4*i] = convert (f[i]);
+	    s16[4*i+1] = convert (f[i+256]);
+	    s16[4*i+2] = convert (f[i+512]);
+	    s16[4*i+3] = convert (f[i+768]);
+	}
+    return 4*256;
+}
+
+static int a52_resample_3F_2R_to_5_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[5*i] = convert (f[i]);
+	    s16[5*i+1] = convert (f[i+512]);
+	    s16[5*i+2] = convert (f[i+768]);
+	    s16[5*i+3] = convert (f[i+1024]);
+	    s16[5*i+4] = convert (f[i+256]);
+	}
+    return 5*256;
+}
+
+static int a52_resample_MONO_LFE_to_6_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0;
+	    s16[6*i+4] = convert (f[i+256]);
+	    s16[6*i+5] = convert (f[i]);
+	}
+    return 6*256;
+}
+
+static int a52_resample_STEREO_LFE_to_6_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[6*i] = convert (f[i+256]);
+	    s16[6*i+1] = convert (f[i+512]);
+	    s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0;
+	    s16[6*i+5] = convert (f[i]);
+	}
+    return 6*256;
+}
+
+static int a52_resample_3F_LFE_to_6_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[6*i] = convert (f[i+256]);
+	    s16[6*i+1] = convert (f[i+768]);
+	    s16[6*i+2] = s16[6*i+3] = 0;
+	    s16[6*i+4] = convert (f[i+512]);
+	    s16[6*i+5] = convert (f[i]);
+	}
+    return 6*256;
+}
+
+static int a52_resample_2F_2R_LFE_to_6_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[6*i] = convert (f[i+256]);
+	    s16[6*i+1] = convert (f[i+512]);
+	    s16[6*i+2] = convert (f[i+768]);
+	    s16[6*i+3] = convert (f[i+1024]);
+	    s16[6*i+4] = 0;
+	    s16[6*i+5] = convert (f[i]);
+	}
+    return 6*256;
+}
+
+static int a52_resample_3F_2R_LFE_to_6_C(float * _f, int16_t * s16){
+    int i;
+    int32_t * f = (int32_t *) _f;
+	for (i = 0; i < 256; i++) {
+	    s16[6*i] = convert (f[i+256]);
+	    s16[6*i+1] = convert (f[i+768]);
+	    s16[6*i+2] = convert (f[i+1024]);
+	    s16[6*i+3] = convert (f[i+1280]);
+	    s16[6*i+4] = convert (f[i+512]);
+	    s16[6*i+5] = convert (f[i]);
+	}
+    return 6*256;
+}
+
+
+static void* a52_resample_C(int flags, int ch){
+    switch (flags) {
+    case A52_MONO:
+	if(ch==5) return a52_resample_MONO_to_5_C;
+	if(ch==1) return a52_resample_MONO_to_1_C;
+	break;
+    case A52_CHANNEL:
+    case A52_STEREO:
+    case A52_DOLBY:
+	if(ch==2) return a52_resample_STEREO_to_2_C;
+	break;
+    case A52_3F:
+	if(ch==5) return a52_resample_3F_to_5_C;
+	break;
+    case A52_2F2R:
+	if(ch==4) return a52_resample_2F_2R_to_4_C;
+	break;
+    case A52_3F2R:
+	if(ch==5) return a52_resample_3F_2R_to_5_C;
+	break;
+    case A52_MONO | A52_LFE:
+	if(ch==6) return a52_resample_MONO_LFE_to_6_C;
+	break;
+    case A52_CHANNEL | A52_LFE:
+    case A52_STEREO | A52_LFE:
+    case A52_DOLBY | A52_LFE:
+	if(ch==6) return a52_resample_STEREO_LFE_to_6_C;
+	break;
+    case A52_3F | A52_LFE:
+	if(ch==6) return a52_resample_3F_LFE_to_6_C;
+	break;
+    case A52_2F2R | A52_LFE:
+	if(ch==6) return a52_resample_2F_2R_LFE_to_6_C;
+	break;
+    case A52_3F2R | A52_LFE:
+	if(ch==6) return a52_resample_3F_2R_LFE_to_6_C;
+	break;
+    }
+    return NULL;
+}
diff --git a/contrib/ffmpeg/libavcodec/liba52/resample_mmx.c b/contrib/ffmpeg/libavcodec/liba52/resample_mmx.c
new file mode 100644
index 000000000..173c804d9
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/resample_mmx.c
@@ -0,0 +1,537 @@
+/*
+ * resample_mmx.c
+ * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* optimization TODO / NOTES
+    movntq is slightly faster (0.5% with the current test.c benchmark)
+	(but thats just test.c so that needs to be testd in reallity)
+	and it would mean (C / MMX2 / MMX / 3DNOW) versions
+*/
+
+static uint64_t __attribute__((aligned(8))) attribute_used magicF2W= 0x43c0000043c00000LL;
+static uint64_t __attribute__((aligned(8))) attribute_used wm1010= 0xFFFF0000FFFF0000LL;
+static uint64_t __attribute__((aligned(8))) attribute_used wm0101= 0x0000FFFF0000FFFFLL;
+static uint64_t __attribute__((aligned(8))) attribute_used wm1100= 0xFFFFFFFF00000000LL;
+
+static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-512, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"movq "MANGLE(wm1100)", %%mm3	\n\t"
+		"movq "MANGLE(wm0101)", %%mm4	\n\t"
+		"movq "MANGLE(wm1010)", %%mm5	\n\t"
+		"pxor %%mm6, %%mm6		\n\t"
+		"1:				\n\t"
+		"movq (%1, %%esi, 2), %%mm0	\n\t"
+		"movq 8(%1, %%esi, 2), %%mm1	\n\t"
+		"leal (%%esi, %%esi, 4), %%edi	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"movq %%mm0, %%mm1		\n\t"
+		"pand %%mm4, %%mm0		\n\t"
+		"pand %%mm5, %%mm1		\n\t"
+		"movq %%mm6, (%0, %%edi)	\n\t" // 0 0 0 0
+		"movd %%mm0, 8(%0, %%edi)	\n\t" // A 0
+		"pand %%mm3, %%mm0		\n\t"
+		"movd %%mm6, 12(%0, %%edi)	\n\t" // 0 0
+		"movd %%mm1, 16(%0, %%edi)	\n\t" // 0 B
+		"pand %%mm3, %%mm1		\n\t"
+		"movd %%mm6, 20(%0, %%edi)	\n\t" // 0 0
+		"movq %%mm0, 24(%0, %%edi)	\n\t" // 0 0 C 0
+		"movq %%mm1, 32(%0, %%edi)	\n\t" // 0 0 0 B
+		"addl $8, %%esi			\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1280), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 5*256;
+}
+
+static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
+#ifdef HAVE_SSE
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"1:				\n\t"
+		"cvtps2pi (%1, %%esi), %%mm0	\n\t"
+		"cvtps2pi 1024(%1, %%esi), %%mm2\n\t"
+		"movq %%mm0, %%mm1		\n\t"
+		"punpcklwd %%mm2, %%mm0		\n\t"
+		"punpckhwd %%mm2, %%mm1		\n\t"
+		"movq %%mm0, (%0, %%esi)	\n\t"
+		"movq %%mm1, 8(%0, %%esi)	\n\t"
+		"addl $16, %%esi		\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+512), "r" (f+256)
+		:"%esi", "memory"
+	);*/
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"1:				\n\t"
+		"movq (%1, %%esi), %%mm0	\n\t"
+		"movq 8(%1, %%esi), %%mm1	\n\t"
+		"movq 1024(%1, %%esi), %%mm2	\n\t"
+		"movq 1032(%1, %%esi), %%mm3	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"movq %%mm0, %%mm1		\n\t"
+		"punpcklwd %%mm2, %%mm0		\n\t"
+		"punpckhwd %%mm2, %%mm1		\n\t"
+		"movq %%mm0, (%0, %%esi)	\n\t"
+		"movq %%mm1, 8(%0, %%esi)	\n\t"
+		"addl $16, %%esi		\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+512), "r" (f+256)
+		:"%esi", "memory"
+	);
+    return 2*256;
+}
+
+static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"pxor %%mm6, %%mm6		\n\t"
+		"movq %%mm7, %%mm5		\n\t"
+		"punpckldq %%mm6, %%mm5		\n\t"
+		"1:				\n\t"
+		"movd (%1, %%esi), %%mm0	\n\t"
+		"punpckldq 2048(%1, %%esi), %%mm0\n\t"
+		"movd 1024(%1, %%esi), %%mm1	\n\t"
+		"punpckldq 4(%1, %%esi), %%mm1	\n\t"
+		"movd 2052(%1, %%esi), %%mm2	\n\t"
+		"movq %%mm7, %%mm3		\n\t"
+		"punpckldq 1028(%1, %%esi), %%mm3\n\t"
+		"movd 8(%1, %%esi), %%mm4	\n\t"
+		"punpckldq 2056(%1, %%esi), %%mm4\n\t"
+		"leal (%%esi, %%esi, 4), %%edi	\n\t"
+		"sarl $1, %%edi			\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm5, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"psubd %%mm7, %%mm4		\n\t"
+		"packssdw %%mm6, %%mm0		\n\t"
+		"packssdw %%mm2, %%mm1		\n\t"
+		"packssdw %%mm4, %%mm3		\n\t"
+		"movq %%mm0, (%0, %%edi)	\n\t"
+		"movq %%mm1, 8(%0, %%edi)	\n\t"
+		"movq %%mm3, 16(%0, %%edi)	\n\t"
+
+		"movd 1032(%1, %%esi), %%mm1	\n\t"
+		"punpckldq 12(%1, %%esi), %%mm1\n\t"
+		"movd 2060(%1, %%esi), %%mm2	\n\t"
+		"movq %%mm7, %%mm3		\n\t"
+		"punpckldq 1036(%1, %%esi), %%mm3\n\t"
+		"pxor %%mm0, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm5, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"movq %%mm0, 24(%0, %%edi)	\n\t"
+		"movq %%mm2, 32(%0, %%edi)	\n\t"
+
+		"addl $16, %%esi		\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1280), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 5*256;
+}
+
+static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"1:				\n\t"
+		"movq (%1, %%esi), %%mm0	\n\t"
+		"movq 8(%1, %%esi), %%mm1	\n\t"
+		"movq 1024(%1, %%esi), %%mm2	\n\t"
+		"movq 1032(%1, %%esi), %%mm3	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"movq 2048(%1, %%esi), %%mm3	\n\t"
+		"movq 2056(%1, %%esi), %%mm4	\n\t"
+		"movq 3072(%1, %%esi), %%mm5	\n\t"
+		"movq 3080(%1, %%esi), %%mm6	\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"psubd %%mm7, %%mm4		\n\t"
+		"psubd %%mm7, %%mm5		\n\t"
+		"psubd %%mm7, %%mm6		\n\t"
+		"packssdw %%mm4, %%mm3		\n\t"
+		"packssdw %%mm6, %%mm5		\n\t"
+		"movq %%mm0, %%mm1		\n\t"
+		"movq %%mm3, %%mm4		\n\t"
+		"punpcklwd %%mm2, %%mm0		\n\t"
+		"punpckhwd %%mm2, %%mm1		\n\t"
+		"punpcklwd %%mm5, %%mm3		\n\t"
+		"punpckhwd %%mm5, %%mm4		\n\t"
+		"movq %%mm0, %%mm2		\n\t"
+		"movq %%mm1, %%mm5		\n\t"
+		"punpckldq %%mm3, %%mm0		\n\t"
+		"punpckhdq %%mm3, %%mm2		\n\t"
+		"punpckldq %%mm4, %%mm1		\n\t"
+		"punpckhdq %%mm4, %%mm5		\n\t"
+		"movq %%mm0, (%0, %%esi,2)	\n\t"
+		"movq %%mm2, 8(%0, %%esi,2)	\n\t"
+		"movq %%mm1, 16(%0, %%esi,2)	\n\t"
+		"movq %%mm5, 24(%0, %%esi,2)	\n\t"
+		"addl $16, %%esi		\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1024), "r" (f+256)
+		:"%esi", "memory"
+	);
+    return 4*256;
+}
+
+static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"1:				\n\t"
+		"movd (%1, %%esi), %%mm0	\n\t"
+		"punpckldq 2048(%1, %%esi), %%mm0\n\t"
+		"movd 3072(%1, %%esi), %%mm1	\n\t"
+		"punpckldq 4096(%1, %%esi), %%mm1\n\t"
+		"movd 1024(%1, %%esi), %%mm2	\n\t"
+		"punpckldq 4(%1, %%esi), %%mm2	\n\t"
+		"movd 2052(%1, %%esi), %%mm3	\n\t"
+		"punpckldq 3076(%1, %%esi), %%mm3\n\t"
+		"movd 4100(%1, %%esi), %%mm4	\n\t"
+		"punpckldq 1028(%1, %%esi), %%mm4\n\t"
+		"movd 8(%1, %%esi), %%mm5	\n\t"
+		"punpckldq 2056(%1, %%esi), %%mm5\n\t"
+		"leal (%%esi, %%esi, 4), %%edi	\n\t"
+		"sarl $1, %%edi			\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"psubd %%mm7, %%mm4		\n\t"
+		"psubd %%mm7, %%mm5		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"packssdw %%mm5, %%mm4		\n\t"
+		"movq %%mm0, (%0, %%edi)	\n\t"
+		"movq %%mm2, 8(%0, %%edi)	\n\t"
+		"movq %%mm4, 16(%0, %%edi)	\n\t"
+
+		"movd 3080(%1, %%esi), %%mm0	\n\t"
+		"punpckldq 4104(%1, %%esi), %%mm0\n\t"
+		"movd 1032(%1, %%esi), %%mm1	\n\t"
+		"punpckldq 12(%1, %%esi), %%mm1\n\t"
+		"movd 2060(%1, %%esi), %%mm2	\n\t"
+		"punpckldq 3084(%1, %%esi), %%mm2\n\t"
+		"movd 4108(%1, %%esi), %%mm3	\n\t"
+		"punpckldq 1036(%1, %%esi), %%mm3\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"movq %%mm0, 24(%0, %%edi)	\n\t"
+		"movq %%mm2, 32(%0, %%edi)	\n\t"
+
+		"addl $16, %%esi		\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1280), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 5*256;
+}
+
+static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"pxor %%mm6, %%mm6		\n\t"
+		"1:				\n\t"
+		"movq 1024(%1, %%esi), %%mm0	\n\t"
+		"movq 1032(%1, %%esi), %%mm1	\n\t"
+		"movq (%1, %%esi), %%mm2	\n\t"
+		"movq 8(%1, %%esi), %%mm3	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"movq %%mm0, %%mm1		\n\t"
+		"punpcklwd %%mm2, %%mm0		\n\t"
+		"punpckhwd %%mm2, %%mm1		\n\t"
+		"leal (%%esi, %%esi, 2), %%edi	\n\t"
+		"movq %%mm6, (%0, %%edi)	\n\t"
+		"movd %%mm0, 8(%0, %%edi)	\n\t"
+		"punpckhdq %%mm0, %%mm0		\n\t"
+		"movq %%mm6, 12(%0, %%edi)	\n\t"
+		"movd %%mm0, 20(%0, %%edi)	\n\t"
+		"movq %%mm6, 24(%0, %%edi)	\n\t"
+		"movd %%mm1, 32(%0, %%edi)	\n\t"
+		"punpckhdq %%mm1, %%mm1		\n\t"
+		"movq %%mm6, 36(%0, %%edi)	\n\t"
+		"movd %%mm1, 44(%0, %%edi)	\n\t"
+		"addl $16, %%esi		\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1536), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 6*256;
+}
+
+static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"pxor %%mm6, %%mm6		\n\t"
+		"1:				\n\t"
+		"movq 1024(%1, %%esi), %%mm0	\n\t"
+		"movq 2048(%1, %%esi), %%mm1	\n\t"
+		"movq (%1, %%esi), %%mm5	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm5		\n\t"
+		"leal (%%esi, %%esi, 2), %%edi	\n\t"
+
+		"pxor %%mm4, %%mm4		\n\t"
+		"packssdw %%mm5, %%mm0		\n\t" // FfAa
+		"packssdw %%mm4, %%mm1		\n\t" // 00Bb
+		"punpckhwd %%mm0, %%mm4		\n\t" // F0f0
+		"punpcklwd %%mm1, %%mm0		\n\t" // BAba
+		"movq %%mm0, %%mm1		\n\t" // BAba
+		"punpckldq %%mm4, %%mm3		\n\t" // f0XX
+		"punpckldq %%mm6, %%mm0		\n\t" // 00ba
+		"punpckhdq %%mm1, %%mm3		\n\t" // BAf0
+
+		"movq %%mm0, (%0, %%edi)	\n\t" // 00ba
+		"punpckhdq %%mm4, %%mm0		\n\t" // F000
+		"movq %%mm3, 8(%0, %%edi)	\n\t" // BAf0
+		"movq %%mm0, 16(%0, %%edi)	\n\t" // F000
+		"addl $8, %%esi			\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1536), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 6*256;
+}
+
+static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+		"pxor %%mm6, %%mm6		\n\t"
+		"1:				\n\t"
+		"movq 1024(%1, %%esi), %%mm0	\n\t"
+		"movq 3072(%1, %%esi), %%mm1	\n\t"
+		"movq 2048(%1, %%esi), %%mm4	\n\t"
+		"movq (%1, %%esi), %%mm5	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm4		\n\t"
+		"psubd %%mm7, %%mm5		\n\t"
+		"leal (%%esi, %%esi, 2), %%edi	\n\t"
+
+		"packssdw %%mm4, %%mm0		\n\t" // EeAa
+		"packssdw %%mm5, %%mm1		\n\t" // FfBb
+		"movq %%mm0, %%mm2		\n\t" // EeAa
+		"punpcklwd %%mm1, %%mm0		\n\t" // BAba
+		"punpckhwd %%mm1, %%mm2		\n\t" // FEfe
+		"movq %%mm0, %%mm1		\n\t" // BAba
+		"punpckldq %%mm6, %%mm0		\n\t" // 00ba
+		"punpckhdq %%mm1, %%mm1		\n\t" // BABA
+
+		"movq %%mm0, (%0, %%edi)	\n\t"
+		"punpckhdq %%mm2, %%mm0		\n\t" // FE00
+		"punpckldq %%mm1, %%mm2		\n\t" // BAfe
+		"movq %%mm2, 8(%0, %%edi)	\n\t"
+		"movq %%mm0, 16(%0, %%edi)	\n\t"
+		"addl $8, %%esi			\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1536), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 6*256;
+}
+
+static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+//		"pxor %%mm6, %%mm6		\n\t"
+		"1:				\n\t"
+		"movq 1024(%1, %%esi), %%mm0	\n\t"
+		"movq 2048(%1, %%esi), %%mm1	\n\t"
+		"movq 3072(%1, %%esi), %%mm2	\n\t"
+		"movq 4096(%1, %%esi), %%mm3	\n\t"
+		"movq (%1, %%esi), %%mm5	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"psubd %%mm7, %%mm5		\n\t"
+		"leal (%%esi, %%esi, 2), %%edi	\n\t"
+
+		"packssdw %%mm2, %%mm0		\n\t" // CcAa
+		"packssdw %%mm3, %%mm1		\n\t" // DdBb
+		"packssdw %%mm5, %%mm5		\n\t" // FfFf
+		"movq %%mm0, %%mm2		\n\t" // CcAa
+		"punpcklwd %%mm1, %%mm0		\n\t" // BAba
+		"punpckhwd %%mm1, %%mm2		\n\t" // DCdc
+		"pxor %%mm4, %%mm4		\n\t" // 0000
+		"punpcklwd %%mm5, %%mm4		\n\t" // F0f0
+		"movq %%mm0, %%mm1		\n\t" // BAba
+		"movq %%mm4, %%mm3		\n\t" // F0f0
+		"punpckldq %%mm2, %%mm0		\n\t" // dcba
+		"punpckhdq %%mm1, %%mm1		\n\t" // BABA
+		"punpckldq %%mm1, %%mm4		\n\t" // BAf0
+		"punpckhdq %%mm3, %%mm2		\n\t" // F0DC
+
+		"movq %%mm0, (%0, %%edi)	\n\t"
+		"movq %%mm4, 8(%0, %%edi)	\n\t"
+		"movq %%mm2, 16(%0, %%edi)	\n\t"
+		"addl $8, %%esi			\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1536), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 6*256;
+}
+
+static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
+    int32_t * f = (int32_t *) _f;
+	asm volatile(
+		"movl $-1024, %%esi		\n\t"
+		"movq "MANGLE(magicF2W)", %%mm7	\n\t"
+//		"pxor %%mm6, %%mm6		\n\t"
+		"1:				\n\t"
+		"movq 1024(%1, %%esi), %%mm0	\n\t"
+		"movq 3072(%1, %%esi), %%mm1	\n\t"
+		"movq 4096(%1, %%esi), %%mm2	\n\t"
+		"movq 5120(%1, %%esi), %%mm3	\n\t"
+		"movq 2048(%1, %%esi), %%mm4	\n\t"
+		"movq (%1, %%esi), %%mm5	\n\t"
+		"psubd %%mm7, %%mm0		\n\t"
+		"psubd %%mm7, %%mm1		\n\t"
+		"psubd %%mm7, %%mm2		\n\t"
+		"psubd %%mm7, %%mm3		\n\t"
+		"psubd %%mm7, %%mm4		\n\t"
+		"psubd %%mm7, %%mm5		\n\t"
+		"leal (%%esi, %%esi, 2), %%edi	\n\t"
+
+		"packssdw %%mm2, %%mm0		\n\t" // CcAa
+		"packssdw %%mm3, %%mm1		\n\t" // DdBb
+		"packssdw %%mm4, %%mm4		\n\t" // EeEe
+		"packssdw %%mm5, %%mm5		\n\t" // FfFf
+		"movq %%mm0, %%mm2		\n\t" // CcAa
+		"punpcklwd %%mm1, %%mm0		\n\t" // BAba
+		"punpckhwd %%mm1, %%mm2		\n\t" // DCdc
+		"punpcklwd %%mm5, %%mm4		\n\t" // FEfe
+		"movq %%mm0, %%mm1		\n\t" // BAba
+		"movq %%mm4, %%mm3		\n\t" // FEfe
+		"punpckldq %%mm2, %%mm0		\n\t" // dcba
+		"punpckhdq %%mm1, %%mm1		\n\t" // BABA
+		"punpckldq %%mm1, %%mm4		\n\t" // BAfe
+		"punpckhdq %%mm3, %%mm2		\n\t" // FEDC
+
+		"movq %%mm0, (%0, %%edi)	\n\t"
+		"movq %%mm4, 8(%0, %%edi)	\n\t"
+		"movq %%mm2, 16(%0, %%edi)	\n\t"
+		"addl $8, %%esi			\n\t"
+		" jnz 1b			\n\t"
+		"emms				\n\t"
+		:: "r" (s16+1536), "r" (f+256)
+		:"%esi", "%edi", "memory"
+	);
+    return 6*256;
+}
+
+
+static void* a52_resample_MMX(int flags, int ch){
+    switch (flags) {
+    case A52_MONO:
+	if(ch==5) return a52_resample_MONO_to_5_MMX;
+	break;
+    case A52_CHANNEL:
+    case A52_STEREO:
+    case A52_DOLBY:
+	if(ch==2) return a52_resample_STEREO_to_2_MMX;
+	break;
+    case A52_3F:
+	if(ch==5) return a52_resample_3F_to_5_MMX;
+	break;
+    case A52_2F2R:
+	if(ch==4) return a52_resample_2F_2R_to_4_MMX;
+	break;
+    case A52_3F2R:
+	if(ch==5) return a52_resample_3F_2R_to_5_MMX;
+	break;
+    case A52_MONO | A52_LFE:
+	if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
+	break;
+    case A52_CHANNEL | A52_LFE:
+    case A52_STEREO | A52_LFE:
+    case A52_DOLBY | A52_LFE:
+	if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
+	break;
+    case A52_3F | A52_LFE:
+	if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
+	break;
+    case A52_2F2R | A52_LFE:
+	if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
+	break;
+    case A52_3F2R | A52_LFE:
+	if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
+	break;
+    }
+    return NULL;
+}
+
+
diff --git a/contrib/ffmpeg/libavcodec/liba52/tables.h b/contrib/ffmpeg/libavcodec/liba52/tables.h
new file mode 100644
index 000000000..7f921c9d0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/liba52/tables.h
@@ -0,0 +1,246 @@
+/*
+ * tables.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static const int8_t exp_1[128] = {
+    -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    25,25,25
+};
+static const int8_t exp_2[128] = {
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    25,25,25
+};
+static const int8_t exp_3[128] = {
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    25,25,25
+};
+
+#define Q(x) ROUND (32768.0 * x)
+
+#define Q0 Q (-2/3)
+#define Q1 Q (0)
+#define Q2 Q (2/3)
+
+static const quantizer_t q_1_0[32] = {
+    Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+    Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+    Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+    0,  0,  0,  0,  0
+};
+
+static const quantizer_t q_1_1[32] = {
+    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+    0,  0,  0,  0,  0
+};
+
+static const quantizer_t q_1_2[32] = {
+    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+    0,  0,  0,  0,  0
+};
+
+#undef Q0
+#undef Q1
+#undef Q2
+
+#define Q0 Q (-4/5)
+#define Q1 Q (-2/5)
+#define Q2 Q (0)
+#define Q3 Q (2/5)
+#define Q4 Q (4/5)
+
+static const quantizer_t q_2_0[128] = {
+    Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
+    Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
+    Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
+    Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
+    Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
+    0,0,0
+};
+
+static const quantizer_t q_2_1[128] = {
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    0,0,0
+};
+
+static const quantizer_t q_2_2[128] = {
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    0,0,0
+};
+
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+
+static const quantizer_t q_3[8] = {
+    Q (-6/7), Q (-4/7), Q (-2/7), Q (0), Q (2/7), Q (4/7), Q (6/7), 0
+};
+
+#define Q0 Q (-10/11)
+#define Q1 Q (-8/11)
+#define Q2 Q (-6/11)
+#define Q3 Q (-4/11)
+#define Q4 Q (-2/11)
+#define Q5 Q (0)
+#define Q6 Q (2/11)
+#define Q7 Q (4/11)
+#define Q8 Q (6/11)
+#define Q9 Q (8/11)
+#define QA Q (10/11)
+
+static const quantizer_t q_4_0[128] = {
+    Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+    Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+    Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+    Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
+    Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
+    Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
+    Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
+    Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
+    Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
+    Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
+    QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
+    0,  0,  0,  0,  0,  0,  0
+};
+
+static const quantizer_t q_4_1[128] = {
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    0,  0,  0,  0,  0,  0,  0
+};
+
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+#undef Q5
+#undef Q6
+#undef Q7
+#undef Q8
+#undef Q9
+#undef QA
+
+static const quantizer_t q_5[16] = {
+    Q (-14/15), Q (-12/15), Q (-10/15), Q (-8/15), Q (-6/15),
+    Q (-4/15), Q (-2/15), Q (0), Q (2/15), Q (4/15),
+    Q (6/15), Q (8/15), Q (10/15), Q (12/15), Q (14/15), 0
+};
+
+#ifndef LIBA52_FIXED
+static const sample_t scale_factor[25] = {
+    0.000030517578125,
+    0.0000152587890625,
+    0.00000762939453125,
+    0.000003814697265625,
+    0.0000019073486328125,
+    0.00000095367431640625,
+    0.000000476837158203125,
+    0.0000002384185791015625,
+    0.00000011920928955078125,
+    0.000000059604644775390625,
+    0.0000000298023223876953125,
+    0.00000001490116119384765625,
+    0.000000007450580596923828125,
+    0.0000000037252902984619140625,
+    0.00000000186264514923095703125,
+    0.000000000931322574615478515625,
+    0.0000000004656612873077392578125,
+    0.00000000023283064365386962890625,
+    0.000000000116415321826934814453125,
+    0.0000000000582076609134674072265625,
+    0.00000000002910383045673370361328125,
+    0.000000000014551915228366851806640625,
+    0.0000000000072759576141834259033203125,
+    0.00000000000363797880709171295166015625,
+    0.000000000001818989403545856475830078125
+};
+#endif
+
+static const uint16_t dither_lut[256] = {
+    0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
+    0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
+    0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198,
+    0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176,
+    0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf,
+    0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321,
+    0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202,
+    0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec,
+    0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761,
+    0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f,
+    0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac,
+    0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642,
+    0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb,
+    0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415,
+    0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536,
+    0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8,
+    0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c,
+    0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2,
+    0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1,
+    0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f,
+    0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6,
+    0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58,
+    0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b,
+    0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95,
+    0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918,
+    0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6,
+    0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5,
+    0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b,
+    0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82,
+    0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c,
+    0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f,
+    0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
+};
diff --git a/contrib/ffmpeg/libavcodec/libgsm.c b/contrib/ffmpeg/libavcodec/libgsm.c
new file mode 100644
index 000000000..7cf59be7a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/libgsm.c
@@ -0,0 +1,97 @@
+/*
+ * Interface to libgsm for gsm encoding/decoding
+ * Copyright (c) 2005 Alban Bedel <albeu@free.fr>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libgsm.c
+ * Interface to libgsm for gsm encoding/decoding
+ */
+
+#include "avcodec.h"
+#include <gsm.h>
+
+// gsm.h miss some essential constants
+#define GSM_BLOCK_SIZE 33
+#define GSM_FRAME_SIZE 160
+
+static int libgsm_init(AVCodecContext *avctx) {
+    if (avctx->channels > 1 || avctx->sample_rate != 8000)
+        return -1;
+
+    avctx->frame_size = GSM_FRAME_SIZE;
+    avctx->block_align = GSM_BLOCK_SIZE;
+
+    avctx->priv_data = gsm_create();
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    return 0;
+}
+
+static int libgsm_close(AVCodecContext *avctx) {
+    gsm_destroy(avctx->priv_data);
+    avctx->priv_data = NULL;
+    return 0;
+}
+
+static int libgsm_encode_frame(AVCodecContext *avctx,
+                               unsigned char *frame, int buf_size, void *data) {
+    // we need a full block
+    if(buf_size < GSM_BLOCK_SIZE) return 0;
+
+    gsm_encode(avctx->priv_data,data,frame);
+
+    return GSM_BLOCK_SIZE;
+}
+
+
+AVCodec libgsm_encoder = {
+    "gsm",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_GSM,
+    0,
+    libgsm_init,
+    libgsm_encode_frame,
+    libgsm_close,
+};
+
+static int libgsm_decode_frame(AVCodecContext *avctx,
+                               void *data, int *data_size,
+                               uint8_t *buf, int buf_size) {
+
+    if(buf_size < GSM_BLOCK_SIZE) return 0;
+
+    if(gsm_decode(avctx->priv_data,buf,data)) return -1;
+
+    *data_size = GSM_FRAME_SIZE*2;
+    return GSM_BLOCK_SIZE;
+}
+
+AVCodec libgsm_decoder = {
+    "gsm",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_GSM,
+    0,
+    libgsm_init,
+    NULL,
+    libgsm_close,
+    libgsm_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/loco.c b/contrib/ffmpeg/libavcodec/loco.c
new file mode 100644
index 000000000..2ec850ed0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/loco.c
@@ -0,0 +1,287 @@
+/*
+ * LOCO codec
+ * Copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file loco.c
+ * LOCO codec.
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#include "bitstream.h"
+#include "golomb.h"
+
+enum LOCO_MODE {LOCO_UNKN=0, LOCO_CYUY2=-1, LOCO_CRGB=-2, LOCO_CRGBA=-3, LOCO_CYV12=-4,
+ LOCO_YUY2=1, LOCO_UYVY=2, LOCO_RGB=3, LOCO_RGBA=4, LOCO_YV12=5};
+
+typedef struct LOCOContext{
+    AVCodecContext *avctx;
+    AVFrame pic;
+    int lossy;
+    int mode;
+} LOCOContext;
+
+typedef struct RICEContext{
+    GetBitContext gb;
+    int save, run, run2; /* internal rice decoder state */
+    int sum, count; /* sum and count for getting rice parameter */
+    int lossy;
+}RICEContext;
+
+static int loco_get_rice_param(RICEContext *r)
+{
+    int cnt = 0;
+    int val = r->count;
+
+    while(r->sum > val && cnt < 9) {
+        val <<= 1;
+        cnt++;
+    }
+
+    return cnt;
+}
+
+static inline void loco_update_rice_param(RICEContext *r, int val)
+{
+    r->sum += val;
+    r->count++;
+
+    if(r->count == 16) {
+        r->sum >>= 1;
+        r->count >>= 1;
+    }
+}
+
+static inline int loco_get_rice(RICEContext *r)
+{
+    int v;
+    if (r->run > 0) { /* we have zero run */
+        r->run--;
+        loco_update_rice_param(r, 0);
+        return 0;
+    }
+    v = get_ur_golomb_jpegls(&r->gb, loco_get_rice_param(r), INT_MAX, 0);
+    loco_update_rice_param(r, (v+1)>>1);
+    if (!v) {
+        if (r->save >= 0) {
+            r->run = get_ur_golomb_jpegls(&r->gb, 2, INT_MAX, 0);
+            if(r->run > 1)
+                r->save += r->run + 1;
+            else
+                r->save -= 3;
+        }
+        else
+            r->run2++;
+    } else {
+        v = ((v>>1) + r->lossy) ^ -(v&1);
+        if (r->run2 > 0) {
+            if (r->run2 > 2)
+                r->save += r->run2;
+            else
+                r->save -= 3;
+            r->run2 = 0;
+        }
+    }
+
+    return v;
+}
+
+/* LOCO main predictor - LOCO-I/JPEG-LS predictor */
+static inline int loco_predict(uint8_t* data, int stride, int step)
+{
+    int a, b, c;
+
+    a = data[-stride];
+    b = data[-step];
+    c = data[-stride - step];
+
+    return mid_pred(a, a + b - c, b);
+}
+
+static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int height,
+                             int stride, uint8_t *buf, int buf_size, int step)
+{
+    RICEContext rc;
+    int val;
+    int i, j;
+
+    init_get_bits(&rc.gb, buf, buf_size*8);
+    rc.save = 0;
+    rc.run = 0;
+    rc.run2 = 0;
+    rc.lossy = l->lossy;
+
+    rc.sum = 8;
+    rc.count = 1;
+
+    /* restore top left pixel */
+    val = loco_get_rice(&rc);
+    data[0] = 128 + val;
+    /* restore top line */
+    for (i = 1; i < width; i++) {
+        val = loco_get_rice(&rc);
+        data[i * step] = data[i * step - step] + val;
+    }
+    data += stride;
+    for (j = 1; j < height; j++) {
+        /* restore left column */
+        val = loco_get_rice(&rc);
+        data[0] = data[-stride] + val;
+        /* restore all other pixels */
+        for (i = 1; i < width; i++) {
+            val = loco_get_rice(&rc);
+            data[i * step] = loco_predict(&data[i * step], stride, step) + val;
+        }
+        data += stride;
+    }
+
+    return ((get_bits_count(&rc.gb) + 7) >> 3);
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    LOCOContext * const l = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&l->pic;
+    int decoded;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference = 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->key_frame = 1;
+
+    switch(l->mode) {
+    case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY:
+        decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
+                                    p->linesize[0], buf, buf_size, 1);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height,
+                                    p->linesize[1], buf, buf_size, 1);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height,
+                                    p->linesize[2], buf, buf_size, 1);
+        break;
+    case LOCO_CYV12: case LOCO_YV12:
+        decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
+                                    p->linesize[0], buf, buf_size, 1);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height / 2,
+                                    p->linesize[2], buf, buf_size, 1);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height / 2,
+                                    p->linesize[1], buf, buf_size, 1);
+        break;
+    case LOCO_CRGB: case LOCO_RGB:
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 3);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 1, avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 3);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 2, avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 3);
+        break;
+    case LOCO_RGBA:
+        decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
+                                    p->linesize[0], buf, buf_size, 4);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[0] + 1, avctx->width, avctx->height,
+                                    p->linesize[0], buf, buf_size, 4);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[0] + 2, avctx->width, avctx->height,
+                                    p->linesize[0], buf, buf_size, 4);
+        buf += decoded; buf_size -= decoded;
+        decoded = loco_decode_plane(l, p->data[0] + 3, avctx->width, avctx->height,
+                                    p->linesize[0], buf, buf_size, 4);
+        break;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = l->pic;
+
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    LOCOContext * const l = avctx->priv_data;
+    int version;
+
+    l->avctx = avctx;
+    if (avctx->extradata_size < 12) {
+        av_log(avctx, AV_LOG_ERROR, "Extradata size must be >= 12 instead of %i\n",
+               avctx->extradata_size);
+        return -1;
+    }
+    version = LE_32(avctx->extradata);
+    switch(version) {
+    case 1:
+        l->lossy = 0;
+        break;
+    case 2:
+        l->lossy = LE_32(avctx->extradata + 8);
+        break;
+    default:
+        l->lossy = LE_32(avctx->extradata + 8);
+        av_log(avctx, AV_LOG_INFO, "This is LOCO codec version %i, please upload file for study\n", version);
+    }
+
+    l->mode = LE_32(avctx->extradata + 4);
+    switch(l->mode) {
+    case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY:
+        avctx->pix_fmt = PIX_FMT_YUV422P;
+        break;
+    case LOCO_CRGB: case LOCO_RGB:
+        avctx->pix_fmt = PIX_FMT_BGR24;
+        break;
+    case LOCO_CYV12: case LOCO_YV12:
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+        break;
+    case LOCO_CRGBA: case LOCO_RGBA:
+        avctx->pix_fmt = PIX_FMT_RGBA32;
+        break;
+    default:
+        av_log(avctx, AV_LOG_INFO, "Unknown colorspace, index = %i\n", l->mode);
+        return -1;
+    }
+    if(avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(avctx, AV_LOG_INFO, "lossy:%i, version:%i, mode: %i\n", l->lossy, version, l->mode);
+
+    return 0;
+}
+
+AVCodec loco_decoder = {
+    "loco",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_LOCO,
+    sizeof(LOCOContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/lzo.c b/contrib/ffmpeg/libavcodec/lzo.c
new file mode 100644
index 000000000..015c80d0d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/lzo.c
@@ -0,0 +1,226 @@
+/*
+ * LZO 1x decompression
+ * Copyright (c) 2006 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "common.h"
+//! avoid e.g. MPlayers fast_memcpy, it slows things down here
+#undef memcpy
+#include <string.h>
+#include "lzo.h"
+
+//! define if we may write up to 12 bytes beyond the output buffer
+#define OUTBUF_PADDED 1
+//! define if we may read up to 4 bytes beyond the input buffer
+#define INBUF_PADDED 1
+typedef struct LZOContext {
+    uint8_t *in, *in_end;
+    uint8_t *out_start, *out, *out_end;
+    int error;
+} LZOContext;
+
+/**
+ * \brief read one byte from input buffer, avoiding overrun
+ * \return byte read
+ */
+static inline int get_byte(LZOContext *c) {
+    if (c->in < c->in_end)
+        return *c->in++;
+    c->error |= LZO_INPUT_DEPLETED;
+    return 1;
+}
+
+/**
+ * \brief decode a length value in the coding used by lzo
+ * \param x previous byte value
+ * \param mask bits used from x
+ * \return decoded length value
+ */
+static inline int get_len(LZOContext *c, int x, int mask) {
+    int cnt = x & mask;
+    if (!cnt) {
+        while (!(x = get_byte(c))) cnt += 255;
+        cnt += mask + x;
+    }
+    return cnt;
+}
+
+/**
+ * \brief copy bytes from input to output buffer with checking
+ * \param cnt number of bytes to copy, must be > 0
+ */
+static inline void copy(LZOContext *c, int cnt) {
+    register uint8_t *src = c->in;
+    register uint8_t *dst = c->out;
+    if (src + cnt > c->in_end) {
+        cnt = c->in_end - src;
+        c->error |= LZO_INPUT_DEPLETED;
+    }
+    if (dst + cnt > c->out_end) {
+        cnt = c->out_end - dst;
+        c->error |= LZO_OUTPUT_FULL;
+    }
+#if defined(INBUF_PADDED) && defined(OUTBUF_PADDED)
+    dst[0] = src[0];
+    dst[1] = src[1];
+    dst[2] = src[2];
+    dst[3] = src[3];
+    src += 4;
+    dst += 4;
+    cnt -= 4;
+    if (cnt > 0)
+#endif
+        memcpy(dst, src, cnt);
+    c->in = src + cnt;
+    c->out = dst + cnt;
+}
+
+/**
+ * \brief copy previously decoded bytes to current position
+ * \param back how many bytes back we start
+ * \param cnt number of bytes to copy, must be > 0
+ *
+ * cnt > back is valid, this will copy the bytes we just copied,
+ * thus creating a repeating pattern with a period length of back.
+ */
+static inline void copy_backptr(LZOContext *c, int back, int cnt) {
+    register uint8_t *src = &c->out[-back];
+    register uint8_t *dst = c->out;
+    if (src < c->out_start) {
+        c->error |= LZO_INVALID_BACKPTR;
+        return;
+    }
+    if (dst + cnt > c->out_end) {
+        cnt = c->out_end - dst;
+        c->error |= LZO_OUTPUT_FULL;
+    }
+    if (back == 1) {
+        memset(dst, *src, cnt);
+        dst += cnt;
+    } else {
+#ifdef OUTBUF_PADDED
+        dst[0] = src[0];
+        dst[1] = src[1];
+        dst[2] = src[2];
+        dst[3] = src[3];
+        src += 4;
+        dst += 4;
+        cnt -= 4;
+        if (cnt > 0) {
+            dst[0] = src[0];
+            dst[1] = src[1];
+            dst[2] = src[2];
+            dst[3] = src[3];
+            dst[4] = src[4];
+            dst[5] = src[5];
+            dst[6] = src[6];
+            dst[7] = src[7];
+            src += 8;
+            dst += 8;
+            cnt -= 8;
+        }
+#endif
+        if (cnt > 0) {
+            int blocklen = back;
+            while (cnt > blocklen) {
+                memcpy(dst, src, blocklen);
+                dst += blocklen;
+                cnt -= blocklen;
+                blocklen <<= 1;
+            }
+            memcpy(dst, src, cnt);
+        }
+        dst += cnt;
+    }
+    c->out = dst;
+}
+
+/**
+ * \brief decode LZO 1x compressed data
+ * \param out output buffer
+ * \param outlen size of output buffer, number of bytes left are returned here
+ * \param in input buffer
+ * \param inlen size of input buffer, number of bytes left are returned here
+ * \return 0 on success, otherwise error flags, see lzo.h
+ *
+ * make sure all buffers are appropriately padded, in must provide
+ * LZO_INPUT_PADDING, out must provide LZO_OUTPUT_PADDING additional bytes
+ */
+int lzo1x_decode(void *out, int *outlen, void *in, int *inlen) {
+    enum {COPY, BACKPTR} state = COPY;
+    int x;
+    LZOContext c;
+    c.in = in;
+    c.in_end = (uint8_t *)in + *inlen;
+    c.out = c.out_start = out;
+    c.out_end = (uint8_t *)out + * outlen;
+    c.error = 0;
+    x = get_byte(&c);
+    if (x > 17) {
+        copy(&c, x - 17);
+        x = get_byte(&c);
+        if (x < 16) c.error |= LZO_ERROR;
+    }
+    while (!c.error) {
+        int cnt, back;
+        if (x >> 4) {
+            if (x >> 6) {
+                cnt = (x >> 5) - 1;
+                back = (get_byte(&c) << 3) + ((x >> 2) & 7) + 1;
+            } else if (x >> 5) {
+                cnt = get_len(&c, x, 31);
+                x = get_byte(&c);
+                back = (get_byte(&c) << 6) + (x >> 2) + 1;
+            } else {
+                cnt = get_len(&c, x, 7);
+                back = (1 << 14) + ((x & 8) << 11);
+                x = get_byte(&c);
+                back += (get_byte(&c) << 6) + (x >> 2);
+                if (back == (1 << 14)) {
+                    if (cnt != 1)
+                        c.error |= LZO_ERROR;
+                    break;
+                }
+            }
+        } else
+        switch (state) {
+            case COPY:
+                cnt = get_len(&c, x, 15);
+                copy(&c, cnt + 3);
+                x = get_byte(&c);
+                if (x >> 4)
+                    continue;
+                cnt = 1;
+                back = (1 << 11) + (get_byte(&c) << 2) + (x >> 2) + 1;
+                break;
+            case BACKPTR:
+                cnt = 0;
+                back = (get_byte(&c) << 2) + (x >> 2) + 1;
+                break;
+        }
+        copy_backptr(&c, back, cnt + 2);
+        cnt = x & 3;
+        state = cnt ? BACKPTR : COPY;
+        if (cnt)
+            copy(&c, cnt);
+        x = get_byte(&c);
+    }
+    *inlen = c.in_end - c.in;
+    *outlen = c.out_end - c.out;
+    return c.error;
+}
diff --git a/contrib/ffmpeg/libavcodec/lzo.h b/contrib/ffmpeg/libavcodec/lzo.h
new file mode 100644
index 000000000..4d00dd721
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/lzo.h
@@ -0,0 +1,35 @@
+/*
+ * LZO 1x decompression
+ * copyright (c) 2006 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _LZO_H
+#define LZO_H
+
+#define LZO_INPUT_DEPLETED 1
+#define LZO_OUTPUT_FULL 2
+#define LZO_INVALID_BACKPTR 4
+#define LZO_ERROR 8
+
+#define LZO_INPUT_PADDING 4
+#define LZO_OUTPUT_PADDING 12
+
+int lzo1x_decode(void *out, int *outlen, void *in, int *inlen);
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/lzw.c b/contrib/ffmpeg/libavcodec/lzw.c
new file mode 100644
index 000000000..b5bb33a21
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/lzw.c
@@ -0,0 +1,262 @@
+/*
+ * LZW decoder
+ * Copyright (c) 2003 Fabrice Bellard.
+ * Copyright (c) 2006 Konstantin Shishkov.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file lzw.c
+ * @brief LZW decoding routines
+ * @author Fabrice Bellard
+ * Modified for use in TIFF by Konstantin Shishkov
+ */
+
+#include "avcodec.h"
+#include "lzw.h"
+
+#define LZW_MAXBITS                 12
+#define LZW_SIZTABLE                (1<<LZW_MAXBITS)
+
+static const uint16_t mask[17] =
+{
+    0x0000, 0x0001, 0x0003, 0x0007,
+    0x000F, 0x001F, 0x003F, 0x007F,
+    0x00FF, 0x01FF, 0x03FF, 0x07FF,
+    0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF
+};
+
+struct LZWState {
+    int eob_reached;
+    uint8_t *pbuf, *ebuf;
+    int bbits;
+    unsigned int bbuf;
+
+    int mode;                   ///< Decoder mode
+    int cursize;                ///< The current code size
+    int curmask;
+    int codesize;
+    int clear_code;
+    int end_code;
+    int newcodes;               ///< First available code
+    int top_slot;               ///< Highest code for current size
+    int top_slot2;              ///< Highest possible code for current size (<=top_slot)
+    int slot;                   ///< Last read code
+    int fc, oc;
+    uint8_t *sp;
+    uint8_t stack[LZW_SIZTABLE];
+    uint8_t suffix[LZW_SIZTABLE];
+    uint16_t prefix[LZW_SIZTABLE];
+    int bs;                     ///< current buffer size for GIF
+};
+
+/* get one code from stream */
+static int lzw_get_code(struct LZWState * s)
+{
+    int c, sizbuf;
+
+    if(s->mode == FF_LZW_GIF) {
+        while (s->bbits < s->cursize) {
+            if (!s->bs) {
+                sizbuf = *s->pbuf++;
+                s->bs = sizbuf;
+                if(!sizbuf) {
+                    s->eob_reached = 1;
+                }
+            }
+            s->bbuf |= (*s->pbuf++) << s->bbits;
+            s->bbits += 8;
+            s->bs--;
+        }
+        c = s->bbuf & s->curmask;
+       s->bbuf >>= s->cursize;
+    } else { // TIFF
+        while (s->bbits < s->cursize) {
+            if (s->pbuf >= s->ebuf) {
+                s->eob_reached = 1;
+            }
+            s->bbuf = (s->bbuf << 8) | (*s->pbuf++);
+            s->bbits += 8;
+        }
+        c = (s->bbuf >> (s->bbits - s->cursize)) & s->curmask;
+    }
+    s->bbits -= s->cursize;
+    return c;
+}
+
+uint8_t* ff_lzw_cur_ptr(LZWState *p)
+{
+    return ((struct LZWState*)p)->pbuf;
+}
+
+void ff_lzw_decode_tail(LZWState *p)
+{
+    struct LZWState *s = (struct LZWState *)p;
+    while(!s->eob_reached)
+        lzw_get_code(s);
+}
+
+void ff_lzw_decode_open(LZWState **p)
+{
+    *p = av_mallocz(sizeof(struct LZWState));
+}
+
+void ff_lzw_decode_close(LZWState **p)
+{
+    av_freep(p);
+}
+
+/**
+ * Initialize LZW decoder
+ * @param s LZW context
+ * @param csize initial code size in bits
+ * @param buf input data
+ * @param buf_size input data size
+ * @param mode decoder working mode - either GIF or TIFF
+ */
+int ff_lzw_decode_init(LZWState *p, int csize, uint8_t *buf, int buf_size, int mode)
+{
+    struct LZWState *s = (struct LZWState *)p;
+
+    if(csize < 1 || csize > LZW_MAXBITS)
+        return -1;
+    /* read buffer */
+    s->eob_reached = 0;
+    s->pbuf = buf;
+    s->ebuf = s->pbuf + buf_size;
+    s->bbuf = 0;
+    s->bbits = 0;
+    s->bs = 0;
+
+    /* decoder */
+    s->codesize = csize;
+    s->cursize = s->codesize + 1;
+    s->curmask = mask[s->cursize];
+    s->top_slot = 1 << s->cursize;
+    s->clear_code = 1 << s->codesize;
+    s->end_code = s->clear_code + 1;
+    s->slot = s->newcodes = s->clear_code + 2;
+    s->oc = s->fc = 0;
+    s->sp = s->stack;
+
+    s->mode = mode;
+    switch(s->mode){
+    case FF_LZW_GIF:
+        s->top_slot2 = s->top_slot;
+        break;
+    case FF_LZW_TIFF:
+        s->top_slot2 = s->top_slot - 1;
+        break;
+    default:
+        return -1;
+    }
+    return 0;
+}
+
+/**
+ * Decode given number of bytes
+ * NOTE: the algorithm here is inspired from the LZW GIF decoder
+ *  written by Steven A. Bennett in 1987.
+ *
+ * @param s LZW context
+ * @param buf output buffer
+ * @param len number of bytes to decode
+ * @return number of bytes decoded
+ */
+int ff_lzw_decode(LZWState *p, uint8_t *buf, int len){
+    int l, c, code, oc, fc;
+    uint8_t *sp;
+    struct LZWState *s = (struct LZWState *)p;
+
+    if (s->end_code < 0)
+        return 0;
+
+    l = len;
+    sp = s->sp;
+    oc = s->oc;
+    fc = s->fc;
+
+    while (sp > s->stack) {
+        *buf++ = *(--sp);
+        if ((--l) == 0)
+            goto the_end;
+    }
+
+    for (;;) {
+        c = lzw_get_code(s);
+        if (c == s->end_code) {
+            s->end_code = -1;
+            break;
+        } else if (c == s->clear_code) {
+            s->cursize = s->codesize + 1;
+            s->curmask = mask[s->cursize];
+            s->slot = s->newcodes;
+            s->top_slot = 1 << s->cursize;
+            s->top_slot2 = s->top_slot;
+            if(s->mode == FF_LZW_TIFF)
+                s->top_slot2--;
+            while ((c = lzw_get_code(s)) == s->clear_code);
+            if (c == s->end_code) {
+                s->end_code = -1;
+                break;
+            }
+            /* test error */
+            if (c >= s->slot)
+                c = 0;
+            fc = oc = c;
+            *buf++ = c;
+            if ((--l) == 0)
+                break;
+        } else {
+            code = c;
+            if (code >= s->slot) {
+                *sp++ = fc;
+                code = oc;
+            }
+            while (code >= s->newcodes) {
+                *sp++ = s->suffix[code];
+                code = s->prefix[code];
+            }
+            *sp++ = code;
+            if (s->slot < s->top_slot) {
+                s->suffix[s->slot] = fc = code;
+                s->prefix[s->slot++] = oc;
+                oc = c;
+            }
+            if (s->slot >= s->top_slot2) {
+                if (s->cursize < LZW_MAXBITS) {
+                    s->top_slot <<= 1;
+                    s->top_slot2 = s->top_slot;
+                    if(s->mode == FF_LZW_TIFF)
+                        s->top_slot2--;
+                    s->curmask = mask[++s->cursize];
+                }
+            }
+            while (sp > s->stack) {
+                *buf++ = *(--sp);
+                if ((--l) == 0)
+                    goto the_end;
+            }
+        }
+    }
+  the_end:
+    s->sp = sp;
+    s->oc = oc;
+    s->fc = fc;
+    return len - l;
+}
diff --git a/contrib/ffmpeg/libavcodec/lzw.h b/contrib/ffmpeg/libavcodec/lzw.h
new file mode 100644
index 000000000..60f115caf
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/lzw.h
@@ -0,0 +1,49 @@
+/*
+ * LZW decoder
+ * Copyright (c) 2003 Fabrice Bellard.
+ * Copyright (c) 2006 Konstantin Shishkov.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file lzw.h
+ * @brief LZW decoding routines
+ * @author Fabrice Bellard
+ * Modified for use in TIFF by Konstantin Shishkov
+ */
+
+#ifndef LZW_H
+#define LZW_H
+
+enum FF_LZW_MODES{
+    FF_LZW_GIF,
+    FF_LZW_TIFF
+};
+
+/* clients should not know what LZWState is */
+typedef void LZWState;
+
+/* first two functions de/allocate memory for LZWState */
+void ff_lzw_decode_open(LZWState **p);
+void ff_lzw_decode_close(LZWState **p);
+int ff_lzw_decode_init(LZWState *s, int csize, uint8_t *buf, int buf_size, int mode);
+int ff_lzw_decode(LZWState *s, uint8_t *buf, int len);
+uint8_t* ff_lzw_cur_ptr(LZWState *lzw);
+void ff_lzw_decode_tail(LZWState *lzw);
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/mace.c b/contrib/ffmpeg/libavcodec/mace.c
new file mode 100644
index 000000000..95839379a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mace.c
@@ -0,0 +1,456 @@
+/*
+ * MACE decoder
+ * Copyright (c) 2002 Laszlo Torok <torokl@alpha.dfmk.hu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mace.c
+ * MACE decoder.
+ */
+
+#include "avcodec.h"
+
+/*
+ * Adapted to ffmpeg by Francois Revol <revol@free.fr>
+ * (removed 68k REG stuff, changed types, added some statics and consts,
+ * libavcodec api, context stuff, interlaced stereo out).
+ */
+
+static const uint16_t MACEtab1[] = { 0xfff3, 0x0008, 0x004c, 0x00de, 0x00de, 0x004c, 0x0008, 0xfff3 };
+
+static const uint16_t MACEtab3[] = { 0xffee, 0x008c, 0x008c, 0xffee };
+
+static const uint16_t MACEtab2[][8] = {
+    { 0x0025, 0x0074, 0x00CE, 0x014A, 0xFEB5, 0xFF31, 0xFF8B, 0xFFDA },
+    { 0x0027, 0x0079, 0x00D8, 0x015A, 0xFEA5, 0xFF27, 0xFF86, 0xFFD8 },
+    { 0x0029, 0x007F, 0x00E1, 0x0169, 0xFE96, 0xFF1E, 0xFF80, 0xFFD6 },
+    { 0x002A, 0x0084, 0x00EB, 0x0179, 0xFE86, 0xFF14, 0xFF7B, 0xFFD5 },
+    { 0x002C, 0x0089, 0x00F5, 0x0188, 0xFE77, 0xFF0A, 0xFF76, 0xFFD3 },
+    { 0x002E, 0x0090, 0x0100, 0x019A, 0xFE65, 0xFEFF, 0xFF6F, 0xFFD1 },
+    { 0x0030, 0x0096, 0x010B, 0x01AC, 0xFE53, 0xFEF4, 0xFF69, 0xFFCF },
+    { 0x0033, 0x009D, 0x0118, 0x01C1, 0xFE3E, 0xFEE7, 0xFF62, 0xFFCC },
+    { 0x0035, 0x00A5, 0x0125, 0x01D6, 0xFE29, 0xFEDA, 0xFF5A, 0xFFCA },
+    { 0x0037, 0x00AC, 0x0132, 0x01EA, 0xFE15, 0xFECD, 0xFF53, 0xFFC8 },
+    { 0x003A, 0x00B3, 0x013F, 0x01FF, 0xFE00, 0xFEC0, 0xFF4C, 0xFFC5 },
+    { 0x003C, 0x00BB, 0x014D, 0x0216, 0xFDE9, 0xFEB2, 0xFF44, 0xFFC3 },
+    { 0x003F, 0x00C3, 0x015C, 0x022D, 0xFDD2, 0xFEA3, 0xFF3C, 0xFFC0 },
+    { 0x0042, 0x00CD, 0x016C, 0x0247, 0xFDB8, 0xFE93, 0xFF32, 0xFFBD },
+    { 0x0045, 0x00D6, 0x017C, 0x0261, 0xFD9E, 0xFE83, 0xFF29, 0xFFBA },
+    { 0x0048, 0x00DF, 0x018C, 0x027B, 0xFD84, 0xFE73, 0xFF20, 0xFFB7 },
+    { 0x004B, 0x00E9, 0x019E, 0x0297, 0xFD68, 0xFE61, 0xFF16, 0xFFB4 },
+    { 0x004F, 0x00F4, 0x01B1, 0x02B6, 0xFD49, 0xFE4E, 0xFF0B, 0xFFB0 },
+    { 0x0052, 0x00FE, 0x01C5, 0x02D5, 0xFD2A, 0xFE3A, 0xFF01, 0xFFAD },
+    { 0x0056, 0x0109, 0x01D8, 0x02F4, 0xFD0B, 0xFE27, 0xFEF6, 0xFFA9 },
+    { 0x005A, 0x0116, 0x01EF, 0x0318, 0xFCE7, 0xFE10, 0xFEE9, 0xFFA5 },
+    { 0x005E, 0x0122, 0x0204, 0x033A, 0xFCC5, 0xFDFB, 0xFEDD, 0xFFA1 },
+    { 0x0062, 0x012F, 0x021A, 0x035E, 0xFCA1, 0xFDE5, 0xFED0, 0xFF9D },
+    { 0x0066, 0x013C, 0x0232, 0x0385, 0xFC7A, 0xFDCD, 0xFEC3, 0xFF99 },
+    { 0x006B, 0x014B, 0x024C, 0x03AE, 0xFC51, 0xFDB3, 0xFEB4, 0xFF94 },
+    { 0x0070, 0x0159, 0x0266, 0x03D7, 0xFC28, 0xFD99, 0xFEA6, 0xFF8F },
+    { 0x0075, 0x0169, 0x0281, 0x0403, 0xFBFC, 0xFD7E, 0xFE96, 0xFF8A },
+    { 0x007A, 0x0179, 0x029E, 0x0432, 0xFBCD, 0xFD61, 0xFE86, 0xFF85 },
+    { 0x007F, 0x018A, 0x02BD, 0x0463, 0xFB9C, 0xFD42, 0xFE75, 0xFF80 },
+    { 0x0085, 0x019B, 0x02DC, 0x0494, 0xFB6B, 0xFD23, 0xFE64, 0xFF7A },
+    { 0x008B, 0x01AE, 0x02FC, 0x04C8, 0xFB37, 0xFD03, 0xFE51, 0xFF74 },
+    { 0x0091, 0x01C1, 0x031F, 0x0500, 0xFAFF, 0xFCE0, 0xFE3E, 0xFF6E },
+    { 0x0098, 0x01D5, 0x0343, 0x0539, 0xFAC6, 0xFCBC, 0xFE2A, 0xFF67 },
+    { 0x009F, 0x01EA, 0x0368, 0x0575, 0xFA8A, 0xFC97, 0xFE15, 0xFF60 },
+    { 0x00A6, 0x0200, 0x038F, 0x05B3, 0xFA4C, 0xFC70, 0xFDFF, 0xFF59 },
+    { 0x00AD, 0x0217, 0x03B7, 0x05F3, 0xFA0C, 0xFC48, 0xFDE8, 0xFF52 },
+    { 0x00B5, 0x022E, 0x03E1, 0x0636, 0xF9C9, 0xFC1E, 0xFDD1, 0xFF4A },
+    { 0x00BD, 0x0248, 0x040E, 0x067F, 0xF980, 0xFBF1, 0xFDB7, 0xFF42 },
+    { 0x00C5, 0x0262, 0x043D, 0x06CA, 0xF935, 0xFBC2, 0xFD9D, 0xFF3A },
+    { 0x00CE, 0x027D, 0x046D, 0x0717, 0xF8E8, 0xFB92, 0xFD82, 0xFF31 },
+    { 0x00D7, 0x0299, 0x049F, 0x0767, 0xF898, 0xFB60, 0xFD66, 0xFF28 },
+    { 0x00E1, 0x02B7, 0x04D5, 0x07BC, 0xF843, 0xFB2A, 0xFD48, 0xFF1E },
+    { 0x00EB, 0x02D6, 0x050B, 0x0814, 0xF7EB, 0xFAF4, 0xFD29, 0xFF14 },
+    { 0x00F6, 0x02F7, 0x0545, 0x0871, 0xF78E, 0xFABA, 0xFD08, 0xFF09 },
+    { 0x0101, 0x0318, 0x0581, 0x08D1, 0xF72E, 0xFA7E, 0xFCE7, 0xFEFE },
+    { 0x010C, 0x033C, 0x05C0, 0x0935, 0xF6CA, 0xFA3F, 0xFCC3, 0xFEF3 },
+    { 0x0118, 0x0361, 0x0602, 0x099F, 0xF660, 0xF9FD, 0xFC9E, 0xFEE7 },
+    { 0x0125, 0x0387, 0x0646, 0x0A0C, 0xF5F3, 0xF9B9, 0xFC78, 0xFEDA },
+    { 0x0132, 0x03B0, 0x068E, 0x0A80, 0xF57F, 0xF971, 0xFC4F, 0xFECD },
+    { 0x013F, 0x03DA, 0x06D9, 0x0AF7, 0xF508, 0xF926, 0xFC25, 0xFEC0 },
+    { 0x014E, 0x0406, 0x0728, 0x0B75, 0xF48A, 0xF8D7, 0xFBF9, 0xFEB1 },
+    { 0x015D, 0x0434, 0x077A, 0x0BF9, 0xF406, 0xF885, 0xFBCB, 0xFEA2 },
+    { 0x016C, 0x0464, 0x07CF, 0x0C82, 0xF37D, 0xF830, 0xFB9B, 0xFE93 },
+    { 0x017C, 0x0496, 0x0828, 0x0D10, 0xF2EF, 0xF7D7, 0xFB69, 0xFE83 },
+    { 0x018E, 0x04CB, 0x0886, 0x0DA6, 0xF259, 0xF779, 0xFB34, 0xFE71 },
+    { 0x019F, 0x0501, 0x08E6, 0x0E41, 0xF1BE, 0xF719, 0xFAFE, 0xFE60 },
+    { 0x01B2, 0x053B, 0x094C, 0x0EE3, 0xF11C, 0xF6B3, 0xFAC4, 0xFE4D },
+    { 0x01C5, 0x0576, 0x09B6, 0x0F8E, 0xF071, 0xF649, 0xFA89, 0xFE3A },
+    { 0x01D9, 0x05B5, 0x0A26, 0x1040, 0xEFBF, 0xF5D9, 0xFA4A, 0xFE26 },
+    { 0x01EF, 0x05F6, 0x0A9A, 0x10FA, 0xEF05, 0xF565, 0xFA09, 0xFE10 },
+    { 0x0205, 0x063A, 0x0B13, 0x11BC, 0xEE43, 0xF4EC, 0xF9C5, 0xFDFA },
+    { 0x021C, 0x0681, 0x0B91, 0x1285, 0xED7A, 0xF46E, 0xF97E, 0xFDE3 },
+    { 0x0234, 0x06CC, 0x0C15, 0x1359, 0xECA6, 0xF3EA, 0xF933, 0xFDCB },
+    { 0x024D, 0x071A, 0x0CA0, 0x1437, 0xEBC8, 0xF35F, 0xF8E5, 0xFDB2 },
+    { 0x0267, 0x076A, 0x0D2F, 0x151D, 0xEAE2, 0xF2D0, 0xF895, 0xFD98 },
+    { 0x0283, 0x07C0, 0x0DC7, 0x160F, 0xE9F0, 0xF238, 0xF83F, 0xFD7C },
+    { 0x029F, 0x0818, 0x0E63, 0x170A, 0xE8F5, 0xF19C, 0xF7E7, 0xFD60 },
+    { 0x02BD, 0x0874, 0x0F08, 0x1811, 0xE7EE, 0xF0F7, 0xF78B, 0xFD42 },
+    { 0x02DD, 0x08D5, 0x0FB4, 0x1926, 0xE6D9, 0xF04B, 0xF72A, 0xFD22 },
+    { 0x02FE, 0x093A, 0x1067, 0x1A44, 0xE5BB, 0xEF98, 0xF6C5, 0xFD01 },
+    { 0x0320, 0x09A3, 0x1122, 0x1B70, 0xE48F, 0xEEDD, 0xF65C, 0xFCDF },
+    { 0x0344, 0x0A12, 0x11E7, 0x1CAB, 0xE354, 0xEE18, 0xF5ED, 0xFCBB },
+    { 0x0369, 0x0A84, 0x12B2, 0x1DF0, 0xE20F, 0xED4D, 0xF57B, 0xFC96 },
+    { 0x0390, 0x0AFD, 0x1389, 0x1F48, 0xE0B7, 0xEC76, 0xF502, 0xFC6F },
+    { 0x03B8, 0x0B7A, 0x1467, 0x20AC, 0xDF53, 0xEB98, 0xF485, 0xFC47 },
+    { 0x03E3, 0x0BFE, 0x1551, 0x2223, 0xDDDC, 0xEAAE, 0xF401, 0xFC1C },
+    { 0x040F, 0x0C87, 0x1645, 0x23A9, 0xDC56, 0xE9BA, 0xF378, 0xFBF0 },
+    { 0x043E, 0x0D16, 0x1744, 0x2541, 0xDABE, 0xE8BB, 0xF2E9, 0xFBC1 },
+    { 0x046E, 0x0DAB, 0x184C, 0x26E8, 0xD917, 0xE7B3, 0xF254, 0xFB91 },
+    { 0x04A1, 0x0E47, 0x1961, 0x28A4, 0xD75B, 0xE69E, 0xF1B8, 0xFB5E },
+    { 0x04D6, 0x0EEA, 0x1A84, 0x2A75, 0xD58A, 0xE57B, 0xF115, 0xFB29 },
+    { 0x050D, 0x0F95, 0x1BB3, 0x2C5B, 0xD3A4, 0xE44C, 0xF06A, 0xFAF2 },
+    { 0x0547, 0x1046, 0x1CEF, 0x2E55, 0xD1AA, 0xE310, 0xEFB9, 0xFAB8 },
+    { 0x0583, 0x1100, 0x1E3A, 0x3066, 0xCF99, 0xE1C5, 0xEEFF, 0xFA7C },
+    { 0x05C2, 0x11C3, 0x1F94, 0x3292, 0xCD6D, 0xE06B, 0xEE3C, 0xFA3D },
+    { 0x0604, 0x128E, 0x20FC, 0x34D2, 0xCB2D, 0xDF03, 0xED71, 0xF9FB },
+    { 0x0649, 0x1362, 0x2275, 0x372E, 0xC8D1, 0xDD8A, 0xEC9D, 0xF9B6 },
+    { 0x0690, 0x143F, 0x23FF, 0x39A4, 0xC65B, 0xDC00, 0xEBC0, 0xF96F },
+    { 0x06DC, 0x1527, 0x259A, 0x3C37, 0xC3C8, 0xDA65, 0xEAD8, 0xF923 },
+    { 0x072A, 0x1619, 0x2749, 0x3EE8, 0xC117, 0xD8B6, 0xE9E6, 0xF8D5 },
+    { 0x077C, 0x1715, 0x2909, 0x41B6, 0xBE49, 0xD6F6, 0xE8EA, 0xF883 },
+    { 0x07D1, 0x181D, 0x2ADF, 0x44A6, 0xBB59, 0xD520, 0xE7E2, 0xF82E },
+    { 0x082B, 0x1930, 0x2CC7, 0x47B4, 0xB84B, 0xD338, 0xE6CF, 0xF7D4 },
+    { 0x0888, 0x1A50, 0x2EC6, 0x4AE7, 0xB518, 0xD139, 0xE5AF, 0xF777 },
+    { 0x08EA, 0x1B7D, 0x30DE, 0x4E40, 0xB1BF, 0xCF21, 0xE482, 0xF715 },
+    { 0x094F, 0x1CB7, 0x330C, 0x51BE, 0xAE41, 0xCCF3, 0xE348, 0xF6B0 },
+    { 0x09BA, 0x1DFF, 0x3554, 0x5565, 0xAA9A, 0xCAAB, 0xE200, 0xF645 },
+    { 0x0A29, 0x1F55, 0x37B4, 0x5932, 0xA6CD, 0xC84B, 0xE0AA, 0xF5D6 },
+    { 0x0A9D, 0x20BC, 0x3A31, 0x5D2E, 0xA2D1, 0xC5CE, 0xDF43, 0xF562 },
+    { 0x0B16, 0x2231, 0x3CC9, 0x6156, 0x9EA9, 0xC336, 0xDDCE, 0xF4E9 },
+    { 0x0B95, 0x23B8, 0x3F80, 0x65AF, 0x9A50, 0xC07F, 0xDC47, 0xF46A },
+    { 0x0C19, 0x2551, 0x4256, 0x6A39, 0x95C6, 0xBDA9, 0xDAAE, 0xF3E6 },
+    { 0x0CA4, 0x26FB, 0x454C, 0x6EF7, 0x9108, 0xBAB3, 0xD904, 0xF35B },
+    { 0x0D34, 0x28B8, 0x4864, 0x73EB, 0x8C14, 0xB79B, 0xD747, 0xF2CB },
+    { 0x0DCB, 0x2A8A, 0x4B9F, 0x7918, 0x86E7, 0xB460, 0xD575, 0xF234 },
+    { 0x0E68, 0x2C6F, 0x4EFE, 0x7E7E, 0x8181, 0xB101, 0xD390, 0xF197 },
+    { 0x0F0D, 0x2E6B, 0x5285, 0x7FFF, 0x8000, 0xAD7A, 0xD194, 0xF0F2 },
+    { 0x0FB9, 0x307E, 0x5635, 0x7FFF, 0x8000, 0xA9CA, 0xCF81, 0xF046 },
+    { 0x106D, 0x32A7, 0x5A0D, 0x7FFF, 0x8000, 0xA5F2, 0xCD58, 0xEF92 },
+    { 0x1128, 0x34EA, 0x5E12, 0x7FFF, 0x8000, 0xA1ED, 0xCB15, 0xEED7 },
+    { 0x11ED, 0x3747, 0x6245, 0x7FFF, 0x8000, 0x9DBA, 0xC8B8, 0xEE12 },
+    { 0x12B9, 0x39BF, 0x66A8, 0x7FFF, 0x8000, 0x9957, 0xC640, 0xED46 },
+    { 0x138F, 0x3C52, 0x6B3C, 0x7FFF, 0x8000, 0x94C3, 0xC3AD, 0xEC70 },
+    { 0x146F, 0x3F04, 0x7006, 0x7FFF, 0x8000, 0x8FF9, 0xC0FB, 0xEB90 },
+    { 0x1558, 0x41D3, 0x7505, 0x7FFF, 0x8000, 0x8AFA, 0xBE2C, 0xEAA7 },
+    { 0x164C, 0x44C3, 0x7A3E, 0x7FFF, 0x8000, 0x85C1, 0xBB3C, 0xE9B3 },
+    { 0x174B, 0x47D5, 0x7FB3, 0x7FFF, 0x8000, 0x804C, 0xB82A, 0xE8B4 },
+    { 0x1855, 0x4B0A, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0xB4F5, 0xE7AA },
+    { 0x196B, 0x4E63, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0xB19C, 0xE694 },
+    { 0x1A8D, 0x51E3, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0xAE1C, 0xE572 },
+    { 0x1BBD, 0x558B, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0xAA74, 0xE442 },
+    { 0x1CFA, 0x595C, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0xA6A3, 0xE305 },
+    { 0x1E45, 0x5D59, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0xA2A6, 0xE1BA },
+    { 0x1F9F, 0x6184, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0x9E7B, 0xE060 },
+    { 0x2108, 0x65DE, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0x9A21, 0xDEF7 },
+    { 0x2281, 0x6A6A, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0x9595, 0xDD7E },
+    { 0x240C, 0x6F29, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0x90D6, 0xDBF3 },
+    { 0x25A7, 0x741F, 0x7FFF, 0x7FFF, 0x8000, 0x8000, 0x8BE0, 0xDA58 },
+};
+
+static const uint16_t MACEtab4[][8] = {
+    { 0x0040, 0x00D8, 0xFF27, 0xFFBF, 0, 0, 0, 0 },  { 0x0043, 0x00E2, 0xFF1D, 0xFFBC, 0, 0, 0, 0 },
+    { 0x0046, 0x00EC, 0xFF13, 0xFFB9, 0, 0, 0, 0 },  { 0x004A, 0x00F6, 0xFF09, 0xFFB5, 0, 0, 0, 0 },
+    { 0x004D, 0x0101, 0xFEFE, 0xFFB2, 0, 0, 0, 0 },  { 0x0050, 0x010C, 0xFEF3, 0xFFAF, 0, 0, 0, 0 },
+    { 0x0054, 0x0118, 0xFEE7, 0xFFAB, 0, 0, 0, 0 },  { 0x0058, 0x0126, 0xFED9, 0xFFA7, 0, 0, 0, 0 },
+    { 0x005C, 0x0133, 0xFECC, 0xFFA3, 0, 0, 0, 0 },  { 0x0060, 0x0141, 0xFEBE, 0xFF9F, 0, 0, 0, 0 },
+    { 0x0064, 0x014E, 0xFEB1, 0xFF9B, 0, 0, 0, 0 },  { 0x0068, 0x015E, 0xFEA1, 0xFF97, 0, 0, 0, 0 },
+    { 0x006D, 0x016D, 0xFE92, 0xFF92, 0, 0, 0, 0 },  { 0x0072, 0x017E, 0xFE81, 0xFF8D, 0, 0, 0, 0 },
+    { 0x0077, 0x018F, 0xFE70, 0xFF88, 0, 0, 0, 0 },  { 0x007C, 0x01A0, 0xFE5F, 0xFF83, 0, 0, 0, 0 },
+    { 0x0082, 0x01B2, 0xFE4D, 0xFF7D, 0, 0, 0, 0 },  { 0x0088, 0x01C6, 0xFE39, 0xFF77, 0, 0, 0, 0 },
+    { 0x008E, 0x01DB, 0xFE24, 0xFF71, 0, 0, 0, 0 },  { 0x0094, 0x01EF, 0xFE10, 0xFF6B, 0, 0, 0, 0 },
+    { 0x009B, 0x0207, 0xFDF8, 0xFF64, 0, 0, 0, 0 },  { 0x00A2, 0x021D, 0xFDE2, 0xFF5D, 0, 0, 0, 0 },
+    { 0x00A9, 0x0234, 0xFDCB, 0xFF56, 0, 0, 0, 0 },  { 0x00B0, 0x024E, 0xFDB1, 0xFF4F, 0, 0, 0, 0 },
+    { 0x00B9, 0x0269, 0xFD96, 0xFF46, 0, 0, 0, 0 },  { 0x00C1, 0x0284, 0xFD7B, 0xFF3E, 0, 0, 0, 0 },
+    { 0x00C9, 0x02A1, 0xFD5E, 0xFF36, 0, 0, 0, 0 },  { 0x00D2, 0x02BF, 0xFD40, 0xFF2D, 0, 0, 0, 0 },
+    { 0x00DC, 0x02DF, 0xFD20, 0xFF23, 0, 0, 0, 0 },  { 0x00E6, 0x02FF, 0xFD00, 0xFF19, 0, 0, 0, 0 },
+    { 0x00F0, 0x0321, 0xFCDE, 0xFF0F, 0, 0, 0, 0 },  { 0x00FB, 0x0346, 0xFCB9, 0xFF04, 0, 0, 0, 0 },
+    { 0x0106, 0x036C, 0xFC93, 0xFEF9, 0, 0, 0, 0 },  { 0x0112, 0x0392, 0xFC6D, 0xFEED, 0, 0, 0, 0 },
+    { 0x011E, 0x03BB, 0xFC44, 0xFEE1, 0, 0, 0, 0 },  { 0x012B, 0x03E5, 0xFC1A, 0xFED4, 0, 0, 0, 0 },
+    { 0x0138, 0x0411, 0xFBEE, 0xFEC7, 0, 0, 0, 0 },  { 0x0146, 0x0441, 0xFBBE, 0xFEB9, 0, 0, 0, 0 },
+    { 0x0155, 0x0472, 0xFB8D, 0xFEAA, 0, 0, 0, 0 },  { 0x0164, 0x04A4, 0xFB5B, 0xFE9B, 0, 0, 0, 0 },
+    { 0x0174, 0x04D9, 0xFB26, 0xFE8B, 0, 0, 0, 0 },  { 0x0184, 0x0511, 0xFAEE, 0xFE7B, 0, 0, 0, 0 },
+    { 0x0196, 0x054A, 0xFAB5, 0xFE69, 0, 0, 0, 0 },  { 0x01A8, 0x0587, 0xFA78, 0xFE57, 0, 0, 0, 0 },
+    { 0x01BB, 0x05C6, 0xFA39, 0xFE44, 0, 0, 0, 0 },  { 0x01CE, 0x0608, 0xF9F7, 0xFE31, 0, 0, 0, 0 },
+    { 0x01E3, 0x064D, 0xF9B2, 0xFE1C, 0, 0, 0, 0 },  { 0x01F9, 0x0694, 0xF96B, 0xFE06, 0, 0, 0, 0 },
+    { 0x020F, 0x06E0, 0xF91F, 0xFDF0, 0, 0, 0, 0 },  { 0x0227, 0x072E, 0xF8D1, 0xFDD8, 0, 0, 0, 0 },
+    { 0x0240, 0x0781, 0xF87E, 0xFDBF, 0, 0, 0, 0 },  { 0x0259, 0x07D7, 0xF828, 0xFDA6, 0, 0, 0, 0 },
+    { 0x0274, 0x0831, 0xF7CE, 0xFD8B, 0, 0, 0, 0 },  { 0x0290, 0x088E, 0xF771, 0xFD6F, 0, 0, 0, 0 },
+    { 0x02AE, 0x08F0, 0xF70F, 0xFD51, 0, 0, 0, 0 },  { 0x02CC, 0x0955, 0xF6AA, 0xFD33, 0, 0, 0, 0 },
+    { 0x02EC, 0x09C0, 0xF63F, 0xFD13, 0, 0, 0, 0 },  { 0x030D, 0x0A2F, 0xF5D0, 0xFCF2, 0, 0, 0, 0 },
+    { 0x0330, 0x0AA4, 0xF55B, 0xFCCF, 0, 0, 0, 0 },  { 0x0355, 0x0B1E, 0xF4E1, 0xFCAA, 0, 0, 0, 0 },
+    { 0x037B, 0x0B9D, 0xF462, 0xFC84, 0, 0, 0, 0 },  { 0x03A2, 0x0C20, 0xF3DF, 0xFC5D, 0, 0, 0, 0 },
+    { 0x03CC, 0x0CAB, 0xF354, 0xFC33, 0, 0, 0, 0 },  { 0x03F8, 0x0D3D, 0xF2C2, 0xFC07, 0, 0, 0, 0 },
+    { 0x0425, 0x0DD3, 0xF22C, 0xFBDA, 0, 0, 0, 0 },  { 0x0454, 0x0E72, 0xF18D, 0xFBAB, 0, 0, 0, 0 },
+    { 0x0486, 0x0F16, 0xF0E9, 0xFB79, 0, 0, 0, 0 },  { 0x04B9, 0x0FC3, 0xF03C, 0xFB46, 0, 0, 0, 0 },
+    { 0x04F0, 0x1078, 0xEF87, 0xFB0F, 0, 0, 0, 0 },  { 0x0528, 0x1133, 0xEECC, 0xFAD7, 0, 0, 0, 0 },
+    { 0x0563, 0x11F7, 0xEE08, 0xFA9C, 0, 0, 0, 0 },  { 0x05A1, 0x12C6, 0xED39, 0xFA5E, 0, 0, 0, 0 },
+    { 0x05E1, 0x139B, 0xEC64, 0xFA1E, 0, 0, 0, 0 },  { 0x0624, 0x147C, 0xEB83, 0xF9DB, 0, 0, 0, 0 },
+    { 0x066A, 0x1565, 0xEA9A, 0xF995, 0, 0, 0, 0 },  { 0x06B3, 0x165A, 0xE9A5, 0xF94C, 0, 0, 0, 0 },
+    { 0x0700, 0x175A, 0xE8A5, 0xF8FF, 0, 0, 0, 0 },  { 0x0750, 0x1865, 0xE79A, 0xF8AF, 0, 0, 0, 0 },
+    { 0x07A3, 0x197A, 0xE685, 0xF85C, 0, 0, 0, 0 },  { 0x07FB, 0x1A9D, 0xE562, 0xF804, 0, 0, 0, 0 },
+    { 0x0856, 0x1BCE, 0xE431, 0xF7A9, 0, 0, 0, 0 },  { 0x08B5, 0x1D0C, 0xE2F3, 0xF74A, 0, 0, 0, 0 },
+    { 0x0919, 0x1E57, 0xE1A8, 0xF6E6, 0, 0, 0, 0 },  { 0x0980, 0x1FB2, 0xE04D, 0xF67F, 0, 0, 0, 0 },
+    { 0x09ED, 0x211D, 0xDEE2, 0xF612, 0, 0, 0, 0 },  { 0x0A5F, 0x2296, 0xDD69, 0xF5A0, 0, 0, 0, 0 },
+    { 0x0AD5, 0x2422, 0xDBDD, 0xF52A, 0, 0, 0, 0 },  { 0x0B51, 0x25BF, 0xDA40, 0xF4AE, 0, 0, 0, 0 },
+    { 0x0BD2, 0x276E, 0xD891, 0xF42D, 0, 0, 0, 0 },  { 0x0C5A, 0x2932, 0xD6CD, 0xF3A5, 0, 0, 0, 0 },
+    { 0x0CE7, 0x2B08, 0xD4F7, 0xF318, 0, 0, 0, 0 },  { 0x0D7A, 0x2CF4, 0xD30B, 0xF285, 0, 0, 0, 0 },
+    { 0x0E14, 0x2EF4, 0xD10B, 0xF1EB, 0, 0, 0, 0 },  { 0x0EB5, 0x310C, 0xCEF3, 0xF14A, 0, 0, 0, 0 },
+    { 0x0F5D, 0x333E, 0xCCC1, 0xF0A2, 0, 0, 0, 0 },  { 0x100C, 0x3587, 0xCA78, 0xEFF3, 0, 0, 0, 0 },
+    { 0x10C4, 0x37EB, 0xC814, 0xEF3B, 0, 0, 0, 0 },  { 0x1183, 0x3A69, 0xC596, 0xEE7C, 0, 0, 0, 0 },
+    { 0x124B, 0x3D05, 0xC2FA, 0xEDB4, 0, 0, 0, 0 },  { 0x131C, 0x3FBE, 0xC041, 0xECE3, 0, 0, 0, 0 },
+    { 0x13F7, 0x4296, 0xBD69, 0xEC08, 0, 0, 0, 0 },  { 0x14DB, 0x458F, 0xBA70, 0xEB24, 0, 0, 0, 0 },
+    { 0x15C9, 0x48AA, 0xB755, 0xEA36, 0, 0, 0, 0 },  { 0x16C2, 0x4BE9, 0xB416, 0xE93D, 0, 0, 0, 0 },
+    { 0x17C6, 0x4F4C, 0xB0B3, 0xE839, 0, 0, 0, 0 },  { 0x18D6, 0x52D5, 0xAD2A, 0xE729, 0, 0, 0, 0 },
+    { 0x19F2, 0x5688, 0xA977, 0xE60D, 0, 0, 0, 0 },  { 0x1B1A, 0x5A65, 0xA59A, 0xE4E5, 0, 0, 0, 0 },
+    { 0x1C50, 0x5E6D, 0xA192, 0xE3AF, 0, 0, 0, 0 },  { 0x1D93, 0x62A4, 0x9D5B, 0xE26C, 0, 0, 0, 0 },
+    { 0x1EE5, 0x670C, 0x98F3, 0xE11A, 0, 0, 0, 0 },  { 0x2046, 0x6BA5, 0x945A, 0xDFB9, 0, 0, 0, 0 },
+    { 0x21B7, 0x7072, 0x8F8D, 0xDE48, 0, 0, 0, 0 },  { 0x2338, 0x7578, 0x8A87, 0xDCC7, 0, 0, 0, 0 },
+    { 0x24CB, 0x7AB5, 0x854A, 0xDB34, 0, 0, 0, 0 },  { 0x266F, 0x7FFF, 0x8000, 0xD990, 0, 0, 0, 0 },
+    { 0x2826, 0x7FFF, 0x8000, 0xD7D9, 0, 0, 0, 0 },  { 0x29F1, 0x7FFF, 0x8000, 0xD60E, 0, 0, 0, 0 },
+    { 0x2BD0, 0x7FFF, 0x8000, 0xD42F, 0, 0, 0, 0 },  { 0x2DC5, 0x7FFF, 0x8000, 0xD23A, 0, 0, 0, 0 },
+    { 0x2FD0, 0x7FFF, 0x8000, 0xD02F, 0, 0, 0, 0 },  { 0x31F2, 0x7FFF, 0x8000, 0xCE0D, 0, 0, 0, 0 },
+    { 0x342C, 0x7FFF, 0x8000, 0xCBD3, 0, 0, 0, 0 },  { 0x3681, 0x7FFF, 0x8000, 0xC97E, 0, 0, 0, 0 },
+    { 0x38F0, 0x7FFF, 0x8000, 0xC70F, 0, 0, 0, 0 },  { 0x3B7A, 0x7FFF, 0x8000, 0xC485, 0, 0, 0, 0 },
+    { 0x3E22, 0x7FFF, 0x8000, 0xC1DD, 0, 0, 0, 0 },  { 0x40E7, 0x7FFF, 0x8000, 0xBF18, 0, 0, 0, 0 },
+};
+/* end of constants */
+
+typedef struct MACEContext {
+  short index, lev, factor, prev2, previous, level;
+  short *outPtr;
+} MACEContext;
+
+/* /// "chomp3()" */
+static void chomp3(MACEContext *ctx,
+            uint8_t val,
+            const uint16_t tab1[],
+            const uint16_t tab2[][8],
+            uint32_t numChannels)
+{
+  short current;
+
+  current=(short)tab2[(ctx->index & 0x7f0) >> 4][val];
+  if (current+ctx->lev > 32767) current=32767;
+  else if (current+ctx->lev < -32768) current=-32767;
+  else current+=ctx->lev;
+  ctx->lev=current-(current >> 3);
+//  *ctx->outPtr++=current >> 8;
+  *ctx->outPtr=current;
+  ctx->outPtr+=numChannels;
+  if ( ( ctx->index += tab1[val]-(ctx->index>>5) ) < 0 ) ctx->index = 0;
+}
+/* \\\ */
+
+/* /// "Exp1to3()" */
+static void Exp1to3(MACEContext *ctx,
+             uint8_t *inBuffer,
+             void *outBuffer,
+             uint32_t cnt,
+             uint32_t numChannels,
+             uint32_t whichChannel)
+{
+   uint8_t pkt;
+
+/*
+   if (inState) {
+     ctx->index=inState[0];
+     ctx->lev=inState[1];
+   } else
+*/
+   ctx->index=ctx->lev=0;
+
+   inBuffer+=(whichChannel-1)*2;
+
+   ctx->outPtr=outBuffer;
+
+   while (cnt>0) {
+     pkt=inBuffer[0];
+     chomp3(ctx, pkt       & 7, MACEtab1, MACEtab2, numChannels);
+     chomp3(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4, numChannels);
+     chomp3(ctx, pkt >> 5     , MACEtab1, MACEtab2, numChannels);
+     pkt=inBuffer[1];
+     chomp3(ctx, pkt       & 7, MACEtab1, MACEtab2, numChannels);
+     chomp3(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4, numChannels);
+     chomp3(ctx, pkt >> 5     , MACEtab1, MACEtab2, numChannels);
+
+     inBuffer+=numChannels*2;
+     --cnt;
+   }
+
+/*
+   if (outState) {
+     outState[0]=ctx->index;
+     outState[1]=ctx->lev;
+   }
+*/
+}
+/* \\\ */
+
+/* /// "chomp6()" */
+static void chomp6(MACEContext *ctx,
+            uint8_t val,
+            const uint16_t tab1[],
+            const uint16_t tab2[][8],
+            uint32_t numChannels)
+{
+  short current;
+
+  current=(short)tab2[(ctx->index & 0x7f0) >> 4][val];
+
+  if ((ctx->previous^current)>=0) {
+    if (ctx->factor+506>32767) ctx->factor=32767;
+    else ctx->factor+=506;
+  } else {
+    if (ctx->factor-314<-32768) ctx->factor=-32767;
+    else ctx->factor-=314;
+  }
+
+  if (current+ctx->level>32767) current=32767;
+  else if (current+ctx->level<-32768) current=-32767;
+  else current+=ctx->level;
+
+  ctx->level=((current*ctx->factor) >> 15);
+  current>>=1;
+
+//  *ctx->outPtr++=(ctx->previous+ctx->prev2-((ctx->prev2-current) >> 2)) >> 8;
+//  *ctx->outPtr++=(ctx->previous+current+((ctx->prev2-current) >> 2)) >> 8;
+  *ctx->outPtr=(ctx->previous+ctx->prev2-((ctx->prev2-current) >> 2));
+  ctx->outPtr+=numChannels;
+  *ctx->outPtr=(ctx->previous+current+((ctx->prev2-current) >> 2));
+  ctx->outPtr+=numChannels;
+  ctx->prev2=ctx->previous;
+  ctx->previous=current;
+
+  if( ( ctx->index += tab1[val]-(ctx->index>>5) ) < 0 ) ctx->index = 0;
+}
+/* \\\ */
+
+/* /// "Exp1to6()" */
+static void Exp1to6(MACEContext *ctx,
+             uint8_t *inBuffer,
+             void *outBuffer,
+             uint32_t cnt,
+             uint32_t numChannels,
+             uint32_t whichChannel)
+{
+   uint8_t pkt;
+
+/*
+   if (inState) {
+     ctx->previous=inState[0];
+     ctx->prev2=inState[1];
+     ctx->index=inState[2];
+     ctx->level=inState[3];
+     ctx->factor=inState[4];
+   } else
+*/
+   ctx->previous=ctx->prev2=ctx->index=ctx->level=ctx->factor=0;
+
+   inBuffer+=(whichChannel-1);
+   ctx->outPtr=outBuffer;
+
+   while (cnt>0) {
+     pkt=*inBuffer;
+
+     chomp6(ctx, pkt >> 5     , MACEtab1, MACEtab2, numChannels);
+     chomp6(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4, numChannels);
+     chomp6(ctx, pkt       & 7, MACEtab1, MACEtab2, numChannels);
+
+     inBuffer+=numChannels;
+     --cnt;
+   }
+
+/*
+   if (outState) {
+     outState[0]=ctx->previous;
+     outState[1]=ctx->prev2;
+     outState[2]=ctx->index;
+     outState[3]=ctx->level;
+     outState[4]=ctx->factor;
+   }
+*/
+}
+/* \\\ */
+
+static int mace_decode_init(AVCodecContext * avctx)
+{
+    if (avctx->channels > 2)
+        return -1;
+    return 0;
+}
+
+static int mace_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    short *samples;
+    MACEContext *c = avctx->priv_data;
+
+    samples = (short *)data;
+    switch (avctx->codec->id) {
+    case CODEC_ID_MACE3:
+#ifdef DEBUG
+puts("mace_decode_frame[3]()");
+#endif
+        Exp1to3(c, buf, samples, buf_size / 2 / avctx->channels, avctx->channels, 1);
+        if (avctx->channels == 2)
+            Exp1to3(c, buf, samples+1, buf_size / 2 / 2, 2, 2);
+        *data_size = 2 * 3 * buf_size;
+        break;
+    case CODEC_ID_MACE6:
+#ifdef DEBUG
+puts("mace_decode_frame[6]()");
+#endif
+        Exp1to6(c, buf, samples, buf_size / avctx->channels, avctx->channels, 1);
+        if (avctx->channels == 2)
+            Exp1to6(c, buf, samples+1, buf_size / 2, 2, 2);
+        *data_size = 2 * 6 * buf_size;
+        break;
+    default:
+        return -1;
+    }
+    return buf_size;
+}
+
+AVCodec mace3_decoder = {
+    "mace3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MACE3,
+    sizeof(MACEContext),
+    mace_decode_init,
+    NULL,
+    NULL,
+    mace_decode_frame,
+};
+
+AVCodec mace6_decoder = {
+    "mace6",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MACE6,
+    sizeof(MACEContext),
+    mace_decode_init,
+    NULL,
+    NULL,
+    mace_decode_frame,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/mathops.h b/contrib/ffmpeg/libavcodec/mathops.h
new file mode 100644
index 000000000..9ae34d71b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mathops.h
@@ -0,0 +1,69 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef MATHOPS_H
+#define MATHOPS_H
+
+#ifdef ARCH_X86_32
+
+#include "i386/mathops.h"
+
+#elif defined(ARCH_ARMV4L)
+
+#include "armv4l/mathops.h"
+
+#elif defined(ARCH_PPC)
+
+#include "ppc/mathops.h"
+
+#endif
+
+/* generic implementation */
+
+#ifndef MULL
+#   define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
+#endif
+
+#ifndef MULH
+//gcc 3.4 creates an incredibly bloated mess out of this
+//#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
+
+static always_inline int MULH(int a, int b){
+    return ((int64_t)(a) * (int64_t)(b))>>32;
+}
+#endif
+
+#ifndef MUL64
+#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#endif
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#ifndef MAC16
+#   define MAC16(rt, ra, rb) rt += (ra) * (rb)
+#endif
+
+/* signed 16x16 -> 32 multiply */
+#ifndef MUL16
+#   define MUL16(ra, rb) ((ra) * (rb))
+#endif
+
+#endif //MATHOPS_H
+
diff --git a/contrib/ffmpeg/libavcodec/mdct.c b/contrib/ffmpeg/libavcodec/mdct.c
new file mode 100644
index 000000000..de3275289
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mdct.c
@@ -0,0 +1,177 @@
+/*
+ * MDCT/IMDCT transforms
+ * Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "dsputil.h"
+
+/**
+ * @file mdct.c
+ * MDCT/IMDCT transforms.
+ */
+
+/**
+ * init MDCT or IMDCT computation.
+ */
+int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
+{
+    int n, n4, i;
+    float alpha;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->nbits = nbits;
+    s->n = n;
+    n4 = n >> 2;
+    s->tcos = av_malloc(n4 * sizeof(FFTSample));
+    if (!s->tcos)
+        goto fail;
+    s->tsin = av_malloc(n4 * sizeof(FFTSample));
+    if (!s->tsin)
+        goto fail;
+
+    for(i=0;i<n4;i++) {
+        alpha = 2 * M_PI * (i + 1.0 / 8.0) / n;
+        s->tcos[i] = -cos(alpha);
+        s->tsin[i] = -sin(alpha);
+    }
+    if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0)
+        goto fail;
+    return 0;
+ fail:
+    av_freep(&s->tcos);
+    av_freep(&s->tsin);
+    return -1;
+}
+
+/* complex multiplication: p = a * b */
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+    float _are = (are);\
+    float _aim = (aim);\
+    float _bre = (bre);\
+    float _bim = (bim);\
+    (pre) = _are * _bre - _aim * _bim;\
+    (pim) = _are * _bim + _aim * _bre;\
+}
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ * @param tmp N/2 samples
+ */
+void ff_imdct_calc(MDCTContext *s, FFTSample *output,
+                   const FFTSample *input, FFTSample *tmp)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->fft.revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)tmp;
+
+    n = 1 << s->nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k = 0; k < n4; k++) {
+        j=revtab[k];
+        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    ff_fft_calc(&s->fft, z);
+
+    /* post rotation + reordering */
+    /* XXX: optimize */
+    for(k = 0; k < n4; k++) {
+        CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]);
+    }
+    for(k = 0; k < n8; k++) {
+        output[2*k] = -z[n8 + k].im;
+        output[n2-1-2*k] = z[n8 + k].im;
+
+        output[2*k+1] = z[n8-1-k].re;
+        output[n2-1-2*k-1] = -z[n8-1-k].re;
+
+        output[n2 + 2*k]=-z[k+n8].re;
+        output[n-1- 2*k]=-z[k+n8].re;
+
+        output[n2 + 2*k+1]=z[n8-k-1].im;
+        output[n-2 - 2 * k] = z[n8-k-1].im;
+    }
+}
+
+/**
+ * Compute MDCT of size N = 2^nbits
+ * @param input N samples
+ * @param out N/2 samples
+ * @param tmp temporary storage of N/2 samples
+ */
+void ff_mdct_calc(MDCTContext *s, FFTSample *out,
+                  const FFTSample *input, FFTSample *tmp)
+{
+    int i, j, n, n8, n4, n2, n3;
+    FFTSample re, im, re1, im1;
+    const uint16_t *revtab = s->fft.revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = (FFTComplex *)tmp;
+
+    n = 1 << s->nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = -input[2*i+3*n4] - input[n3-1-2*i];
+        im = -input[n4+2*i] + input[n4-1-2*i];
+        j = revtab[i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+
+        re = input[2*i] - input[n2-1-2*i];
+        im = -(input[n2+2*i] + input[n-1-2*i]);
+        j = revtab[n8 + i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
+    }
+
+    ff_fft_calc(&s->fft, x);
+
+    /* post rotation */
+    for(i=0;i<n4;i++) {
+        re = x[i].re;
+        im = x[i].im;
+        CMUL(re1, im1, re, im, -tsin[i], -tcos[i]);
+        out[2*i] = im1;
+        out[n2-1-2*i] = re1;
+    }
+}
+
+void ff_mdct_end(MDCTContext *s)
+{
+    av_freep(&s->tcos);
+    av_freep(&s->tsin);
+    ff_fft_end(&s->fft);
+}
diff --git a/contrib/ffmpeg/libavcodec/mdec.c b/contrib/ffmpeg/libavcodec/mdec.c
new file mode 100644
index 000000000..ee43b2777
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mdec.c
@@ -0,0 +1,270 @@
+/*
+ * PSX MDEC codec
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * based upon code from Sebastian Jedruszkiewicz <elf@frogger.rules.pl>
+ */
+
+/**
+ * @file mdec.c
+ * PSX MDEC codec.
+ * This is very similar to intra only MPEG1.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+typedef struct MDECContext{
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame picture;
+    PutBitContext pb;
+    GetBitContext gb;
+    ScanTable scantable;
+    int version;
+    int qscale;
+    int last_dc[3];
+    int mb_width;
+    int mb_height;
+    int mb_x, mb_y;
+    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
+    DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]);
+    DECLARE_ALIGNED_8(int, q_intra_matrix[64]);
+    uint8_t *bitstream_buffer;
+    unsigned int bitstream_buffer_size;
+    int block_last_index[6];
+} MDECContext;
+
+//very similar to mpeg1
+static inline int mdec_decode_block_intra(MDECContext *a, DCTELEM *block, int n)
+{
+    int level, diff, i, j, run;
+    int component;
+    RLTable *rl = &rl_mpeg1;
+    uint8_t * const scantable= a->scantable.permutated;
+    const uint16_t *quant_matrix= ff_mpeg1_default_intra_matrix;
+    const int qscale= a->qscale;
+
+    /* DC coef */
+    if(a->version==2){
+        block[0]= 2*get_sbits(&a->gb, 10) + 1024;
+    }else{
+        component = (n <= 3 ? 0 : n - 4 + 1);
+        diff = decode_dc(&a->gb, component);
+        if (diff >= 0xffff)
+            return -1;
+        a->last_dc[component]+= diff;
+        block[0] = a->last_dc[component]<<3;
+    }
+
+    i = 0;
+    {
+        OPEN_READER(re, &a->gb);
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            UPDATE_CACHE(re, &a->gb);
+            GET_RL_VLC(level, run, re, &a->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level == 127){
+                break;
+            } else if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= (level*qscale*quant_matrix[j])>>3;
+//                level= (level-1)|1;
+                level = (level ^ SHOW_SBITS(re, &a->gb, 1)) - SHOW_SBITS(re, &a->gb, 1);
+                LAST_SKIP_BITS(re, &a->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &a->gb, 6)+1; LAST_SKIP_BITS(re, &a->gb, 6);
+                UPDATE_CACHE(re, &a->gb);
+                level = SHOW_SBITS(re, &a->gb, 10); SKIP_BITS(re, &a->gb, 10);
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= -level;
+                    level= (level*qscale*quant_matrix[j])>>3;
+                    level= (level-1)|1;
+                    level= -level;
+                }else{
+                    level= (level*qscale*quant_matrix[j])>>3;
+                    level= (level-1)|1;
+                }
+            }
+            if (i > 63){
+                av_log(a->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", a->mb_x, a->mb_y);
+                return -1;
+            }
+
+            block[j] = level;
+        }
+        CLOSE_READER(re, &a->gb);
+    }
+    a->block_last_index[n] = i;
+    return 0;
+}
+
+static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
+    int i;
+    const int block_index[6]= {5,4,0,1,2,3};
+
+    a->dsp.clear_blocks(block[0]);
+
+    for(i=0; i<6; i++){
+        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0)
+            return -1;
+    }
+    return 0;
+}
+
+static inline void idct_put(MDECContext *a, int mb_x, int mb_y){
+    DCTELEM (*block)[64]= a->block;
+    int linesize= a->picture.linesize[0];
+
+    uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
+    uint8_t *dest_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
+    uint8_t *dest_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;
+
+    a->dsp.idct_put(dest_y                 , linesize, block[0]);
+    a->dsp.idct_put(dest_y              + 8, linesize, block[1]);
+    a->dsp.idct_put(dest_y + 8*linesize    , linesize, block[2]);
+    a->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]);
+
+    if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
+        a->dsp.idct_put(dest_cb, a->picture.linesize[1], block[4]);
+        a->dsp.idct_put(dest_cr, a->picture.linesize[2], block[5]);
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    MDECContext * const a = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    int i;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+    a->last_dc[0]=
+    a->last_dc[1]=
+    a->last_dc[2]= 0;
+
+    a->bitstream_buffer= av_fast_realloc(a->bitstream_buffer, &a->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    for(i=0; i<buf_size; i+=2){
+        a->bitstream_buffer[i]  = buf[i+1];
+        a->bitstream_buffer[i+1]= buf[i  ];
+    }
+    init_get_bits(&a->gb, a->bitstream_buffer, buf_size*8);
+
+    /* skip over 4 preamble bytes in stream (typically 0xXX 0xXX 0x00 0x38) */
+    skip_bits(&a->gb, 32);
+
+    a->qscale=  get_bits(&a->gb, 16);
+    a->version= get_bits(&a->gb, 16);
+
+//    printf("qscale:%d (0x%X), version:%d (0x%X)\n", a->qscale, a->qscale, a->version, a->version);
+
+    for(a->mb_x=0; a->mb_x<a->mb_width; a->mb_x++){
+        for(a->mb_y=0; a->mb_y<a->mb_height; a->mb_y++){
+            if( decode_mb(a, a->block) <0)
+                return -1;
+
+            idct_put(a, a->mb_x, a->mb_y);
+        }
+    }
+
+//    p->quality= (32 + a->inv_qscale/2)/a->inv_qscale;
+//    memset(p->qscale_table, p->quality, p->qstride*a->mb_height);
+
+    *picture= *(AVFrame*)&a->picture;
+    *data_size = sizeof(AVPicture);
+
+    emms_c();
+
+    return (get_bits_count(&a->gb)+31)/32*4;
+}
+
+static void mdec_common_init(AVCodecContext *avctx){
+    MDECContext * const a = avctx->priv_data;
+
+    dsputil_init(&a->dsp, avctx);
+
+    a->mb_width   = (avctx->coded_width  + 15) / 16;
+    a->mb_height  = (avctx->coded_height + 15) / 16;
+
+    avctx->coded_frame= (AVFrame*)&a->picture;
+    a->avctx= avctx;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    MDECContext * const a = avctx->priv_data;
+    AVFrame *p= (AVFrame*)&a->picture;
+
+    mdec_common_init(avctx);
+    init_vlcs();
+    ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);
+/*
+    for(i=0; i<64; i++){
+        int index= ff_zigzag_direct[i];
+        a->intra_matrix[i]= 64*ff_mpeg1_default_intra_matrix[index] / a->inv_qscale;
+    }
+*/
+    p->qstride= a->mb_width;
+    p->qscale_table= av_mallocz( p->qstride * a->mb_height);
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    return 0;
+}
+
+static int decode_end(AVCodecContext *avctx){
+    MDECContext * const a = avctx->priv_data;
+
+    av_freep(&a->bitstream_buffer);
+    av_freep(&a->picture.qscale_table);
+    a->bitstream_buffer_size=0;
+
+    return 0;
+}
+
+AVCodec mdec_decoder = {
+    "mdec",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MDEC,
+    sizeof(MDECContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/mjpeg.c b/contrib/ffmpeg/libavcodec/mjpeg.c
new file mode 100644
index 000000000..3d8383e7b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mjpeg.c
@@ -0,0 +1,2625 @@
+/*
+ * MJPEG encoder and decoder
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2003 Alex Beregszaszi
+ * Copyright (c) 2003-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Support for external huffman table, various fixes (AVID workaround),
+ * aspecting, new decode_frame mechanism and apple mjpeg-b support
+ *                                  by Alex Beregszaszi <alex@naxine.org>
+ */
+
+/**
+ * @file mjpeg.c
+ * MJPEG encoder and decoder.
+ */
+
+//#define DEBUG
+#include <assert.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "bytestream.h"
+
+/* use two quantizer tables (one for luminance and one for chrominance) */
+/* not yet working */
+#undef TWOMATRIXES
+
+typedef struct MJpegContext {
+    uint8_t huff_size_dc_luminance[12]; //FIXME use array [3] instead of lumi / chrom, for easier addressing
+    uint16_t huff_code_dc_luminance[12];
+    uint8_t huff_size_dc_chrominance[12];
+    uint16_t huff_code_dc_chrominance[12];
+
+    uint8_t huff_size_ac_luminance[256];
+    uint16_t huff_code_ac_luminance[256];
+    uint8_t huff_size_ac_chrominance[256];
+    uint16_t huff_code_ac_chrominance[256];
+} MJpegContext;
+
+/* JPEG marker codes */
+typedef enum {
+    /* start of frame */
+    SOF0  = 0xc0,       /* baseline */
+    SOF1  = 0xc1,       /* extended sequential, huffman */
+    SOF2  = 0xc2,       /* progressive, huffman */
+    SOF3  = 0xc3,       /* lossless, huffman */
+
+    SOF5  = 0xc5,       /* differential sequential, huffman */
+    SOF6  = 0xc6,       /* differential progressive, huffman */
+    SOF7  = 0xc7,       /* differential lossless, huffman */
+    JPG   = 0xc8,       /* reserved for JPEG extension */
+    SOF9  = 0xc9,       /* extended sequential, arithmetic */
+    SOF10 = 0xca,       /* progressive, arithmetic */
+    SOF11 = 0xcb,       /* lossless, arithmetic */
+
+    SOF13 = 0xcd,       /* differential sequential, arithmetic */
+    SOF14 = 0xce,       /* differential progressive, arithmetic */
+    SOF15 = 0xcf,       /* differential lossless, arithmetic */
+
+    DHT   = 0xc4,       /* define huffman tables */
+
+    DAC   = 0xcc,       /* define arithmetic-coding conditioning */
+
+    /* restart with modulo 8 count "m" */
+    RST0  = 0xd0,
+    RST1  = 0xd1,
+    RST2  = 0xd2,
+    RST3  = 0xd3,
+    RST4  = 0xd4,
+    RST5  = 0xd5,
+    RST6  = 0xd6,
+    RST7  = 0xd7,
+
+    SOI   = 0xd8,       /* start of image */
+    EOI   = 0xd9,       /* end of image */
+    SOS   = 0xda,       /* start of scan */
+    DQT   = 0xdb,       /* define quantization tables */
+    DNL   = 0xdc,       /* define number of lines */
+    DRI   = 0xdd,       /* define restart interval */
+    DHP   = 0xde,       /* define hierarchical progression */
+    EXP   = 0xdf,       /* expand reference components */
+
+    APP0  = 0xe0,
+    APP1  = 0xe1,
+    APP2  = 0xe2,
+    APP3  = 0xe3,
+    APP4  = 0xe4,
+    APP5  = 0xe5,
+    APP6  = 0xe6,
+    APP7  = 0xe7,
+    APP8  = 0xe8,
+    APP9  = 0xe9,
+    APP10 = 0xea,
+    APP11 = 0xeb,
+    APP12 = 0xec,
+    APP13 = 0xed,
+    APP14 = 0xee,
+    APP15 = 0xef,
+
+    JPG0  = 0xf0,
+    JPG1  = 0xf1,
+    JPG2  = 0xf2,
+    JPG3  = 0xf3,
+    JPG4  = 0xf4,
+    JPG5  = 0xf5,
+    JPG6  = 0xf6,
+    SOF48 = 0xf7,       ///< JPEG-LS
+    LSE   = 0xf8,       ///< JPEG-LS extension parameters
+    JPG9  = 0xf9,
+    JPG10 = 0xfa,
+    JPG11 = 0xfb,
+    JPG12 = 0xfc,
+    JPG13 = 0xfd,
+
+    COM   = 0xfe,       /* comment */
+
+    TEM   = 0x01,       /* temporary private use for arithmetic coding */
+
+    /* 0x02 -> 0xbf reserved */
+} JPEG_MARKER;
+
+#if 0
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned char std_luminance_quant_tbl[64] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned char std_chrominance_quant_tbl[64] = {
+    17,  18,  24,  47,  99,  99,  99,  99,
+    18,  21,  26,  66,  99,  99,  99,  99,
+    24,  26,  56,  99,  99,  99,  99,  99,
+    47,  66,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99
+};
+#endif
+
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+static const uint8_t bits_dc_luminance[17] =
+{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+static const uint8_t val_dc_luminance[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+static const uint8_t bits_dc_chrominance[17] =
+{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+static const uint8_t val_dc_chrominance[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+static const uint8_t bits_ac_luminance[17] =
+{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+static const uint8_t val_ac_luminance[] =
+{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa
+};
+
+static const uint8_t bits_ac_chrominance[17] =
+{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+
+static const uint8_t val_ac_chrominance[] =
+{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa
+};
+
+/* isn't this function nicer than the one in the libjpeg ? */
+static void build_huffman_codes(uint8_t *huff_size, uint16_t *huff_code,
+                                const uint8_t *bits_table, const uint8_t *val_table)
+{
+    int i, j, k,nb, code, sym;
+
+    code = 0;
+    k = 0;
+    for(i=1;i<=16;i++) {
+        nb = bits_table[i];
+        for(j=0;j<nb;j++) {
+            sym = val_table[k++];
+            huff_size[sym] = i;
+            huff_code[sym] = code;
+            code++;
+        }
+        code <<= 1;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+int mjpeg_init(MpegEncContext *s)
+{
+    MJpegContext *m;
+
+    m = av_malloc(sizeof(MJpegContext));
+    if (!m)
+        return -1;
+
+    s->min_qcoeff=-1023;
+    s->max_qcoeff= 1023;
+
+    /* build all the huffman tables */
+    build_huffman_codes(m->huff_size_dc_luminance,
+                        m->huff_code_dc_luminance,
+                        bits_dc_luminance,
+                        val_dc_luminance);
+    build_huffman_codes(m->huff_size_dc_chrominance,
+                        m->huff_code_dc_chrominance,
+                        bits_dc_chrominance,
+                        val_dc_chrominance);
+    build_huffman_codes(m->huff_size_ac_luminance,
+                        m->huff_code_ac_luminance,
+                        bits_ac_luminance,
+                        val_ac_luminance);
+    build_huffman_codes(m->huff_size_ac_chrominance,
+                        m->huff_code_ac_chrominance,
+                        bits_ac_chrominance,
+                        val_ac_chrominance);
+
+    s->mjpeg_ctx = m;
+    return 0;
+}
+
+void mjpeg_close(MpegEncContext *s)
+{
+    av_free(s->mjpeg_ctx);
+}
+#endif //CONFIG_ENCODERS
+
+#define PREDICT(ret, topleft, top, left, predictor)\
+    switch(predictor){\
+        case 1: ret= left; break;\
+        case 2: ret= top; break;\
+        case 3: ret= topleft; break;\
+        case 4: ret= left   +   top - topleft; break;\
+        case 5: ret= left   + ((top - topleft)>>1); break;\
+        case 6: ret= top + ((left   - topleft)>>1); break;\
+        default:\
+        case 7: ret= (left + top)>>1; break;\
+    }
+
+#ifdef CONFIG_ENCODERS
+static inline void put_marker(PutBitContext *p, int code)
+{
+    put_bits(p, 8, 0xff);
+    put_bits(p, 8, code);
+}
+
+/* table_class: 0 = DC coef, 1 = AC coefs */
+static int put_huffman_table(MpegEncContext *s, int table_class, int table_id,
+                             const uint8_t *bits_table, const uint8_t *value_table)
+{
+    PutBitContext *p = &s->pb;
+    int n, i;
+
+    put_bits(p, 4, table_class);
+    put_bits(p, 4, table_id);
+
+    n = 0;
+    for(i=1;i<=16;i++) {
+        n += bits_table[i];
+        put_bits(p, 8, bits_table[i]);
+    }
+
+    for(i=0;i<n;i++)
+        put_bits(p, 8, value_table[i]);
+
+    return n + 17;
+}
+
+static void jpeg_table_header(MpegEncContext *s)
+{
+    PutBitContext *p = &s->pb;
+    int i, j, size;
+    uint8_t *ptr;
+
+    /* quant matrixes */
+    put_marker(p, DQT);
+#ifdef TWOMATRIXES
+    put_bits(p, 16, 2 + 2 * (1 + 64));
+#else
+    put_bits(p, 16, 2 + 1 * (1 + 64));
+#endif
+    put_bits(p, 4, 0); /* 8 bit precision */
+    put_bits(p, 4, 0); /* table 0 */
+    for(i=0;i<64;i++) {
+        j = s->intra_scantable.permutated[i];
+        put_bits(p, 8, s->intra_matrix[j]);
+    }
+#ifdef TWOMATRIXES
+    put_bits(p, 4, 0); /* 8 bit precision */
+    put_bits(p, 4, 1); /* table 1 */
+    for(i=0;i<64;i++) {
+        j = s->intra_scantable.permutated[i];
+        put_bits(p, 8, s->chroma_intra_matrix[j]);
+    }
+#endif
+
+    /* huffman table */
+    put_marker(p, DHT);
+    flush_put_bits(p);
+    ptr = pbBufPtr(p);
+    put_bits(p, 16, 0); /* patched later */
+    size = 2;
+    size += put_huffman_table(s, 0, 0, bits_dc_luminance, val_dc_luminance);
+    size += put_huffman_table(s, 0, 1, bits_dc_chrominance, val_dc_chrominance);
+
+    size += put_huffman_table(s, 1, 0, bits_ac_luminance, val_ac_luminance);
+    size += put_huffman_table(s, 1, 1, bits_ac_chrominance, val_ac_chrominance);
+    ptr[0] = size >> 8;
+    ptr[1] = size;
+}
+
+static void jpeg_put_comments(MpegEncContext *s)
+{
+    PutBitContext *p = &s->pb;
+    int size;
+    uint8_t *ptr;
+
+    if (s->aspect_ratio_info /* && !lossless */)
+    {
+    /* JFIF header */
+    put_marker(p, APP0);
+    put_bits(p, 16, 16);
+    ff_put_string(p, "JFIF", 1); /* this puts the trailing zero-byte too */
+    put_bits(p, 16, 0x0201); /* v 1.02 */
+    put_bits(p, 8, 0); /* units type: 0 - aspect ratio */
+    put_bits(p, 16, s->avctx->sample_aspect_ratio.num);
+    put_bits(p, 16, s->avctx->sample_aspect_ratio.den);
+    put_bits(p, 8, 0); /* thumbnail width */
+    put_bits(p, 8, 0); /* thumbnail height */
+    }
+
+    /* comment */
+    if(!(s->flags & CODEC_FLAG_BITEXACT)){
+        put_marker(p, COM);
+        flush_put_bits(p);
+        ptr = pbBufPtr(p);
+        put_bits(p, 16, 0); /* patched later */
+        ff_put_string(p, LIBAVCODEC_IDENT, 1);
+        size = strlen(LIBAVCODEC_IDENT)+3;
+        ptr[0] = size >> 8;
+        ptr[1] = size;
+    }
+
+    if(  s->avctx->pix_fmt == PIX_FMT_YUV420P
+       ||s->avctx->pix_fmt == PIX_FMT_YUV422P
+       ||s->avctx->pix_fmt == PIX_FMT_YUV444P){
+        put_marker(p, COM);
+        flush_put_bits(p);
+        ptr = pbBufPtr(p);
+        put_bits(p, 16, 0); /* patched later */
+        ff_put_string(p, "CS=ITU601", 1);
+        size = strlen("CS=ITU601")+3;
+        ptr[0] = size >> 8;
+        ptr[1] = size;
+    }
+}
+
+void mjpeg_picture_header(MpegEncContext *s)
+{
+    const int lossless= s->avctx->codec_id != CODEC_ID_MJPEG;
+    const int ls      = s->avctx->codec_id == CODEC_ID_JPEGLS;
+
+    assert(!(ls && s->mjpeg_write_tables));
+
+    put_marker(&s->pb, SOI);
+
+    if (!s->mjpeg_data_only_frames)
+    {
+    jpeg_put_comments(s);
+
+    if (s->mjpeg_write_tables) jpeg_table_header(s);
+
+    switch(s->avctx->codec_id){
+    case CODEC_ID_MJPEG:  put_marker(&s->pb, SOF0 ); break;
+    case CODEC_ID_LJPEG:  put_marker(&s->pb, SOF3 ); break;
+    case CODEC_ID_JPEGLS: put_marker(&s->pb, SOF48); break;
+    default: assert(0);
+    }
+
+    put_bits(&s->pb, 16, 17);
+    if(lossless && s->avctx->pix_fmt == PIX_FMT_RGBA32)
+        put_bits(&s->pb, 8, 9); /* 9 bits/component RCT */
+    else
+        put_bits(&s->pb, 8, 8); /* 8 bits/component */
+    put_bits(&s->pb, 16, s->height);
+    put_bits(&s->pb, 16, s->width);
+    put_bits(&s->pb, 8, 3); /* 3 components */
+
+    /* Y component */
+    put_bits(&s->pb, 8, 1); /* component number */
+    put_bits(&s->pb, 4, s->mjpeg_hsample[0]); /* H factor */
+    put_bits(&s->pb, 4, s->mjpeg_vsample[0]); /* V factor */
+    put_bits(&s->pb, 8, 0); /* select matrix */
+
+    /* Cb component */
+    put_bits(&s->pb, 8, 2); /* component number */
+    put_bits(&s->pb, 4, s->mjpeg_hsample[1]); /* H factor */
+    put_bits(&s->pb, 4, s->mjpeg_vsample[1]); /* V factor */
+#ifdef TWOMATRIXES
+    put_bits(&s->pb, 8, lossless ? 0 : 1); /* select matrix */
+#else
+    put_bits(&s->pb, 8, 0); /* select matrix */
+#endif
+
+    /* Cr component */
+    put_bits(&s->pb, 8, 3); /* component number */
+    put_bits(&s->pb, 4, s->mjpeg_hsample[2]); /* H factor */
+    put_bits(&s->pb, 4, s->mjpeg_vsample[2]); /* V factor */
+#ifdef TWOMATRIXES
+    put_bits(&s->pb, 8, lossless ? 0 : 1); /* select matrix */
+#else
+    put_bits(&s->pb, 8, 0); /* select matrix */
+#endif
+    }
+
+    /* scan header */
+    put_marker(&s->pb, SOS);
+    put_bits(&s->pb, 16, 12); /* length */
+    put_bits(&s->pb, 8, 3); /* 3 components */
+
+    /* Y component */
+    put_bits(&s->pb, 8, 1); /* index */
+    put_bits(&s->pb, 4, 0); /* DC huffman table index */
+    put_bits(&s->pb, 4, 0); /* AC huffman table index */
+
+    /* Cb component */
+    put_bits(&s->pb, 8, 2); /* index */
+    put_bits(&s->pb, 4, 1); /* DC huffman table index */
+    put_bits(&s->pb, 4, lossless ? 0 : 1); /* AC huffman table index */
+
+    /* Cr component */
+    put_bits(&s->pb, 8, 3); /* index */
+    put_bits(&s->pb, 4, 1); /* DC huffman table index */
+    put_bits(&s->pb, 4, lossless ? 0 : 1); /* AC huffman table index */
+
+    put_bits(&s->pb, 8, (lossless && !ls) ? s->avctx->prediction_method+1 : 0); /* Ss (not used) */
+
+    switch(s->avctx->codec_id){
+    case CODEC_ID_MJPEG:  put_bits(&s->pb, 8, 63); break; /* Se (not used) */
+    case CODEC_ID_LJPEG:  put_bits(&s->pb, 8,  0); break; /* not used */
+    case CODEC_ID_JPEGLS: put_bits(&s->pb, 8,  1); break; /* ILV = line interleaved */
+    default: assert(0);
+    }
+
+    put_bits(&s->pb, 8, 0); /* Ah/Al (not used) */
+
+    //FIXME DC/AC entropy table selectors stuff in jpegls
+}
+
+static void escape_FF(MpegEncContext *s, int start)
+{
+    int size= put_bits_count(&s->pb) - start*8;
+    int i, ff_count;
+    uint8_t *buf= s->pb.buf + start;
+    int align= (-(size_t)(buf))&3;
+
+    assert((size&7) == 0);
+    size >>= 3;
+
+    ff_count=0;
+    for(i=0; i<size && i<align; i++){
+        if(buf[i]==0xFF) ff_count++;
+    }
+    for(; i<size-15; i+=16){
+        int acc, v;
+
+        v= *(uint32_t*)(&buf[i]);
+        acc= (((v & (v>>4))&0x0F0F0F0F)+0x01010101)&0x10101010;
+        v= *(uint32_t*)(&buf[i+4]);
+        acc+=(((v & (v>>4))&0x0F0F0F0F)+0x01010101)&0x10101010;
+        v= *(uint32_t*)(&buf[i+8]);
+        acc+=(((v & (v>>4))&0x0F0F0F0F)+0x01010101)&0x10101010;
+        v= *(uint32_t*)(&buf[i+12]);
+        acc+=(((v & (v>>4))&0x0F0F0F0F)+0x01010101)&0x10101010;
+
+        acc>>=4;
+        acc+= (acc>>16);
+        acc+= (acc>>8);
+        ff_count+= acc&0xFF;
+    }
+    for(; i<size; i++){
+        if(buf[i]==0xFF) ff_count++;
+    }
+
+    if(ff_count==0) return;
+
+    /* skip put bits */
+    for(i=0; i<ff_count-3; i+=4)
+        put_bits(&s->pb, 32, 0);
+    put_bits(&s->pb, (ff_count-i)*8, 0);
+    flush_put_bits(&s->pb);
+
+    for(i=size-1; ff_count; i--){
+        int v= buf[i];
+
+        if(v==0xFF){
+//printf("%d %d\n", i, ff_count);
+            buf[i+ff_count]= 0;
+            ff_count--;
+        }
+
+        buf[i+ff_count]= v;
+    }
+}
+
+void ff_mjpeg_stuffing(PutBitContext * pbc)
+{
+    int length;
+    length= (-put_bits_count(pbc))&7;
+    if(length) put_bits(pbc, length, (1<<length)-1);
+}
+
+void mjpeg_picture_trailer(MpegEncContext *s)
+{
+    ff_mjpeg_stuffing(&s->pb);
+    flush_put_bits(&s->pb);
+
+    assert((s->header_bits&7)==0);
+
+    escape_FF(s, s->header_bits>>3);
+
+    put_marker(&s->pb, EOI);
+}
+
+static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
+                                   uint8_t *huff_size, uint16_t *huff_code)
+{
+    int mant, nbits;
+
+    if (val == 0) {
+        put_bits(&s->pb, huff_size[0], huff_code[0]);
+    } else {
+        mant = val;
+        if (val < 0) {
+            val = -val;
+            mant--;
+        }
+
+        nbits= av_log2_16bit(val) + 1;
+
+        put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
+
+        put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
+    }
+}
+
+static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
+{
+    int mant, nbits, code, i, j;
+    int component, dc, run, last_index, val;
+    MJpegContext *m = s->mjpeg_ctx;
+    uint8_t *huff_size_ac;
+    uint16_t *huff_code_ac;
+
+    /* DC coef */
+    component = (n <= 3 ? 0 : (n&1) + 1);
+    dc = block[0]; /* overflow is impossible */
+    val = dc - s->last_dc[component];
+    if (n < 4) {
+        mjpeg_encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
+        huff_size_ac = m->huff_size_ac_luminance;
+        huff_code_ac = m->huff_code_ac_luminance;
+    } else {
+        mjpeg_encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+        huff_size_ac = m->huff_size_ac_chrominance;
+        huff_code_ac = m->huff_code_ac_chrominance;
+    }
+    s->last_dc[component] = dc;
+
+    /* AC coefs */
+
+    run = 0;
+    last_index = s->block_last_index[n];
+    for(i=1;i<=last_index;i++) {
+        j = s->intra_scantable.permutated[i];
+        val = block[j];
+        if (val == 0) {
+            run++;
+        } else {
+            while (run >= 16) {
+                put_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
+                run -= 16;
+            }
+            mant = val;
+            if (val < 0) {
+                val = -val;
+                mant--;
+            }
+
+            nbits= av_log2(val) + 1;
+            code = (run << 4) | nbits;
+
+            put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
+
+            put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
+            run = 0;
+        }
+    }
+
+    /* output EOB only if not already 64 values */
+    if (last_index < 63 || run != 0)
+        put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
+}
+
+void mjpeg_encode_mb(MpegEncContext *s,
+                     DCTELEM block[6][64])
+{
+    int i;
+    for(i=0;i<5;i++) {
+        encode_block(s, block[i], i);
+    }
+    if (s->chroma_format == CHROMA_420) {
+        encode_block(s, block[5], 5);
+    } else {
+        encode_block(s, block[6], 6);
+        encode_block(s, block[5], 5);
+        encode_block(s, block[7], 7);
+    }
+}
+
+static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    MpegEncContext * const s = avctx->priv_data;
+    MJpegContext * const m = s->mjpeg_ctx;
+    AVFrame *pict = data;
+    const int width= s->width;
+    const int height= s->height;
+    AVFrame * const p= (AVFrame*)&s->current_picture;
+    const int predictor= avctx->prediction_method+1;
+
+    init_put_bits(&s->pb, buf, buf_size);
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    mjpeg_picture_header(s);
+
+    s->header_bits= put_bits_count(&s->pb);
+
+    if(avctx->pix_fmt == PIX_FMT_RGBA32){
+        int x, y, i;
+        const int linesize= p->linesize[0];
+        uint16_t (*buffer)[4]= (void *) s->rd_scratchpad;
+        int left[3], top[3], topleft[3];
+
+        for(i=0; i<3; i++){
+            buffer[0][i]= 1 << (9 - 1);
+        }
+
+        for(y = 0; y < height; y++) {
+            const int modified_predictor= y ? predictor : 1;
+            uint8_t *ptr = p->data[0] + (linesize * y);
+
+            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < width*3*4){
+                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                return -1;
+            }
+
+            for(i=0; i<3; i++){
+                top[i]= left[i]= topleft[i]= buffer[0][i];
+            }
+            for(x = 0; x < width; x++) {
+                buffer[x][1] = ptr[4*x+0] - ptr[4*x+1] + 0x100;
+                buffer[x][2] = ptr[4*x+2] - ptr[4*x+1] + 0x100;
+                buffer[x][0] = (ptr[4*x+0] + 2*ptr[4*x+1] + ptr[4*x+2])>>2;
+
+                for(i=0;i<3;i++) {
+                    int pred, diff;
+
+                    PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);
+
+                    topleft[i]= top[i];
+                    top[i]= buffer[x+1][i];
+
+                    left[i]= buffer[x][i];
+
+                    diff= ((left[i] - pred + 0x100)&0x1FF) - 0x100;
+
+                    if(i==0)
+                        mjpeg_encode_dc(s, diff, m->huff_size_dc_luminance, m->huff_code_dc_luminance); //FIXME ugly
+                    else
+                        mjpeg_encode_dc(s, diff, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+                }
+            }
+        }
+    }else{
+        int mb_x, mb_y, i;
+        const int mb_width  = (width  + s->mjpeg_hsample[0] - 1) / s->mjpeg_hsample[0];
+        const int mb_height = (height + s->mjpeg_vsample[0] - 1) / s->mjpeg_vsample[0];
+
+        for(mb_y = 0; mb_y < mb_height; mb_y++) {
+            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < mb_width * 4 * 3 * s->mjpeg_hsample[0] * s->mjpeg_vsample[0]){
+                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                return -1;
+            }
+            for(mb_x = 0; mb_x < mb_width; mb_x++) {
+                if(mb_x==0 || mb_y==0){
+                    for(i=0;i<3;i++) {
+                        uint8_t *ptr;
+                        int x, y, h, v, linesize;
+                        h = s->mjpeg_hsample[i];
+                        v = s->mjpeg_vsample[i];
+                        linesize= p->linesize[i];
+
+                        for(y=0; y<v; y++){
+                            for(x=0; x<h; x++){
+                                int pred;
+
+                                ptr = p->data[i] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
+                                if(y==0 && mb_y==0){
+                                    if(x==0 && mb_x==0){
+                                        pred= 128;
+                                    }else{
+                                        pred= ptr[-1];
+                                    }
+                                }else{
+                                    if(x==0 && mb_x==0){
+                                        pred= ptr[-linesize];
+                                    }else{
+                                        PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+                                    }
+                                }
+
+                                if(i==0)
+                                    mjpeg_encode_dc(s, (int8_t)(*ptr - pred), m->huff_size_dc_luminance, m->huff_code_dc_luminance); //FIXME ugly
+                                else
+                                    mjpeg_encode_dc(s, (int8_t)(*ptr - pred), m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+                            }
+                        }
+                    }
+                }else{
+                    for(i=0;i<3;i++) {
+                        uint8_t *ptr;
+                        int x, y, h, v, linesize;
+                        h = s->mjpeg_hsample[i];
+                        v = s->mjpeg_vsample[i];
+                        linesize= p->linesize[i];
+
+                        for(y=0; y<v; y++){
+                            for(x=0; x<h; x++){
+                                int pred;
+
+                                ptr = p->data[i] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
+//printf("%d %d %d %d %8X\n", mb_x, mb_y, x, y, ptr);
+                                PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+
+                                if(i==0)
+                                    mjpeg_encode_dc(s, (int8_t)(*ptr - pred), m->huff_size_dc_luminance, m->huff_code_dc_luminance); //FIXME ugly
+                                else
+                                    mjpeg_encode_dc(s, (int8_t)(*ptr - pred), m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    emms_c();
+
+    mjpeg_picture_trailer(s);
+    s->picture_number++;
+
+    flush_put_bits(&s->pb);
+    return pbBufPtr(&s->pb) - s->pb.buf;
+//    return (put_bits_count(&f->pb)+7)/8;
+}
+
+#endif //CONFIG_ENCODERS
+
+/******************************************/
+/* decoding */
+
+#define MAX_COMPONENTS 4
+
+typedef struct MJpegDecodeContext {
+    AVCodecContext *avctx;
+    GetBitContext gb;
+    int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
+
+    int start_code; /* current start code */
+    int buffer_size;
+    uint8_t *buffer;
+
+    int16_t quant_matrixes[4][64];
+    VLC vlcs[2][4];
+    int qscale[4];      ///< quantizer scale calculated from quant_matrixes
+
+    int org_height;  /* size given at codec init */
+    int first_picture;    /* true if decoding first picture */
+    int interlaced;     /* true if interlaced */
+    int bottom_field;   /* true if bottom field */
+    int lossless;
+    int ls;
+    int progressive;
+    int rgb;
+    int rct;            /* standard rct */
+    int pegasus_rct;    /* pegasus reversible colorspace transform */
+    int bits;           /* bits per component */
+
+    int maxval;
+    int near;         ///< near lossless bound (si 0 for lossless)
+    int t1,t2,t3;
+    int reset;        ///< context halfing intervall ?rename
+
+    int width, height;
+    int mb_width, mb_height;
+    int nb_components;
+    int component_id[MAX_COMPONENTS];
+    int h_count[MAX_COMPONENTS]; /* horizontal and vertical count for each component */
+    int v_count[MAX_COMPONENTS];
+    int comp_index[MAX_COMPONENTS];
+    int dc_index[MAX_COMPONENTS];
+    int ac_index[MAX_COMPONENTS];
+    int nb_blocks[MAX_COMPONENTS];
+    int h_scount[MAX_COMPONENTS];
+    int v_scount[MAX_COMPONENTS];
+    int h_max, v_max; /* maximum h and v counts */
+    int quant_index[4];   /* quant table index for each component */
+    int last_dc[MAX_COMPONENTS]; /* last DEQUANTIZED dc (XXX: am I right to do that ?) */
+    AVFrame picture; /* picture structure */
+    int linesize[MAX_COMPONENTS];                   ///< linesize << interlaced
+    int8_t *qscale_table;
+    DECLARE_ALIGNED_8(DCTELEM, block[64]);
+    ScanTable scantable;
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+    int restart_interval;
+    int restart_count;
+
+    int buggy_avid;
+    int cs_itu601;
+    int interlace_polarity;
+
+    int mjpb_skiptosod;
+
+    int cur_scan; /* current scan, used by JPEG-LS */
+} MJpegDecodeContext;
+
+#include "jpeg_ls.c" //FIXME make jpeg-ls more independant
+
+static int mjpeg_decode_dht(MJpegDecodeContext *s);
+
+static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table,
+                      int nb_codes, int use_static, int is_ac)
+{
+    uint8_t huff_size[256+16];
+    uint16_t huff_code[256+16];
+
+    assert(nb_codes <= 256);
+
+    memset(huff_size, 0, sizeof(huff_size));
+    build_huffman_codes(huff_size, huff_code, bits_table, val_table);
+
+    if(is_ac){
+        memmove(huff_size+16, huff_size, sizeof(uint8_t)*nb_codes);
+        memmove(huff_code+16, huff_code, sizeof(uint16_t)*nb_codes);
+        memset(huff_size, 0, sizeof(uint8_t)*16);
+        memset(huff_code, 0, sizeof(uint16_t)*16);
+        nb_codes += 16;
+    }
+
+    return init_vlc(vlc, 9, nb_codes, huff_size, 1, 1, huff_code, 2, 2, use_static);
+}
+
+static int mjpeg_decode_init(AVCodecContext *avctx)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+    MpegEncContext s2;
+    memset(s, 0, sizeof(MJpegDecodeContext));
+
+    s->avctx = avctx;
+
+    /* ugly way to get the idct & scantable FIXME */
+    memset(&s2, 0, sizeof(MpegEncContext));
+    s2.avctx= avctx;
+//    s2->out_format = FMT_MJPEG;
+    dsputil_init(&s2.dsp, avctx);
+    DCT_common_init(&s2);
+
+    s->scantable= s2.intra_scantable;
+    s->idct_put= s2.dsp.idct_put;
+    s->idct_add= s2.dsp.idct_add;
+
+    s->mpeg_enc_ctx_allocated = 0;
+    s->buffer_size = 0;
+    s->buffer = NULL;
+    s->start_code = -1;
+    s->first_picture = 1;
+    s->org_height = avctx->coded_height;
+
+    build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12, 0, 0);
+    build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12, 0, 0);
+    build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251, 0, 1);
+    build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251, 0, 1);
+
+    if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
+    {
+        av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
+        init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
+        mjpeg_decode_dht(s);
+        /* should check for error - but dunno */
+    }
+
+    return 0;
+}
+
+
+/**
+ * finds the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or -1
+ */
+static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
+    int vop_found, i;
+    uint16_t state;
+
+    vop_found= pc->frame_start_found;
+    state= pc->state;
+
+    i=0;
+    if(!vop_found){
+        for(i=0; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if(state == 0xFFD8){
+                i++;
+                vop_found=1;
+                break;
+            }
+        }
+    }
+
+    if(vop_found){
+        /* EOF considered as end of frame */
+        if (buf_size == 0)
+            return 0;
+        for(; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if(state == 0xFFD8){
+                pc->frame_start_found=0;
+                pc->state=0;
+                return i-1;
+            }
+        }
+    }
+    pc->frame_start_found= vop_found;
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+static int jpeg_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    next= find_frame_end(pc, buf, buf_size);
+
+    if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+/* quantize tables */
+static int mjpeg_decode_dqt(MJpegDecodeContext *s)
+{
+    int len, index, i, j;
+
+    len = get_bits(&s->gb, 16) - 2;
+
+    while (len >= 65) {
+        /* only 8 bit precision handled */
+        if (get_bits(&s->gb, 4) != 0)
+        {
+            dprintf("dqt: 16bit precision\n");
+            return -1;
+        }
+        index = get_bits(&s->gb, 4);
+        if (index >= 4)
+            return -1;
+        dprintf("index=%d\n", index);
+        /* read quant table */
+        for(i=0;i<64;i++) {
+            j = s->scantable.permutated[i];
+            s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
+        }
+
+        //XXX FIXME finetune, and perhaps add dc too
+        s->qscale[index]= FFMAX(
+            s->quant_matrixes[index][s->scantable.permutated[1]],
+            s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
+        dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
+        len -= 65;
+    }
+
+    return 0;
+}
+
+/* decode huffman tables and build VLC decoders */
+static int mjpeg_decode_dht(MJpegDecodeContext *s)
+{
+    int len, index, i, class, n, v, code_max;
+    uint8_t bits_table[17];
+    uint8_t val_table[256];
+
+    len = get_bits(&s->gb, 16) - 2;
+
+    while (len > 0) {
+        if (len < 17)
+            return -1;
+        class = get_bits(&s->gb, 4);
+        if (class >= 2)
+            return -1;
+        index = get_bits(&s->gb, 4);
+        if (index >= 4)
+            return -1;
+        n = 0;
+        for(i=1;i<=16;i++) {
+            bits_table[i] = get_bits(&s->gb, 8);
+            n += bits_table[i];
+        }
+        len -= 17;
+        if (len < n || n > 256)
+            return -1;
+
+        code_max = 0;
+        for(i=0;i<n;i++) {
+            v = get_bits(&s->gb, 8);
+            if (v > code_max)
+                code_max = v;
+            val_table[i] = v;
+        }
+        len -= n;
+
+        /* build VLC and flush previous vlc if present */
+        free_vlc(&s->vlcs[class][index]);
+        dprintf("class=%d index=%d nb_codes=%d\n",
+               class, index, code_max + 1);
+        if(build_vlc(&s->vlcs[class][index], bits_table, val_table, code_max + 1, 0, class > 0) < 0){
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int mjpeg_decode_sof(MJpegDecodeContext *s)
+{
+    int len, nb_components, i, width, height, pix_fmt_id;
+
+    /* XXX: verify len field validity */
+    len = get_bits(&s->gb, 16);
+    s->bits= get_bits(&s->gb, 8);
+
+    if(s->pegasus_rct) s->bits=9;
+    if(s->bits==9 && !s->pegasus_rct) s->rct=1;    //FIXME ugly
+
+    if (s->bits != 8 && !s->lossless){
+        av_log(s->avctx, AV_LOG_ERROR, "only 8 bits/component accepted\n");
+        return -1;
+    }
+
+    height = get_bits(&s->gb, 16);
+    width = get_bits(&s->gb, 16);
+
+    dprintf("sof0: picture: %dx%d\n", width, height);
+    if(avcodec_check_dimensions(s->avctx, width, height))
+        return -1;
+
+    nb_components = get_bits(&s->gb, 8);
+    if (nb_components <= 0 ||
+        nb_components > MAX_COMPONENTS)
+        return -1;
+    if (s->ls && !(s->bits <= 8 || nb_components == 1)){
+        av_log(s->avctx, AV_LOG_ERROR, "only <= 8 bits/component or 16-bit gray accepted for JPEG-LS\n");
+        return -1;
+    }
+    s->nb_components = nb_components;
+    s->h_max = 1;
+    s->v_max = 1;
+    for(i=0;i<nb_components;i++) {
+        /* component id */
+        s->component_id[i] = get_bits(&s->gb, 8) - 1;
+        s->h_count[i] = get_bits(&s->gb, 4);
+        s->v_count[i] = get_bits(&s->gb, 4);
+        /* compute hmax and vmax (only used in interleaved case) */
+        if (s->h_count[i] > s->h_max)
+            s->h_max = s->h_count[i];
+        if (s->v_count[i] > s->v_max)
+            s->v_max = s->v_count[i];
+        s->quant_index[i] = get_bits(&s->gb, 8);
+        if (s->quant_index[i] >= 4)
+            return -1;
+        dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
+            s->v_count[i], s->component_id[i], s->quant_index[i]);
+    }
+
+    if(s->ls && (s->h_max > 1 || s->v_max > 1)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Subsampling in JPEG-LS is not supported.\n");
+        return -1;
+    }
+
+    if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;
+
+    /* if different size, realloc/alloc picture */
+    /* XXX: also check h_count and v_count */
+    if (width != s->width || height != s->height) {
+        av_freep(&s->qscale_table);
+
+        s->width = width;
+        s->height = height;
+
+        /* test interlaced mode */
+        if (s->first_picture &&
+            s->org_height != 0 &&
+            s->height < ((s->org_height * 3) / 4)) {
+            s->interlaced = 1;
+//            s->bottom_field = (s->interlace_polarity) ? 1 : 0;
+            s->bottom_field = 0;
+            height *= 2;
+        }
+
+        avcodec_set_dimensions(s->avctx, width, height);
+
+        s->qscale_table= av_mallocz((s->width+15)/16);
+
+        s->first_picture = 0;
+    }
+
+    if(s->interlaced && s->bottom_field)
+        return 0;
+
+    /* XXX: not complete test ! */
+    pix_fmt_id = (s->h_count[0] << 20) | (s->v_count[0] << 16) |
+                 (s->h_count[1] << 12) | (s->v_count[1] <<  8) |
+                 (s->h_count[2] <<  4) |  s->v_count[2];
+    dprintf("pix fmt id %x\n", pix_fmt_id);
+    switch(pix_fmt_id){
+    case 0x222222:
+    case 0x111111:
+        if(s->rgb){
+            s->avctx->pix_fmt = PIX_FMT_RGBA32;
+        }else if(s->nb_components==3)
+            s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV444P : PIX_FMT_YUVJ444P;
+        else
+            s->avctx->pix_fmt = PIX_FMT_GRAY8;
+        break;
+    case 0x211111:
+    case 0x221212:
+        s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV422P : PIX_FMT_YUVJ422P;
+        break;
+    default:
+    case 0x221111:
+        s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420P;
+        break;
+    }
+    if(s->ls){
+        if(s->nb_components > 1)
+            s->avctx->pix_fmt = PIX_FMT_RGB24;
+        else if(s->bits <= 8)
+            s->avctx->pix_fmt = PIX_FMT_GRAY8;
+        else
+            s->avctx->pix_fmt = PIX_FMT_GRAY16;
+    }
+
+    if(s->picture.data[0])
+        s->avctx->release_buffer(s->avctx, &s->picture);
+
+    s->picture.reference= 0;
+    if(s->avctx->get_buffer(s->avctx, &s->picture) < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    s->picture.pict_type= I_TYPE;
+    s->picture.key_frame= 1;
+
+    for(i=0; i<3; i++){
+        s->linesize[i]= s->picture.linesize[i] << s->interlaced;
+    }
+
+//    printf("%d %d %d %d %d %d\n", s->width, s->height, s->linesize[0], s->linesize[1], s->interlaced, s->avctx->height);
+
+    if (len != (8+(3*nb_components)))
+    {
+        dprintf("decode_sof0: error, len(%d) mismatch\n", len);
+    }
+
+    /* totally blank picture as progressive JPEG will only add details to it */
+    if(s->progressive){
+        memset(s->picture.data[0], 0, s->picture.linesize[0] * s->height);
+        memset(s->picture.data[1], 0, s->picture.linesize[1] * s->height >> (s->v_max - s->v_count[1]));
+        memset(s->picture.data[2], 0, s->picture.linesize[2] * s->height >> (s->v_max - s->v_count[2]));
+    }
+    return 0;
+}
+
+static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
+{
+    int code;
+    code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
+    if (code < 0)
+    {
+        dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
+                &s->vlcs[0][dc_index]);
+        return 0xffff;
+    }
+
+    if(code)
+        return get_xbits(&s->gb, code);
+    else
+        return 0;
+}
+
+/* decode block and dequantize */
+static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
+                        int component, int dc_index, int ac_index, int16_t *quant_matrix)
+{
+    int code, i, j, level, val;
+
+    /* DC coef */
+    val = mjpeg_decode_dc(s, dc_index);
+    if (val == 0xffff) {
+        dprintf("error dc\n");
+        return -1;
+    }
+    val = val * quant_matrix[0] + s->last_dc[component];
+    s->last_dc[component] = val;
+    block[0] = val;
+    /* AC coefs */
+    i = 0;
+    {OPEN_READER(re, &s->gb)
+    for(;;) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_VLC(code, re, &s->gb, s->vlcs[1][ac_index].table, 9, 2)
+
+        /* EOB */
+        if (code == 0x10)
+            break;
+        i += ((unsigned)code) >> 4;
+        if(code != 0x100){
+            code &= 0xf;
+            if(code > MIN_CACHE_BITS - 16){
+                UPDATE_CACHE(re, &s->gb)
+            }
+            {
+                int cache=GET_CACHE(re,&s->gb);
+                int sign=(~cache)>>31;
+                level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign;
+            }
+
+            LAST_SKIP_BITS(re, &s->gb, code)
+
+            if (i >= 63) {
+                if(i == 63){
+                    j = s->scantable.permutated[63];
+                    block[j] = level * quant_matrix[j];
+                    break;
+                }
+                dprintf("error count: %d\n", i);
+                return -1;
+            }
+            j = s->scantable.permutated[i];
+            block[j] = level * quant_matrix[j];
+        }
+    }
+    CLOSE_READER(re, &s->gb)}
+
+    return 0;
+}
+
+/* decode block and dequantize - progressive JPEG version */
+static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block,
+                        int component, int dc_index, int ac_index, int16_t *quant_matrix,
+                        int ss, int se, int Ah, int Al, int *EOBRUN)
+{
+    int code, i, j, level, val, run;
+
+    /* DC coef */
+    if(!ss){
+        val = mjpeg_decode_dc(s, dc_index);
+        if (val == 0xffff) {
+            dprintf("error dc\n");
+            return -1;
+        }
+        val = (val * quant_matrix[0] << Al) + s->last_dc[component];
+    }else
+        val = 0;
+    s->last_dc[component] = val;
+    block[0] = val;
+    if(!se) return 0;
+    /* AC coefs */
+    if(*EOBRUN){
+        (*EOBRUN)--;
+        return 0;
+    }
+    {OPEN_READER(re, &s->gb)
+    for(i=ss;;i++) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_VLC(code, re, &s->gb, s->vlcs[1][ac_index].table, 9, 2)
+        /* Progressive JPEG use AC coeffs from zero and this decoder sets offset 16 by default */
+        code -= 16;
+        if(code & 0xF) {
+            i += ((unsigned) code) >> 4;
+            code &= 0xf;
+            if(code > MIN_CACHE_BITS - 16){
+                UPDATE_CACHE(re, &s->gb)
+            }
+            {
+                int cache=GET_CACHE(re,&s->gb);
+                int sign=(~cache)>>31;
+                level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign;
+            }
+
+            LAST_SKIP_BITS(re, &s->gb, code)
+
+            if (i >= se) {
+                if(i == se){
+                    j = s->scantable.permutated[se];
+                    block[j] = level * quant_matrix[j] << Al;
+                    break;
+                }
+                dprintf("error count: %d\n", i);
+                return -1;
+            }
+            j = s->scantable.permutated[i];
+            block[j] = level * quant_matrix[j] << Al;
+        }else{
+            run = ((unsigned) code) >> 4;
+            if(run == 0xF){// ZRL - skip 15 coefficients
+                i += 15;
+            }else{
+                val = run;
+                run = (1 << run);
+                UPDATE_CACHE(re, &s->gb);
+                run += (GET_CACHE(re, &s->gb) >> (32 - val)) & (run - 1);
+                if(val)
+                    LAST_SKIP_BITS(re, &s->gb, val);
+                *EOBRUN = run - 1;
+                break;
+            }
+        }
+    }
+    CLOSE_READER(re, &s->gb)}
+
+    return 0;
+}
+
+static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point_transform){
+    int i, mb_x, mb_y;
+    uint16_t buffer[32768][4];
+    int left[3], top[3], topleft[3];
+    const int linesize= s->linesize[0];
+    const int mask= (1<<s->bits)-1;
+
+    if((unsigned)s->mb_width > 32768) //dynamic alloc
+        return -1;
+
+    for(i=0; i<3; i++){
+        buffer[0][i]= 1 << (s->bits + point_transform - 1);
+    }
+    for(mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        const int modified_predictor= mb_y ? predictor : 1;
+        uint8_t *ptr = s->picture.data[0] + (linesize * mb_y);
+
+        if (s->interlaced && s->bottom_field)
+            ptr += linesize >> 1;
+
+        for(i=0; i<3; i++){
+            top[i]= left[i]= topleft[i]= buffer[0][i];
+        }
+        for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+            if (s->restart_interval && !s->restart_count)
+                s->restart_count = s->restart_interval;
+
+            for(i=0;i<3;i++) {
+                int pred;
+
+                topleft[i]= top[i];
+                top[i]= buffer[mb_x][i];
+
+                PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);
+
+                left[i]=
+                buffer[mb_x][i]= mask & (pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform));
+            }
+
+            if (s->restart_interval && !--s->restart_count) {
+                align_get_bits(&s->gb);
+                skip_bits(&s->gb, 16); /* skip RSTn */
+            }
+        }
+
+        if(s->rct){
+            for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                ptr[4*mb_x+1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200)>>2);
+                ptr[4*mb_x+0] = buffer[mb_x][1] + ptr[4*mb_x+1];
+                ptr[4*mb_x+2] = buffer[mb_x][2] + ptr[4*mb_x+1];
+            }
+        }else if(s->pegasus_rct){
+            for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                ptr[4*mb_x+1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2])>>2);
+                ptr[4*mb_x+0] = buffer[mb_x][1] + ptr[4*mb_x+1];
+                ptr[4*mb_x+2] = buffer[mb_x][2] + ptr[4*mb_x+1];
+            }
+        }else{
+            for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                ptr[4*mb_x+0] = buffer[mb_x][0];
+                ptr[4*mb_x+1] = buffer[mb_x][1];
+                ptr[4*mb_x+2] = buffer[mb_x][2];
+            }
+        }
+    }
+    return 0;
+}
+
+static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point_transform){
+    int i, mb_x, mb_y;
+    const int nb_components=3;
+
+    for(mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+            if (s->restart_interval && !s->restart_count)
+                s->restart_count = s->restart_interval;
+
+            if(mb_x==0 || mb_y==0 || s->interlaced){
+                for(i=0;i<nb_components;i++) {
+                    uint8_t *ptr;
+                    int n, h, v, x, y, c, j, linesize;
+                    n = s->nb_blocks[i];
+                    c = s->comp_index[i];
+                    h = s->h_scount[i];
+                    v = s->v_scount[i];
+                    x = 0;
+                    y = 0;
+                    linesize= s->linesize[c];
+
+                    for(j=0; j<n; j++) {
+                        int pred;
+
+                        ptr = s->picture.data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
+                        if(y==0 && mb_y==0){
+                            if(x==0 && mb_x==0){
+                                pred= 128 << point_transform;
+                            }else{
+                                pred= ptr[-1];
+                            }
+                        }else{
+                            if(x==0 && mb_x==0){
+                                pred= ptr[-linesize];
+                            }else{
+                                PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+                            }
+                        }
+
+                        if (s->interlaced && s->bottom_field)
+                            ptr += linesize >> 1;
+                        *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);
+
+                        if (++x == h) {
+                            x = 0;
+                            y++;
+                        }
+                    }
+                }
+            }else{
+                for(i=0;i<nb_components;i++) {
+                    uint8_t *ptr;
+                    int n, h, v, x, y, c, j, linesize;
+                    n = s->nb_blocks[i];
+                    c = s->comp_index[i];
+                    h = s->h_scount[i];
+                    v = s->v_scount[i];
+                    x = 0;
+                    y = 0;
+                    linesize= s->linesize[c];
+
+                    for(j=0; j<n; j++) {
+                        int pred;
+
+                        ptr = s->picture.data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
+                        PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+                        *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);
+                        if (++x == h) {
+                            x = 0;
+                            y++;
+                        }
+                    }
+                }
+            }
+            if (s->restart_interval && !--s->restart_count) {
+                align_get_bits(&s->gb);
+                skip_bits(&s->gb, 16); /* skip RSTn */
+            }
+        }
+    }
+    return 0;
+}
+
+static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int ss, int se, int Ah, int Al){
+    int i, mb_x, mb_y;
+    int EOBRUN = 0;
+
+    if(Ah) return 0; /* TODO decode refinement planes too */
+    for(mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+            if (s->restart_interval && !s->restart_count)
+                s->restart_count = s->restart_interval;
+
+            for(i=0;i<nb_components;i++) {
+                uint8_t *ptr;
+                int n, h, v, x, y, c, j;
+                n = s->nb_blocks[i];
+                c = s->comp_index[i];
+                h = s->h_scount[i];
+                v = s->v_scount[i];
+                x = 0;
+                y = 0;
+                for(j=0;j<n;j++) {
+                    memset(s->block, 0, sizeof(s->block));
+                    if (!s->progressive && decode_block(s, s->block, i,
+                                     s->dc_index[i], s->ac_index[i],
+                                     s->quant_matrixes[ s->quant_index[c] ]) < 0) {
+                        dprintf("error y=%d x=%d\n", mb_y, mb_x);
+                        return -1;
+                    }
+                    if (s->progressive && decode_block_progressive(s, s->block, i,
+                                     s->dc_index[i], s->ac_index[i],
+                                     s->quant_matrixes[ s->quant_index[c] ], ss, se, Ah, Al, &EOBRUN) < 0) {
+                        dprintf("error y=%d x=%d\n", mb_y, mb_x);
+                        return -1;
+                    }
+//                    dprintf("mb: %d %d processed\n", mb_y, mb_x);
+                    ptr = s->picture.data[c] +
+                        (((s->linesize[c] * (v * mb_y + y) * 8) +
+                        (h * mb_x + x) * 8) >> s->avctx->lowres);
+                    if (s->interlaced && s->bottom_field)
+                        ptr += s->linesize[c] >> 1;
+//av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", mb_x, mb_y, x, y, c, s->bottom_field, (v * mb_y + y) * 8, (h * mb_x + x) * 8);
+                    if(!s->progressive)
+                        s->idct_put(ptr, s->linesize[c], s->block);
+                    else
+                        s->idct_add(ptr, s->linesize[c], s->block);
+                    if (++x == h) {
+                        x = 0;
+                        y++;
+                    }
+                }
+            }
+            /* (< 1350) buggy workaround for Spectralfan.mov, should be fixed */
+            if (s->restart_interval && (s->restart_interval < 1350) &&
+                !--s->restart_count) {
+                align_get_bits(&s->gb);
+                skip_bits(&s->gb, 16); /* skip RSTn */
+                for (i=0; i<nb_components; i++) /* reset dc */
+                    s->last_dc[i] = 1024;
+            }
+        }
+    }
+    return 0;
+}
+
+static int mjpeg_decode_sos(MJpegDecodeContext *s)
+{
+    int len, nb_components, i, h, v, predictor, point_transform;
+    int vmax, hmax, index, id;
+    const int block_size= s->lossless ? 1 : 8;
+    int ilv, prev_shift;
+
+    /* XXX: verify len field validity */
+    len = get_bits(&s->gb, 16);
+    nb_components = get_bits(&s->gb, 8);
+    if (len != 6+2*nb_components)
+    {
+        dprintf("decode_sos: invalid len (%d)\n", len);
+        return -1;
+    }
+    vmax = 0;
+    hmax = 0;
+    for(i=0;i<nb_components;i++) {
+        id = get_bits(&s->gb, 8) - 1;
+        dprintf("component: %d\n", id);
+        /* find component index */
+        for(index=0;index<s->nb_components;index++)
+            if (id == s->component_id[index])
+                break;
+        if (index == s->nb_components)
+        {
+            dprintf("decode_sos: index(%d) out of components\n", index);
+            return -1;
+        }
+
+        s->comp_index[i] = index;
+
+        s->nb_blocks[i] = s->h_count[index] * s->v_count[index];
+        s->h_scount[i] = s->h_count[index];
+        s->v_scount[i] = s->v_count[index];
+
+        s->dc_index[i] = get_bits(&s->gb, 4);
+        s->ac_index[i] = get_bits(&s->gb, 4);
+
+        if (s->dc_index[i] <  0 || s->ac_index[i] < 0 ||
+            s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
+            goto out_of_range;
+#if 0 //buggy
+        switch(s->start_code)
+        {
+            case SOF0:
+                if (dc_index[i] > 1 || ac_index[i] > 1)
+                    goto out_of_range;
+                break;
+            case SOF1:
+            case SOF2:
+                if (dc_index[i] > 3 || ac_index[i] > 3)
+                    goto out_of_range;
+                break;
+            case SOF3:
+                if (dc_index[i] > 3 || ac_index[i] != 0)
+                    goto out_of_range;
+                break;
+        }
+#endif
+    }
+
+    predictor= get_bits(&s->gb, 8); /* JPEG Ss / lossless JPEG predictor /JPEG-LS NEAR */
+    ilv= get_bits(&s->gb, 8);    /* JPEG Se / JPEG-LS ILV */
+    prev_shift = get_bits(&s->gb, 4); /* Ah */
+    point_transform= get_bits(&s->gb, 4); /* Al */
+
+    for(i=0;i<nb_components;i++)
+        s->last_dc[i] = 1024;
+
+    if (nb_components > 1) {
+        /* interleaved stream */
+        s->mb_width  = (s->width  + s->h_max * block_size - 1) / (s->h_max * block_size);
+        s->mb_height = (s->height + s->v_max * block_size - 1) / (s->v_max * block_size);
+    } else if(!s->ls) { /* skip this for JPEG-LS */
+        h = s->h_max / s->h_scount[0];
+        v = s->v_max / s->v_scount[0];
+        s->mb_width  = (s->width  + h * block_size - 1) / (h * block_size);
+        s->mb_height = (s->height + v * block_size - 1) / (v * block_size);
+        s->nb_blocks[0] = 1;
+        s->h_scount[0] = 1;
+        s->v_scount[0] = 1;
+    }
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d %s\n", s->lossless ? "lossless" : "sequencial DCT", s->rgb ? "RGB" : "",
+               predictor, point_transform, ilv, s->bits,
+               s->pegasus_rct ? "PRCT" : (s->rct ? "RCT" : ""));
+
+
+    /* mjpeg-b can have padding bytes between sos and image data, skip them */
+    for (i = s->mjpb_skiptosod; i > 0; i--)
+        skip_bits(&s->gb, 8);
+
+    if(s->lossless){
+        if(s->ls){
+//            for(){
+//            reset_ls_coding_parameters(s, 0);
+
+            ls_decode_picture(s, predictor, point_transform, ilv);
+        }else{
+            if(s->rgb){
+                if(ljpeg_decode_rgb_scan(s, predictor, point_transform) < 0)
+                    return -1;
+            }else{
+                if(ljpeg_decode_yuv_scan(s, predictor, point_transform) < 0)
+                    return -1;
+            }
+        }
+    }else{
+        if(mjpeg_decode_scan(s, nb_components, predictor, ilv, prev_shift, point_transform) < 0)
+            return -1;
+    }
+    emms_c();
+    return 0;
+ out_of_range:
+    dprintf("decode_sos: ac/dc index out of range\n");
+    return -1;
+}
+
+static int mjpeg_decode_dri(MJpegDecodeContext *s)
+{
+    if (get_bits(&s->gb, 16) != 4)
+        return -1;
+    s->restart_interval = get_bits(&s->gb, 16);
+    s->restart_count = 0;
+    dprintf("restart interval: %d\n", s->restart_interval);
+
+    return 0;
+}
+
+static int mjpeg_decode_app(MJpegDecodeContext *s)
+{
+    int len, id;
+
+    len = get_bits(&s->gb, 16);
+    if (len < 5)
+        return -1;
+    if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
+        return -1;
+
+    id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+    id = be2me_32(id);
+    len -= 6;
+
+    if(s->avctx->debug & FF_DEBUG_STARTCODE){
+        av_log(s->avctx, AV_LOG_DEBUG, "APPx %8X\n", id);
+    }
+
+    /* buggy AVID, it puts EOI only at every 10th frame */
+    /* also this fourcc is used by non-avid files too, it holds some
+       informations, but it's always present in AVID creates files */
+    if (id == ff_get_fourcc("AVI1"))
+    {
+        /* structure:
+            4bytes      AVI1
+            1bytes      polarity
+            1bytes      always zero
+            4bytes      field_size
+            4bytes      field_size_less_padding
+        */
+            s->buggy_avid = 1;
+//        if (s->first_picture)
+//            printf("mjpeg: workarounding buggy AVID\n");
+        s->interlace_polarity = get_bits(&s->gb, 8);
+#if 0
+        skip_bits(&s->gb, 8);
+        skip_bits(&s->gb, 32);
+        skip_bits(&s->gb, 32);
+        len -= 10;
+#endif
+//        if (s->interlace_polarity)
+//            printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
+        goto out;
+    }
+
+//    len -= 2;
+
+    if (id == ff_get_fourcc("JFIF"))
+    {
+        int t_w, t_h, v1, v2;
+        skip_bits(&s->gb, 8); /* the trailing zero-byte */
+        v1= get_bits(&s->gb, 8);
+        v2= get_bits(&s->gb, 8);
+        skip_bits(&s->gb, 8);
+
+        s->avctx->sample_aspect_ratio.num= get_bits(&s->gb, 16);
+        s->avctx->sample_aspect_ratio.den= get_bits(&s->gb, 16);
+
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "mjpeg: JFIF header found (version: %x.%x) SAR=%d/%d\n",
+                v1, v2,
+                s->avctx->sample_aspect_ratio.num,
+                s->avctx->sample_aspect_ratio.den
+            );
+
+        t_w = get_bits(&s->gb, 8);
+        t_h = get_bits(&s->gb, 8);
+        if (t_w && t_h)
+        {
+            /* skip thumbnail */
+            if (len-10-(t_w*t_h*3) > 0)
+                len -= t_w*t_h*3;
+        }
+        len -= 10;
+        goto out;
+    }
+
+    if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e'))
+    {
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
+        skip_bits(&s->gb, 16); /* version */
+        skip_bits(&s->gb, 16); /* flags0 */
+        skip_bits(&s->gb, 16); /* flags1 */
+        skip_bits(&s->gb, 8);  /* transform */
+        len -= 7;
+        goto out;
+    }
+
+    if (id == ff_get_fourcc("LJIF")){
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
+        skip_bits(&s->gb, 16); /* version ? */
+        skip_bits(&s->gb, 16); /* unknwon always 0? */
+        skip_bits(&s->gb, 16); /* unknwon always 0? */
+        skip_bits(&s->gb, 16); /* unknwon always 0? */
+        switch( get_bits(&s->gb, 8)){
+        case 1:
+            s->rgb= 1;
+            s->pegasus_rct=0;
+            break;
+        case 2:
+            s->rgb= 1;
+            s->pegasus_rct=1;
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "unknown colorspace\n");
+        }
+        len -= 9;
+        goto out;
+    }
+
+    /* Apple MJPEG-A */
+    if ((s->start_code == APP1) && (len > (0x28 - 8)))
+    {
+        id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+        id = be2me_32(id);
+        len -= 4;
+        if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
+        {
+#if 0
+            skip_bits(&s->gb, 32); /* field size */
+            skip_bits(&s->gb, 32); /* pad field size */
+            skip_bits(&s->gb, 32); /* next off */
+            skip_bits(&s->gb, 32); /* quant off */
+            skip_bits(&s->gb, 32); /* huff off */
+            skip_bits(&s->gb, 32); /* image off */
+            skip_bits(&s->gb, 32); /* scan off */
+            skip_bits(&s->gb, 32); /* data off */
+#endif
+            if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+                av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
+        }
+    }
+
+out:
+    /* slow but needed for extreme adobe jpegs */
+    if (len < 0)
+        av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
+    while(--len > 0)
+        skip_bits(&s->gb, 8);
+
+    return 0;
+}
+
+static int mjpeg_decode_com(MJpegDecodeContext *s)
+{
+    int len = get_bits(&s->gb, 16);
+    if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
+        char *cbuf = av_malloc(len - 1);
+        if (cbuf) {
+            int i;
+            for (i = 0; i < len - 2; i++)
+                cbuf[i] = get_bits(&s->gb, 8);
+            if (i > 0 && cbuf[i-1] == '\n')
+                cbuf[i-1] = 0;
+            else
+                cbuf[i] = 0;
+
+            if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+                av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
+
+            /* buggy avid, it puts EOI only at every 10th frame */
+            if (!strcmp(cbuf, "AVID"))
+            {
+                s->buggy_avid = 1;
+                //        if (s->first_picture)
+                //            printf("mjpeg: workarounding buggy AVID\n");
+            }
+            else if(!strcmp(cbuf, "CS=ITU601")){
+                s->cs_itu601= 1;
+            }
+
+            av_free(cbuf);
+        }
+    }
+
+    return 0;
+}
+
+#if 0
+static int valid_marker_list[] =
+{
+        /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f */
+/* 0 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 1 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 2 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 3 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 4 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 5 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 6 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 7 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 8 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* 9 */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* a */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* b */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/* c */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* d */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* e */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* f */    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+}
+#endif
+
+/* return the 8 bit start code value and update the search
+   state. Return -1 if no start code found */
+static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end)
+{
+    uint8_t *buf_ptr;
+    unsigned int v, v2;
+    int val;
+#ifdef DEBUG
+    int skipped=0;
+#endif
+
+    buf_ptr = *pbuf_ptr;
+    while (buf_ptr < buf_end) {
+        v = *buf_ptr++;
+        v2 = *buf_ptr;
+        if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
+            val = *buf_ptr++;
+            goto found;
+        }
+#ifdef DEBUG
+        skipped++;
+#endif
+    }
+    val = -1;
+found:
+#ifdef DEBUG
+    dprintf("find_marker skipped %d bytes\n", skipped);
+#endif
+    *pbuf_ptr = buf_ptr;
+    return val;
+}
+
+static int mjpeg_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+    uint8_t *buf_end, *buf_ptr;
+    int start_code;
+    AVFrame *picture = data;
+
+    buf_ptr = buf;
+    buf_end = buf + buf_size;
+    while (buf_ptr < buf_end) {
+        /* find start next marker */
+        start_code = find_marker(&buf_ptr, buf_end);
+        {
+            /* EOF */
+            if (start_code < 0) {
+                goto the_end;
+            } else {
+                dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr);
+
+                if ((buf_end - buf_ptr) > s->buffer_size)
+                {
+                    av_free(s->buffer);
+                    s->buffer_size = buf_end-buf_ptr;
+                    s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
+                    dprintf("buffer too small, expanding to %d bytes\n",
+                        s->buffer_size);
+                }
+
+                /* unescape buffer of SOS, use special treatment for JPEG-LS */
+                if (start_code == SOS && !s->ls)
+                {
+                    uint8_t *src = buf_ptr;
+                    uint8_t *dst = s->buffer;
+
+                    while (src<buf_end)
+                    {
+                        uint8_t x = *(src++);
+
+                        *(dst++) = x;
+                        if (x == 0xff)
+                        {
+                            while(src<buf_end && x == 0xff)
+                                x = *(src++);
+
+                            if (x >= 0xd0 && x <= 0xd7)
+                                *(dst++) = x;
+                            else if (x)
+                                break;
+                        }
+                    }
+                    init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
+
+                    dprintf("escaping removed %d bytes\n",
+                        (buf_end - buf_ptr) - (dst - s->buffer));
+                }
+                else if(start_code == SOS && s->ls){
+                    uint8_t *src = buf_ptr;
+                    uint8_t *dst = s->buffer;
+                    int bit_count = 0;
+                    int t = 0, b = 0;
+                    PutBitContext pb;
+
+                    s->cur_scan++;
+
+                    /* find marker */
+                    while (src + t < buf_end){
+                        uint8_t x = src[t++];
+                        if (x == 0xff){
+                            while((src + t < buf_end) && x == 0xff)
+                                x = src[t++];
+                            if (x & 0x80) {
+                                t -= 2;
+                                break;
+                            }
+                        }
+                    }
+                    bit_count = t * 8;
+
+                    init_put_bits(&pb, dst, t);
+
+                    /* unescape bitstream */
+                    while(b < t){
+                        uint8_t x = src[b++];
+                        put_bits(&pb, 8, x);
+                        if(x == 0xFF){
+                            x = src[b++];
+                            put_bits(&pb, 7, x);
+                            bit_count--;
+                        }
+                    }
+                    flush_put_bits(&pb);
+
+                    init_get_bits(&s->gb, dst, bit_count);
+                }
+                else
+                    init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
+
+                s->start_code = start_code;
+                if(s->avctx->debug & FF_DEBUG_STARTCODE){
+                    av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);
+                }
+
+                /* process markers */
+                if (start_code >= 0xd0 && start_code <= 0xd7) {
+                    dprintf("restart marker: %d\n", start_code&0x0f);
+                    /* APP fields */
+                } else if (start_code >= APP0 && start_code <= APP15) {
+                    mjpeg_decode_app(s);
+                    /* Comment */
+                } else if (start_code == COM){
+                    mjpeg_decode_com(s);
+                }
+
+                switch(start_code) {
+                case SOI:
+                    s->restart_interval = 0;
+
+                    s->restart_count = 0;
+                    /* nothing to do on SOI */
+                    break;
+                case DQT:
+                    mjpeg_decode_dqt(s);
+                    break;
+                case DHT:
+                    if(mjpeg_decode_dht(s) < 0){
+                        av_log(s->avctx, AV_LOG_ERROR, "huffman table decode error\n");
+                        return -1;
+                    }
+                    break;
+                case SOF0:
+                    s->lossless=0;
+                    s->progressive=0;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
+                    break;
+                case SOF2:
+                    s->lossless=0;
+                    s->progressive=1;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
+                    break;
+                case SOF3:
+                    s->lossless=1;
+                    s->progressive=0;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
+                    break;
+                case SOF48:
+                    s->lossless=1;
+                    s->ls=1;
+                    s->progressive=0;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
+                    break;
+                case LSE:
+                    if (decode_lse(s) < 0)
+                        return -1;
+                    break;
+                case EOI:
+                    s->cur_scan = 0;
+                    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
+                        break;
+eoi_parser:
+                    {
+                        if (s->interlaced) {
+                            s->bottom_field ^= 1;
+                            /* if not bottom field, do not output image yet */
+                            if (s->bottom_field)
+                                goto not_the_end;
+                        }
+                        *picture = s->picture;
+                        *data_size = sizeof(AVFrame);
+
+                        if(!s->lossless){
+                            picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]);
+                            picture->qstride= 0;
+                            picture->qscale_table= s->qscale_table;
+                            memset(picture->qscale_table, picture->quality, (s->width+15)/16);
+                            if(avctx->debug & FF_DEBUG_QP)
+                                av_log(s->avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
+                            picture->quality*= FF_QP2LAMBDA;
+                        }
+
+                        goto the_end;
+                    }
+                    break;
+                case SOS:
+                    mjpeg_decode_sos(s);
+                    /* buggy avid puts EOI every 10-20th frame */
+                    /* if restart period is over process EOI */
+                    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
+                        goto eoi_parser;
+                    break;
+                case DRI:
+                    mjpeg_decode_dri(s);
+                    break;
+                case SOF1:
+                case SOF5:
+                case SOF6:
+                case SOF7:
+                case SOF9:
+                case SOF10:
+                case SOF11:
+                case SOF13:
+                case SOF14:
+                case SOF15:
+                case JPG:
+                    av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
+                    break;
+//                default:
+//                    printf("mjpeg: unsupported marker (%x)\n", start_code);
+//                    break;
+                }
+
+not_the_end:
+                /* eof process start code */
+                buf_ptr += (get_bits_count(&s->gb)+7)/8;
+                dprintf("marker parser used %d bytes (%d bits)\n",
+                    (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
+            }
+        }
+    }
+the_end:
+    dprintf("mjpeg decode frame unused %d bytes\n", buf_end - buf_ptr);
+//    return buf_end - buf_ptr;
+    return buf_ptr - buf;
+}
+
+static int mjpegb_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+    uint8_t *buf_end, *buf_ptr;
+    AVFrame *picture = data;
+    GetBitContext hgb; /* for the header */
+    uint32_t dqt_offs, dht_offs, sof_offs, sos_offs, second_field_offs;
+    uint32_t field_size, sod_offs;
+
+    buf_ptr = buf;
+    buf_end = buf + buf_size;
+
+read_header:
+    /* reset on every SOI */
+    s->restart_interval = 0;
+    s->restart_count = 0;
+    s->mjpb_skiptosod = 0;
+
+    init_get_bits(&hgb, buf_ptr, /*buf_size*/(buf_end - buf_ptr)*8);
+
+    skip_bits(&hgb, 32); /* reserved zeros */
+
+    if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg")))
+    {
+        dprintf("not mjpeg-b (bad fourcc)\n");
+        return 0;
+    }
+
+    field_size = get_bits_long(&hgb, 32); /* field size */
+    dprintf("field size: 0x%x\n", field_size);
+    skip_bits(&hgb, 32); /* padded field size */
+    second_field_offs = get_bits_long(&hgb, 32);
+    dprintf("second field offs: 0x%x\n", second_field_offs);
+    if (second_field_offs)
+        s->interlaced = 1;
+
+    dqt_offs = get_bits_long(&hgb, 32);
+    dprintf("dqt offs: 0x%x\n", dqt_offs);
+    if (dqt_offs)
+    {
+        init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
+        s->start_code = DQT;
+        mjpeg_decode_dqt(s);
+    }
+
+    dht_offs = get_bits_long(&hgb, 32);
+    dprintf("dht offs: 0x%x\n", dht_offs);
+    if (dht_offs)
+    {
+        init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
+        s->start_code = DHT;
+        mjpeg_decode_dht(s);
+    }
+
+    sof_offs = get_bits_long(&hgb, 32);
+    dprintf("sof offs: 0x%x\n", sof_offs);
+    if (sof_offs)
+    {
+        init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
+        s->start_code = SOF0;
+        if (mjpeg_decode_sof(s) < 0)
+            return -1;
+    }
+
+    sos_offs = get_bits_long(&hgb, 32);
+    dprintf("sos offs: 0x%x\n", sos_offs);
+    sod_offs = get_bits_long(&hgb, 32);
+    dprintf("sod offs: 0x%x\n", sod_offs);
+    if (sos_offs)
+    {
+//        init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
+        init_get_bits(&s->gb, buf+sos_offs, field_size*8);
+        s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
+        s->start_code = SOS;
+        mjpeg_decode_sos(s);
+    }
+
+    if (s->interlaced) {
+        s->bottom_field ^= 1;
+        /* if not bottom field, do not output image yet */
+        if (s->bottom_field && second_field_offs)
+        {
+            buf_ptr = buf + second_field_offs;
+            second_field_offs = 0;
+            goto read_header;
+            }
+    }
+
+    //XXX FIXME factorize, this looks very similar to the EOI code
+
+    *picture= s->picture;
+    *data_size = sizeof(AVFrame);
+
+    if(!s->lossless){
+        picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]);
+        picture->qstride= 0;
+        picture->qscale_table= s->qscale_table;
+        memset(picture->qscale_table, picture->quality, (s->width+15)/16);
+        if(avctx->debug & FF_DEBUG_QP)
+            av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
+        picture->quality*= FF_QP2LAMBDA;
+    }
+
+    return buf_ptr - buf;
+}
+
+#include "sp5x.h"
+
+static int sp5x_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+#if 0
+    MJpegDecodeContext *s = avctx->priv_data;
+#endif
+    const int qscale = 5;
+    uint8_t *buf_ptr, *buf_end, *recoded;
+    int i = 0, j = 0;
+
+    if (!avctx->width || !avctx->height)
+        return -1;
+
+    buf_ptr = buf;
+    buf_end = buf + buf_size;
+
+#if 1
+    recoded = av_mallocz(buf_size + 1024);
+    if (!recoded)
+        return -1;
+
+    /* SOI */
+    recoded[j++] = 0xFF;
+    recoded[j++] = 0xD8;
+
+    memcpy(recoded+j, &sp5x_data_dqt[0], sizeof(sp5x_data_dqt));
+    memcpy(recoded+j+5, &sp5x_quant_table[qscale * 2], 64);
+    memcpy(recoded+j+70, &sp5x_quant_table[(qscale * 2) + 1], 64);
+    j += sizeof(sp5x_data_dqt);
+
+    memcpy(recoded+j, &sp5x_data_dht[0], sizeof(sp5x_data_dht));
+    j += sizeof(sp5x_data_dht);
+
+    memcpy(recoded+j, &sp5x_data_sof[0], sizeof(sp5x_data_sof));
+    recoded[j+5] = (avctx->coded_height >> 8) & 0xFF;
+    recoded[j+6] = avctx->coded_height & 0xFF;
+    recoded[j+7] = (avctx->coded_width >> 8) & 0xFF;
+    recoded[j+8] = avctx->coded_width & 0xFF;
+    j += sizeof(sp5x_data_sof);
+
+    memcpy(recoded+j, &sp5x_data_sos[0], sizeof(sp5x_data_sos));
+    j += sizeof(sp5x_data_sos);
+
+    for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
+    {
+        recoded[j++] = buf[i];
+        if (buf[i] == 0xff)
+            recoded[j++] = 0;
+    }
+
+    /* EOI */
+    recoded[j++] = 0xFF;
+    recoded[j++] = 0xD9;
+
+    i = mjpeg_decode_frame(avctx, data, data_size, recoded, j);
+
+    av_free(recoded);
+
+#else
+    /* SOF */
+    s->bits = 8;
+    s->width  = avctx->coded_width;
+    s->height = avctx->coded_height;
+    s->nb_components = 3;
+    s->component_id[0] = 0;
+    s->h_count[0] = 2;
+    s->v_count[0] = 2;
+    s->quant_index[0] = 0;
+    s->component_id[1] = 1;
+    s->h_count[1] = 1;
+    s->v_count[1] = 1;
+    s->quant_index[1] = 1;
+    s->component_id[2] = 2;
+    s->h_count[2] = 1;
+    s->v_count[2] = 1;
+    s->quant_index[2] = 1;
+    s->h_max = 2;
+    s->v_max = 2;
+
+    s->qscale_table = av_mallocz((s->width+15)/16);
+    avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420;
+    s->interlaced = 0;
+
+    s->picture.reference = 0;
+    if (avctx->get_buffer(avctx, &s->picture) < 0)
+    {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    s->picture.pict_type = I_TYPE;
+    s->picture.key_frame = 1;
+
+    for (i = 0; i < 3; i++)
+        s->linesize[i] = s->picture.linesize[i] << s->interlaced;
+
+    /* DQT */
+    for (i = 0; i < 64; i++)
+    {
+        j = s->scantable.permutated[i];
+        s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
+    }
+    s->qscale[0] = FFMAX(
+        s->quant_matrixes[0][s->scantable.permutated[1]],
+        s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
+
+    for (i = 0; i < 64; i++)
+    {
+        j = s->scantable.permutated[i];
+        s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
+    }
+    s->qscale[1] = FFMAX(
+        s->quant_matrixes[1][s->scantable.permutated[1]],
+        s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
+
+    /* DHT */
+
+    /* SOS */
+    s->comp_index[0] = 0;
+    s->nb_blocks[0] = s->h_count[0] * s->v_count[0];
+    s->h_scount[0] = s->h_count[0];
+    s->v_scount[0] = s->v_count[0];
+    s->dc_index[0] = 0;
+    s->ac_index[0] = 0;
+
+    s->comp_index[1] = 1;
+    s->nb_blocks[1] = s->h_count[1] * s->v_count[1];
+    s->h_scount[1] = s->h_count[1];
+    s->v_scount[1] = s->v_count[1];
+    s->dc_index[1] = 1;
+    s->ac_index[1] = 1;
+
+    s->comp_index[2] = 2;
+    s->nb_blocks[2] = s->h_count[2] * s->v_count[2];
+    s->h_scount[2] = s->h_count[2];
+    s->v_scount[2] = s->v_count[2];
+    s->dc_index[2] = 1;
+    s->ac_index[2] = 1;
+
+    for (i = 0; i < 3; i++)
+        s->last_dc[i] = 1024;
+
+    s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
+    s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);
+
+    init_get_bits(&s->gb, buf+14, (buf_size-14)*8);
+
+    return mjpeg_decode_scan(s);
+#endif
+
+    return i;
+}
+
+static int mjpeg_decode_end(AVCodecContext *avctx)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+    int i, j;
+
+    av_free(s->buffer);
+    av_free(s->qscale_table);
+
+    for(i=0;i<2;i++) {
+        for(j=0;j<4;j++)
+            free_vlc(&s->vlcs[i][j]);
+    }
+    return 0;
+}
+
+static int mjpega_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                              uint8_t **poutbuf, int *poutbuf_size,
+                              const uint8_t *buf, int buf_size, int keyframe)
+{
+    uint8_t *poutbufp;
+    int i;
+
+    if (avctx->codec_id != CODEC_ID_MJPEG) {
+        av_log(avctx, AV_LOG_ERROR, "mjpega bitstream filter only applies to mjpeg codec\n");
+        return 0;
+    }
+
+    *poutbuf_size = 0;
+    *poutbuf = av_malloc(buf_size + 44 + FF_INPUT_BUFFER_PADDING_SIZE);
+    poutbufp = *poutbuf;
+    bytestream_put_byte(&poutbufp, 0xff);
+    bytestream_put_byte(&poutbufp, SOI);
+    bytestream_put_byte(&poutbufp, 0xff);
+    bytestream_put_byte(&poutbufp, APP1);
+    bytestream_put_be16(&poutbufp, 42); /* size */
+    bytestream_put_be32(&poutbufp, 0);
+    bytestream_put_buffer(&poutbufp, "mjpg", 4);
+    bytestream_put_be32(&poutbufp, buf_size + 44); /* field size */
+    bytestream_put_be32(&poutbufp, buf_size + 44); /* pad field size */
+    bytestream_put_be32(&poutbufp, 0);             /* next ptr */
+
+    for (i = 0; i < buf_size - 1; i++) {
+        if (buf[i] == 0xff) {
+            switch (buf[i + 1]) {
+            case DQT:  /* quant off */
+            case DHT:  /* huff  off */
+            case SOF0: /* image off */
+                bytestream_put_be32(&poutbufp, i + 46);
+                break;
+            case SOS:
+                bytestream_put_be32(&poutbufp, i + 46); /* scan off */
+                bytestream_put_be32(&poutbufp, i + 46 + BE_16(buf + i + 2)); /* data off */
+                bytestream_put_buffer(&poutbufp, buf + 2, buf_size - 2); /* skip already written SOI */
+                *poutbuf_size = poutbufp - *poutbuf;
+                return 1;
+            case APP1:
+                if (i + 8 < buf_size && LE_32(buf + i + 8) == ff_get_fourcc("mjpg")) {
+                    av_log(avctx, AV_LOG_ERROR, "bitstream already formatted\n");
+                    memcpy(*poutbuf, buf, buf_size);
+                    *poutbuf_size = buf_size;
+                    return 1;
+                }
+            }
+        }
+    }
+    av_freep(poutbuf);
+    av_log(avctx, AV_LOG_ERROR, "could not find SOS marker in bitstream\n");
+    return 0;
+}
+
+AVCodec mjpeg_decoder = {
+    "mjpeg",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MJPEG,
+    sizeof(MJpegDecodeContext),
+    mjpeg_decode_init,
+    NULL,
+    mjpeg_decode_end,
+    mjpeg_decode_frame,
+    CODEC_CAP_DR1,
+    NULL
+};
+
+AVCodec mjpegb_decoder = {
+    "mjpegb",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MJPEGB,
+    sizeof(MJpegDecodeContext),
+    mjpeg_decode_init,
+    NULL,
+    mjpeg_decode_end,
+    mjpegb_decode_frame,
+    CODEC_CAP_DR1,
+    NULL
+};
+
+AVCodec sp5x_decoder = {
+    "sp5x",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SP5X,
+    sizeof(MJpegDecodeContext),
+    mjpeg_decode_init,
+    NULL,
+    mjpeg_decode_end,
+    sp5x_decode_frame,
+    CODEC_CAP_DR1,
+    NULL
+};
+
+#ifdef CONFIG_ENCODERS
+AVCodec ljpeg_encoder = { //FIXME avoid MPV_* lossless jpeg shouldnt need them
+    "ljpeg",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_LJPEG,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    encode_picture_lossless,
+    MPV_encode_end,
+};
+#endif
+
+AVCodecParser mjpeg_parser = {
+    { CODEC_ID_MJPEG },
+    sizeof(ParseContext),
+    NULL,
+    jpeg_parse,
+    ff_parse_close,
+};
+
+AVBitStreamFilter mjpega_dump_header_bsf = {
+    "mjpegadump",
+    0,
+    mjpega_dump_header,
+};
diff --git a/contrib/ffmpeg/libavcodec/mlib/dsputil_mlib.c b/contrib/ffmpeg/libavcodec/mlib/dsputil_mlib.c
new file mode 100644
index 000000000..b78a54e0e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mlib/dsputil_mlib.c
@@ -0,0 +1,464 @@
+/*
+ * Sun mediaLib optimized DSP utils
+ * Copyright (c) 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+
+#include <mlib_types.h>
+#include <mlib_status.h>
+#include <mlib_sys.h>
+#include <mlib_algebra.h>
+#include <mlib_video.h>
+
+/* misc */
+
+static void get_pixels_mlib(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+  int i;
+
+  for (i=0;i<8;i++) {
+    mlib_VectorConvert_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)pixels, 8);
+
+    pixels += line_size;
+    block += 8;
+  }
+}
+
+static void diff_pixels_mlib(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int line_size)
+{
+  int i;
+
+  for (i=0;i<8;i++) {
+    mlib_VectorSub_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)s1, (mlib_u8 *)s2, 8);
+
+    s1 += line_size;
+    s2 += line_size;
+    block += 8;
+  }
+}
+
+static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+    mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size);
+}
+
+/* put block, width 16 pixel, height 8/16 */
+
+static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
+                               int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoCopyRef_U8_U8_16x8(dest, (uint8_t *)ref, stride);
+    break;
+
+    case 16:
+      mlib_VideoCopyRef_U8_U8_16x16(dest, (uint8_t *)ref, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoInterpX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoInterpY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoInterpXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+/* put block, width 8 pixel, height 4/8/16 */
+
+static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
+                               int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoCopyRef_U8_U8_8x4(dest, (uint8_t *)ref, stride);
+    break;
+
+    case 8:
+      mlib_VideoCopyRef_U8_U8_8x8(dest, (uint8_t *)ref, stride);
+    break;
+
+    case 16:
+      mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoInterpX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 8:
+      mlib_VideoInterpX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoInterpY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 8:
+      mlib_VideoInterpY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoInterpXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 8:
+      mlib_VideoInterpXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+/* average block, width 16 pixel, height 8/16 */
+
+static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
+                               int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoCopyRefAve_U8_U8_16x8(dest, (uint8_t *)ref, stride);
+    break;
+
+    case 16:
+      mlib_VideoCopyRefAve_U8_U8_16x16(dest, (uint8_t *)ref, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoInterpAveX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpAveX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoInterpAveY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpAveY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 8:
+      mlib_VideoInterpAveXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpAveXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+/* average block, width 8 pixel, height 4/8/16 */
+
+static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
+                               int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoCopyRefAve_U8_U8_8x4(dest, (uint8_t *)ref, stride);
+    break;
+
+    case 8:
+      mlib_VideoCopyRefAve_U8_U8_8x8(dest, (uint8_t *)ref, stride);
+    break;
+
+    case 16:
+      mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoInterpAveX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 8:
+      mlib_VideoInterpAveX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoInterpAveY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 8:
+      mlib_VideoInterpAveY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
+                                  int stride, int height)
+{
+  switch (height) {
+    case 4:
+      mlib_VideoInterpAveXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 8:
+      mlib_VideoInterpAveXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    case 16:
+      mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
+    break;
+
+    default:
+      assert(0);
+  }
+}
+
+/* swap byte order of a buffer */
+
+static void bswap_buf_mlib(uint32_t *dst, uint32_t *src, int w)
+{
+  mlib_VectorReverseByteOrder_U32_U32(dst, src, w);
+}
+
+/* transformations */
+
+static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    mlib_VideoIDCT8x8_S16_S16 (data, data);
+
+    for(i=0;i<8;i++) {
+        dest[0] = cm[data[0]];
+        dest[1] = cm[data[1]];
+        dest[2] = cm[data[2]];
+        dest[3] = cm[data[3]];
+        dest[4] = cm[data[4]];
+        dest[5] = cm[data[5]];
+        dest[6] = cm[data[6]];
+        dest[7] = cm[data[7]];
+
+        dest += line_size;
+        data += 8;
+    }
+}
+
+static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data)
+{
+    mlib_VideoIDCT8x8_S16_S16 (data, data);
+    mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size);
+}
+
+static void ff_idct_mlib(DCTELEM *data)
+{
+    mlib_VideoIDCT8x8_S16_S16 (data, data);
+}
+
+static void ff_fdct_mlib(DCTELEM *data)
+{
+    mlib_VideoDCT8x8_S16_S16 (data, data);
+}
+
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
+{
+    c->get_pixels  = get_pixels_mlib;
+    c->diff_pixels = diff_pixels_mlib;
+    c->add_pixels_clamped = add_pixels_clamped_mlib;
+
+    c->put_pixels_tab[0][0] = put_pixels16_mlib;
+    c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
+    c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_mlib;
+    c->put_pixels_tab[1][0] = put_pixels8_mlib;
+    c->put_pixels_tab[1][1] = put_pixels8_x2_mlib;
+    c->put_pixels_tab[1][2] = put_pixels8_y2_mlib;
+    c->put_pixels_tab[1][3] = put_pixels8_xy2_mlib;
+
+    c->avg_pixels_tab[0][0] = avg_pixels16_mlib;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2_mlib;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2_mlib;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mlib;
+    c->avg_pixels_tab[1][0] = avg_pixels8_mlib;
+    c->avg_pixels_tab[1][1] = avg_pixels8_x2_mlib;
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2_mlib;
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mlib;
+
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
+
+    c->bswap_buf = bswap_buf_mlib;
+}
+
+void MPV_common_init_mlib(MpegEncContext *s)
+{
+    if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
+        s->dsp.fdct = ff_fdct_mlib;
+    }
+
+    if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
+        s->dsp.idct_put= ff_idct_put_mlib;
+        s->dsp.idct_add= ff_idct_add_mlib;
+        s->dsp.idct    = ff_idct_mlib;
+        s->dsp.idct_permutation_type= FF_NO_IDCT_PERM;
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/mmvideo.c b/contrib/ffmpeg/libavcodec/mmvideo.c
new file mode 100644
index 000000000..07d3f3fc5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mmvideo.c
@@ -0,0 +1,206 @@
+/*
+ * American Laser Games MM Video Decoder
+ * Copyright (c) 2006 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file mm.c
+ * American Laser Games MM Video Decoder
+ * by Peter Ross (suxen_drol at hotmail dot com)
+ *
+ * The MM format was used by IBM-PC ports of ALG's "arcade shooter" games,
+ * including Mad Dog McCree and Crime Patrol.
+ *
+ * Technical details here:
+ *  http://wiki.multimedia.cx/index.php?title=American_Laser_Games_MM
+ */
+
+#include "avcodec.h"
+
+#define MM_PREAMBLE_SIZE    6
+
+#define MM_TYPE_INTER       0x5
+#define MM_TYPE_INTRA       0x8
+#define MM_TYPE_INTRA_HH    0xc
+#define MM_TYPE_INTER_HH    0xd
+#define MM_TYPE_INTRA_HHV   0xe
+#define MM_TYPE_INTER_HHV   0xf
+
+typedef struct MmContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+} MmContext;
+
+static int mm_decode_init(AVCodecContext *avctx)
+{
+    MmContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+
+    if (s->avctx->palctrl == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "mmvideo: palette expected.\n");
+        return -1;
+    }
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height))
+        return -1;
+
+    s->frame.reference = 1;
+    if (avctx->get_buffer(avctx, &s->frame)) {
+        av_log(s->avctx, AV_LOG_ERROR, "mmvideo: get_buffer() failed\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static void mm_decode_intra(MmContext * s, int half_horiz, int half_vert, const uint8_t *buf, int buf_size)
+{
+    int i, x, y;
+    i=0; x=0; y=0;
+
+    while(i<buf_size) {
+        int run_length, color;
+
+        if (buf[i] & 0x80) {
+            run_length = 1;
+            color = buf[i];
+            i++;
+        }else{
+            run_length = (buf[i] & 0x7f) + 2;
+            color = buf[i+1];
+            i+=2;
+        }
+
+        if (half_horiz)
+            run_length *=2;
+
+        if (color) {
+            memset(s->frame.data[0] + y*s->frame.linesize[0] + x, color, run_length);
+            if (half_vert)
+                memset(s->frame.data[0] + (y+1)*s->frame.linesize[0] + x, color, run_length);
+        }
+        x+= run_length;
+
+        if (x >= s->avctx->width) {
+            x=0;
+            y += half_vert ? 2 : 1;
+        }
+    }
+}
+
+static void mm_decode_inter(MmContext * s, int half_horiz, int half_vert, const uint8_t *buf, int buf_size)
+{
+    const int data_ptr = 2 + LE_16(&buf[0]);
+    int d, r, y;
+    d = data_ptr; r = 2; y = 0;
+
+    while(r < data_ptr) {
+        int i, j;
+        int length = buf[r] & 0x7f;
+        int x = buf[r+1] + ((buf[r] & 0x80) << 1);
+        r += 2;
+
+        if (length==0) {
+            y += x;
+            continue;
+        }
+
+        for(i=0; i<length; i++) {
+            for(j=0; j<8; j++) {
+                int replace = (buf[r+i] >> (7-j)) & 1;
+                if (replace) {
+                    int color = buf[d];
+                    s->frame.data[0][y*s->frame.linesize[0] + x] = color;
+                    if (half_horiz)
+                        s->frame.data[0][y*s->frame.linesize[0] + x + 1] = color;
+                    if (half_vert) {
+                        s->frame.data[0][(y+1)*s->frame.linesize[0] + x] = color;
+                        if (half_horiz)
+                            s->frame.data[0][(y+1)*s->frame.linesize[0] + x + 1] = color;
+                    }
+                    d++;
+                }
+                x += half_horiz ? 2 : 1;
+            }
+        }
+
+        r += length;
+        y += half_vert ? 2 : 1;
+    }
+}
+
+static int mm_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    MmContext *s = avctx->priv_data;
+    AVPaletteControl *palette_control = avctx->palctrl;
+    int type;
+
+    if (palette_control->palette_changed) {
+        memcpy(s->frame.data[1], palette_control->palette, AVPALETTE_SIZE);
+        palette_control->palette_changed = 0;
+    }
+
+    type = LE_16(&buf[0]);
+    buf += MM_PREAMBLE_SIZE;
+    buf_size -= MM_PREAMBLE_SIZE;
+
+    switch(type) {
+    case MM_TYPE_INTRA     : mm_decode_intra(s, 0, 0, buf, buf_size); break;
+    case MM_TYPE_INTRA_HH  : mm_decode_intra(s, 1, 0, buf, buf_size); break;
+    case MM_TYPE_INTRA_HHV : mm_decode_intra(s, 1, 1, buf, buf_size); break;
+    case MM_TYPE_INTER     : mm_decode_inter(s, 0, 0, buf, buf_size); break;
+    case MM_TYPE_INTER_HH  : mm_decode_inter(s, 1, 0, buf, buf_size); break;
+    case MM_TYPE_INTER_HHV : mm_decode_inter(s, 1, 1, buf, buf_size); break;
+    default :
+        return -1;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    return buf_size;
+}
+
+static int mm_decode_end(AVCodecContext *avctx)
+{
+    MmContext *s = avctx->priv_data;
+
+    if(s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec mmvideo_decoder = {
+    "mmvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MMVIDEO,
+    sizeof(MmContext),
+    mm_decode_init,
+    NULL,
+    mm_decode_end,
+    mm_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/motion_est.c b/contrib/ffmpeg/libavcodec/motion_est.c
new file mode 100644
index 000000000..4dc17b99b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/motion_est.c
@@ -0,0 +1,2114 @@
+/*
+ * Motion estimation
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file motion_est.c
+ * Motion estimation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#define SQ(a) ((a)*(a))
+
+#define P_LEFT P[1]
+#define P_TOP P[2]
+#define P_TOPRIGHT P[3]
+#define P_MEDIAN P[4]
+#define P_MV1 P[9]
+
+static inline int sad_hpel_motion_search(MpegEncContext * s,
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
+                                  int size, int h);
+
+static inline int update_map_generation(MotionEstContext *c)
+{
+    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
+    if(c->map_generation==0){
+        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
+        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
+    }
+    return c->map_generation;
+}
+
+/* shape adaptive search stuff */
+typedef struct Minima{
+    int height;
+    int x, y;
+    int checked;
+}Minima;
+
+static int minima_cmp(const void *a, const void *b){
+    const Minima *da = (const Minima *) a;
+    const Minima *db = (const Minima *) b;
+
+    return da->height - db->height;
+}
+
+#define FLAG_QPEL   1 //must be 1
+#define FLAG_CHROMA 2
+#define FLAG_DIRECT 4
+
+static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
+    const int offset[3]= {
+          y*c->  stride + x,
+        ((y*c->uvstride + x)>>1),
+        ((y*c->uvstride + x)>>1),
+    };
+    int i;
+    for(i=0; i<3; i++){
+        c->src[0][i]= src [i] + offset[i];
+        c->ref[0][i]= ref [i] + offset[i];
+    }
+    if(ref_index){
+        for(i=0; i<3; i++){
+            c->ref[ref_index][i]= ref2[i] + offset[i];
+        }
+    }
+}
+
+static int get_flags(MotionEstContext *c, int direct, int chroma){
+    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
+           + (direct ? FLAG_DIRECT : 0)
+           + (chroma ? FLAG_CHROMA : 0);
+}
+
+static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+    MotionEstContext * const c= &s->me;
+    const int stride= c->stride;
+    const int uvstride= c->uvstride;
+    const int qpel= flags&FLAG_QPEL;
+    const int chroma= flags&FLAG_CHROMA;
+    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
+    const int hx= subx + (x<<(1+qpel));
+    const int hy= suby + (y<<(1+qpel));
+    uint8_t * const * const ref= c->ref[ref_index];
+    uint8_t * const * const src= c->src[src_index];
+    int d;
+    //FIXME check chroma 4mv, (no crashes ...)
+    if(flags&FLAG_DIRECT){
+        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
+            const int time_pp= s->pp_time;
+            const int time_pb= s->pb_time;
+            const int mask= 2*qpel+1;
+            if(s->mv_type==MV_TYPE_8X8){
+                int i;
+                for(i=0; i<4; i++){
+                    int fx = c->direct_basis_mv[i][0] + hx;
+                    int fy = c->direct_basis_mv[i][1] + hy;
+                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
+                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
+                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
+                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
+
+                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
+                    if(qpel){
+                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
+                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
+                    }else{
+                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
+                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
+                    }
+                }
+            }else{
+                int fx = c->direct_basis_mv[0][0] + hx;
+                int fy = c->direct_basis_mv[0][1] + hy;
+                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
+                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
+                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
+                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
+
+                if(qpel){
+                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
+                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
+                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
+                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
+                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
+                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
+                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
+                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
+                }else{
+                    assert((fx>>1) + 16*s->mb_x >= -16);
+                    assert((fy>>1) + 16*s->mb_y >= -16);
+                    assert((fx>>1) + 16*s->mb_x <= s->width);
+                    assert((fy>>1) + 16*s->mb_y <= s->height);
+                    assert((bx>>1) + 16*s->mb_x >= -16);
+                    assert((by>>1) + 16*s->mb_y >= -16);
+                    assert((bx>>1) + 16*s->mb_x <= s->width);
+                    assert((by>>1) + 16*s->mb_y <= s->height);
+
+                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
+                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
+                }
+            }
+            d = cmp_func(s, c->temp, src[0], stride, 16);
+        }else
+            d= 256*256*256*32;
+    }else{
+        int uvdxy;              /* no, it might not be used uninitialized */
+        if(dxy){
+            if(qpel){
+                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
+                if(chroma){
+                    int cx= hx/2;
+                    int cy= hy/2;
+                    cx= (cx>>1)|(cx&1);
+                    cy= (cy>>1)|(cy&1);
+                    uvdxy= (cx&1) + 2*(cy&1);
+                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
+                }
+            }else{
+                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
+                if(chroma)
+                    uvdxy= dxy | (x&1) | (2*(y&1));
+            }
+            d = cmp_func(s, c->temp, src[0], stride, h);
+        }else{
+            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
+            if(chroma)
+                uvdxy= (x&1) + 2*(y&1);
+        }
+        if(chroma){
+            uint8_t * const uvtemp= c->temp + 16*stride;
+            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
+            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
+            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
+            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
+        }
+    }
+#if 0
+    if(full_pel){
+        const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
+        score_map[index]= d;
+    }
+
+    d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
+#endif
+    return d;
+}
+
+#include "motion_est_template.c"
+
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
+    return 0;
+}
+
+static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
+}
+
+void ff_init_me(MpegEncContext *s){
+    MotionEstContext * const c= &s->me;
+    c->avctx= s->avctx;
+
+    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
+
+    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
+    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
+    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
+
+/*FIXME s->no_rounding b_type*/
+    if(s->flags&CODEC_FLAG_QPEL){
+        c->sub_motion_search= qpel_motion_search;
+        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
+        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
+        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
+    }else{
+        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
+            c->sub_motion_search= hpel_motion_search;
+        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
+                && c->avctx->    me_cmp == FF_CMP_SAD
+                && c->avctx->    mb_cmp == FF_CMP_SAD)
+            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
+        else
+            c->sub_motion_search= hpel_motion_search;
+    }
+    c->hpel_avg= s->dsp.avg_pixels_tab;
+    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
+    else               c->hpel_put= s->dsp.put_pixels_tab;
+
+    if(s->linesize){
+        c->stride  = s->linesize;
+        c->uvstride= s->uvlinesize;
+    }else{
+        c->stride  = 16*s->mb_width + 32;
+        c->uvstride=  8*s->mb_width + 16;
+    }
+
+    // 8x8 fullpel search would need a 4x4 chroma compare, which we dont have yet, and even if we had the motion estimation code doesnt expect it
+    if(s->codec_id != CODEC_ID_SNOW){
+        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
+            s->dsp.me_cmp[2]= zero_cmp;
+        }
+        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
+            s->dsp.me_sub_cmp[2]= zero_cmp;
+        }
+        c->hpel_put[2][0]= c->hpel_put[2][1]=
+        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
+    }
+
+    if(s->codec_id == CODEC_ID_H261){
+        c->sub_motion_search= no_sub_motion_search;
+    }
+
+    c->temp= c->scratchpad;
+}
+
+#if 0
+static int pix_dev(uint8_t * pix, int line_size, int mean)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+            s += FFABS(pix[0]-mean);
+            s += FFABS(pix[1]-mean);
+            s += FFABS(pix[2]-mean);
+            s += FFABS(pix[3]-mean);
+            s += FFABS(pix[4]-mean);
+            s += FFABS(pix[5]-mean);
+            s += FFABS(pix[6]-mean);
+            s += FFABS(pix[7]-mean);
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+#endif
+
+static inline void no_motion_search(MpegEncContext * s,
+                                    int *mx_ptr, int *my_ptr)
+{
+    *mx_ptr = 16 * s->mb_x;
+    *my_ptr = 16 * s->mb_y;
+}
+
+#if 0  /* the use of these functions is inside #if 0 */
+static int full_motion_search(MpegEncContext * s,
+                              int *mx_ptr, int *my_ptr, int range,
+                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
+{
+    int x1, y1, x2, y2, xx, yy, x, y;
+    int mx, my, dmin, d;
+    uint8_t *pix;
+
+    xx = 16 * s->mb_x;
+    yy = 16 * s->mb_y;
+    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
+    if (x1 < xmin)
+        x1 = xmin;
+    x2 = xx + range - 1;
+    if (x2 > xmax)
+        x2 = xmax;
+    y1 = yy - range + 1;
+    if (y1 < ymin)
+        y1 = ymin;
+    y2 = yy + range - 1;
+    if (y2 > ymax)
+        y2 = ymax;
+    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+    dmin = 0x7fffffff;
+    mx = 0;
+    my = 0;
+    for (y = y1; y <= y2; y++) {
+        for (x = x1; x <= x2; x++) {
+            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
+                             s->linesize, 16);
+            if (d < dmin ||
+                (d == dmin &&
+                 (abs(x - xx) + abs(y - yy)) <
+                 (abs(mx - xx) + abs(my - yy)))) {
+                dmin = d;
+                mx = x;
+                my = y;
+            }
+        }
+    }
+
+    *mx_ptr = mx;
+    *my_ptr = my;
+
+#if 0
+    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
+        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
+        av_log(NULL, AV_LOG_ERROR, "error %d %d\n", *mx_ptr, *my_ptr);
+    }
+#endif
+    return dmin;
+}
+
+
+static int log_motion_search(MpegEncContext * s,
+                             int *mx_ptr, int *my_ptr, int range,
+                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
+{
+    int x1, y1, x2, y2, xx, yy, x, y;
+    int mx, my, dmin, d;
+    uint8_t *pix;
+
+    xx = s->mb_x << 4;
+    yy = s->mb_y << 4;
+
+    /* Left limit */
+    x1 = xx - range;
+    if (x1 < xmin)
+        x1 = xmin;
+
+    /* Right limit */
+    x2 = xx + range;
+    if (x2 > xmax)
+        x2 = xmax;
+
+    /* Upper limit */
+    y1 = yy - range;
+    if (y1 < ymin)
+        y1 = ymin;
+
+    /* Lower limit */
+    y2 = yy + range;
+    if (y2 > ymax)
+        y2 = ymax;
+
+    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+    dmin = 0x7fffffff;
+    mx = 0;
+    my = 0;
+
+    do {
+        for (y = y1; y <= y2; y += range) {
+            for (x = x1; x <= x2; x += range) {
+                d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
+                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
+                    dmin = d;
+                    mx = x;
+                    my = y;
+                }
+            }
+        }
+
+        range = range >> 1;
+
+        x1 = mx - range;
+        if (x1 < xmin)
+            x1 = xmin;
+
+        x2 = mx + range;
+        if (x2 > xmax)
+            x2 = xmax;
+
+        y1 = my - range;
+        if (y1 < ymin)
+            y1 = ymin;
+
+        y2 = my + range;
+        if (y2 > ymax)
+            y2 = ymax;
+
+    } while (range >= 1);
+
+#ifdef DEBUG
+    av_log(s->avctx, AV_LOG_DEBUG, "log       - MX: %d\tMY: %d\n", mx, my);
+#endif
+    *mx_ptr = mx;
+    *my_ptr = my;
+    return dmin;
+}
+
+static int phods_motion_search(MpegEncContext * s,
+                               int *mx_ptr, int *my_ptr, int range,
+                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
+{
+    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
+    int mx, my, dminx, dminy;
+    uint8_t *pix;
+
+    xx = s->mb_x << 4;
+    yy = s->mb_y << 4;
+
+    /* Left limit */
+    x1 = xx - range;
+    if (x1 < xmin)
+        x1 = xmin;
+
+    /* Right limit */
+    x2 = xx + range;
+    if (x2 > xmax)
+        x2 = xmax;
+
+    /* Upper limit */
+    y1 = yy - range;
+    if (y1 < ymin)
+        y1 = ymin;
+
+    /* Lower limit */
+    y2 = yy + range;
+    if (y2 > ymax)
+        y2 = ymax;
+
+    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+    mx = 0;
+    my = 0;
+
+    x = xx;
+    y = yy;
+    do {
+        dminx = 0x7fffffff;
+        dminy = 0x7fffffff;
+
+        lastx = x;
+        for (x = x1; x <= x2; x += range) {
+            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
+            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
+                dminx = d;
+                mx = x;
+            }
+        }
+
+        x = lastx;
+        for (y = y1; y <= y2; y += range) {
+            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
+            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
+                dminy = d;
+                my = y;
+            }
+        }
+
+        range = range >> 1;
+
+        x = mx;
+        y = my;
+        x1 = mx - range;
+        if (x1 < xmin)
+            x1 = xmin;
+
+        x2 = mx + range;
+        if (x2 > xmax)
+            x2 = xmax;
+
+        y1 = my - range;
+        if (y1 < ymin)
+            y1 = ymin;
+
+        y2 = my + range;
+        if (y2 > ymax)
+            y2 = ymax;
+
+    } while (range >= 1);
+
+#ifdef DEBUG
+    av_log(s->avctx, AV_LOG_DEBUG, "phods     - MX: %d\tMY: %d\n", mx, my);
+#endif
+
+    /* half pixel search */
+    *mx_ptr = mx;
+    *my_ptr = my;
+    return dminy;
+}
+#endif /* 0 */
+
+#define Z_THRESHOLD 256
+
+#define CHECK_SAD_HALF_MV(suffix, x, y) \
+{\
+    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
+    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
+    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
+}
+
+static inline int sad_hpel_motion_search(MpegEncContext * s,
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
+                                  int size, int h)
+{
+    MotionEstContext * const c= &s->me;
+    const int penalty_factor= c->sub_penalty_factor;
+    int mx, my, dminh;
+    uint8_t *pix, *ptr;
+    int stride= c->stride;
+    const int flags= c->sub_flags;
+    LOAD_COMMON
+
+    assert(flags == 0);
+
+    if(c->skip){
+//    printf("S");
+        *mx_ptr = 0;
+        *my_ptr = 0;
+        return dmin;
+    }
+//    printf("N");
+
+    pix = c->src[src_index][0];
+
+    mx = *mx_ptr;
+    my = *my_ptr;
+    ptr = c->ref[ref_index][0] + (my * stride) + mx;
+
+    dminh = dmin;
+
+    if (mx > xmin && mx < xmax &&
+        my > ymin && my < ymax) {
+        int dx=0, dy=0;
+        int d, pen_x, pen_y;
+        const int index= (my<<ME_MAP_SHIFT) + mx;
+        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
+        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
+        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
+        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
+        mx<<=1;
+        my<<=1;
+
+
+        pen_x= pred_x + mx;
+        pen_y= pred_y + my;
+
+        ptr-= stride;
+        if(t<=b){
+            CHECK_SAD_HALF_MV(y2 , 0, -1)
+            if(l<=r){
+                CHECK_SAD_HALF_MV(xy2, -1, -1)
+                if(t+r<=b+l){
+                    CHECK_SAD_HALF_MV(xy2, +1, -1)
+                    ptr+= stride;
+                }else{
+                    ptr+= stride;
+                    CHECK_SAD_HALF_MV(xy2, -1, +1)
+                }
+                CHECK_SAD_HALF_MV(x2 , -1,  0)
+            }else{
+                CHECK_SAD_HALF_MV(xy2, +1, -1)
+                if(t+l<=b+r){
+                    CHECK_SAD_HALF_MV(xy2, -1, -1)
+                    ptr+= stride;
+                }else{
+                    ptr+= stride;
+                    CHECK_SAD_HALF_MV(xy2, +1, +1)
+                }
+                CHECK_SAD_HALF_MV(x2 , +1,  0)
+            }
+        }else{
+            if(l<=r){
+                if(t+l<=b+r){
+                    CHECK_SAD_HALF_MV(xy2, -1, -1)
+                    ptr+= stride;
+                }else{
+                    ptr+= stride;
+                    CHECK_SAD_HALF_MV(xy2, +1, +1)
+                }
+                CHECK_SAD_HALF_MV(x2 , -1,  0)
+                CHECK_SAD_HALF_MV(xy2, -1, +1)
+            }else{
+                if(t+r<=b+l){
+                    CHECK_SAD_HALF_MV(xy2, +1, -1)
+                    ptr+= stride;
+                }else{
+                    ptr+= stride;
+                    CHECK_SAD_HALF_MV(xy2, -1, +1)
+                }
+                CHECK_SAD_HALF_MV(x2 , +1,  0)
+                CHECK_SAD_HALF_MV(xy2, +1, +1)
+            }
+            CHECK_SAD_HALF_MV(y2 ,  0, +1)
+        }
+        mx+=dx;
+        my+=dy;
+
+    }else{
+        mx<<=1;
+        my<<=1;
+    }
+
+    *mx_ptr = mx;
+    *my_ptr = my;
+    return dminh;
+}
+
+static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
+{
+    const int xy= s->mb_x + s->mb_y*s->mb_stride;
+
+    s->p_mv_table[xy][0] = mx;
+    s->p_mv_table[xy][1] = my;
+
+    /* has already been set to the 4 MV if 4MV is done */
+    if(mv4){
+        int mot_xy= s->block_index[0];
+
+        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
+        s->current_picture.motion_val[0][mot_xy  ][1]= my;
+        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
+        s->current_picture.motion_val[0][mot_xy+1][1]= my;
+
+        mot_xy += s->b8_stride;
+        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
+        s->current_picture.motion_val[0][mot_xy  ][1]= my;
+        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
+        s->current_picture.motion_val[0][mot_xy+1][1]= my;
+    }
+}
+
+/**
+ * get fullpel ME search limits.
+ */
+static inline void get_limits(MpegEncContext *s, int x, int y)
+{
+    MotionEstContext * const c= &s->me;
+/*
+    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
+    else                   c->range= 16;
+*/
+    if (s->unrestricted_mv) {
+        c->xmin = - x - 16;
+        c->ymin = - y - 16;
+        c->xmax = - x + s->mb_width *16;
+        c->ymax = - y + s->mb_height*16;
+    } else if (s->out_format == FMT_H261){
+        // Search range of H261 is different from other codec standards
+        c->xmin = (x > 15) ? - 15 : 0;
+        c->ymin = (y > 15) ? - 15 : 0;
+        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
+        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
+    } else {
+        c->xmin = - x;
+        c->ymin = - y;
+        c->xmax = - x + s->mb_width *16 - 16;
+        c->ymax = - y + s->mb_height*16 - 16;
+    }
+}
+
+static inline void init_mv4_ref(MotionEstContext *c){
+    const int stride= c->stride;
+
+    c->ref[1][0] = c->ref[0][0] + 8;
+    c->ref[2][0] = c->ref[0][0] + 8*stride;
+    c->ref[3][0] = c->ref[2][0] + 8;
+    c->src[1][0] = c->src[0][0] + 8;
+    c->src[2][0] = c->src[0][0] + 8*stride;
+    c->src[3][0] = c->src[2][0] + 8;
+}
+
+static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
+{
+    MotionEstContext * const c= &s->me;
+    const int size= 1;
+    const int h=8;
+    int block;
+    int P[10][2];
+    int dmin_sum=0, mx4_sum=0, my4_sum=0;
+    int same=1;
+    const int stride= c->stride;
+    uint8_t *mv_penalty= c->current_mv_penalty;
+
+    init_mv4_ref(c);
+
+    for(block=0; block<4; block++){
+        int mx4, my4;
+        int pred_x4, pred_y4;
+        int dmin4;
+        static const int off[4]= {2, 1, 1, -1};
+        const int mot_stride = s->b8_stride;
+        const int mot_xy = s->block_index[block];
+
+        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
+        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
+
+        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
+
+        /* special case for first line */
+        if (s->first_slice_line && block<2) {
+            c->pred_x= pred_x4= P_LEFT[0];
+            c->pred_y= pred_y4= P_LEFT[1];
+        } else {
+            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
+            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
+            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
+            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
+            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
+            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
+            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
+            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+
+            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+            c->pred_x= pred_x4 = P_MEDIAN[0];
+            c->pred_y= pred_y4 = P_MEDIAN[1];
+        }
+        P_MV1[0]= mx;
+        P_MV1[1]= my;
+
+        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
+
+        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
+
+        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
+            int dxy;
+            const int offset= ((block&1) + (block>>1)*stride)*8;
+            uint8_t *dest_y = c->scratchpad + offset;
+            if(s->quarter_sample){
+                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
+                dxy = ((my4 & 3) << 2) | (mx4 & 3);
+
+                if(s->no_rounding)
+                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
+                else
+                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
+            }else{
+                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
+                dxy = ((my4 & 1) << 1) | (mx4 & 1);
+
+                if(s->no_rounding)
+                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
+                else
+                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
+            }
+            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
+        }else
+            dmin_sum+= dmin4;
+
+        if(s->quarter_sample){
+            mx4_sum+= mx4/2;
+            my4_sum+= my4/2;
+        }else{
+            mx4_sum+= mx4;
+            my4_sum+= my4;
+        }
+
+        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
+        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
+
+        if(mx4 != mx || my4 != my) same=0;
+    }
+
+    if(same)
+        return INT_MAX;
+
+    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
+        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
+    }
+
+    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
+        int dxy;
+        int mx, my;
+        int offset;
+
+        mx= ff_h263_round_chroma(mx4_sum);
+        my= ff_h263_round_chroma(my4_sum);
+        dxy = ((my & 1) << 1) | (mx & 1);
+
+        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
+
+        if(s->no_rounding){
+            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
+            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
+        }else{
+            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
+            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
+        }
+
+        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
+        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
+    }
+
+    c->pred_x= mx;
+    c->pred_y= my;
+
+    switch(c->avctx->mb_cmp&0xFF){
+    /*case FF_CMP_SSE:
+        return dmin_sum+ 32*s->qscale*s->qscale;*/
+    case FF_CMP_RD:
+        return dmin_sum;
+    default:
+        return dmin_sum+ 11*c->mb_penalty_factor;
+    }
+}
+
+static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
+    MotionEstContext * const c= &s->me;
+
+    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
+    c->src[1][0] = c->src[0][0] + s->linesize;
+    if(c->flags & FLAG_CHROMA){
+        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
+        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
+        c->src[1][1] = c->src[0][1] + s->uvlinesize;
+        c->src[1][2] = c->src[0][2] + s->uvlinesize;
+    }
+}
+
+static int interlaced_search(MpegEncContext *s, int ref_index,
+                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
+{
+    MotionEstContext * const c= &s->me;
+    const int size=0;
+    const int h=8;
+    int block;
+    int P[10][2];
+    uint8_t * const mv_penalty= c->current_mv_penalty;
+    int same=1;
+    const int stride= 2*s->linesize;
+    int dmin_sum= 0;
+    const int mot_stride= s->mb_stride;
+    const int xy= s->mb_x + s->mb_y*mot_stride;
+
+    c->ymin>>=1;
+    c->ymax>>=1;
+    c->stride<<=1;
+    c->uvstride<<=1;
+    init_interlaced_ref(s, ref_index);
+
+    for(block=0; block<2; block++){
+        int field_select;
+        int best_dmin= INT_MAX;
+        int best_field= -1;
+
+        for(field_select=0; field_select<2; field_select++){
+            int dmin, mx_i, my_i;
+            int16_t (*mv_table)[2]= mv_tables[block][field_select];
+
+            if(user_field_select){
+                assert(field_select==0 || field_select==1);
+                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
+                if(field_select_tables[block][xy] != field_select)
+                    continue;
+            }
+
+            P_LEFT[0] = mv_table[xy - 1][0];
+            P_LEFT[1] = mv_table[xy - 1][1];
+            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
+
+            c->pred_x= P_LEFT[0];
+            c->pred_y= P_LEFT[1];
+
+            if(!s->first_slice_line){
+                P_TOP[0]      = mv_table[xy - mot_stride][0];
+                P_TOP[1]      = mv_table[xy - mot_stride][1];
+                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
+                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
+                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
+                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
+                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
+                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
+
+                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+            }
+            P_MV1[0]= mx; //FIXME not correct if block != field_select
+            P_MV1[1]= my / 2;
+
+            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
+
+            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
+
+            mv_table[xy][0]= mx_i;
+            mv_table[xy][1]= my_i;
+
+            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
+                int dxy;
+
+                //FIXME chroma ME
+                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
+                dxy = ((my_i & 1) << 1) | (mx_i & 1);
+
+                if(s->no_rounding){
+                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
+                }else{
+                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
+                }
+                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
+                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
+            }else
+                dmin+= c->mb_penalty_factor; //field_select bits
+
+            dmin += field_select != block; //slightly prefer same field
+
+            if(dmin < best_dmin){
+                best_dmin= dmin;
+                best_field= field_select;
+            }
+        }
+        {
+            int16_t (*mv_table)[2]= mv_tables[block][best_field];
+
+            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
+            if(mv_table[xy][1]&1) same=0;
+            if(mv_table[xy][1]*2 != my) same=0;
+            if(best_field != block) same=0;
+        }
+
+        field_select_tables[block][xy]= best_field;
+        dmin_sum += best_dmin;
+    }
+
+    c->ymin<<=1;
+    c->ymax<<=1;
+    c->stride>>=1;
+    c->uvstride>>=1;
+
+    if(same)
+        return INT_MAX;
+
+    switch(c->avctx->mb_cmp&0xFF){
+    /*case FF_CMP_SSE:
+        return dmin_sum+ 32*s->qscale*s->qscale;*/
+    case FF_CMP_RD:
+        return dmin_sum;
+    default:
+        return dmin_sum+ 11*c->mb_penalty_factor;
+    }
+}
+
+static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
+    int ymax= s->me.ymax>>interlaced;
+    int ymin= s->me.ymin>>interlaced;
+
+    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
+    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
+    if(mv[1] <       ymin) mv[1] =       ymin;
+    if(mv[1] >       ymax) mv[1] =       ymax;
+}
+
+static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
+    MotionEstContext * const c= &s->me;
+    Picture *p= s->current_picture_ptr;
+    int mb_xy= mb_x + mb_y*s->mb_stride;
+    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
+    int mb_type= s->current_picture.mb_type[mb_xy];
+    int flags= c->flags;
+    int shift= (flags&FLAG_QPEL) + 1;
+    int mask= (1<<shift)-1;
+    int x, y, i;
+    int d=0;
+    me_cmp_func cmpf= s->dsp.sse[0];
+    me_cmp_func chroma_cmpf= s->dsp.sse[1];
+
+    if(p_type && USES_LIST(mb_type, 1)){
+        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
+        return INT_MAX/2;
+    }
+    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
+
+    for(i=0; i<4; i++){
+        int xy= s->block_index[i];
+        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
+        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
+    }
+
+    if(IS_INTERLACED(mb_type)){
+        int xy2= xy  + s->b8_stride;
+        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
+        c->stride<<=1;
+        c->uvstride<<=1;
+
+        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
+            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
+            return INT_MAX/2;
+        }
+
+        if(USES_LIST(mb_type, 0)){
+            int field_select0= p->ref_index[0][xy ];
+            int field_select1= p->ref_index[0][xy2];
+            assert(field_select0==0 ||field_select0==1);
+            assert(field_select1==0 ||field_select1==1);
+            init_interlaced_ref(s, 0);
+
+            if(p_type){
+                s->p_field_select_table[0][mb_xy]= field_select0;
+                s->p_field_select_table[1][mb_xy]= field_select1;
+                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
+                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
+                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
+            }else{
+                s->b_field_select_table[0][0][mb_xy]= field_select0;
+                s->b_field_select_table[0][1][mb_xy]= field_select1;
+                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
+                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
+                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
+            }
+
+            x= p->motion_val[0][xy ][0];
+            y= p->motion_val[0][xy ][1];
+            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
+            x= p->motion_val[0][xy2][0];
+            y= p->motion_val[0][xy2][1];
+            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
+        }
+        if(USES_LIST(mb_type, 1)){
+            int field_select0= p->ref_index[1][xy ];
+            int field_select1= p->ref_index[1][xy2];
+            assert(field_select0==0 ||field_select0==1);
+            assert(field_select1==0 ||field_select1==1);
+            init_interlaced_ref(s, 2);
+
+            s->b_field_select_table[1][0][mb_xy]= field_select0;
+            s->b_field_select_table[1][1][mb_xy]= field_select1;
+            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
+            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
+            if(USES_LIST(mb_type, 0)){
+                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
+            }else{
+                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
+            }
+
+            x= p->motion_val[1][xy ][0];
+            y= p->motion_val[1][xy ][1];
+            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
+            x= p->motion_val[1][xy2][0];
+            y= p->motion_val[1][xy2][1];
+            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
+            //FIXME bidir scores
+        }
+        c->stride>>=1;
+        c->uvstride>>=1;
+    }else if(IS_8X8(mb_type)){
+        if(!(s->flags & CODEC_FLAG_4MV)){
+            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
+            return INT_MAX/2;
+        }
+        cmpf= s->dsp.sse[1];
+        chroma_cmpf= s->dsp.sse[1];
+        init_mv4_ref(c);
+        for(i=0; i<4; i++){
+            xy= s->block_index[i];
+            x= p->motion_val[0][xy][0];
+            y= p->motion_val[0][xy][1];
+            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
+        }
+        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
+    }else{
+        if(USES_LIST(mb_type, 0)){
+            if(p_type){
+                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
+                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
+            }else if(USES_LIST(mb_type, 1)){
+                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
+                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
+                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
+            }else{
+                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
+                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
+            }
+            x= p->motion_val[0][xy][0];
+            y= p->motion_val[0][xy][1];
+            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
+        }else if(USES_LIST(mb_type, 1)){
+            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
+            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
+
+            x= p->motion_val[1][xy][0];
+            y= p->motion_val[1][xy][1];
+            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
+        }else
+            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
+    }
+    return d;
+}
+
+void ff_estimate_p_frame_motion(MpegEncContext * s,
+                                int mb_x, int mb_y)
+{
+    MotionEstContext * const c= &s->me;
+    uint8_t *pix, *ppix;
+    int sum, varc, vard, mx, my, dmin;
+    int P[10][2];
+    const int shift= 1+s->quarter_sample;
+    int mb_type=0;
+    Picture * const pic= &s->current_picture;
+
+    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
+
+    assert(s->quarter_sample==0 || s->quarter_sample==1);
+    assert(s->linesize == c->stride);
+    assert(s->uvlinesize == c->uvstride);
+
+    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
+    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+
+    get_limits(s, 16*mb_x, 16*mb_y);
+    c->skip=0;
+
+    /* intra / predictive decision */
+    pix = c->src[0][0];
+    sum = s->dsp.pix_sum(pix, s->linesize);
+    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
+
+    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
+    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
+    c->mb_var_sum_temp += (varc+128)>>8;
+
+    if(c->avctx->me_threshold){
+        vard= check_input_motion(s, mb_x, mb_y, 1);
+
+        if((vard+128)>>8 < c->avctx->me_threshold){
+            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
+            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
+            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
+            c->mc_mb_var_sum_temp += (vard+128)>>8;
+            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
+            return;
+        }
+        if((vard+128)>>8 < c->avctx->mb_threshold)
+            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
+    }
+
+    switch(s->me_method) {
+    case ME_ZERO:
+    default:
+        no_motion_search(s, &mx, &my);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        dmin = 0;
+        break;
+#if 0
+    case ME_FULL:
+        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+    case ME_LOG:
+        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+    case ME_PHODS:
+        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+#endif
+    case ME_X1:
+    case ME_EPZS:
+       {
+            const int mot_stride = s->b8_stride;
+            const int mot_xy = s->block_index[0];
+
+            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
+            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
+
+            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
+
+            if(!s->first_slice_line) {
+                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
+                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
+                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
+                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
+                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
+                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
+                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+
+                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+                if(s->out_format == FMT_H263){
+                    c->pred_x = P_MEDIAN[0];
+                    c->pred_y = P_MEDIAN[1];
+                }else { /* mpeg1 at least */
+                    c->pred_x= P_LEFT[0];
+                    c->pred_y= P_LEFT[1];
+                }
+            }else{
+                c->pred_x= P_LEFT[0];
+                c->pred_y= P_LEFT[1];
+            }
+
+        }
+        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
+
+        break;
+    }
+
+    /* At this point (mx,my) are full-pell and the relative displacement */
+    ppix = c->ref[0][0] + (my * s->linesize) + mx;
+
+    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
+
+    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
+//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
+    c->mc_mb_var_sum_temp += (vard+128)>>8;
+
+#if 0
+    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
+           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
+#endif
+    if(mb_type){
+        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
+        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
+        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
+
+        if(mb_type == CANDIDATE_MB_TYPE_INTER){
+            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
+            set_p_mv_tables(s, mx, my, 1);
+        }else{
+            mx <<=shift;
+            my <<=shift;
+        }
+        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
+            h263_mv4_search(s, mx, my, shift);
+
+            set_p_mv_tables(s, mx, my, 0);
+        }
+        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
+            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
+        }
+    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
+        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
+        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
+        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
+
+        if (vard*2 + 200*256 > varc)
+            mb_type|= CANDIDATE_MB_TYPE_INTRA;
+        if (varc*2 + 200*256 > vard || s->qscale > 24){
+//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
+            mb_type|= CANDIDATE_MB_TYPE_INTER;
+            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
+            if(s->flags&CODEC_FLAG_MV0)
+                if(mx || my)
+                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
+        }else{
+            mx <<=shift;
+            my <<=shift;
+        }
+        if((s->flags&CODEC_FLAG_4MV)
+           && !c->skip && varc>50<<8 && vard>10<<8){
+            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
+                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
+
+            set_p_mv_tables(s, mx, my, 0);
+        }else
+            set_p_mv_tables(s, mx, my, 1);
+        if((s->flags&CODEC_FLAG_INTERLACED_ME)
+           && !c->skip){ //FIXME varc/d checks
+            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
+                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
+        }
+    }else{
+        int intra_score, i;
+        mb_type= CANDIDATE_MB_TYPE_INTER;
+
+        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
+        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
+            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
+
+        if((s->flags&CODEC_FLAG_4MV)
+           && !c->skip && varc>50<<8 && vard>10<<8){
+            int dmin4= h263_mv4_search(s, mx, my, shift);
+            if(dmin4 < dmin){
+                mb_type= CANDIDATE_MB_TYPE_INTER4V;
+                dmin=dmin4;
+            }
+        }
+        if((s->flags&CODEC_FLAG_INTERLACED_ME)
+           && !c->skip){ //FIXME varc/d checks
+            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
+            if(dmin_i < dmin){
+                mb_type = CANDIDATE_MB_TYPE_INTER_I;
+                dmin= dmin_i;
+            }
+        }
+
+//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
+        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
+
+        /* get intra luma score */
+        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
+            intra_score= varc - 500;
+        }else{
+            int mean= (sum+128)>>8;
+            mean*= 0x01010101;
+
+            for(i=0; i<16; i++){
+                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
+                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
+                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
+                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
+            }
+
+            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
+        }
+#if 0 //FIXME
+        /* get chroma score */
+        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
+            for(i=1; i<3; i++){
+                uint8_t *dest_c;
+                int mean;
+
+                if(s->out_format == FMT_H263){
+                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
+                }else{
+                    mean= (s->last_dc[i] + 4)>>3;
+                }
+                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
+
+                mean*= 0x01010101;
+                for(i=0; i<8; i++){
+                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
+                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
+                }
+
+                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
+            }
+        }
+#endif
+        intra_score += c->mb_penalty_factor*16;
+
+        if(intra_score < dmin){
+            mb_type= CANDIDATE_MB_TYPE_INTRA;
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
+        }else
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
+
+        {
+            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
+            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
+            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
+        }
+    }
+
+    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
+}
+
+int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
+                                    int mb_x, int mb_y)
+{
+    MotionEstContext * const c= &s->me;
+    int mx, my, dmin;
+    int P[10][2];
+    const int shift= 1+s->quarter_sample;
+    const int xy= mb_x + mb_y*s->mb_stride;
+    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
+
+    assert(s->quarter_sample==0 || s->quarter_sample==1);
+
+    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
+    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+
+    get_limits(s, 16*mb_x, 16*mb_y);
+    c->skip=0;
+
+    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
+    P_LEFT[1]       = s->p_mv_table[xy + 1][1];
+
+    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
+
+    /* special case for first line */
+    if (s->first_slice_line) {
+        c->pred_x= P_LEFT[0];
+        c->pred_y= P_LEFT[1];
+        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
+        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
+    } else {
+        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
+        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
+        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
+        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
+        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
+        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
+        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
+
+        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+        c->pred_x = P_MEDIAN[0];
+        c->pred_y = P_MEDIAN[1];
+    }
+
+    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
+
+    s->p_mv_table[xy][0] = mx<<shift;
+    s->p_mv_table[xy][1] = my<<shift;
+
+    return dmin;
+}
+
+static int ff_estimate_motion_b(MpegEncContext * s,
+                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
+{
+    MotionEstContext * const c= &s->me;
+    int mx, my, dmin;
+    int P[10][2];
+    const int shift= 1+s->quarter_sample;
+    const int mot_stride = s->mb_stride;
+    const int mot_xy = mb_y*mot_stride + mb_x;
+    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
+    int mv_scale;
+
+    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
+    c->current_mv_penalty= mv_penalty;
+
+    get_limits(s, 16*mb_x, 16*mb_y);
+
+    switch(s->me_method) {
+    case ME_ZERO:
+    default:
+        no_motion_search(s, &mx, &my);
+        dmin = 0;
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+#if 0
+    case ME_FULL:
+        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+    case ME_LOG:
+        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+    case ME_PHODS:
+        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
+        mx-= mb_x*16;
+        my-= mb_y*16;
+        break;
+#endif
+    case ME_X1:
+    case ME_EPZS:
+       {
+            P_LEFT[0]        = mv_table[mot_xy - 1][0];
+            P_LEFT[1]        = mv_table[mot_xy - 1][1];
+
+            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
+
+            /* special case for first line */
+            if (!s->first_slice_line) {
+                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
+                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
+                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
+                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
+                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
+                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
+                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+
+                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+            }
+            c->pred_x= P_LEFT[0];
+            c->pred_y= P_LEFT[1];
+        }
+
+        if(mv_table == s->b_forw_mv_table){
+            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
+        }else{
+            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
+        }
+
+        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
+
+        break;
+    }
+
+    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
+
+    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
+        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
+
+//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
+//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
+    mv_table[mot_xy][0]= mx;
+    mv_table[mot_xy][1]= my;
+
+    return dmin;
+}
+
+static inline int check_bidir_mv(MpegEncContext * s,
+                   int motion_fx, int motion_fy,
+                   int motion_bx, int motion_by,
+                   int pred_fx, int pred_fy,
+                   int pred_bx, int pred_by,
+                   int size, int h)
+{
+    //FIXME optimize?
+    //FIXME better f_code prediction (max mv & distance)
+    //FIXME pointers
+    MotionEstContext * const c= &s->me;
+    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
+    int stride= c->stride;
+    uint8_t *dest_y = c->scratchpad;
+    uint8_t *ptr;
+    int dxy;
+    int src_x, src_y;
+    int fbmin;
+    uint8_t **src_data= c->src[0];
+    uint8_t **ref_data= c->ref[0];
+    uint8_t **ref2_data= c->ref[2];
+
+    if(s->quarter_sample){
+        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
+        src_x = motion_fx >> 2;
+        src_y = motion_fy >> 2;
+
+        ptr = ref_data[0] + (src_y * stride) + src_x;
+        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
+
+        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
+        src_x = motion_bx >> 2;
+        src_y = motion_by >> 2;
+
+        ptr = ref2_data[0] + (src_y * stride) + src_x;
+        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
+    }else{
+        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
+        src_x = motion_fx >> 1;
+        src_y = motion_fy >> 1;
+
+        ptr = ref_data[0] + (src_y * stride) + src_x;
+        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
+
+        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
+        src_x = motion_bx >> 1;
+        src_y = motion_by >> 1;
+
+        ptr = ref2_data[0] + (src_y * stride) + src_x;
+        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
+    }
+
+    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
+           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
+           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
+
+    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
+    }
+    //FIXME CHROMA !!!
+
+    return fbmin;
+}
+
+/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
+static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
+{
+    MotionEstContext * const c= &s->me;
+    const int mot_stride = s->mb_stride;
+    const int xy = mb_y *mot_stride + mb_x;
+    int fbmin;
+    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
+    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
+    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
+    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
+    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
+    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
+    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
+    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
+    const int flags= c->sub_flags;
+    const int qpel= flags&FLAG_QPEL;
+    const int shift= 1+qpel;
+    const int xmin= c->xmin<<shift;
+    const int ymin= c->ymin<<shift;
+    const int xmax= c->xmax<<shift;
+    const int ymax= c->ymax<<shift;
+    uint8_t map[8][8][8][8];
+
+    memset(map,0,sizeof(map));
+#define BIDIR_MAP(fx,fy,bx,by) \
+    map[(motion_fx+fx)&7][(motion_fy+fy)&7][(motion_bx+bx)&7][(motion_by+by)&7]
+    BIDIR_MAP(0,0,0,0) = 1;
+
+    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
+                          motion_bx, motion_by,
+                          pred_fx, pred_fy,
+                          pred_bx, pred_by,
+                          0, 16);
+
+    if(s->avctx->bidir_refine){
+        int score, end;
+#define CHECK_BIDIR(fx,fy,bx,by)\
+    if( !BIDIR_MAP(fx,fy,bx,by)\
+       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
+       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
+        BIDIR_MAP(fx,fy,bx,by) = 1;\
+        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
+        if(score < fbmin){\
+            fbmin= score;\
+            motion_fx+=fx;\
+            motion_fy+=fy;\
+            motion_bx+=bx;\
+            motion_by+=by;\
+            end=0;\
+        }\
+    }
+#define CHECK_BIDIR2(a,b,c,d)\
+CHECK_BIDIR(a,b,c,d)\
+CHECK_BIDIR(-(a),-(b),-(c),-(d))
+
+#define CHECK_BIDIRR(a,b,c,d)\
+CHECK_BIDIR2(a,b,c,d)\
+CHECK_BIDIR2(b,c,d,a)\
+CHECK_BIDIR2(c,d,a,b)\
+CHECK_BIDIR2(d,a,b,c)
+
+        do{
+            end=1;
+
+            CHECK_BIDIRR( 0, 0, 0, 1)
+            if(s->avctx->bidir_refine > 1){
+                CHECK_BIDIRR( 0, 0, 1, 1)
+                CHECK_BIDIR2( 0, 1, 0, 1)
+                CHECK_BIDIR2( 1, 0, 1, 0)
+                CHECK_BIDIRR( 0, 0,-1, 1)
+                CHECK_BIDIR2( 0,-1, 0, 1)
+                CHECK_BIDIR2(-1, 0, 1, 0)
+                if(s->avctx->bidir_refine > 2){
+                    CHECK_BIDIRR( 0, 1, 1, 1)
+                    CHECK_BIDIRR( 0,-1, 1, 1)
+                    CHECK_BIDIRR( 0, 1,-1, 1)
+                    CHECK_BIDIRR( 0, 1, 1,-1)
+                    if(s->avctx->bidir_refine > 3){
+                        CHECK_BIDIR2( 1, 1, 1, 1)
+                        CHECK_BIDIRR( 1, 1, 1,-1)
+                        CHECK_BIDIR2( 1, 1,-1,-1)
+                        CHECK_BIDIR2( 1,-1,-1, 1)
+                        CHECK_BIDIR2( 1,-1, 1,-1)
+                    }
+                }
+            }
+        }while(!end);
+    }
+
+    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
+    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
+    s->b_bidir_back_mv_table[xy][0]= motion_bx;
+    s->b_bidir_back_mv_table[xy][1]= motion_by;
+
+    return fbmin;
+}
+
+static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
+{
+    MotionEstContext * const c= &s->me;
+    int P[10][2];
+    const int mot_stride = s->mb_stride;
+    const int mot_xy = mb_y*mot_stride + mb_x;
+    const int shift= 1+s->quarter_sample;
+    int dmin, i;
+    const int time_pp= s->pp_time;
+    const int time_pb= s->pb_time;
+    int mx, my, xmin, xmax, ymin, ymax;
+    int16_t (*mv_table)[2]= s->b_direct_mv_table;
+
+    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
+    ymin= xmin=(-32)>>shift;
+    ymax= xmax=   31>>shift;
+
+    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
+        s->mv_type= MV_TYPE_8X8;
+    }else{
+        s->mv_type= MV_TYPE_16X16;
+    }
+
+    for(i=0; i<4; i++){
+        int index= s->block_index[i];
+        int min, max;
+
+        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
+        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
+        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
+        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
+//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
+//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
+
+        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
+        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
+        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
+        min+= 16*mb_x - 1;
+        xmax= FFMIN(xmax, s->width - max);
+        xmin= FFMAX(xmin, - 16     - min);
+
+        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
+        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
+        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
+        min+= 16*mb_y - 1;
+        ymax= FFMIN(ymax, s->height - max);
+        ymin= FFMAX(ymin, - 16      - min);
+
+        if(s->mv_type == MV_TYPE_16X16) break;
+    }
+
+    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
+
+    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
+        s->b_direct_mv_table[mot_xy][0]= 0;
+        s->b_direct_mv_table[mot_xy][1]= 0;
+
+        return 256*256*256*64;
+    }
+
+    c->xmin= xmin;
+    c->ymin= ymin;
+    c->xmax= xmax;
+    c->ymax= ymax;
+    c->flags     |= FLAG_DIRECT;
+    c->sub_flags |= FLAG_DIRECT;
+    c->pred_x=0;
+    c->pred_y=0;
+
+    P_LEFT[0]        = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
+    P_LEFT[1]        = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
+
+    /* special case for first line */
+    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as its cliped
+        P_TOP[0]      = clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
+        P_TOP[1]      = clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
+        P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
+        P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
+
+        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+    }
+
+    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
+    if(c->sub_flags&FLAG_QPEL)
+        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
+    else
+        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
+
+    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
+        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
+
+    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
+
+    s->b_direct_mv_table[mot_xy][0]= mx;
+    s->b_direct_mv_table[mot_xy][1]= my;
+    c->flags     &= ~FLAG_DIRECT;
+    c->sub_flags &= ~FLAG_DIRECT;
+
+    return dmin;
+}
+
+void ff_estimate_b_frame_motion(MpegEncContext * s,
+                             int mb_x, int mb_y)
+{
+    MotionEstContext * const c= &s->me;
+    const int penalty_factor= c->mb_penalty_factor;
+    int fmin, bmin, dmin, fbmin, bimin, fimin;
+    int type=0;
+    const int xy = mb_y*s->mb_stride + mb_x;
+    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
+
+    get_limits(s, 16*mb_x, 16*mb_y);
+
+    c->skip=0;
+    if(c->avctx->me_threshold){
+        int vard= check_input_motion(s, mb_x, mb_y, 0);
+
+        if((vard+128)>>8 < c->avctx->me_threshold){
+//            pix = c->src[0][0];
+//            sum = s->dsp.pix_sum(pix, s->linesize);
+//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
+
+//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
+             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
+/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
+            c->mb_var_sum_temp    += (varc+128)>>8;*/
+            c->mc_mb_var_sum_temp += (vard+128)>>8;
+/*            if (vard <= 64<<8 || vard < varc) {
+                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
+            }else{
+                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
+            }*/
+            return;
+        }
+        if((vard+128)>>8 < c->avctx->mb_threshold){
+            type= s->mb_type[mb_y*s->mb_stride + mb_x];
+            if(type == CANDIDATE_MB_TYPE_DIRECT){
+                direct_search(s, mb_x, mb_y);
+            }
+            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
+                c->skip=0;
+                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
+            }
+            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
+                c->skip=0;
+                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
+            }
+            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
+                c->skip=0;
+                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+                interlaced_search(s, 0,
+                                        s->b_field_mv_table[0], s->b_field_select_table[0],
+                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
+            }
+            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
+                c->skip=0;
+                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
+                interlaced_search(s, 2,
+                                        s->b_field_mv_table[1], s->b_field_select_table[1],
+                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
+            }
+            return;
+        }
+    }
+
+    if (s->codec_id == CODEC_ID_MPEG4)
+        dmin= direct_search(s, mb_x, mb_y);
+    else
+        dmin= INT_MAX;
+//FIXME penalty stuff for non mpeg4
+    c->skip=0;
+    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
+
+    c->skip=0;
+    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
+//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
+
+    c->skip=0;
+    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
+//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
+
+    if(s->flags & CODEC_FLAG_INTERLACED_ME){
+//FIXME mb type penalty
+        c->skip=0;
+        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+        fimin= interlaced_search(s, 0,
+                                 s->b_field_mv_table[0], s->b_field_select_table[0],
+                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
+        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
+        bimin= interlaced_search(s, 2,
+                                 s->b_field_mv_table[1], s->b_field_select_table[1],
+                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
+    }else
+        fimin= bimin= INT_MAX;
+
+    {
+        int score= fmin;
+        type = CANDIDATE_MB_TYPE_FORWARD;
+
+        if (dmin <= score){
+            score = dmin;
+            type = CANDIDATE_MB_TYPE_DIRECT;
+        }
+        if(bmin<score){
+            score=bmin;
+            type= CANDIDATE_MB_TYPE_BACKWARD;
+        }
+        if(fbmin<score){
+            score=fbmin;
+            type= CANDIDATE_MB_TYPE_BIDIR;
+        }
+        if(fimin<score){
+            score=fimin;
+            type= CANDIDATE_MB_TYPE_FORWARD_I;
+        }
+        if(bimin<score){
+            score=bimin;
+            type= CANDIDATE_MB_TYPE_BACKWARD_I;
+        }
+
+        score= ((unsigned)(score*score + 128*256))>>16;
+        c->mc_mb_var_sum_temp += score;
+        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
+    }
+
+    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
+        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
+        if(fimin < INT_MAX)
+            type |= CANDIDATE_MB_TYPE_FORWARD_I;
+        if(bimin < INT_MAX)
+            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
+        if(fimin < INT_MAX && bimin < INT_MAX){
+            type |= CANDIDATE_MB_TYPE_BIDIR_I;
+        }
+         //FIXME something smarter
+        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
+#if 0
+        if(s->out_format == FMT_MPEG1)
+            type |= CANDIDATE_MB_TYPE_INTRA;
+#endif
+    }
+
+    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
+}
+
+/* find best f_code for ME which do unlimited searches */
+int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
+{
+    if(s->me_method>=ME_EPZS){
+        int score[8];
+        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
+        uint8_t * fcode_tab= s->fcode_tab;
+        int best_fcode=-1;
+        int best_score=-10000000;
+
+        if(s->msmpeg4_version)
+            range= FFMIN(range, 16);
+        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
+            range= FFMIN(range, 256);
+
+        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
+
+        for(y=0; y<s->mb_height; y++){
+            int x;
+            int xy= y*s->mb_stride;
+            for(x=0; x<s->mb_width; x++){
+                if(s->mb_type[xy] & type){
+                    int mx= mv_table[xy][0];
+                    int my= mv_table[xy][1];
+                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
+                                     fcode_tab[my + MAX_MV]);
+                    int j;
+
+                        if(mx >= range || mx < -range ||
+                           my >= range || my < -range)
+                            continue;
+
+                    for(j=0; j<fcode && j<8; j++){
+                        if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
+                            score[j]-= 170;
+                    }
+                }
+                xy++;
+            }
+        }
+
+        for(i=1; i<8; i++){
+            if(score[i] > best_score){
+                best_score= score[i];
+                best_fcode= i;
+            }
+//            printf("%d %d\n", i, score[i]);
+        }
+
+//    printf("fcode: %d type: %d\n", i, s->pict_type);
+        return best_fcode;
+/*        for(i=0; i<=MAX_FCODE; i++){
+            printf("%d ", mv_num[i]);
+        }
+        printf("\n");*/
+    }else{
+        return 1;
+    }
+}
+
+void ff_fix_long_p_mvs(MpegEncContext * s)
+{
+    MotionEstContext * const c= &s->me;
+    const int f_code= s->f_code;
+    int y, range;
+    assert(s->pict_type==P_TYPE);
+
+    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
+
+    assert(range <= 16 || !s->msmpeg4_version);
+    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
+
+    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
+
+//printf("%d no:%d %d//\n", clip, noclip, f_code);
+    if(s->flags&CODEC_FLAG_4MV){
+        const int wrap= s->b8_stride;
+
+        /* clip / convert to intra 8x8 type MVs */
+        for(y=0; y<s->mb_height; y++){
+            int xy= y*2*wrap;
+            int i= y*s->mb_stride;
+            int x;
+
+            for(x=0; x<s->mb_width; x++){
+                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
+                    int block;
+                    for(block=0; block<4; block++){
+                        int off= (block& 1) + (block>>1)*wrap;
+                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
+                        int my= s->current_picture.motion_val[0][ xy + off ][1];
+
+                        if(   mx >=range || mx <-range
+                           || my >=range || my <-range){
+                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
+                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
+                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
+                        }
+                    }
+                }
+                xy+=2;
+                i++;
+            }
+        }
+    }
+}
+
+/**
+ *
+ * @param truncate 1 for truncation, 0 for using intra
+ */
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
+                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
+{
+    MotionEstContext * const c= &s->me;
+    int y, h_range, v_range;
+
+    // RAL: 8 in MPEG-1, 16 in MPEG-4
+    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
+
+    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
+
+    h_range= range;
+    v_range= field_select_table ? range>>1 : range;
+
+    /* clip / convert to intra 16x16 type MVs */
+    for(y=0; y<s->mb_height; y++){
+        int x;
+        int xy= y*s->mb_stride;
+        for(x=0; x<s->mb_width; x++){
+            if (s->mb_type[xy] & type){    // RAL: "type" test added...
+                if(field_select_table==NULL || field_select_table[xy] == field_select){
+                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
+                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){
+
+                        if(truncate){
+                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
+                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
+                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
+                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
+                        }else{
+                            s->mb_type[xy] &= ~type;
+                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
+                            mv_table[xy][0]=
+                            mv_table[xy][1]= 0;
+                        }
+                    }
+                }
+            }
+            xy++;
+        }
+    }
+}
diff --git a/contrib/ffmpeg/libavcodec/motion_est_template.c b/contrib/ffmpeg/libavcodec/motion_est_template.c
new file mode 100644
index 000000000..193a8b24e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/motion_est_template.c
@@ -0,0 +1,1166 @@
+/*
+ * Motion estimation
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file motion_est_template.c
+ * Motion estimation template.
+ */
+
+//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
+#define LOAD_COMMON\
+    uint32_t attribute_unused * const score_map= c->score_map;\
+    const int attribute_unused xmin= c->xmin;\
+    const int attribute_unused ymin= c->ymin;\
+    const int attribute_unused xmax= c->xmax;\
+    const int attribute_unused ymax= c->ymax;\
+    uint8_t *mv_penalty= c->current_mv_penalty;\
+    const int pred_x= c->pred_x;\
+    const int pred_y= c->pred_y;\
+
+#define CHECK_HALF_MV(dx, dy, x, y)\
+{\
+    const int hx= 2*(x)+(dx);\
+    const int hy= 2*(y)+(dy);\
+    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
+    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
+    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
+}
+
+#if 0
+static int hpel_motion_search)(MpegEncContext * s,
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  uint8_t *ref_data[3],
+                                  int size)
+{
+    const int xx = 16 * s->mb_x + 8*(n&1);
+    const int yy = 16 * s->mb_y + 8*(n>>1);
+    const int mx = *mx_ptr;
+    const int my = *my_ptr;
+    const int penalty_factor= c->sub_penalty_factor;
+
+    LOAD_COMMON
+
+ //   INIT;
+ //FIXME factorize
+    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
+
+    if(s->no_rounding /*FIXME b_type*/){
+        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
+        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
+    }else{
+        hpel_put=& s->dsp.put_pixels_tab[size];
+        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
+    }
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+    cmp_sub= s->dsp.me_sub_cmp[size];
+    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
+
+    if(c->skip){ //FIXME somehow move up (benchmark)
+        *mx_ptr = 0;
+        *my_ptr = 0;
+        return dmin;
+    }
+
+    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
+        CMP_HPEL(dmin, 0, 0, mx, my, size);
+        if(mx || my)
+            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
+    }
+
+    if (mx > xmin && mx < xmax &&
+        my > ymin && my < ymax) {
+        int bx=2*mx, by=2*my;
+        int d= dmin;
+
+        CHECK_HALF_MV(1, 1, mx-1, my-1)
+        CHECK_HALF_MV(0, 1, mx  , my-1)
+        CHECK_HALF_MV(1, 1, mx  , my-1)
+        CHECK_HALF_MV(1, 0, mx-1, my  )
+        CHECK_HALF_MV(1, 0, mx  , my  )
+        CHECK_HALF_MV(1, 1, mx-1, my  )
+        CHECK_HALF_MV(0, 1, mx  , my  )
+        CHECK_HALF_MV(1, 1, mx  , my  )
+
+        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
+
+        *mx_ptr = bx;
+        *my_ptr = by;
+    }else{
+        *mx_ptr =2*mx;
+        *my_ptr =2*my;
+    }
+
+    return dmin;
+}
+
+#else
+static int hpel_motion_search(MpegEncContext * s,
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
+                                  int size, int h)
+{
+    MotionEstContext * const c= &s->me;
+    const int mx = *mx_ptr;
+    const int my = *my_ptr;
+    const int penalty_factor= c->sub_penalty_factor;
+    me_cmp_func cmp_sub, chroma_cmp_sub;
+    int bx=2*mx, by=2*my;
+
+    LOAD_COMMON
+    int flags= c->sub_flags;
+
+ //FIXME factorize
+
+    cmp_sub= s->dsp.me_sub_cmp[size];
+    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
+
+    if(c->skip){ //FIXME move out of hpel?
+        *mx_ptr = 0;
+        *my_ptr = 0;
+        return dmin;
+    }
+
+    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
+        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
+        if(mx || my || size>0)
+            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
+    }
+
+    if (mx > xmin && mx < xmax &&
+        my > ymin && my < ymax) {
+        int d= dmin;
+        const int index= (my<<ME_MAP_SHIFT) + mx;
+        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
+        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
+                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
+        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
+                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
+        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
+
+#if 1
+        int key;
+        int map_generation= c->map_generation;
+#ifndef NDEBUG
+        uint32_t *map= c->map;
+#endif
+        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
+        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
+        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
+        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
+        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
+        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
+        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
+        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
+#endif
+        if(t<=b){
+            CHECK_HALF_MV(0, 1, mx  ,my-1)
+            if(l<=r){
+                CHECK_HALF_MV(1, 1, mx-1, my-1)
+                if(t+r<=b+l){
+                    CHECK_HALF_MV(1, 1, mx  , my-1)
+                }else{
+                    CHECK_HALF_MV(1, 1, mx-1, my  )
+                }
+                CHECK_HALF_MV(1, 0, mx-1, my  )
+            }else{
+                CHECK_HALF_MV(1, 1, mx  , my-1)
+                if(t+l<=b+r){
+                    CHECK_HALF_MV(1, 1, mx-1, my-1)
+                }else{
+                    CHECK_HALF_MV(1, 1, mx  , my  )
+                }
+                CHECK_HALF_MV(1, 0, mx  , my  )
+            }
+        }else{
+            if(l<=r){
+                if(t+l<=b+r){
+                    CHECK_HALF_MV(1, 1, mx-1, my-1)
+                }else{
+                    CHECK_HALF_MV(1, 1, mx  , my  )
+                }
+                CHECK_HALF_MV(1, 0, mx-1, my)
+                CHECK_HALF_MV(1, 1, mx-1, my)
+            }else{
+                if(t+r<=b+l){
+                    CHECK_HALF_MV(1, 1, mx  , my-1)
+                }else{
+                    CHECK_HALF_MV(1, 1, mx-1, my)
+                }
+                CHECK_HALF_MV(1, 0, mx  , my)
+                CHECK_HALF_MV(1, 1, mx  , my)
+            }
+            CHECK_HALF_MV(0, 1, mx  , my)
+        }
+        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
+    }
+
+    *mx_ptr = bx;
+    *my_ptr = by;
+
+    return dmin;
+}
+#endif
+
+static int no_sub_motion_search(MpegEncContext * s,
+          int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
+                                  int size, int h)
+{
+    (*mx_ptr)<<=1;
+    (*my_ptr)<<=1;
+    return dmin;
+}
+
+inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
+                               int ref_index, int size, int h, int add_rate)
+{
+//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
+    MotionEstContext * const c= &s->me;
+    const int penalty_factor= c->mb_penalty_factor;
+    const int flags= c->mb_flags;
+    const int qpel= flags & FLAG_QPEL;
+    const int mask= 1+2*qpel;
+    me_cmp_func cmp_sub, chroma_cmp_sub;
+    int d;
+
+    LOAD_COMMON
+
+ //FIXME factorize
+
+    cmp_sub= s->dsp.mb_cmp[size];
+    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
+
+//    assert(!c->skip);
+//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
+
+    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
+    //FIXME check cbp before adding penalty for (0,0) vector
+    if(add_rate && (mx || my || size>0))
+        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
+
+    return d;
+}
+
+#define CHECK_QUARTER_MV(dx, dy, x, y)\
+{\
+    const int hx= 4*(x)+(dx);\
+    const int hy= 4*(y)+(dy);\
+    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
+    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
+    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
+}
+
+static int qpel_motion_search(MpegEncContext * s,
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
+                                  int size, int h)
+{
+    MotionEstContext * const c= &s->me;
+    const int mx = *mx_ptr;
+    const int my = *my_ptr;
+    const int penalty_factor= c->sub_penalty_factor;
+    const int map_generation= c->map_generation;
+    const int subpel_quality= c->avctx->me_subpel_quality;
+    uint32_t *map= c->map;
+    me_cmp_func cmpf, chroma_cmpf;
+    me_cmp_func cmp_sub, chroma_cmp_sub;
+
+    LOAD_COMMON
+    int flags= c->sub_flags;
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
+ //FIXME factorize
+
+    cmp_sub= s->dsp.me_sub_cmp[size];
+    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
+
+    if(c->skip){ //FIXME somehow move up (benchmark)
+        *mx_ptr = 0;
+        *my_ptr = 0;
+        return dmin;
+    }
+
+    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
+        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
+        if(mx || my || size>0)
+            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
+    }
+
+    if (mx > xmin && mx < xmax &&
+        my > ymin && my < ymax) {
+        int bx=4*mx, by=4*my;
+        int d= dmin;
+        int i, nx, ny;
+        const int index= (my<<ME_MAP_SHIFT) + mx;
+        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
+        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
+        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
+        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
+        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
+        int best[8];
+        int best_pos[8][2];
+
+        memset(best, 64, sizeof(int)*8);
+#if 1
+        if(s->me.dia_size>=2){
+            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
+            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
+
+            for(ny= -3; ny <= 3; ny++){
+                for(nx= -3; nx <= 3; nx++){
+                    //FIXME this could overflow (unlikely though)
+                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
+                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
+                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
+                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
+                    int i;
+
+                    if((nx&3)==0 && (ny&3)==0) continue;
+
+                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
+
+//                    if(nx&1) score-=1024*c->penalty_factor;
+//                    if(ny&1) score-=1024*c->penalty_factor;
+
+                    for(i=0; i<8; i++){
+                        if(score < best[i]){
+                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
+                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
+                            best[i]= score;
+                            best_pos[i][0]= nx + 4*mx;
+                            best_pos[i][1]= ny + 4*my;
+                            break;
+                        }
+                    }
+                }
+            }
+        }else{
+            int tl;
+            //FIXME this could overflow (unlikely though)
+            const int cx = 4*(r - l);
+            const int cx2= r + l - 2*c;
+            const int cy = 4*(b - t);
+            const int cy2= b + t - 2*c;
+            int cxy;
+
+            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
+                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+            }else{
+                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
+            }
+
+            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
+
+            assert(16*cx2 + 4*cx + 32*c == 32*r);
+            assert(16*cx2 - 4*cx + 32*c == 32*l);
+            assert(16*cy2 + 4*cy + 32*c == 32*b);
+            assert(16*cy2 - 4*cy + 32*c == 32*t);
+            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
+
+            for(ny= -3; ny <= 3; ny++){
+                for(nx= -3; nx <= 3; nx++){
+                    //FIXME this could overflow (unlikely though)
+                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
+                    int i;
+
+                    if((nx&3)==0 && (ny&3)==0) continue;
+
+                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
+//                    if(nx&1) score-=32*c->penalty_factor;
+  //                  if(ny&1) score-=32*c->penalty_factor;
+
+                    for(i=0; i<8; i++){
+                        if(score < best[i]){
+                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
+                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
+                            best[i]= score;
+                            best_pos[i][0]= nx + 4*mx;
+                            best_pos[i][1]= ny + 4*my;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        for(i=0; i<subpel_quality; i++){
+            nx= best_pos[i][0];
+            ny= best_pos[i][1];
+            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
+        }
+
+#if 0
+            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
+            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
+//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
+            if(tl<br){
+
+//            nx= FFMAX(4*mx - bx, bx - 4*mx);
+//            ny= FFMAX(4*my - by, by - 4*my);
+
+            static int stats[7][7], count;
+            count++;
+            stats[4*mx - bx + 3][4*my - by + 3]++;
+            if(256*256*256*64 % count ==0){
+                for(i=0; i<49; i++){
+                    if((i%7)==0) printf("\n");
+                    printf("%6d ", stats[0][i]);
+                }
+                printf("\n");
+            }
+            }
+#endif
+#else
+
+        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
+        CHECK_QUARTER_MV(0, 2, mx  , my-1)
+        CHECK_QUARTER_MV(2, 2, mx  , my-1)
+        CHECK_QUARTER_MV(2, 0, mx  , my  )
+        CHECK_QUARTER_MV(2, 2, mx  , my  )
+        CHECK_QUARTER_MV(0, 2, mx  , my  )
+        CHECK_QUARTER_MV(2, 2, mx-1, my  )
+        CHECK_QUARTER_MV(2, 0, mx-1, my  )
+
+        nx= bx;
+        ny= by;
+
+        for(i=0; i<8; i++){
+            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
+            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
+            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
+        }
+#endif
+#if 0
+        //outer ring
+        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
+        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
+        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
+        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
+        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
+        CHECK_QUARTER_MV(0, 1, mx  , my-1)
+        CHECK_QUARTER_MV(1, 1, mx  , my-1)
+        CHECK_QUARTER_MV(2, 1, mx  , my-1)
+        CHECK_QUARTER_MV(3, 1, mx  , my-1)
+        CHECK_QUARTER_MV(3, 2, mx  , my-1)
+        CHECK_QUARTER_MV(3, 3, mx  , my-1)
+        CHECK_QUARTER_MV(3, 0, mx  , my  )
+        CHECK_QUARTER_MV(3, 1, mx  , my  )
+        CHECK_QUARTER_MV(3, 2, mx  , my  )
+        CHECK_QUARTER_MV(3, 3, mx  , my  )
+        CHECK_QUARTER_MV(2, 3, mx  , my  )
+        CHECK_QUARTER_MV(1, 3, mx  , my  )
+        CHECK_QUARTER_MV(0, 3, mx  , my  )
+        CHECK_QUARTER_MV(3, 3, mx-1, my  )
+        CHECK_QUARTER_MV(2, 3, mx-1, my  )
+        CHECK_QUARTER_MV(1, 3, mx-1, my  )
+        CHECK_QUARTER_MV(1, 2, mx-1, my  )
+        CHECK_QUARTER_MV(1, 1, mx-1, my  )
+        CHECK_QUARTER_MV(1, 0, mx-1, my  )
+#endif
+        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
+
+        *mx_ptr = bx;
+        *my_ptr = by;
+    }else{
+        *mx_ptr =4*mx;
+        *my_ptr =4*my;
+    }
+
+    return dmin;
+}
+
+
+#define CHECK_MV(x,y)\
+{\
+    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+    assert((x) >= xmin);\
+    assert((x) <= xmax);\
+    assert((y) >= ymin);\
+    assert((y) <= ymax);\
+/*printf("check_mv %d %d\n", x, y);*/\
+    if(map[index]!=key){\
+        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
+        map[index]= key;\
+        score_map[index]= d;\
+        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
+/*printf("score:%d\n", d);*/\
+        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
+    }\
+}
+
+#define CHECK_CLIPED_MV(ax,ay)\
+{\
+    const int Lx= ax;\
+    const int Ly= ay;\
+    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
+    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
+    CHECK_MV(Lx2, Ly2)\
+}
+
+#define CHECK_MV_DIR(x,y,new_dir)\
+{\
+    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
+    if(map[index]!=key){\
+        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
+        map[index]= key;\
+        score_map[index]= d;\
+        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
+/*printf("score:%d\n", d);*/\
+        if(d<dmin){\
+            best[0]=x;\
+            best[1]=y;\
+            dmin=d;\
+            next_dir= new_dir;\
+        }\
+    }\
+}
+
+#define check(x,y,S,v)\
+if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
+if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
+if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
+if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
+
+#define LOAD_COMMON2\
+    uint32_t *map= c->map;\
+    const int qpel= flags&FLAG_QPEL;\
+    const int shift= 1+qpel;\
+
+static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
+                                       int src_index, int ref_index, int const penalty_factor,
+                                       int size, int h, int flags)
+{
+    MotionEstContext * const c= &s->me;
+    me_cmp_func cmpf, chroma_cmpf;
+    int next_dir=-1;
+    LOAD_COMMON
+    LOAD_COMMON2
+    int map_generation= c->map_generation;
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
+        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
+        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
+        if(map[index]!=key){ //this will be executed only very rarey
+            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
+            map[index]= key;
+        }
+    }
+
+    for(;;){
+        int d;
+        const int dir= next_dir;
+        const int x= best[0];
+        const int y= best[1];
+        next_dir=-1;
+
+//printf("%d", dir);
+        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
+        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
+        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
+        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
+
+        if(next_dir==-1){
+            return dmin;
+        }
+    }
+}
+
+static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
+                                       int src_index, int ref_index, int const penalty_factor,
+                                       int size, int h, int flags)
+{
+    MotionEstContext * const c= &s->me;
+    me_cmp_func cmpf, chroma_cmpf;
+    int dia_size;
+    LOAD_COMMON
+    LOAD_COMMON2
+    int map_generation= c->map_generation;
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    for(dia_size=1; dia_size<=4; dia_size++){
+        int dir;
+        const int x= best[0];
+        const int y= best[1];
+
+        if(dia_size&(dia_size-1)) continue;
+
+        if(   x + dia_size > xmax
+           || x - dia_size < xmin
+           || y + dia_size > ymax
+           || y - dia_size < ymin)
+           continue;
+
+        for(dir= 0; dir<dia_size; dir+=2){
+            int d;
+
+            CHECK_MV(x + dir           , y + dia_size - dir);
+            CHECK_MV(x + dia_size - dir, y - dir           );
+            CHECK_MV(x - dir           , y - dia_size + dir);
+            CHECK_MV(x - dia_size + dir, y + dir           );
+        }
+
+        if(x!=best[0] || y!=best[1])
+            dia_size=0;
+#if 0
+{
+int dx, dy, i;
+static int stats[8*8];
+dx= FFABS(x-best[0]);
+dy= FFABS(y-best[1]);
+if(dy>dx){
+    dx^=dy; dy^=dx; dx^=dy;
+}
+stats[dy*8 + dx] ++;
+if(256*256*256*64 % (stats[0]+1)==0){
+    for(i=0; i<64; i++){
+        if((i&7)==0) printf("\n");
+        printf("%8d ", stats[i]);
+    }
+    printf("\n");
+}
+}
+#endif
+    }
+    return dmin;
+}
+
+static int umh_search(MpegEncContext * s, int *best, int dmin,
+                                       int src_index, int ref_index, int const penalty_factor,
+                                       int size, int h, int flags)
+{
+    MotionEstContext * const c= &s->me;
+    me_cmp_func cmpf, chroma_cmpf;
+    LOAD_COMMON
+    LOAD_COMMON2
+    int map_generation= c->map_generation;
+    int x,y,x2,y2, i, j, d;
+    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
+                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
+                                 {-2, 3}, { 0, 4}, { 2, 3},
+                                 {-2,-3}, { 0,-4}, { 2,-3},};
+    static const int hex2[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}};
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    x= best[0];
+    y= best[1];
+    for(x2=FFMAX(x-15, xmin); x2<=FFMIN(x+15,xmax); x2+=2){
+        CHECK_MV(x2, y);
+    }
+    for(y2=FFMAX(y- 7, ymin); y2<=FFMIN(y+ 7,ymax); y2+=2){
+        CHECK_MV(x, y2);
+    }
+
+    x= best[0];
+    y= best[1];
+    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
+        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
+            CHECK_MV(x2, y2);
+        }
+    }
+
+//FIXME prevent the CLIP stuff
+
+    for(j=1; j<=4; j++){
+        for(i=0; i<16; i++){
+            CHECK_CLIPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
+        }
+    }
+
+    do{
+        x= best[0];
+        y= best[1];
+        for(i=0; i<6; i++){
+            CHECK_CLIPED_MV(x+hex2[i][0], y+hex2[i][1]);
+        }
+    }while(best[0] != x || best[1] != y);
+
+    do{
+        x= best[0];
+        y= best[1];
+        CHECK_CLIPED_MV(x+1, y);
+        CHECK_CLIPED_MV(x, y+1);
+        CHECK_CLIPED_MV(x-1, y);
+        CHECK_CLIPED_MV(x, y-1);
+    }while(best[0] != x || best[1] != y);
+
+    return dmin;
+}
+
+#define SAB_CHECK_MV(ax,ay)\
+{\
+    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
+    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
+/*printf("sab check %d %d\n", ax, ay);*/\
+    if(map[index]!=key){\
+        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
+        map[index]= key;\
+        score_map[index]= d;\
+        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
+/*printf("score: %d\n", d);*/\
+        if(d < minima[minima_count-1].height){\
+            int j=0;\
+            \
+            while(d >= minima[j].height) j++;\
+\
+            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
+\
+            minima[j].checked= 0;\
+            minima[j].height= d;\
+            minima[j].x= ax;\
+            minima[j].y= ay;\
+            \
+            i=-1;\
+            continue;\
+        }\
+    }\
+}
+
+#define MAX_SAB_SIZE ME_MAP_SIZE
+static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
+                                       int src_index, int ref_index, int const penalty_factor,
+                                       int size, int h, int flags)
+{
+    MotionEstContext * const c= &s->me;
+    me_cmp_func cmpf, chroma_cmpf;
+    Minima minima[MAX_SAB_SIZE];
+    const int minima_count= FFABS(c->dia_size);
+    int i, j;
+    LOAD_COMMON
+    LOAD_COMMON2
+    int map_generation= c->map_generation;
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    for(j=i=0; i<ME_MAP_SIZE; i++){
+        uint32_t key= map[i];
+
+        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
+
+        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
+
+        assert(j<MAX_SAB_SIZE); //max j = number of predictors
+
+        minima[j].height= score_map[i];
+        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
+        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
+        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
+        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
+        minima[j].checked=0;
+        if(minima[j].x || minima[j].y)
+            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
+
+        j++;
+    }
+
+    qsort(minima, j, sizeof(Minima), minima_cmp);
+
+    for(; j<minima_count; j++){
+        minima[j].height=256*256*256*64;
+        minima[j].checked=0;
+        minima[j].x= minima[j].y=0;
+    }
+
+    for(i=0; i<minima_count; i++){
+        const int x= minima[i].x;
+        const int y= minima[i].y;
+        int d;
+
+        if(minima[i].checked) continue;
+
+        if(   x >= xmax || x <= xmin
+           || y >= ymax || y <= ymin)
+           continue;
+
+        SAB_CHECK_MV(x-1, y)
+        SAB_CHECK_MV(x+1, y)
+        SAB_CHECK_MV(x  , y-1)
+        SAB_CHECK_MV(x  , y+1)
+
+        minima[i].checked= 1;
+    }
+
+    best[0]= minima[0].x;
+    best[1]= minima[0].y;
+    dmin= minima[0].height;
+
+    if(   best[0] < xmax && best[0] > xmin
+       && best[1] < ymax && best[1] > ymin){
+        int d;
+        //ensure that the refernece samples for hpel refinement are in the map
+        CHECK_MV(best[0]-1, best[1])
+        CHECK_MV(best[0]+1, best[1])
+        CHECK_MV(best[0], best[1]-1)
+        CHECK_MV(best[0], best[1]+1)
+    }
+    return dmin;
+}
+
+static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
+                                       int src_index, int ref_index, int const penalty_factor,
+                                       int size, int h, int flags)
+{
+    MotionEstContext * const c= &s->me;
+    me_cmp_func cmpf, chroma_cmpf;
+    int dia_size;
+    LOAD_COMMON
+    LOAD_COMMON2
+    int map_generation= c->map_generation;
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
+        int dir, start, end;
+        const int x= best[0];
+        const int y= best[1];
+
+        start= FFMAX(0, y + dia_size - ymax);
+        end  = FFMIN(dia_size, xmax - x + 1);
+        for(dir= start; dir<end; dir++){
+            int d;
+
+//check(x + dir,y + dia_size - dir,0, a0)
+            CHECK_MV(x + dir           , y + dia_size - dir);
+        }
+
+        start= FFMAX(0, x + dia_size - xmax);
+        end  = FFMIN(dia_size, y - ymin + 1);
+        for(dir= start; dir<end; dir++){
+            int d;
+
+//check(x + dia_size - dir, y - dir,0, a1)
+            CHECK_MV(x + dia_size - dir, y - dir           );
+        }
+
+        start= FFMAX(0, -y + dia_size + ymin );
+        end  = FFMIN(dia_size, x - xmin + 1);
+        for(dir= start; dir<end; dir++){
+            int d;
+
+//check(x - dir,y - dia_size + dir,0, a2)
+            CHECK_MV(x - dir           , y - dia_size + dir);
+        }
+
+        start= FFMAX(0, -x + dia_size + xmin );
+        end  = FFMIN(dia_size, ymax - y + 1);
+        for(dir= start; dir<end; dir++){
+            int d;
+
+//check(x - dia_size + dir, y + dir,0, a3)
+            CHECK_MV(x - dia_size + dir, y + dir           );
+        }
+
+        if(x!=best[0] || y!=best[1])
+            dia_size=0;
+#if 0
+{
+int dx, dy, i;
+static int stats[8*8];
+dx= FFABS(x-best[0]);
+dy= FFABS(y-best[1]);
+stats[dy*8 + dx] ++;
+if(256*256*256*64 % (stats[0]+1)==0){
+    for(i=0; i<64; i++){
+        if((i&7)==0) printf("\n");
+        printf("%6d ", stats[i]);
+    }
+    printf("\n");
+}
+}
+#endif
+    }
+    return dmin;
+}
+
+static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
+                                       int src_index, int ref_index, int const penalty_factor,
+                                       int size, int h, int flags){
+    MotionEstContext * const c= &s->me;
+    if(c->dia_size==-1)
+        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+    else if(c->dia_size<-1)
+        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+    else if(c->dia_size<2)
+        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+    else
+        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+}
+
+static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
+                             int ref_mv_scale, int flags, int size, int h)
+{
+    MotionEstContext * const c= &s->me;
+    int best[2]={0, 0};
+    int d, dmin;
+    int map_generation;
+    int penalty_factor;
+    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
+    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
+    me_cmp_func cmpf, chroma_cmpf;
+
+    LOAD_COMMON
+    LOAD_COMMON2
+
+    if(c->pre_pass){
+        penalty_factor= c->pre_penalty_factor;
+        cmpf= s->dsp.me_pre_cmp[size];
+        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
+    }else{
+        penalty_factor= c->penalty_factor;
+        cmpf= s->dsp.me_cmp[size];
+        chroma_cmpf= s->dsp.me_cmp[size+1];
+    }
+
+    map_generation= update_map_generation(c);
+
+    assert(cmpf);
+    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
+    map[0]= map_generation;
+    score_map[0]= dmin;
+
+    /* first line */
+    if (s->first_slice_line) {
+        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+    }else{
+        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
+                    && ( P_LEFT[0]    |P_LEFT[1]
+                        |P_TOP[0]     |P_TOP[1]
+                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
+            *mx_ptr= 0;
+            *my_ptr= 0;
+            c->skip=1;
+            return dmin;
+        }
+        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
+        CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
+        CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
+        CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
+        CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+            CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
+            CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
+            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+    }
+    if(dmin>h*h*4){
+        if(c->pre_pass){
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
+            if(!s->first_slice_line)
+                CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+        }else{
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
+            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
+                CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+        }
+    }
+
+    if(c->avctx->last_predictor_count){
+        const int count= c->avctx->last_predictor_count;
+        const int xstart= FFMAX(0, s->mb_x - count);
+        const int ystart= FFMAX(0, s->mb_y - count);
+        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
+        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
+        int mb_y;
+
+        for(mb_y=ystart; mb_y<yend; mb_y++){
+            int mb_x;
+            for(mb_x=xstart; mb_x<xend; mb_x++){
+                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
+                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
+                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
+
+                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
+                CHECK_MV(mx,my)
+            }
+        }
+    }
+
+//check(best[0],best[1],0, b0)
+    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+
+//check(best[0],best[1],0, b1)
+    *mx_ptr= best[0];
+    *my_ptr= best[1];
+
+//    printf("%d %d %d \n", best[0], best[1], dmin);
+    return dmin;
+}
+
+//this function is dedicated to the braindamaged gcc
+inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
+                             int ref_mv_scale, int size, int h)
+{
+    MotionEstContext * const c= &s->me;
+//FIXME convert other functions in the same way if faster
+    if(c->flags==0 && h==16 && size==0){
+        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
+//    case FLAG_QPEL:
+//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
+    }else{
+        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
+    }
+}
+
+static int epzs_motion_search4(MpegEncContext * s,
+                             int *mx_ptr, int *my_ptr, int P[10][2],
+                             int src_index, int ref_index, int16_t (*last_mv)[2],
+                             int ref_mv_scale)
+{
+    MotionEstContext * const c= &s->me;
+    int best[2]={0, 0};
+    int d, dmin;
+    int map_generation;
+    const int penalty_factor= c->penalty_factor;
+    const int size=1;
+    const int h=8;
+    const int ref_mv_stride= s->mb_stride;
+    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
+    me_cmp_func cmpf, chroma_cmpf;
+    LOAD_COMMON
+    int flags= c->flags;
+    LOAD_COMMON2
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    map_generation= update_map_generation(c);
+
+    dmin = 1000000;
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
+    /* first line */
+    if (s->first_slice_line) {
+        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+    }else{
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+        //FIXME try some early stop
+            CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+            CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+            CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
+            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+    }
+    if(dmin>64*4){
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
+        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+    }
+
+    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+
+    *mx_ptr= best[0];
+    *my_ptr= best[1];
+
+//    printf("%d %d %d \n", best[0], best[1], dmin);
+    return dmin;
+}
+
+//try to merge with above FIXME (needs PSNR test)
+static int epzs_motion_search2(MpegEncContext * s,
+                             int *mx_ptr, int *my_ptr, int P[10][2],
+                             int src_index, int ref_index, int16_t (*last_mv)[2],
+                             int ref_mv_scale)
+{
+    MotionEstContext * const c= &s->me;
+    int best[2]={0, 0};
+    int d, dmin;
+    int map_generation;
+    const int penalty_factor= c->penalty_factor;
+    const int size=0; //FIXME pass as arg
+    const int h=8;
+    const int ref_mv_stride= s->mb_stride;
+    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
+    me_cmp_func cmpf, chroma_cmpf;
+    LOAD_COMMON
+    int flags= c->flags;
+    LOAD_COMMON2
+
+    cmpf= s->dsp.me_cmp[size];
+    chroma_cmpf= s->dsp.me_cmp[size+1];
+
+    map_generation= update_map_generation(c);
+
+    dmin = 1000000;
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
+    /* first line */
+    if (s->first_slice_line) {
+        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+    }else{
+        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+        //FIXME try some early stop
+            CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+            CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+            CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
+            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
+    }
+    if(dmin>64*4){
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
+        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
+    }
+
+    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+
+    *mx_ptr= best[0];
+    *my_ptr= best[1];
+
+//    printf("%d %d %d \n", best[0], best[1], dmin);
+    return dmin;
+}
diff --git a/contrib/ffmpeg/libavcodec/motion_test.c b/contrib/ffmpeg/libavcodec/motion_test.c
new file mode 100644
index 000000000..8540b7483
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/motion_test.c
@@ -0,0 +1,159 @@
+/*
+ * (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file motion_test.c
+ * motion test.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "dsputil.h"
+
+#include "i386/mmx.h"
+
+#undef printf
+
+int pix_abs16x16_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_c(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_x2_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_x2_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_x2_c(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_y2_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_y2_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_y2_c(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_xy2_mmx(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_xy2_mmx1(uint8_t *blk1, uint8_t *blk2, int lx);
+int pix_abs16x16_xy2_c(uint8_t *blk1, uint8_t *blk2, int lx);
+
+typedef int motion_func(uint8_t *blk1, uint8_t *blk2, int lx);
+
+#define WIDTH 64
+#define HEIGHT 64
+
+uint8_t img1[WIDTH * HEIGHT];
+uint8_t img2[WIDTH * HEIGHT];
+
+void fill_random(uint8_t *tab, int size)
+{
+    int i;
+    for(i=0;i<size;i++) {
+#if 1
+        tab[i] = random() % 256;
+#else
+        tab[i] = i;
+#endif
+    }
+}
+
+void help(void)
+{
+    printf("motion-test [-h]\n"
+           "test motion implementations\n");
+    exit(1);
+}
+
+int64_t gettime(void)
+{
+    struct timeval tv;
+    gettimeofday(&tv,NULL);
+    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+#define NB_ITS 500
+
+int dummy;
+
+void test_motion(const char *name,
+                 motion_func *test_func, motion_func *ref_func)
+{
+    int x, y, d1, d2, it;
+    uint8_t *ptr;
+    int64_t ti;
+    printf("testing '%s'\n", name);
+
+    /* test correctness */
+    for(it=0;it<20;it++) {
+
+        fill_random(img1, WIDTH * HEIGHT);
+        fill_random(img2, WIDTH * HEIGHT);
+
+        for(y=0;y<HEIGHT-17;y++) {
+            for(x=0;x<WIDTH-17;x++) {
+                ptr = img2 + y * WIDTH + x;
+                d1 = test_func(img1, ptr, WIDTH);
+                d2 = ref_func(img1, ptr, WIDTH);
+                if (d1 != d2) {
+                    printf("error: mmx=%d c=%d\n", d1, d2);
+                }
+            }
+        }
+    }
+    emms();
+
+    /* speed test */
+    ti = gettime();
+    d1 = 0;
+    for(it=0;it<NB_ITS;it++) {
+        for(y=0;y<HEIGHT-17;y++) {
+            for(x=0;x<WIDTH-17;x++) {
+                ptr = img2 + y * WIDTH + x;
+                d1 += test_func(img1, ptr, WIDTH);
+            }
+        }
+    }
+    emms();
+    dummy = d1; /* avoid optimisation */
+    ti = gettime() - ti;
+
+    printf("  %0.0f kop/s\n",
+           (double)NB_ITS * (WIDTH - 16) * (HEIGHT - 16) /
+           (double)(ti / 1000.0));
+}
+
+
+int main(int argc, char **argv)
+{
+    int c;
+
+    for(;;) {
+        c = getopt(argc, argv, "h");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        }
+    }
+
+    printf("ffmpeg motion test\n");
+
+    test_motion("mmx", pix_abs16x16_mmx, pix_abs16x16_c);
+    test_motion("mmx_x2", pix_abs16x16_x2_mmx, pix_abs16x16_x2_c);
+    test_motion("mmx_y2", pix_abs16x16_y2_mmx, pix_abs16x16_y2_c);
+    test_motion("mmx_xy2", pix_abs16x16_xy2_mmx, pix_abs16x16_xy2_c);
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/mp3lameaudio.c b/contrib/ffmpeg/libavcodec/mp3lameaudio.c
new file mode 100644
index 000000000..d13350265
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mp3lameaudio.c
@@ -0,0 +1,221 @@
+/*
+ * Interface to libmp3lame for mp3 encoding
+ * Copyright (c) 2002 Lennert Buytenhek <buytenh@gnu.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mp3lameaudio.c
+ * Interface to libmp3lame for mp3 encoding.
+ */
+
+#include "avcodec.h"
+#include "mpegaudio.h"
+#include <lame/lame.h>
+
+#define BUFFER_SIZE (2*MPA_FRAME_SIZE)
+typedef struct Mp3AudioContext {
+    lame_global_flags *gfp;
+    int stereo;
+    uint8_t buffer[BUFFER_SIZE];
+    int buffer_index;
+} Mp3AudioContext;
+
+static int MP3lame_encode_init(AVCodecContext *avctx)
+{
+    Mp3AudioContext *s = avctx->priv_data;
+
+    if (avctx->channels > 2)
+        return -1;
+
+    s->stereo = avctx->channels > 1 ? 1 : 0;
+
+    if ((s->gfp = lame_init()) == NULL)
+        goto err;
+    lame_set_in_samplerate(s->gfp, avctx->sample_rate);
+    lame_set_out_samplerate(s->gfp, avctx->sample_rate);
+    lame_set_num_channels(s->gfp, avctx->channels);
+    /* lame 3.91 dies on quality != 5 */
+    lame_set_quality(s->gfp, 5);
+    /* lame 3.91 doesn't work in mono */
+    lame_set_mode(s->gfp, JOINT_STEREO);
+    lame_set_brate(s->gfp, avctx->bit_rate/1000);
+    if(avctx->flags & CODEC_FLAG_QSCALE) {
+        lame_set_brate(s->gfp, 0);
+        lame_set_VBR(s->gfp, vbr_default);
+        lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA);
+    }
+    lame_set_bWriteVbrTag(s->gfp,0);
+    if (lame_init_params(s->gfp) < 0)
+        goto err_close;
+
+    avctx->frame_size = lame_get_framesize(s->gfp);
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    return 0;
+
+err_close:
+    lame_close(s->gfp);
+err:
+    return -1;
+}
+
+static const int sSampleRates[3] = {
+    44100, 48000,  32000
+};
+
+static const int sBitRates[2][3][15] = {
+    {   {  0, 32, 64, 96,128,160,192,224,256,288,320,352,384,416,448},
+        {  0, 32, 48, 56, 64, 80, 96,112,128,160,192,224,256,320,384},
+        {  0, 32, 40, 48, 56, 64, 80, 96,112,128,160,192,224,256,320}
+    },
+    {   {  0, 32, 48, 56, 64, 80, 96,112,128,144,160,176,192,224,256},
+        {  0,  8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160},
+        {  0,  8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160}
+    },
+};
+
+static const int sSamplesPerFrame[2][3] =
+{
+    {  384,     1152,    1152 },
+    {  384,     1152,     576 }
+};
+
+static const int sBitsPerSlot[3] = {
+    32,
+    8,
+    8
+};
+
+static int mp3len(void *data, int *samplesPerFrame, int *sampleRate)
+{
+    uint8_t *dataTmp = (uint8_t *)data;
+    uint32_t header = ( (uint32_t)dataTmp[0] << 24 ) | ( (uint32_t)dataTmp[1] << 16 ) | ( (uint32_t)dataTmp[2] << 8 ) | (uint32_t)dataTmp[3];
+    int layerID = 3 - ((header >> 17) & 0x03);
+    int bitRateID = ((header >> 12) & 0x0f);
+    int sampleRateID = ((header >> 10) & 0x03);
+    int bitsPerSlot = sBitsPerSlot[layerID];
+    int isPadded = ((header >> 9) & 0x01);
+    static int const mode_tab[4]= {2,3,1,0};
+    int mode= mode_tab[(header >> 19) & 0x03];
+    int mpeg_id= mode>0;
+    int temp0, temp1, bitRate;
+
+    if ( (( header >> 21 ) & 0x7ff) != 0x7ff || mode == 3 || layerID==3 || sampleRateID==3) {
+        return -1;
+    }
+
+    if(!samplesPerFrame) samplesPerFrame= &temp0;
+    if(!sampleRate     ) sampleRate     = &temp1;
+
+//    *isMono = ((header >>  6) & 0x03) == 0x03;
+
+    *sampleRate = sSampleRates[sampleRateID]>>mode;
+    bitRate = sBitRates[mpeg_id][layerID][bitRateID] * 1000;
+    *samplesPerFrame = sSamplesPerFrame[mpeg_id][layerID];
+//av_log(NULL, AV_LOG_DEBUG, "sr:%d br:%d spf:%d l:%d m:%d\n", *sampleRate, bitRate, *samplesPerFrame, layerID, mode);
+
+    return *samplesPerFrame * bitRate / (bitsPerSlot * *sampleRate) + isPadded;
+}
+
+static int MP3lame_encode_frame(AVCodecContext *avctx,
+                                unsigned char *frame, int buf_size, void *data)
+{
+    Mp3AudioContext *s = avctx->priv_data;
+    int len;
+    int lame_result;
+
+    /* lame 3.91 dies on '1-channel interleaved' data */
+
+    if(data){
+        if (s->stereo) {
+            lame_result = lame_encode_buffer_interleaved(
+                s->gfp,
+                data,
+                avctx->frame_size,
+                s->buffer + s->buffer_index,
+                BUFFER_SIZE - s->buffer_index
+                );
+        } else {
+            lame_result = lame_encode_buffer(
+                s->gfp,
+                data,
+                data,
+                avctx->frame_size,
+                s->buffer + s->buffer_index,
+                BUFFER_SIZE - s->buffer_index
+                );
+        }
+    }else{
+        lame_result= lame_encode_flush(
+                s->gfp,
+                s->buffer + s->buffer_index,
+                BUFFER_SIZE - s->buffer_index
+                );
+    }
+
+    if(lame_result==-1) {
+        /* output buffer too small */
+        av_log(avctx, AV_LOG_ERROR, "lame: output buffer too small (buffer index: %d, free bytes: %d)\n", s->buffer_index, BUFFER_SIZE - s->buffer_index);
+        return 0;
+    }
+
+    s->buffer_index += lame_result;
+
+    if(s->buffer_index<4)
+        return 0;
+
+        len= mp3len(s->buffer, NULL, NULL);
+//av_log(avctx, AV_LOG_DEBUG, "in:%d packet-len:%d index:%d\n", avctx->frame_size, len, s->buffer_index);
+        if(len <= s->buffer_index){
+            memcpy(frame, s->buffer, len);
+            s->buffer_index -= len;
+
+            memmove(s->buffer, s->buffer+len, s->buffer_index);
+            //FIXME fix the audio codec API, so we dont need the memcpy()
+/*for(i=0; i<len; i++){
+    av_log(avctx, AV_LOG_DEBUG, "%2X ", frame[i]);
+}*/
+            return len;
+        }else
+            return 0;
+}
+
+static int MP3lame_encode_close(AVCodecContext *avctx)
+{
+    Mp3AudioContext *s = avctx->priv_data;
+
+    av_freep(&avctx->coded_frame);
+
+    lame_close(s->gfp);
+    return 0;
+}
+
+
+AVCodec mp3lame_encoder = {
+    "mp3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP3,
+    sizeof(Mp3AudioContext),
+    MP3lame_encode_init,
+    MP3lame_encode_frame,
+    MP3lame_encode_close,
+    .capabilities= CODEC_CAP_DELAY,
+};
diff --git a/contrib/ffmpeg/libavcodec/mpeg12.c b/contrib/ffmpeg/libavcodec/mpeg12.c
new file mode 100644
index 000000000..c392e17af
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpeg12.c
@@ -0,0 +1,3493 @@
+/*
+ * MPEG1 codec / MPEG2 decoder
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpeg12.c
+ * MPEG1/2 codec
+ */
+
+//#define DEBUG
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+#include "mpeg12data.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+
+/* Start codes. */
+#define SEQ_END_CODE            0x000001b7
+#define SEQ_START_CODE          0x000001b3
+#define GOP_START_CODE          0x000001b8
+#define PICTURE_START_CODE      0x00000100
+#define SLICE_MIN_START_CODE    0x00000101
+#define SLICE_MAX_START_CODE    0x000001af
+#define EXT_START_CODE          0x000001b5
+#define USER_START_CODE         0x000001b2
+
+#define DC_VLC_BITS 9
+#define MV_VLC_BITS 9
+#define MBINCR_VLC_BITS 9
+#define MB_PAT_VLC_BITS 9
+#define MB_PTYPE_VLC_BITS 6
+#define MB_BTYPE_VLC_BITS 6
+#define TEX_VLC_BITS 9
+
+#ifdef CONFIG_ENCODERS
+static void mpeg1_encode_block(MpegEncContext *s,
+                         DCTELEM *block,
+                         int component);
+static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code);    // RAL: f_code parameter added
+#endif //CONFIG_ENCODERS
+static inline int mpeg1_decode_block_inter(MpegEncContext *s,
+                              DCTELEM *block,
+                              int n);
+static inline int mpeg1_decode_block_intra(MpegEncContext *s,
+                              DCTELEM *block,
+                              int n);
+static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n);
+static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
+                                        DCTELEM *block,
+                                        int n);
+static inline int mpeg2_decode_block_intra(MpegEncContext *s,
+                                    DCTELEM *block,
+                                    int n);
+static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n);
+static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n);
+static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
+static void exchange_uv(MpegEncContext *s);
+
+#ifdef HAVE_XVMC
+extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx);
+extern int XVMC_field_end(MpegEncContext *s);
+extern void XVMC_pack_pblocks(MpegEncContext *s,int cbp);
+extern void XVMC_init_block(MpegEncContext *s);//set s->block
+#endif
+
+static const enum PixelFormat pixfmt_yuv_420[]= {PIX_FMT_YUV420P,-1};
+static const enum PixelFormat pixfmt_yuv_422[]= {PIX_FMT_YUV422P,-1};
+static const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1};
+static const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
+                                           PIX_FMT_XVMC_MPEG2_IDCT,
+                                           PIX_FMT_XVMC_MPEG2_MC,
+                                           -1};
+#ifdef CONFIG_ENCODERS
+static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
+static uint8_t fcode_tab[MAX_MV*2+1];
+
+static uint8_t  uni_mpeg1_ac_vlc_len [64*64*2];
+static uint8_t  uni_mpeg2_ac_vlc_len [64*64*2];
+
+/* simple include everything table for dc, first byte is bits number next 3 are code*/
+static uint32_t mpeg1_lum_dc_uni[512];
+static uint32_t mpeg1_chr_dc_uni[512];
+
+static uint8_t mpeg1_index_run[2][64];
+static int8_t mpeg1_max_level[2][64];
+#endif //CONFIG_ENCODERS
+
+static void init_2d_vlc_rl(RLTable *rl, int use_static)
+{
+    int i;
+
+    init_vlc(&rl->vlc, TEX_VLC_BITS, rl->n + 2,
+             &rl->table_vlc[0][1], 4, 2,
+             &rl->table_vlc[0][0], 4, 2, use_static);
+
+    if(use_static)
+        rl->rl_vlc[0]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
+    else
+        rl->rl_vlc[0]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
+
+    for(i=0; i<rl->vlc.table_size; i++){
+        int code= rl->vlc.table[i][0];
+        int len = rl->vlc.table[i][1];
+        int level, run;
+
+        if(len==0){ // illegal code
+            run= 65;
+            level= MAX_LEVEL;
+        }else if(len<0){ //more bits needed
+            run= 0;
+            level= code;
+        }else{
+            if(code==rl->n){ //esc
+                run= 65;
+                level= 0;
+            }else if(code==rl->n+1){ //eob
+                run= 0;
+                level= 127;
+            }else{
+                run=   rl->table_run  [code] + 1;
+                level= rl->table_level[code];
+            }
+        }
+        rl->rl_vlc[0][i].len= len;
+        rl->rl_vlc[0][i].level= level;
+        rl->rl_vlc[0][i].run= run;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static void init_uni_ac_vlc(RLTable *rl, uint8_t *uni_ac_vlc_len){
+    int i;
+
+    for(i=0; i<128; i++){
+        int level= i-64;
+        int run;
+        for(run=0; run<64; run++){
+            int len, bits, code;
+
+            int alevel= FFABS(level);
+            int sign= (level>>31)&1;
+
+            if (alevel > rl->max_level[0][run])
+                code= 111; /*rl->n*/
+            else
+                code= rl->index_run[0][run] + alevel - 1;
+
+            if (code < 111 /* rl->n */) {
+                /* store the vlc & sign at once */
+                len=   rl->table_vlc[code][1]+1;
+                bits= (rl->table_vlc[code][0]<<1) + sign;
+            } else {
+                len=  rl->table_vlc[111/*rl->n*/][1]+6;
+                bits= rl->table_vlc[111/*rl->n*/][0]<<6;
+
+                bits|= run;
+                if (alevel < 128) {
+                    bits<<=8; len+=8;
+                    bits|= level & 0xff;
+                } else {
+                    bits<<=16; len+=16;
+                    bits|= level & 0xff;
+                    if (level < 0) {
+                        bits|= 0x8001 + level + 255;
+                    } else {
+                        bits|= level & 0xffff;
+                    }
+                }
+            }
+
+            uni_ac_vlc_len [UNI_AC_ENC_INDEX(run, i)]= len;
+        }
+    }
+}
+
+
+static int find_frame_rate_index(MpegEncContext *s){
+    int i;
+    int64_t dmin= INT64_MAX;
+    int64_t d;
+
+    for(i=1;i<14;i++) {
+        int64_t n0= 1001LL/ff_frame_rate_tab[i].den*ff_frame_rate_tab[i].num*s->avctx->time_base.num;
+        int64_t n1= 1001LL*s->avctx->time_base.den;
+        if(s->avctx->strict_std_compliance > FF_COMPLIANCE_INOFFICIAL && i>=9) break;
+
+        d = FFABS(n0 - n1);
+        if(d < dmin){
+            dmin=d;
+            s->frame_rate_index= i;
+        }
+    }
+    if(dmin)
+        return -1;
+    else
+        return 0;
+}
+
+static int encode_init(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    if(MPV_encode_init(avctx) < 0)
+        return -1;
+
+    if(find_frame_rate_index(s) < 0){
+        if(s->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
+            av_log(avctx, AV_LOG_ERROR, "MPEG1/2 does not support %d/%d fps\n", avctx->time_base.den, avctx->time_base.num);
+            return -1;
+        }else{
+            av_log(avctx, AV_LOG_INFO, "MPEG1/2 does not support %d/%d fps, there may be AV sync issues\n", avctx->time_base.den, avctx->time_base.num);
+        }
+    }
+
+    if(avctx->profile == FF_PROFILE_UNKNOWN)
+        avctx->profile = s->chroma_format == CHROMA_420 ? 4 : 0;
+
+    if(avctx->level == FF_LEVEL_UNKNOWN)
+        avctx->level = s->chroma_format == CHROMA_420 ? 8 : 5;
+
+    if((avctx->flags2 & CODEC_FLAG2_DROP_FRAME_TIMECODE) && s->frame_rate_index != 4){
+        av_log(avctx, AV_LOG_ERROR, "Drop frame time code only allowed with 1001/30000 fps\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static void put_header(MpegEncContext *s, int header)
+{
+    align_put_bits(&s->pb);
+    put_bits(&s->pb, 16, header>>16);
+    put_bits(&s->pb, 16, header&0xFFFF);
+}
+
+/* put sequence header if needed */
+static void mpeg1_encode_sequence_header(MpegEncContext *s)
+{
+        unsigned int vbv_buffer_size;
+        unsigned int fps, v;
+        int i;
+        uint64_t time_code;
+        float best_aspect_error= 1E10;
+        float aspect_ratio= av_q2d(s->avctx->sample_aspect_ratio);
+        int constraint_parameter_flag;
+
+        if(aspect_ratio==0.0) aspect_ratio= 1.0; //pixel aspect 1:1 (VGA)
+
+        if (s->current_picture.key_frame) {
+            AVRational framerate= ff_frame_rate_tab[s->frame_rate_index];
+
+            /* mpeg1 header repeated every gop */
+            put_header(s, SEQ_START_CODE);
+
+            put_bits(&s->pb, 12, s->width);
+            put_bits(&s->pb, 12, s->height);
+
+            for(i=1; i<15; i++){
+                float error= aspect_ratio;
+                if(s->codec_id == CODEC_ID_MPEG1VIDEO || i <=1)
+                    error-= 1.0/mpeg1_aspect[i];
+                else
+                    error-= av_q2d(mpeg2_aspect[i])*s->height/s->width;
+
+                error= FFABS(error);
+
+                if(error < best_aspect_error){
+                    best_aspect_error= error;
+                    s->aspect_ratio_info= i;
+                }
+            }
+
+            put_bits(&s->pb, 4, s->aspect_ratio_info);
+            put_bits(&s->pb, 4, s->frame_rate_index);
+
+            if(s->avctx->rc_max_rate){
+                v = (s->avctx->rc_max_rate + 399) / 400;
+                if (v > 0x3ffff && s->codec_id == CODEC_ID_MPEG1VIDEO)
+                    v = 0x3ffff;
+            }else{
+                v= 0x3FFFF;
+            }
+
+            if(s->avctx->rc_buffer_size)
+                vbv_buffer_size = s->avctx->rc_buffer_size;
+            else
+                /* VBV calculation: Scaled so that a VCD has the proper VBV size of 40 kilobytes */
+                vbv_buffer_size = (( 20 * s->bit_rate) / (1151929 / 2)) * 8 * 1024;
+            vbv_buffer_size= (vbv_buffer_size + 16383) / 16384;
+
+            put_bits(&s->pb, 18, v & 0x3FFFF);
+            put_bits(&s->pb, 1, 1); /* marker */
+            put_bits(&s->pb, 10, vbv_buffer_size & 0x3FF);
+
+            constraint_parameter_flag=
+                s->width <= 768 && s->height <= 576 &&
+                s->mb_width * s->mb_height <= 396 &&
+                s->mb_width * s->mb_height * framerate.num <= framerate.den*396*25 &&
+                framerate.num <= framerate.den*30 &&
+                s->avctx->me_range && s->avctx->me_range < 128 &&
+                vbv_buffer_size <= 20 &&
+                v <= 1856000/400 &&
+                s->codec_id == CODEC_ID_MPEG1VIDEO;
+
+            put_bits(&s->pb, 1, constraint_parameter_flag);
+
+            ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
+            ff_write_quant_matrix(&s->pb, s->avctx->inter_matrix);
+
+            if(s->codec_id == CODEC_ID_MPEG2VIDEO){
+                put_header(s, EXT_START_CODE);
+                put_bits(&s->pb, 4, 1); //seq ext
+
+                put_bits(&s->pb, 1, s->chroma_format == CHROMA_422); //escx
+
+                put_bits(&s->pb, 3, s->avctx->profile); //profile
+                put_bits(&s->pb, 4, s->avctx->level); //level
+
+                put_bits(&s->pb, 1, s->progressive_sequence);
+                put_bits(&s->pb, 2, s->chroma_format);
+                put_bits(&s->pb, 2, 0); //horizontal size ext
+                put_bits(&s->pb, 2, 0); //vertical size ext
+                put_bits(&s->pb, 12, v>>18); //bitrate ext
+                put_bits(&s->pb, 1, 1); //marker
+                put_bits(&s->pb, 8, vbv_buffer_size >>10); //vbv buffer ext
+                put_bits(&s->pb, 1, s->low_delay);
+                put_bits(&s->pb, 2, 0); // frame_rate_ext_n
+                put_bits(&s->pb, 5, 0); // frame_rate_ext_d
+            }
+
+            put_header(s, GOP_START_CODE);
+            put_bits(&s->pb, 1, !!(s->avctx->flags & CODEC_FLAG2_DROP_FRAME_TIMECODE)); /* drop frame flag */
+            /* time code : we must convert from the real frame rate to a
+               fake mpeg frame rate in case of low frame rate */
+            fps = (framerate.num + framerate.den/2)/ framerate.den;
+            time_code = s->current_picture_ptr->coded_picture_number + s->avctx->timecode_frame_start;
+
+            s->gop_picture_number = s->current_picture_ptr->coded_picture_number;
+            if (s->avctx->flags2 & CODEC_FLAG2_DROP_FRAME_TIMECODE) {
+                /* only works for NTSC 29.97 */
+                int d = time_code / 17982;
+                int m = time_code % 17982;
+                //if (m < 2) m += 2; /* not needed since -2,-1 / 1798 in C returns 0 */
+                time_code += 18 * d + 2 * ((m - 2) / 1798);
+            }
+            put_bits(&s->pb, 5, (uint32_t)((time_code / (fps * 3600)) % 24));
+            put_bits(&s->pb, 6, (uint32_t)((time_code / (fps * 60)) % 60));
+            put_bits(&s->pb, 1, 1);
+            put_bits(&s->pb, 6, (uint32_t)((time_code / fps) % 60));
+            put_bits(&s->pb, 6, (uint32_t)((time_code % fps)));
+            put_bits(&s->pb, 1, !!(s->flags & CODEC_FLAG_CLOSED_GOP));
+            put_bits(&s->pb, 1, 0); /* broken link */
+        }
+}
+
+static inline void encode_mb_skip_run(MpegEncContext *s, int run){
+    while (run >= 33) {
+        put_bits(&s->pb, 11, 0x008);
+        run -= 33;
+    }
+    put_bits(&s->pb, mbAddrIncrTable[run][1],
+             mbAddrIncrTable[run][0]);
+}
+#endif //CONFIG_ENCODERS
+
+static void common_init(MpegEncContext *s)
+{
+
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= mpeg2_dc_scale_table[s->intra_dc_precision];
+
+}
+
+void ff_mpeg1_clean_buffers(MpegEncContext *s){
+    s->last_dc[0] = 1 << (7 + s->intra_dc_precision);
+    s->last_dc[1] = s->last_dc[0];
+    s->last_dc[2] = s->last_dc[0];
+    memset(s->last_mv, 0, sizeof(s->last_mv));
+}
+
+#ifdef CONFIG_ENCODERS
+
+void ff_mpeg1_encode_slice_header(MpegEncContext *s){
+    put_header(s, SLICE_MIN_START_CODE + s->mb_y);
+    put_bits(&s->pb, 5, s->qscale); /* quantizer scale */
+    put_bits(&s->pb, 1, 0); /* slice extra information */
+}
+
+void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
+{
+    mpeg1_encode_sequence_header(s);
+
+    /* mpeg1 picture header */
+    put_header(s, PICTURE_START_CODE);
+    /* temporal reference */
+
+    // RAL: s->picture_number instead of s->fake_picture_number
+    put_bits(&s->pb, 10, (s->picture_number -
+                          s->gop_picture_number) & 0x3ff);
+    put_bits(&s->pb, 3, s->pict_type);
+
+    s->vbv_delay_ptr= s->pb.buf + put_bits_count(&s->pb)/8;
+    put_bits(&s->pb, 16, 0xFFFF); /* vbv_delay */
+
+    // RAL: Forward f_code also needed for B frames
+    if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) {
+        put_bits(&s->pb, 1, 0); /* half pel coordinates */
+        if(s->codec_id == CODEC_ID_MPEG1VIDEO)
+            put_bits(&s->pb, 3, s->f_code); /* forward_f_code */
+        else
+            put_bits(&s->pb, 3, 7); /* forward_f_code */
+    }
+
+    // RAL: Backward f_code necessary for B frames
+    if (s->pict_type == B_TYPE) {
+        put_bits(&s->pb, 1, 0); /* half pel coordinates */
+        if(s->codec_id == CODEC_ID_MPEG1VIDEO)
+            put_bits(&s->pb, 3, s->b_code); /* backward_f_code */
+        else
+            put_bits(&s->pb, 3, 7); /* backward_f_code */
+    }
+
+    put_bits(&s->pb, 1, 0); /* extra bit picture */
+
+    s->frame_pred_frame_dct = 1;
+    if(s->codec_id == CODEC_ID_MPEG2VIDEO){
+        put_header(s, EXT_START_CODE);
+        put_bits(&s->pb, 4, 8); //pic ext
+        if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) {
+            put_bits(&s->pb, 4, s->f_code);
+            put_bits(&s->pb, 4, s->f_code);
+        }else{
+            put_bits(&s->pb, 8, 255);
+        }
+        if (s->pict_type == B_TYPE) {
+            put_bits(&s->pb, 4, s->b_code);
+            put_bits(&s->pb, 4, s->b_code);
+        }else{
+            put_bits(&s->pb, 8, 255);
+        }
+        put_bits(&s->pb, 2, s->intra_dc_precision);
+
+        assert(s->picture_structure == PICT_FRAME);
+        put_bits(&s->pb, 2, s->picture_structure);
+        if (s->progressive_sequence) {
+            put_bits(&s->pb, 1, 0); /* no repeat */
+        } else {
+            put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
+        }
+        /* XXX: optimize the generation of this flag with entropy
+           measures */
+        s->frame_pred_frame_dct = s->progressive_sequence;
+
+        put_bits(&s->pb, 1, s->frame_pred_frame_dct);
+        put_bits(&s->pb, 1, s->concealment_motion_vectors);
+        put_bits(&s->pb, 1, s->q_scale_type);
+        put_bits(&s->pb, 1, s->intra_vlc_format);
+        put_bits(&s->pb, 1, s->alternate_scan);
+        put_bits(&s->pb, 1, s->repeat_first_field);
+        s->progressive_frame = s->progressive_sequence;
+        put_bits(&s->pb, 1, s->chroma_format == CHROMA_420 ? s->progressive_frame : 0); /* chroma_420_type */
+        put_bits(&s->pb, 1, s->progressive_frame);
+        put_bits(&s->pb, 1, 0); //composite_display_flag
+    }
+    if(s->flags & CODEC_FLAG_SVCD_SCAN_OFFSET){
+        int i;
+
+        put_header(s, USER_START_CODE);
+        for(i=0; i<sizeof(svcd_scan_offset_placeholder); i++){
+            put_bits(&s->pb, 8, svcd_scan_offset_placeholder[i]);
+        }
+    }
+
+    s->mb_y=0;
+    ff_mpeg1_encode_slice_header(s);
+}
+
+static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
+                                int has_mv, int field_motion)
+{
+    put_bits(&s->pb, n, bits);
+    if (!s->frame_pred_frame_dct) {
+        if (has_mv)
+            put_bits(&s->pb, 2, 2 - field_motion); /* motion_type: frame/field */
+        put_bits(&s->pb, 1, s->interlaced_dct);
+    }
+}
+
+static always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
+                                                   DCTELEM block[6][64],
+                                                   int motion_x, int motion_y,
+                                                   int mb_block_count)
+{
+    int i, cbp;
+    const int mb_x = s->mb_x;
+    const int mb_y = s->mb_y;
+    const int first_mb= mb_x == s->resync_mb_x && mb_y == s->resync_mb_y;
+
+    /* compute cbp */
+    cbp = 0;
+    for(i=0;i<mb_block_count;i++) {
+        if (s->block_last_index[i] >= 0)
+            cbp |= 1 << (mb_block_count - 1 - i);
+    }
+
+    if (cbp == 0 && !first_mb && s->mv_type == MV_TYPE_16X16 &&
+        (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) &&
+        ((s->pict_type == P_TYPE && (motion_x | motion_y) == 0) ||
+        (s->pict_type == B_TYPE && s->mv_dir == s->last_mv_dir && (((s->mv_dir & MV_DIR_FORWARD) ? ((s->mv[0][0][0] - s->last_mv[0][0][0])|(s->mv[0][0][1] - s->last_mv[0][0][1])) : 0) |
+        ((s->mv_dir & MV_DIR_BACKWARD) ? ((s->mv[1][0][0] - s->last_mv[1][0][0])|(s->mv[1][0][1] - s->last_mv[1][0][1])) : 0)) == 0))) {
+        s->mb_skip_run++;
+        s->qscale -= s->dquant;
+        s->skip_count++;
+        s->misc_bits++;
+        s->last_bits++;
+        if(s->pict_type == P_TYPE){
+            s->last_mv[0][1][0]= s->last_mv[0][0][0]=
+            s->last_mv[0][1][1]= s->last_mv[0][0][1]= 0;
+        }
+    } else {
+        if(first_mb){
+            assert(s->mb_skip_run == 0);
+            encode_mb_skip_run(s, s->mb_x);
+        }else{
+            encode_mb_skip_run(s, s->mb_skip_run);
+        }
+
+        if (s->pict_type == I_TYPE) {
+            if(s->dquant && cbp){
+                put_mb_modes(s, 2, 1, 0, 0); /* macroblock_type : macroblock_quant = 1 */
+                put_bits(&s->pb, 5, s->qscale);
+            }else{
+                put_mb_modes(s, 1, 1, 0, 0); /* macroblock_type : macroblock_quant = 0 */
+                s->qscale -= s->dquant;
+            }
+            s->misc_bits+= get_bits_diff(s);
+            s->i_count++;
+        } else if (s->mb_intra) {
+            if(s->dquant && cbp){
+                put_mb_modes(s, 6, 0x01, 0, 0);
+                put_bits(&s->pb, 5, s->qscale);
+            }else{
+                put_mb_modes(s, 5, 0x03, 0, 0);
+                s->qscale -= s->dquant;
+            }
+            s->misc_bits+= get_bits_diff(s);
+            s->i_count++;
+            memset(s->last_mv, 0, sizeof(s->last_mv));
+        } else if (s->pict_type == P_TYPE) {
+            if(s->mv_type == MV_TYPE_16X16){
+                if (cbp != 0) {
+                    if ((motion_x|motion_y) == 0) {
+                        if(s->dquant){
+                            put_mb_modes(s, 5, 1, 0, 0); /* macroblock_pattern & quant */
+                            put_bits(&s->pb, 5, s->qscale);
+                        }else{
+                            put_mb_modes(s, 2, 1, 0, 0); /* macroblock_pattern only */
+                        }
+                        s->misc_bits+= get_bits_diff(s);
+                    } else {
+                        if(s->dquant){
+                            put_mb_modes(s, 5, 2, 1, 0); /* motion + cbp */
+                            put_bits(&s->pb, 5, s->qscale);
+                        }else{
+                            put_mb_modes(s, 1, 1, 1, 0); /* motion + cbp */
+                        }
+                        s->misc_bits+= get_bits_diff(s);
+                        mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);    // RAL: f_code parameter added
+                        mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);    // RAL: f_code parameter added
+                        s->mv_bits+= get_bits_diff(s);
+                    }
+                } else {
+                    put_bits(&s->pb, 3, 1); /* motion only */
+                    if (!s->frame_pred_frame_dct)
+                        put_bits(&s->pb, 2, 2); /* motion_type: frame */
+                    s->misc_bits+= get_bits_diff(s);
+                    mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);    // RAL: f_code parameter added
+                    mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);    // RAL: f_code parameter added
+                    s->qscale -= s->dquant;
+                    s->mv_bits+= get_bits_diff(s);
+                }
+                s->last_mv[0][1][0]= s->last_mv[0][0][0]= motion_x;
+                s->last_mv[0][1][1]= s->last_mv[0][0][1]= motion_y;
+            }else{
+                assert(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD);
+
+                if (cbp) {
+                    if(s->dquant){
+                        put_mb_modes(s, 5, 2, 1, 1); /* motion + cbp */
+                        put_bits(&s->pb, 5, s->qscale);
+                    }else{
+                        put_mb_modes(s, 1, 1, 1, 1); /* motion + cbp */
+                    }
+                } else {
+                    put_bits(&s->pb, 3, 1); /* motion only */
+                    put_bits(&s->pb, 2, 1); /* motion_type: field */
+                    s->qscale -= s->dquant;
+                }
+                s->misc_bits+= get_bits_diff(s);
+                for(i=0; i<2; i++){
+                    put_bits(&s->pb, 1, s->field_select[0][i]);
+                    mpeg1_encode_motion(s, s->mv[0][i][0] -  s->last_mv[0][i][0]    , s->f_code);
+                    mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code);
+                    s->last_mv[0][i][0]=   s->mv[0][i][0];
+                    s->last_mv[0][i][1]= 2*s->mv[0][i][1];
+                }
+                s->mv_bits+= get_bits_diff(s);
+            }
+            if(cbp) {
+                if (s->chroma_y_shift) {
+                    put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
+                } else {
+                    put_bits(&s->pb, mbPatTable[cbp>>2][1], mbPatTable[cbp>>2][0]);
+                    put_bits(&s->pb, 2, cbp & 3);
+                }
+            }
+            s->f_count++;
+        } else{
+            static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi
+
+            if(s->mv_type == MV_TYPE_16X16){
+                if (cbp){    // With coded bloc pattern
+                    if (s->dquant) {
+                        if(s->mv_dir == MV_DIR_FORWARD)
+                            put_mb_modes(s, 6, 3, 1, 0);
+                        else
+                            put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 0);
+                        put_bits(&s->pb, 5, s->qscale);
+                    } else {
+                        put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 0);
+                    }
+                }else{    // No coded bloc pattern
+                    put_bits(&s->pb, mb_type_len[s->mv_dir], 2);
+                    if (!s->frame_pred_frame_dct)
+                        put_bits(&s->pb, 2, 2); /* motion_type: frame */
+                    s->qscale -= s->dquant;
+                }
+                s->misc_bits += get_bits_diff(s);
+                if (s->mv_dir&MV_DIR_FORWARD){
+                    mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+                    mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+                    s->last_mv[0][0][0]=s->last_mv[0][1][0]= s->mv[0][0][0];
+                    s->last_mv[0][0][1]=s->last_mv[0][1][1]= s->mv[0][0][1];
+                    s->f_count++;
+                }
+                if (s->mv_dir&MV_DIR_BACKWARD){
+                    mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+                    mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+                    s->last_mv[1][0][0]=s->last_mv[1][1][0]= s->mv[1][0][0];
+                    s->last_mv[1][0][1]=s->last_mv[1][1][1]= s->mv[1][0][1];
+                    s->b_count++;
+                }
+            }else{
+                assert(s->mv_type == MV_TYPE_FIELD);
+                assert(!s->frame_pred_frame_dct);
+                if (cbp){    // With coded bloc pattern
+                    if (s->dquant) {
+                        if(s->mv_dir == MV_DIR_FORWARD)
+                            put_mb_modes(s, 6, 3, 1, 1);
+                        else
+                            put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 1);
+                        put_bits(&s->pb, 5, s->qscale);
+                    } else {
+                        put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 1);
+                    }
+                }else{    // No coded bloc pattern
+                    put_bits(&s->pb, mb_type_len[s->mv_dir], 2);
+                    put_bits(&s->pb, 2, 1); /* motion_type: field */
+                    s->qscale -= s->dquant;
+                }
+                s->misc_bits += get_bits_diff(s);
+                if (s->mv_dir&MV_DIR_FORWARD){
+                    for(i=0; i<2; i++){
+                        put_bits(&s->pb, 1, s->field_select[0][i]);
+                        mpeg1_encode_motion(s, s->mv[0][i][0] -  s->last_mv[0][i][0]    , s->f_code);
+                        mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code);
+                        s->last_mv[0][i][0]=   s->mv[0][i][0];
+                        s->last_mv[0][i][1]= 2*s->mv[0][i][1];
+                    }
+                    s->f_count++;
+                }
+                if (s->mv_dir&MV_DIR_BACKWARD){
+                    for(i=0; i<2; i++){
+                        put_bits(&s->pb, 1, s->field_select[1][i]);
+                        mpeg1_encode_motion(s, s->mv[1][i][0] -  s->last_mv[1][i][0]    , s->b_code);
+                        mpeg1_encode_motion(s, s->mv[1][i][1] - (s->last_mv[1][i][1]>>1), s->b_code);
+                        s->last_mv[1][i][0]=   s->mv[1][i][0];
+                        s->last_mv[1][i][1]= 2*s->mv[1][i][1];
+                    }
+                    s->b_count++;
+                }
+            }
+            s->mv_bits += get_bits_diff(s);
+            if(cbp) {
+                if (s->chroma_y_shift) {
+                    put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
+                } else {
+                    put_bits(&s->pb, mbPatTable[cbp>>2][1], mbPatTable[cbp>>2][0]);
+                    put_bits(&s->pb, 2, cbp & 3);
+                }
+            }
+        }
+        for(i=0;i<mb_block_count;i++) {
+            if (cbp & (1 << (mb_block_count - 1 - i))) {
+                mpeg1_encode_block(s, block[i], i);
+            }
+        }
+        s->mb_skip_run = 0;
+        if(s->mb_intra)
+            s->i_tex_bits+= get_bits_diff(s);
+        else
+            s->p_tex_bits+= get_bits_diff(s);
+    }
+}
+
+void mpeg1_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y)
+{
+    if (s->chroma_format == CHROMA_420) mpeg1_encode_mb_internal(s, block, motion_x, motion_y, 6);
+    else                                mpeg1_encode_mb_internal(s, block, motion_x, motion_y, 8);
+}
+
+// RAL: Parameter added: f_or_b_code
+static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code)
+{
+    int code, bit_size, l, bits, range, sign;
+
+    if (val == 0) {
+        /* zero vector */
+        code = 0;
+        put_bits(&s->pb,
+                 mbMotionVectorTable[0][1],
+                 mbMotionVectorTable[0][0]);
+    } else {
+        bit_size = f_or_b_code - 1;
+        range = 1 << bit_size;
+        /* modulo encoding */
+        l= INT_BIT - 5 - bit_size;
+        val= (val<<l)>>l;
+
+        if (val >= 0) {
+            val--;
+            code = (val >> bit_size) + 1;
+            bits = val & (range - 1);
+            sign = 0;
+        } else {
+            val = -val;
+            val--;
+            code = (val >> bit_size) + 1;
+            bits = val & (range - 1);
+            sign = 1;
+        }
+
+        assert(code > 0 && code <= 16);
+
+        put_bits(&s->pb,
+                 mbMotionVectorTable[code][1],
+                 mbMotionVectorTable[code][0]);
+
+        put_bits(&s->pb, 1, sign);
+        if (bit_size > 0) {
+            put_bits(&s->pb, bit_size, bits);
+        }
+    }
+}
+
+void ff_mpeg1_encode_init(MpegEncContext *s)
+{
+    static int done=0;
+
+    common_init(s);
+
+    if(!done){
+        int f_code;
+        int mv;
+        int i;
+
+        done=1;
+        init_rl(&rl_mpeg1, 1);
+        if(s->intra_vlc_format)
+            init_rl(&rl_mpeg2, 1);
+
+        for(i=0; i<64; i++)
+        {
+                mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
+                mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
+        }
+
+        init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_len);
+        if(s->intra_vlc_format)
+            init_uni_ac_vlc(&rl_mpeg2, uni_mpeg2_ac_vlc_len);
+
+        /* build unified dc encoding tables */
+        for(i=-255; i<256; i++)
+        {
+                int adiff, index;
+                int bits, code;
+                int diff=i;
+
+                adiff = FFABS(diff);
+                if(diff<0) diff--;
+                index = av_log2(2*adiff);
+
+                bits= vlc_dc_lum_bits[index] + index;
+                code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
+                mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
+
+                bits= vlc_dc_chroma_bits[index] + index;
+                code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
+                mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
+        }
+
+        mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+
+        for(f_code=1; f_code<=MAX_FCODE; f_code++){
+            for(mv=-MAX_MV; mv<=MAX_MV; mv++){
+                int len;
+
+                if(mv==0) len= mbMotionVectorTable[0][1];
+                else{
+                    int val, bit_size, range, code;
+
+                    bit_size = f_code - 1;
+                    range = 1 << bit_size;
+
+                    val=mv;
+                    if (val < 0)
+                        val = -val;
+                    val--;
+                    code = (val >> bit_size) + 1;
+                    if(code<17){
+                        len= mbMotionVectorTable[code][1] + 1 + bit_size;
+                    }else{
+                        len= mbMotionVectorTable[16][1] + 2 + bit_size;
+                    }
+                }
+
+                mv_penalty[f_code][mv+MAX_MV]= len;
+            }
+        }
+
+
+        for(f_code=MAX_FCODE; f_code>0; f_code--){
+            for(mv=-(8<<f_code); mv<(8<<f_code); mv++){
+                fcode_tab[mv+MAX_MV]= f_code;
+            }
+        }
+    }
+    s->me.mv_penalty= mv_penalty;
+    s->fcode_tab= fcode_tab;
+    if(s->codec_id == CODEC_ID_MPEG1VIDEO){
+        s->min_qcoeff=-255;
+        s->max_qcoeff= 255;
+    }else{
+        s->min_qcoeff=-2047;
+        s->max_qcoeff= 2047;
+    }
+    if (s->intra_vlc_format) {
+        s->intra_ac_vlc_length=
+        s->intra_ac_vlc_last_length= uni_mpeg2_ac_vlc_len;
+    } else {
+        s->intra_ac_vlc_length=
+        s->intra_ac_vlc_last_length= uni_mpeg1_ac_vlc_len;
+    }
+    s->inter_ac_vlc_length=
+    s->inter_ac_vlc_last_length= uni_mpeg1_ac_vlc_len;
+}
+
+static inline void encode_dc(MpegEncContext *s, int diff, int component)
+{
+  if(((unsigned) (diff+255)) >= 511){
+        int index;
+
+        if(diff<0){
+            index= av_log2_16bit(-2*diff);
+            diff--;
+        }else{
+            index= av_log2_16bit(2*diff);
+        }
+        if (component == 0) {
+            put_bits(
+                &s->pb,
+                vlc_dc_lum_bits[index] + index,
+                (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1)));
+        }else{
+            put_bits(
+                &s->pb,
+                vlc_dc_chroma_bits[index] + index,
+                (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1)));
+        }
+  }else{
+    if (component == 0) {
+        put_bits(
+            &s->pb,
+            mpeg1_lum_dc_uni[diff+255]&0xFF,
+            mpeg1_lum_dc_uni[diff+255]>>8);
+    } else {
+        put_bits(
+            &s->pb,
+            mpeg1_chr_dc_uni[diff+255]&0xFF,
+            mpeg1_chr_dc_uni[diff+255]>>8);
+    }
+  }
+}
+
+static void mpeg1_encode_block(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int alevel, level, last_non_zero, dc, diff, i, j, run, last_index, sign;
+    int code, component;
+    const uint16_t (*table_vlc)[2] = rl_mpeg1.table_vlc;
+
+    last_index = s->block_last_index[n];
+
+    /* DC coef */
+    if (s->mb_intra) {
+        component = (n <= 3 ? 0 : (n&1) + 1);
+        dc = block[0]; /* overflow is impossible */
+        diff = dc - s->last_dc[component];
+        encode_dc(s, diff, component);
+        s->last_dc[component] = dc;
+        i = 1;
+        if (s->intra_vlc_format)
+            table_vlc = rl_mpeg2.table_vlc;
+    } else {
+        /* encode the first coefficient : needs to be done here because
+           it is handled slightly differently */
+        level = block[0];
+        if (abs(level) == 1) {
+                code = ((uint32_t)level >> 31); /* the sign bit */
+                put_bits(&s->pb, 2, code | 0x02);
+                i = 1;
+        } else {
+            i = 0;
+            last_non_zero = -1;
+            goto next_coef;
+        }
+    }
+
+    /* now quantify & encode AC coefs */
+    last_non_zero = i - 1;
+
+    for(;i<=last_index;i++) {
+        j = s->intra_scantable.permutated[i];
+        level = block[j];
+    next_coef:
+#if 0
+        if (level != 0)
+            dprintf("level[%d]=%d\n", i, level);
+#endif
+        /* encode using VLC */
+        if (level != 0) {
+            run = i - last_non_zero - 1;
+
+            alevel= level;
+            MASK_ABS(sign, alevel)
+            sign&=1;
+
+            if (alevel <= mpeg1_max_level[0][run]){
+                code= mpeg1_index_run[0][run] + alevel - 1;
+                /* store the vlc & sign at once */
+                put_bits(&s->pb, table_vlc[code][1]+1, (table_vlc[code][0]<<1) + sign);
+            } else {
+                /* escape seems to be pretty rare <5% so i dont optimize it */
+                put_bits(&s->pb, table_vlc[111][1], table_vlc[111][0]);
+                /* escape: only clip in this case */
+                put_bits(&s->pb, 6, run);
+                if(s->codec_id == CODEC_ID_MPEG1VIDEO){
+                    if (alevel < 128) {
+                        put_bits(&s->pb, 8, level & 0xff);
+                    } else {
+                        if (level < 0) {
+                            put_bits(&s->pb, 16, 0x8001 + level + 255);
+                        } else {
+                            put_bits(&s->pb, 16, level & 0xffff);
+                        }
+                    }
+                }else{
+                    put_bits(&s->pb, 12, level & 0xfff);
+                }
+            }
+            last_non_zero = i;
+        }
+    }
+    /* end of block */
+    put_bits(&s->pb, table_vlc[112][1], table_vlc[112][0]);
+}
+#endif //CONFIG_ENCODERS
+
+/******************************************/
+/* decoding */
+
+static VLC dc_lum_vlc;
+static VLC dc_chroma_vlc;
+static VLC mv_vlc;
+static VLC mbincr_vlc;
+static VLC mb_ptype_vlc;
+static VLC mb_btype_vlc;
+static VLC mb_pat_vlc;
+
+static void init_vlcs(void)
+{
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+
+        init_vlc(&dc_lum_vlc, DC_VLC_BITS, 12,
+                 vlc_dc_lum_bits, 1, 1,
+                 vlc_dc_lum_code, 2, 2, 1);
+        init_vlc(&dc_chroma_vlc,  DC_VLC_BITS, 12,
+                 vlc_dc_chroma_bits, 1, 1,
+                 vlc_dc_chroma_code, 2, 2, 1);
+        init_vlc(&mv_vlc, MV_VLC_BITS, 17,
+                 &mbMotionVectorTable[0][1], 2, 1,
+                 &mbMotionVectorTable[0][0], 2, 1, 1);
+        init_vlc(&mbincr_vlc, MBINCR_VLC_BITS, 36,
+                 &mbAddrIncrTable[0][1], 2, 1,
+                 &mbAddrIncrTable[0][0], 2, 1, 1);
+        init_vlc(&mb_pat_vlc, MB_PAT_VLC_BITS, 64,
+                 &mbPatTable[0][1], 2, 1,
+                 &mbPatTable[0][0], 2, 1, 1);
+
+        init_vlc(&mb_ptype_vlc, MB_PTYPE_VLC_BITS, 7,
+                 &table_mb_ptype[0][1], 2, 1,
+                 &table_mb_ptype[0][0], 2, 1, 1);
+        init_vlc(&mb_btype_vlc, MB_BTYPE_VLC_BITS, 11,
+                 &table_mb_btype[0][1], 2, 1,
+                 &table_mb_btype[0][0], 2, 1, 1);
+        init_rl(&rl_mpeg1, 1);
+        init_rl(&rl_mpeg2, 1);
+
+        init_2d_vlc_rl(&rl_mpeg1, 1);
+        init_2d_vlc_rl(&rl_mpeg2, 1);
+    }
+}
+
+static inline int get_dmv(MpegEncContext *s)
+{
+    if(get_bits1(&s->gb))
+        return 1 - (get_bits1(&s->gb) << 1);
+    else
+        return 0;
+}
+
+static inline int get_qscale(MpegEncContext *s)
+{
+    int qscale = get_bits(&s->gb, 5);
+    if (s->q_scale_type) {
+        return non_linear_qscale[qscale];
+    } else {
+        return qscale << 1;
+    }
+}
+
+/* motion type (for mpeg2) */
+#define MT_FIELD 1
+#define MT_FRAME 2
+#define MT_16X8  2
+#define MT_DMV   3
+
+static int mpeg_decode_mb(MpegEncContext *s,
+                          DCTELEM block[12][64])
+{
+    int i, j, k, cbp, val, mb_type, motion_type;
+    const int mb_block_count = 4 + (1<< s->chroma_format);
+
+    dprintf("decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y);
+
+    assert(s->mb_skipped==0);
+
+    if (s->mb_skip_run-- != 0) {
+        if(s->pict_type == I_TYPE){
+            av_log(s->avctx, AV_LOG_ERROR, "skipped MB in I frame at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+
+        /* skip mb */
+        s->mb_intra = 0;
+        for(i=0;i<12;i++)
+            s->block_last_index[i] = -1;
+        if(s->picture_structure == PICT_FRAME)
+            s->mv_type = MV_TYPE_16X16;
+        else
+            s->mv_type = MV_TYPE_FIELD;
+        if (s->pict_type == P_TYPE) {
+            /* if P type, zero motion vector is implied */
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mv[0][0][0] = s->mv[0][0][1] = 0;
+            s->last_mv[0][0][0] = s->last_mv[0][0][1] = 0;
+            s->last_mv[0][1][0] = s->last_mv[0][1][1] = 0;
+            s->field_select[0][0]= s->picture_structure - 1;
+            s->mb_skipped = 1;
+            s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
+        } else {
+            int mb_type;
+
+            if(s->mb_x)
+                mb_type= s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1];
+            else
+                mb_type= s->current_picture.mb_type[ s->mb_width + (s->mb_y-1)*s->mb_stride - 1]; // FIXME not sure if this is allowed in mpeg at all,
+            if(IS_INTRA(mb_type))
+                return -1;
+
+            /* if B type, reuse previous vectors and directions */
+            s->mv[0][0][0] = s->last_mv[0][0][0];
+            s->mv[0][0][1] = s->last_mv[0][0][1];
+            s->mv[1][0][0] = s->last_mv[1][0][0];
+            s->mv[1][0][1] = s->last_mv[1][0][1];
+
+            s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]=
+                mb_type | MB_TYPE_SKIP;
+//            assert(s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1]&(MB_TYPE_16x16|MB_TYPE_16x8));
+
+            if((s->mv[0][0][0]|s->mv[0][0][1]|s->mv[1][0][0]|s->mv[1][0][1])==0)
+                s->mb_skipped = 1;
+        }
+
+        return 0;
+    }
+
+    switch(s->pict_type) {
+    default:
+    case I_TYPE:
+        if (get_bits1(&s->gb) == 0) {
+            if (get_bits1(&s->gb) == 0){
+                av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in I Frame at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+            mb_type = MB_TYPE_QUANT | MB_TYPE_INTRA;
+        } else {
+            mb_type = MB_TYPE_INTRA;
+        }
+        break;
+    case P_TYPE:
+        mb_type = get_vlc2(&s->gb, mb_ptype_vlc.table, MB_PTYPE_VLC_BITS, 1);
+        if (mb_type < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in P Frame at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        mb_type = ptype2mb_type[ mb_type ];
+        break;
+    case B_TYPE:
+        mb_type = get_vlc2(&s->gb, mb_btype_vlc.table, MB_BTYPE_VLC_BITS, 1);
+        if (mb_type < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "invalid mb type in B Frame at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        mb_type = btype2mb_type[ mb_type ];
+        break;
+    }
+    dprintf("mb_type=%x\n", mb_type);
+//    motion_type = 0; /* avoid warning */
+    if (IS_INTRA(mb_type)) {
+        s->dsp.clear_blocks(s->block[0]);
+
+        if(!s->chroma_y_shift){
+            s->dsp.clear_blocks(s->block[6]);
+        }
+
+        /* compute dct type */
+        if (s->picture_structure == PICT_FRAME && //FIXME add a interlaced_dct coded var?
+            !s->frame_pred_frame_dct) {
+            s->interlaced_dct = get_bits1(&s->gb);
+        }
+
+        if (IS_QUANT(mb_type))
+            s->qscale = get_qscale(s);
+
+        if (s->concealment_motion_vectors) {
+            /* just parse them */
+            if (s->picture_structure != PICT_FRAME)
+                skip_bits1(&s->gb); /* field select */
+
+            s->mv[0][0][0]= s->last_mv[0][0][0]= s->last_mv[0][1][0] =
+                mpeg_decode_motion(s, s->mpeg_f_code[0][0], s->last_mv[0][0][0]);
+            s->mv[0][0][1]= s->last_mv[0][0][1]= s->last_mv[0][1][1] =
+                mpeg_decode_motion(s, s->mpeg_f_code[0][1], s->last_mv[0][0][1]);
+
+            skip_bits1(&s->gb); /* marker */
+        }else
+            memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
+        s->mb_intra = 1;
+#ifdef HAVE_XVMC
+        //one 1 we memcpy blocks in xvmcvideo
+        if(s->avctx->xvmc_acceleration > 1){
+            XVMC_pack_pblocks(s,-1);//inter are always full blocks
+            if(s->swap_uv){
+                exchange_uv(s);
+            }
+        }
+#endif
+
+        if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
+            if(s->flags2 & CODEC_FLAG2_FAST){
+                for(i=0;i<6;i++) {
+                    mpeg2_fast_decode_block_intra(s, s->pblocks[i], i);
+                }
+            }else{
+                for(i=0;i<mb_block_count;i++) {
+                    if (mpeg2_decode_block_intra(s, s->pblocks[i], i) < 0)
+                        return -1;
+                }
+            }
+        } else {
+            for(i=0;i<6;i++) {
+                if (mpeg1_decode_block_intra(s, s->pblocks[i], i) < 0)
+                    return -1;
+            }
+        }
+    } else {
+        if (mb_type & MB_TYPE_ZERO_MV){
+            assert(mb_type & MB_TYPE_CBP);
+
+            /* compute dct type */
+            if (s->picture_structure == PICT_FRAME && //FIXME add a interlaced_dct coded var?
+                !s->frame_pred_frame_dct) {
+                s->interlaced_dct = get_bits1(&s->gb);
+            }
+
+            if (IS_QUANT(mb_type))
+                s->qscale = get_qscale(s);
+
+            s->mv_dir = MV_DIR_FORWARD;
+            if(s->picture_structure == PICT_FRAME)
+                s->mv_type = MV_TYPE_16X16;
+            else{
+                s->mv_type = MV_TYPE_FIELD;
+                mb_type |= MB_TYPE_INTERLACED;
+                s->field_select[0][0]= s->picture_structure - 1;
+            }
+            s->last_mv[0][0][0] = 0;
+            s->last_mv[0][0][1] = 0;
+            s->last_mv[0][1][0] = 0;
+            s->last_mv[0][1][1] = 0;
+            s->mv[0][0][0] = 0;
+            s->mv[0][0][1] = 0;
+        }else{
+            assert(mb_type & MB_TYPE_L0L1);
+//FIXME decide if MBs in field pictures are MB_TYPE_INTERLACED
+            /* get additionnal motion vector type */
+            if (s->frame_pred_frame_dct)
+                motion_type = MT_FRAME;
+            else{
+                motion_type = get_bits(&s->gb, 2);
+            }
+
+            /* compute dct type */
+            if (s->picture_structure == PICT_FRAME && //FIXME add a interlaced_dct coded var?
+                !s->frame_pred_frame_dct && HAS_CBP(mb_type)) {
+                s->interlaced_dct = get_bits1(&s->gb);
+            }
+
+            if (IS_QUANT(mb_type))
+                s->qscale = get_qscale(s);
+
+            /* motion vectors */
+            s->mv_dir = 0;
+            for(i=0;i<2;i++) {
+                if (USES_LIST(mb_type, i)) {
+                    s->mv_dir |= (MV_DIR_FORWARD >> i);
+                    dprintf("motion_type=%d\n", motion_type);
+                    switch(motion_type) {
+                    case MT_FRAME: /* or MT_16X8 */
+                        if (s->picture_structure == PICT_FRAME) {
+                            /* MT_FRAME */
+                            mb_type |= MB_TYPE_16x16;
+                            s->mv_type = MV_TYPE_16X16;
+                            s->mv[i][0][0]= s->last_mv[i][0][0]= s->last_mv[i][1][0] =
+                                mpeg_decode_motion(s, s->mpeg_f_code[i][0], s->last_mv[i][0][0]);
+                            s->mv[i][0][1]= s->last_mv[i][0][1]= s->last_mv[i][1][1] =
+                                mpeg_decode_motion(s, s->mpeg_f_code[i][1], s->last_mv[i][0][1]);
+                            /* full_pel: only for mpeg1 */
+                            if (s->full_pel[i]){
+                                s->mv[i][0][0] <<= 1;
+                                s->mv[i][0][1] <<= 1;
+                            }
+                        } else {
+                            /* MT_16X8 */
+                            mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
+                            s->mv_type = MV_TYPE_16X8;
+                            for(j=0;j<2;j++) {
+                                s->field_select[i][j] = get_bits1(&s->gb);
+                                for(k=0;k<2;k++) {
+                                    val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
+                                                             s->last_mv[i][j][k]);
+                                    s->last_mv[i][j][k] = val;
+                                    s->mv[i][j][k] = val;
+                                }
+                            }
+                        }
+                        break;
+                    case MT_FIELD:
+                        s->mv_type = MV_TYPE_FIELD;
+                        if (s->picture_structure == PICT_FRAME) {
+                            mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
+                            for(j=0;j<2;j++) {
+                                s->field_select[i][j] = get_bits1(&s->gb);
+                                val = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
+                                                         s->last_mv[i][j][0]);
+                                s->last_mv[i][j][0] = val;
+                                s->mv[i][j][0] = val;
+                                dprintf("fmx=%d\n", val);
+                                val = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
+                                                         s->last_mv[i][j][1] >> 1);
+                                s->last_mv[i][j][1] = val << 1;
+                                s->mv[i][j][1] = val;
+                                dprintf("fmy=%d\n", val);
+                            }
+                        } else {
+                            mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
+                            s->field_select[i][0] = get_bits1(&s->gb);
+                            for(k=0;k<2;k++) {
+                                val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
+                                                         s->last_mv[i][0][k]);
+                                s->last_mv[i][0][k] = val;
+                                s->last_mv[i][1][k] = val;
+                                s->mv[i][0][k] = val;
+                            }
+                        }
+                        break;
+                    case MT_DMV:
+                        {
+                            int dmx, dmy, mx, my, m;
+
+                            mx = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
+                                                    s->last_mv[i][0][0]);
+                            s->last_mv[i][0][0] = mx;
+                            s->last_mv[i][1][0] = mx;
+                            dmx = get_dmv(s);
+                            my = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
+                                                    s->last_mv[i][0][1] >> 1);
+                            dmy = get_dmv(s);
+                            s->mv_type = MV_TYPE_DMV;
+
+
+                            s->last_mv[i][0][1] = my<<1;
+                            s->last_mv[i][1][1] = my<<1;
+
+                            s->mv[i][0][0] = mx;
+                            s->mv[i][0][1] = my;
+                            s->mv[i][1][0] = mx;//not used
+                            s->mv[i][1][1] = my;//not used
+
+                            if (s->picture_structure == PICT_FRAME) {
+                                mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
+
+                                //m = 1 + 2 * s->top_field_first;
+                                m = s->top_field_first ? 1 : 3;
+
+                                /* top -> top pred */
+                                s->mv[i][2][0] = ((mx * m + (mx > 0)) >> 1) + dmx;
+                                s->mv[i][2][1] = ((my * m + (my > 0)) >> 1) + dmy - 1;
+                                m = 4 - m;
+                                s->mv[i][3][0] = ((mx * m + (mx > 0)) >> 1) + dmx;
+                                s->mv[i][3][1] = ((my * m + (my > 0)) >> 1) + dmy + 1;
+                            } else {
+                                mb_type |= MB_TYPE_16x16;
+
+                                s->mv[i][2][0] = ((mx + (mx > 0)) >> 1) + dmx;
+                                s->mv[i][2][1] = ((my + (my > 0)) >> 1) + dmy;
+                                if(s->picture_structure == PICT_TOP_FIELD)
+                                    s->mv[i][2][1]--;
+                                else
+                                    s->mv[i][2][1]++;
+                            }
+                        }
+                        break;
+                    default:
+                        av_log(s->avctx, AV_LOG_ERROR, "00 motion_type at %d %d\n", s->mb_x, s->mb_y);
+                        return -1;
+                    }
+                }
+            }
+        }
+
+        s->mb_intra = 0;
+        if (HAS_CBP(mb_type)) {
+            s->dsp.clear_blocks(s->block[0]);
+
+            if(!s->chroma_y_shift){
+                s->dsp.clear_blocks(s->block[6]);
+            }
+
+            cbp = get_vlc2(&s->gb, mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
+            if (cbp < 0 || ((cbp == 0) && (s->chroma_format < 2)) ){
+                av_log(s->avctx, AV_LOG_ERROR, "invalid cbp at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+            if(mb_block_count > 6){
+                 cbp<<= mb_block_count-6;
+                 cbp |= get_bits(&s->gb, mb_block_count-6);
+            }
+
+#ifdef HAVE_XVMC
+            //on 1 we memcpy blocks in xvmcvideo
+            if(s->avctx->xvmc_acceleration > 1){
+                XVMC_pack_pblocks(s,cbp);
+                if(s->swap_uv){
+                    exchange_uv(s);
+                }
+            }
+#endif
+
+            if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
+                if(s->flags2 & CODEC_FLAG2_FAST){
+                    for(i=0;i<6;i++) {
+                        if(cbp & 32) {
+                            mpeg2_fast_decode_block_non_intra(s, s->pblocks[i], i);
+                        } else {
+                            s->block_last_index[i] = -1;
+                        }
+                        cbp+=cbp;
+                    }
+                }else{
+                    cbp<<= 12-mb_block_count;
+
+                    for(i=0;i<mb_block_count;i++) {
+                        if ( cbp & (1<<11) ) {
+                            if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
+                                return -1;
+                        } else {
+                            s->block_last_index[i] = -1;
+                        }
+                        cbp+=cbp;
+                    }
+                }
+            } else {
+                if(s->flags2 & CODEC_FLAG2_FAST){
+                    for(i=0;i<6;i++) {
+                        if (cbp & 32) {
+                            mpeg1_fast_decode_block_inter(s, s->pblocks[i], i);
+                        } else {
+                            s->block_last_index[i] = -1;
+                        }
+                        cbp+=cbp;
+                    }
+                }else{
+                    for(i=0;i<6;i++) {
+                        if (cbp & 32) {
+                            if (mpeg1_decode_block_inter(s, s->pblocks[i], i) < 0)
+                                return -1;
+                        } else {
+                            s->block_last_index[i] = -1;
+                        }
+                        cbp+=cbp;
+                    }
+                }
+            }
+        }else{
+            for(i=0;i<12;i++)
+                s->block_last_index[i] = -1;
+        }
+    }
+
+    s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= mb_type;
+
+    return 0;
+}
+
+/* as h263, but only 17 codes */
+static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred)
+{
+    int code, sign, val, l, shift;
+
+    code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
+    if (code == 0) {
+        return pred;
+    }
+    if (code < 0) {
+        return 0xffff;
+    }
+
+    sign = get_bits1(&s->gb);
+    shift = fcode - 1;
+    val = code;
+    if (shift) {
+        val = (val - 1) << shift;
+        val |= get_bits(&s->gb, shift);
+        val++;
+    }
+    if (sign)
+        val = -val;
+    val += pred;
+
+    /* modulo decoding */
+    l= INT_BIT - 5 - shift;
+    val = (val<<l)>>l;
+    return val;
+}
+
+static inline int decode_dc(GetBitContext *gb, int component)
+{
+    int code, diff;
+
+    if (component == 0) {
+        code = get_vlc2(gb, dc_lum_vlc.table, DC_VLC_BITS, 2);
+    } else {
+        code = get_vlc2(gb, dc_chroma_vlc.table, DC_VLC_BITS, 2);
+    }
+    if (code < 0){
+        av_log(NULL, AV_LOG_ERROR, "invalid dc code at\n");
+        return 0xffff;
+    }
+    if (code == 0) {
+        diff = 0;
+    } else {
+        diff = get_xbits(gb, code);
+    }
+    return diff;
+}
+
+static inline int mpeg1_decode_block_intra(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int level, dc, diff, i, j, run;
+    int component;
+    RLTable *rl = &rl_mpeg1;
+    uint8_t * const scantable= s->intra_scantable.permutated;
+    const uint16_t *quant_matrix= s->intra_matrix;
+    const int qscale= s->qscale;
+
+    /* DC coef */
+    component = (n <= 3 ? 0 : n - 4 + 1);
+    diff = decode_dc(&s->gb, component);
+    if (diff >= 0xffff)
+        return -1;
+    dc = s->last_dc[component];
+    dc += diff;
+    s->last_dc[component] = dc;
+    block[0] = dc<<3;
+    dprintf("dc=%d diff=%d\n", dc, diff);
+    i = 0;
+    {
+        OPEN_READER(re, &s->gb);
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            UPDATE_CACHE(re, &s->gb);
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level == 127){
+                break;
+            } else if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= (level*qscale*quant_matrix[j])>>4;
+                level= (level-1)|1;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
+                if (level == -128) {
+                    level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8);
+                } else if (level == 0) {
+                    level = SHOW_UBITS(re, &s->gb, 8)      ; LAST_SKIP_BITS(re, &s->gb, 8);
+                }
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= -level;
+                    level= (level*qscale*quant_matrix[j])>>4;
+                    level= (level-1)|1;
+                    level= -level;
+                }else{
+                    level= (level*qscale*quant_matrix[j])>>4;
+                    level= (level-1)|1;
+                }
+            }
+            if (i > 63){
+                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            block[j] = level;
+        }
+        CLOSE_READER(re, &s->gb);
+    }
+    s->block_last_index[n] = i;
+   return 0;
+}
+
+static inline int mpeg1_decode_block_inter(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int level, i, j, run;
+    RLTable *rl = &rl_mpeg1;
+    uint8_t * const scantable= s->intra_scantable.permutated;
+    const uint16_t *quant_matrix= s->inter_matrix;
+    const int qscale= s->qscale;
+
+    {
+        OPEN_READER(re, &s->gb);
+        i = -1;
+        /* special case for the first coef. no need to add a second vlc table */
+        UPDATE_CACHE(re, &s->gb);
+        if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
+            level= (3*qscale*quant_matrix[0])>>5;
+            level= (level-1)|1;
+            if(GET_CACHE(re, &s->gb)&0x40000000)
+                level= -level;
+            block[0] = level;
+            i++;
+            SKIP_BITS(re, &s->gb, 2);
+            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+                goto end;
+        }
+
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                level= (level-1)|1;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
+                if (level == -128) {
+                    level = SHOW_UBITS(re, &s->gb, 8) - 256; SKIP_BITS(re, &s->gb, 8);
+                } else if (level == 0) {
+                    level = SHOW_UBITS(re, &s->gb, 8)      ; SKIP_BITS(re, &s->gb, 8);
+                }
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= -level;
+                    level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                    level= (level-1)|1;
+                    level= -level;
+                }else{
+                    level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                    level= (level-1)|1;
+                }
+            }
+            if (i > 63){
+                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            block[j] = level;
+            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+                break;
+            UPDATE_CACHE(re, &s->gb);
+        }
+end:
+        LAST_SKIP_BITS(re, &s->gb, 2);
+        CLOSE_READER(re, &s->gb);
+    }
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n)
+{
+    int level, i, j, run;
+    RLTable *rl = &rl_mpeg1;
+    uint8_t * const scantable= s->intra_scantable.permutated;
+    const int qscale= s->qscale;
+
+    {
+        OPEN_READER(re, &s->gb);
+        i = -1;
+        /* special case for the first coef. no need to add a second vlc table */
+        UPDATE_CACHE(re, &s->gb);
+        if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
+            level= (3*qscale)>>1;
+            level= (level-1)|1;
+            if(GET_CACHE(re, &s->gb)&0x40000000)
+                level= -level;
+            block[0] = level;
+            i++;
+            SKIP_BITS(re, &s->gb, 2);
+            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+                goto end;
+        }
+
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= ((level*2+1)*qscale)>>1;
+                level= (level-1)|1;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
+                if (level == -128) {
+                    level = SHOW_UBITS(re, &s->gb, 8) - 256; SKIP_BITS(re, &s->gb, 8);
+                } else if (level == 0) {
+                    level = SHOW_UBITS(re, &s->gb, 8)      ; SKIP_BITS(re, &s->gb, 8);
+                }
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= -level;
+                    level= ((level*2+1)*qscale)>>1;
+                    level= (level-1)|1;
+                    level= -level;
+                }else{
+                    level= ((level*2+1)*qscale)>>1;
+                    level= (level-1)|1;
+                }
+            }
+
+            block[j] = level;
+            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+                break;
+            UPDATE_CACHE(re, &s->gb);
+        }
+end:
+        LAST_SKIP_BITS(re, &s->gb, 2);
+        CLOSE_READER(re, &s->gb);
+    }
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+
+static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int level, i, j, run;
+    RLTable *rl = &rl_mpeg1;
+    uint8_t * const scantable= s->intra_scantable.permutated;
+    const uint16_t *quant_matrix;
+    const int qscale= s->qscale;
+    int mismatch;
+
+    mismatch = 1;
+
+    {
+        OPEN_READER(re, &s->gb);
+        i = -1;
+        if (n < 4)
+            quant_matrix = s->inter_matrix;
+        else
+            quant_matrix = s->chroma_inter_matrix;
+
+        /* special case for the first coef. no need to add a second vlc table */
+        UPDATE_CACHE(re, &s->gb);
+        if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
+            level= (3*qscale*quant_matrix[0])>>5;
+            if(GET_CACHE(re, &s->gb)&0x40000000)
+                level= -level;
+            block[0] = level;
+            mismatch ^= level;
+            i++;
+            SKIP_BITS(re, &s->gb, 2);
+            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+                goto end;
+        }
+
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
+
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= ((-level*2+1)*qscale*quant_matrix[j])>>5;
+                    level= -level;
+                }else{
+                    level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+                }
+            }
+            if (i > 63){
+                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            mismatch ^= level;
+            block[j] = level;
+            if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+                break;
+            UPDATE_CACHE(re, &s->gb);
+        }
+end:
+        LAST_SKIP_BITS(re, &s->gb, 2);
+        CLOSE_READER(re, &s->gb);
+    }
+    block[63] ^= (mismatch & 1);
+
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int level, i, j, run;
+    RLTable *rl = &rl_mpeg1;
+    uint8_t * const scantable= s->intra_scantable.permutated;
+    const int qscale= s->qscale;
+    OPEN_READER(re, &s->gb);
+    i = -1;
+
+    /* special case for the first coef. no need to add a second vlc table */
+    UPDATE_CACHE(re, &s->gb);
+    if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
+        level= (3*qscale)>>1;
+        if(GET_CACHE(re, &s->gb)&0x40000000)
+            level= -level;
+        block[0] = level;
+        i++;
+        SKIP_BITS(re, &s->gb, 2);
+        if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+            goto end;
+    }
+
+    /* now quantify & encode AC coefs */
+    for(;;) {
+        GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+        if(level != 0) {
+            i += run;
+            j = scantable[i];
+            level= ((level*2+1)*qscale)>>1;
+            level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+            SKIP_BITS(re, &s->gb, 1);
+        } else {
+            /* escape */
+            run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+            UPDATE_CACHE(re, &s->gb);
+            level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
+
+            i += run;
+            j = scantable[i];
+            if(level<0){
+                level= ((-level*2+1)*qscale)>>1;
+                level= -level;
+            }else{
+                level= ((level*2+1)*qscale)>>1;
+            }
+        }
+
+        block[j] = level;
+        if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+            break;
+        UPDATE_CACHE(re, &s->gb);
+    }
+end:
+    LAST_SKIP_BITS(re, &s->gb, 2);
+    CLOSE_READER(re, &s->gb);
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+
+static inline int mpeg2_decode_block_intra(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int level, dc, diff, i, j, run;
+    int component;
+    RLTable *rl;
+    uint8_t * const scantable= s->intra_scantable.permutated;
+    const uint16_t *quant_matrix;
+    const int qscale= s->qscale;
+    int mismatch;
+
+    /* DC coef */
+    if (n < 4){
+        quant_matrix = s->intra_matrix;
+        component = 0;
+    }else{
+        quant_matrix = s->chroma_intra_matrix;
+        component = (n&1) + 1;
+    }
+    diff = decode_dc(&s->gb, component);
+    if (diff >= 0xffff)
+        return -1;
+    dc = s->last_dc[component];
+    dc += diff;
+    s->last_dc[component] = dc;
+    block[0] = dc << (3 - s->intra_dc_precision);
+    dprintf("dc=%d\n", block[0]);
+    mismatch = block[0] ^ 1;
+    i = 0;
+    if (s->intra_vlc_format)
+        rl = &rl_mpeg2;
+    else
+        rl = &rl_mpeg1;
+
+    {
+        OPEN_READER(re, &s->gb);
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            UPDATE_CACHE(re, &s->gb);
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level == 127){
+                break;
+            } else if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= (level*qscale*quant_matrix[j])>>4;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= (-level*qscale*quant_matrix[j])>>4;
+                    level= -level;
+                }else{
+                    level= (level*qscale*quant_matrix[j])>>4;
+                }
+            }
+            if (i > 63){
+                av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            mismatch^= level;
+            block[j] = level;
+        }
+        CLOSE_READER(re, &s->gb);
+    }
+    block[63]^= mismatch&1;
+
+    s->block_last_index[n] = i;
+    return 0;
+}
+
+static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
+                               DCTELEM *block,
+                               int n)
+{
+    int level, dc, diff, j, run;
+    int component;
+    RLTable *rl;
+    uint8_t * scantable= s->intra_scantable.permutated;
+    const uint16_t *quant_matrix;
+    const int qscale= s->qscale;
+
+    /* DC coef */
+    if (n < 4){
+        quant_matrix = s->intra_matrix;
+        component = 0;
+    }else{
+        quant_matrix = s->chroma_intra_matrix;
+        component = (n&1) + 1;
+    }
+    diff = decode_dc(&s->gb, component);
+    if (diff >= 0xffff)
+        return -1;
+    dc = s->last_dc[component];
+    dc += diff;
+    s->last_dc[component] = dc;
+    block[0] = dc << (3 - s->intra_dc_precision);
+    if (s->intra_vlc_format)
+        rl = &rl_mpeg2;
+    else
+        rl = &rl_mpeg1;
+
+    {
+        OPEN_READER(re, &s->gb);
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            UPDATE_CACHE(re, &s->gb);
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
+
+            if(level == 127){
+                break;
+            } else if(level != 0) {
+                scantable += run;
+                j = *scantable;
+                level= (level*qscale*quant_matrix[j])>>4;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12);
+                scantable += run;
+                j = *scantable;
+                if(level<0){
+                    level= (-level*qscale*quant_matrix[j])>>4;
+                    level= -level;
+                }else{
+                    level= (level*qscale*quant_matrix[j])>>4;
+                }
+            }
+
+            block[j] = level;
+        }
+        CLOSE_READER(re, &s->gb);
+    }
+
+    s->block_last_index[n] = scantable - s->intra_scantable.permutated;
+    return 0;
+}
+
+typedef struct Mpeg1Context {
+    MpegEncContext mpeg_enc_ctx;
+    int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
+    int repeat_field; /* true if we must repeat the field */
+    AVPanScan pan_scan; /** some temporary storage for the panscan */
+    int slice_count;
+    int swap_uv;//indicate VCR2
+    int save_aspect_info;
+    AVRational frame_rate_ext;       ///< MPEG-2 specific framerate modificator
+
+} Mpeg1Context;
+
+static int mpeg_decode_init(AVCodecContext *avctx)
+{
+    Mpeg1Context *s = avctx->priv_data;
+    MpegEncContext *s2 = &s->mpeg_enc_ctx;
+    int i;
+
+    //we need some parmutation to store
+    //matrixes, until MPV_common_init()
+    //set the real permutatuon
+    for(i=0;i<64;i++)
+       s2->dsp.idct_permutation[i]=i;
+
+    MPV_decode_defaults(s2);
+
+    s->mpeg_enc_ctx.avctx= avctx;
+    s->mpeg_enc_ctx.flags= avctx->flags;
+    s->mpeg_enc_ctx.flags2= avctx->flags2;
+    common_init(&s->mpeg_enc_ctx);
+    init_vlcs();
+
+    s->mpeg_enc_ctx_allocated = 0;
+    s->mpeg_enc_ctx.picture_number = 0;
+    s->repeat_field = 0;
+    s->mpeg_enc_ctx.codec_id= avctx->codec->id;
+    return 0;
+}
+
+static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm,
+                                     const uint8_t *new_perm){
+    uint16_t temp_matrix[64];
+    int i;
+
+    memcpy(temp_matrix,matrix,64*sizeof(uint16_t));
+
+    for(i=0;i<64;i++){
+        matrix[new_perm[i]] = temp_matrix[old_perm[i]];
+    }
+}
+
+//Call this function when we know all parameters
+//it may be called in different places for mpeg1 and mpeg2
+static int mpeg_decode_postinit(AVCodecContext *avctx){
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    uint8_t old_permutation[64];
+
+    if (
+        (s1->mpeg_enc_ctx_allocated == 0)||
+        avctx->coded_width  != s->width ||
+        avctx->coded_height != s->height||
+        s1->save_aspect_info != s->aspect_ratio_info||
+        0)
+    {
+
+        if (s1->mpeg_enc_ctx_allocated) {
+            ParseContext pc= s->parse_context;
+            s->parse_context.buffer=0;
+            MPV_common_end(s);
+            s->parse_context= pc;
+        }
+
+        if( (s->width == 0 )||(s->height == 0))
+            return -2;
+
+        avcodec_set_dimensions(avctx, s->width, s->height);
+        avctx->bit_rate = s->bit_rate;
+        s1->save_aspect_info = s->aspect_ratio_info;
+
+     //low_delay may be forced, in this case we will have B frames
+     //that behave like P frames
+        avctx->has_b_frames = !(s->low_delay);
+
+        if(avctx->sub_id==1){//s->codec_id==avctx->codec_id==CODEC_ID
+            //mpeg1 fps
+            avctx->time_base.den= ff_frame_rate_tab[s->frame_rate_index].num;
+            avctx->time_base.num= ff_frame_rate_tab[s->frame_rate_index].den;
+            //mpeg1 aspect
+            avctx->sample_aspect_ratio= av_d2q(
+                    1.0/mpeg1_aspect[s->aspect_ratio_info], 255);
+
+        }else{//mpeg2
+        //mpeg2 fps
+            av_reduce(
+                &s->avctx->time_base.den,
+                &s->avctx->time_base.num,
+                ff_frame_rate_tab[s->frame_rate_index].num * s1->frame_rate_ext.num,
+                ff_frame_rate_tab[s->frame_rate_index].den * s1->frame_rate_ext.den,
+                1<<30);
+        //mpeg2 aspect
+            if(s->aspect_ratio_info > 1){
+                if( (s1->pan_scan.width == 0 )||(s1->pan_scan.height == 0) ){
+                    s->avctx->sample_aspect_ratio=
+                        av_div_q(
+                         mpeg2_aspect[s->aspect_ratio_info],
+                         (AVRational){s->width, s->height}
+                         );
+                }else{
+                    s->avctx->sample_aspect_ratio=
+                        av_div_q(
+                         mpeg2_aspect[s->aspect_ratio_info],
+                         (AVRational){s1->pan_scan.width, s1->pan_scan.height}
+                        );
+                }
+            }else{
+                s->avctx->sample_aspect_ratio=
+                    mpeg2_aspect[s->aspect_ratio_info];
+            }
+        }//mpeg2
+
+        if(avctx->xvmc_acceleration){
+            avctx->pix_fmt = avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
+        }else{
+            if(s->chroma_format <  2){
+                avctx->pix_fmt = avctx->get_format(avctx,pixfmt_yuv_420);
+            }else
+            if(s->chroma_format == 2){
+                avctx->pix_fmt = avctx->get_format(avctx,pixfmt_yuv_422);
+            }else
+            if(s->chroma_format >  2){
+                avctx->pix_fmt = avctx->get_format(avctx,pixfmt_yuv_444);
+            }
+        }
+        //until then pix_fmt may be changed right after codec init
+        if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
+            if( avctx->idct_algo == FF_IDCT_AUTO )
+                avctx->idct_algo = FF_IDCT_SIMPLE;
+
+        //quantization matrixes may need reordering
+        //if dct permutation is changed
+        memcpy(old_permutation,s->dsp.idct_permutation,64*sizeof(uint8_t));
+
+        if (MPV_common_init(s) < 0)
+            return -2;
+
+        quant_matrix_rebuild(s->intra_matrix,       old_permutation,s->dsp.idct_permutation);
+        quant_matrix_rebuild(s->inter_matrix,       old_permutation,s->dsp.idct_permutation);
+        quant_matrix_rebuild(s->chroma_intra_matrix,old_permutation,s->dsp.idct_permutation);
+        quant_matrix_rebuild(s->chroma_inter_matrix,old_permutation,s->dsp.idct_permutation);
+
+        s1->mpeg_enc_ctx_allocated = 1;
+    }
+    return 0;
+}
+
+static int mpeg1_decode_picture(AVCodecContext *avctx,
+                                const uint8_t *buf, int buf_size)
+{
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    int ref, f_code, vbv_delay;
+
+    if(mpeg_decode_postinit(s->avctx) < 0)
+       return -2;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    ref = get_bits(&s->gb, 10); /* temporal ref */
+    s->pict_type = get_bits(&s->gb, 3);
+    if(s->pict_type == 0 || s->pict_type > 3)
+        return -1;
+
+    vbv_delay= get_bits(&s->gb, 16);
+    if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) {
+        s->full_pel[0] = get_bits1(&s->gb);
+        f_code = get_bits(&s->gb, 3);
+        if (f_code == 0 && avctx->error_resilience >= FF_ER_COMPLIANT)
+            return -1;
+        s->mpeg_f_code[0][0] = f_code;
+        s->mpeg_f_code[0][1] = f_code;
+    }
+    if (s->pict_type == B_TYPE) {
+        s->full_pel[1] = get_bits1(&s->gb);
+        f_code = get_bits(&s->gb, 3);
+        if (f_code == 0 && avctx->error_resilience >= FF_ER_COMPLIANT)
+            return -1;
+        s->mpeg_f_code[1][0] = f_code;
+        s->mpeg_f_code[1][1] = f_code;
+    }
+    s->current_picture.pict_type= s->pict_type;
+    s->current_picture.key_frame= s->pict_type == I_TYPE;
+
+    if(avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(avctx, AV_LOG_DEBUG, "vbv_delay %d, ref %d type:%d\n", vbv_delay, ref, s->pict_type);
+
+    s->y_dc_scale = 8;
+    s->c_dc_scale = 8;
+    s->first_slice = 1;
+    return 0;
+}
+
+static void mpeg_decode_sequence_extension(Mpeg1Context *s1)
+{
+    MpegEncContext *s= &s1->mpeg_enc_ctx;
+    int horiz_size_ext, vert_size_ext;
+    int bit_rate_ext;
+
+    skip_bits(&s->gb, 1); /* profil and level esc*/
+    s->avctx->profile= get_bits(&s->gb, 3);
+    s->avctx->level= get_bits(&s->gb, 4);
+    s->progressive_sequence = get_bits1(&s->gb); /* progressive_sequence */
+    s->chroma_format = get_bits(&s->gb, 2); /* chroma_format 1=420, 2=422, 3=444 */
+    horiz_size_ext = get_bits(&s->gb, 2);
+    vert_size_ext = get_bits(&s->gb, 2);
+    s->width |= (horiz_size_ext << 12);
+    s->height |= (vert_size_ext << 12);
+    bit_rate_ext = get_bits(&s->gb, 12);  /* XXX: handle it */
+    s->bit_rate += (bit_rate_ext << 18) * 400;
+    skip_bits1(&s->gb); /* marker */
+    s->avctx->rc_buffer_size += get_bits(&s->gb, 8)*1024*16<<10;
+
+    s->low_delay = get_bits1(&s->gb);
+    if(s->flags & CODEC_FLAG_LOW_DELAY) s->low_delay=1;
+
+    s1->frame_rate_ext.num = get_bits(&s->gb, 2)+1;
+    s1->frame_rate_ext.den = get_bits(&s->gb, 5)+1;
+
+    dprintf("sequence extension\n");
+    s->codec_id= s->avctx->codec_id= CODEC_ID_MPEG2VIDEO;
+    s->avctx->sub_id = 2; /* indicates mpeg2 found */
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_DEBUG, "profile: %d, level: %d vbv buffer: %d, bitrate:%d\n",
+               s->avctx->profile, s->avctx->level, s->avctx->rc_buffer_size, s->bit_rate);
+
+}
+
+static void mpeg_decode_sequence_display_extension(Mpeg1Context *s1)
+{
+    MpegEncContext *s= &s1->mpeg_enc_ctx;
+    int color_description, w, h;
+
+    skip_bits(&s->gb, 3); /* video format */
+    color_description= get_bits1(&s->gb);
+    if(color_description){
+        skip_bits(&s->gb, 8); /* color primaries */
+        skip_bits(&s->gb, 8); /* transfer_characteristics */
+        skip_bits(&s->gb, 8); /* matrix_coefficients */
+    }
+    w= get_bits(&s->gb, 14);
+    skip_bits(&s->gb, 1); //marker
+    h= get_bits(&s->gb, 14);
+    skip_bits(&s->gb, 1); //marker
+
+    s1->pan_scan.width= 16*w;
+    s1->pan_scan.height=16*h;
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_DEBUG, "sde w:%d, h:%d\n", w, h);
+}
+
+static void mpeg_decode_picture_display_extension(Mpeg1Context *s1)
+{
+    MpegEncContext *s= &s1->mpeg_enc_ctx;
+    int i,nofco;
+
+    nofco = 1;
+    if(s->progressive_sequence){
+        if(s->repeat_first_field){
+            nofco++;
+            if(s->top_field_first)
+                nofco++;
+        }
+    }else{
+        if(s->picture_structure == PICT_FRAME){
+            nofco++;
+            if(s->repeat_first_field)
+                nofco++;
+        }
+    }
+    for(i=0; i<nofco; i++){
+        s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16);
+        skip_bits(&s->gb, 1); //marker
+        s1->pan_scan.position[i][1]= get_sbits(&s->gb, 16);
+        skip_bits(&s->gb, 1); //marker
+    }
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_DEBUG, "pde (%d,%d) (%d,%d) (%d,%d)\n",
+            s1->pan_scan.position[0][0], s1->pan_scan.position[0][1],
+            s1->pan_scan.position[1][0], s1->pan_scan.position[1][1],
+            s1->pan_scan.position[2][0], s1->pan_scan.position[2][1]
+        );
+}
+
+static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
+{
+    int i, v, j;
+
+    dprintf("matrix extension\n");
+
+    if (get_bits1(&s->gb)) {
+        for(i=0;i<64;i++) {
+            v = get_bits(&s->gb, 8);
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+            s->intra_matrix[j] = v;
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+    if (get_bits1(&s->gb)) {
+        for(i=0;i<64;i++) {
+            v = get_bits(&s->gb, 8);
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+            s->inter_matrix[j] = v;
+            s->chroma_inter_matrix[j] = v;
+        }
+    }
+    if (get_bits1(&s->gb)) {
+        for(i=0;i<64;i++) {
+            v = get_bits(&s->gb, 8);
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+    if (get_bits1(&s->gb)) {
+        for(i=0;i<64;i++) {
+            v = get_bits(&s->gb, 8);
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+            s->chroma_inter_matrix[j] = v;
+        }
+    }
+}
+
+static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
+{
+    s->full_pel[0] = s->full_pel[1] = 0;
+    s->mpeg_f_code[0][0] = get_bits(&s->gb, 4);
+    s->mpeg_f_code[0][1] = get_bits(&s->gb, 4);
+    s->mpeg_f_code[1][0] = get_bits(&s->gb, 4);
+    s->mpeg_f_code[1][1] = get_bits(&s->gb, 4);
+    s->intra_dc_precision = get_bits(&s->gb, 2);
+    s->picture_structure = get_bits(&s->gb, 2);
+    s->top_field_first = get_bits1(&s->gb);
+    s->frame_pred_frame_dct = get_bits1(&s->gb);
+    s->concealment_motion_vectors = get_bits1(&s->gb);
+    s->q_scale_type = get_bits1(&s->gb);
+    s->intra_vlc_format = get_bits1(&s->gb);
+    s->alternate_scan = get_bits1(&s->gb);
+    s->repeat_first_field = get_bits1(&s->gb);
+    s->chroma_420_type = get_bits1(&s->gb);
+    s->progressive_frame = get_bits1(&s->gb);
+
+    if(s->picture_structure == PICT_FRAME){
+        s->first_field=0;
+        s->v_edge_pos= 16*s->mb_height;
+    }else{
+        s->first_field ^= 1;
+        s->v_edge_pos=  8*s->mb_height;
+        memset(s->mbskip_table, 0, s->mb_stride*s->mb_height);
+    }
+
+    if(s->alternate_scan){
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
+    }else{
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
+    }
+
+    /* composite display not parsed */
+    dprintf("intra_dc_precision=%d\n", s->intra_dc_precision);
+    dprintf("picture_structure=%d\n", s->picture_structure);
+    dprintf("top field first=%d\n", s->top_field_first);
+    dprintf("repeat first field=%d\n", s->repeat_first_field);
+    dprintf("conceal=%d\n", s->concealment_motion_vectors);
+    dprintf("intra_vlc_format=%d\n", s->intra_vlc_format);
+    dprintf("alternate_scan=%d\n", s->alternate_scan);
+    dprintf("frame_pred_frame_dct=%d\n", s->frame_pred_frame_dct);
+    dprintf("progressive_frame=%d\n", s->progressive_frame);
+}
+
+static void mpeg_decode_extension(AVCodecContext *avctx,
+                                  const uint8_t *buf, int buf_size)
+{
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    int ext_type;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    ext_type = get_bits(&s->gb, 4);
+    switch(ext_type) {
+    case 0x1:
+        mpeg_decode_sequence_extension(s1);
+        break;
+    case 0x2:
+        mpeg_decode_sequence_display_extension(s1);
+        break;
+    case 0x3:
+        mpeg_decode_quant_matrix_extension(s);
+        break;
+    case 0x7:
+        mpeg_decode_picture_display_extension(s1);
+        break;
+    case 0x8:
+        mpeg_decode_picture_coding_extension(s);
+        break;
+    }
+}
+
+static void exchange_uv(MpegEncContext *s){
+    short * tmp = s->pblocks[4];
+    s->pblocks[4] = s->pblocks[5];
+    s->pblocks[5] = tmp;
+}
+
+static int mpeg_field_start(MpegEncContext *s){
+    AVCodecContext *avctx= s->avctx;
+    Mpeg1Context *s1 = (Mpeg1Context*)s;
+
+    /* start frame decoding */
+    if(s->first_field || s->picture_structure==PICT_FRAME){
+        if(MPV_frame_start(s, avctx) < 0)
+            return -1;
+
+        ff_er_frame_start(s);
+
+        /* first check if we must repeat the frame */
+        s->current_picture_ptr->repeat_pict = 0;
+        if (s->repeat_first_field) {
+            if (s->progressive_sequence) {
+                if (s->top_field_first)
+                    s->current_picture_ptr->repeat_pict = 4;
+                else
+                    s->current_picture_ptr->repeat_pict = 2;
+            } else if (s->progressive_frame) {
+                s->current_picture_ptr->repeat_pict = 1;
+            }
+        }
+
+        *s->current_picture_ptr->pan_scan= s1->pan_scan;
+    }else{ //second field
+            int i;
+
+            if(!s->current_picture_ptr){
+                av_log(s->avctx, AV_LOG_ERROR, "first field missing\n");
+                return -1;
+            }
+
+            for(i=0; i<4; i++){
+                s->current_picture.data[i] = s->current_picture_ptr->data[i];
+                if(s->picture_structure == PICT_BOTTOM_FIELD){
+                    s->current_picture.data[i] += s->current_picture_ptr->linesize[i];
+                }
+            }
+    }
+#ifdef HAVE_XVMC
+// MPV_frame_start will call this function too,
+// but we need to call it on every field
+    if(s->avctx->xvmc_acceleration)
+         XVMC_field_start(s,avctx);
+#endif
+
+    return 0;
+}
+
+#define DECODE_SLICE_ERROR -1
+#define DECODE_SLICE_OK 0
+
+/**
+ * decodes a slice. MpegEncContext.mb_y must be set to the MB row from the startcode
+ * @return DECODE_SLICE_ERROR if the slice is damaged<br>
+ *         DECODE_SLICE_OK if this slice is ok<br>
+ */
+static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
+                             const uint8_t **buf, int buf_size)
+{
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    AVCodecContext *avctx= s->avctx;
+    int ret;
+    const int field_pic= s->picture_structure != PICT_FRAME;
+    const int lowres= s->avctx->lowres;
+
+    s->resync_mb_x=
+    s->resync_mb_y= -1;
+
+    if (mb_y<<field_pic >= s->mb_height){
+        av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", mb_y, s->mb_height);
+        return -1;
+    }
+
+    init_get_bits(&s->gb, *buf, buf_size*8);
+
+    ff_mpeg1_clean_buffers(s);
+    s->interlaced_dct = 0;
+
+    s->qscale = get_qscale(s);
+
+    if(s->qscale == 0){
+        av_log(s->avctx, AV_LOG_ERROR, "qscale == 0\n");
+        return -1;
+    }
+
+    /* extra slice info */
+    while (get_bits1(&s->gb) != 0) {
+        skip_bits(&s->gb, 8);
+    }
+
+    s->mb_x=0;
+
+    for(;;) {
+        int code = get_vlc2(&s->gb, mbincr_vlc.table, MBINCR_VLC_BITS, 2);
+        if (code < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "first mb_incr damaged\n");
+            return -1;
+        }
+        if (code >= 33) {
+            if (code == 33) {
+                s->mb_x += 33;
+            }
+            /* otherwise, stuffing, nothing to do */
+        } else {
+            s->mb_x += code;
+            break;
+        }
+    }
+
+    s->resync_mb_x= s->mb_x;
+    s->resync_mb_y= s->mb_y= mb_y;
+    s->mb_skip_run= 0;
+    ff_init_block_index(s);
+
+    if (s->mb_y==0 && s->mb_x==0 && (s->first_field || s->picture_structure==PICT_FRAME)) {
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n",
+                 s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
+                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
+                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"",
+                 s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
+                 s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
+        }
+    }
+
+    for(;;) {
+#ifdef HAVE_XVMC
+        //one 1 we memcpy blocks in xvmcvideo
+        if(s->avctx->xvmc_acceleration > 1)
+            XVMC_init_block(s);//set s->block
+#endif
+
+        ret = mpeg_decode_mb(s, s->block);
+        s->chroma_qscale= s->qscale;
+
+        dprintf("ret=%d\n", ret);
+        if (ret < 0)
+            return -1;
+
+        if(s->current_picture.motion_val[0] && !s->encoding){ //note motion_val is normally NULL unless we want to extract the MVs
+            const int wrap = field_pic ? 2*s->b8_stride : s->b8_stride;
+            int xy = s->mb_x*2 + s->mb_y*2*wrap;
+            int motion_x, motion_y, dir, i;
+            if(field_pic && !s->first_field)
+                xy += wrap/2;
+
+            for(i=0; i<2; i++){
+                for(dir=0; dir<2; dir++){
+                    if (s->mb_intra || (dir==1 && s->pict_type != B_TYPE)) {
+                        motion_x = motion_y = 0;
+                    }else if (s->mv_type == MV_TYPE_16X16 || (s->mv_type == MV_TYPE_FIELD && field_pic)){
+                        motion_x = s->mv[dir][0][0];
+                        motion_y = s->mv[dir][0][1];
+                    } else /*if ((s->mv_type == MV_TYPE_FIELD) || (s->mv_type == MV_TYPE_16X8))*/ {
+                        motion_x = s->mv[dir][i][0];
+                        motion_y = s->mv[dir][i][1];
+                    }
+
+                    s->current_picture.motion_val[dir][xy    ][0] = motion_x;
+                    s->current_picture.motion_val[dir][xy    ][1] = motion_y;
+                    s->current_picture.motion_val[dir][xy + 1][0] = motion_x;
+                    s->current_picture.motion_val[dir][xy + 1][1] = motion_y;
+                    s->current_picture.ref_index [dir][xy    ]=
+                    s->current_picture.ref_index [dir][xy + 1]= s->field_select[dir][i];
+                    assert(s->field_select[dir][i]==0 || s->field_select[dir][i]==1);
+                }
+                xy += wrap;
+            }
+        }
+
+        s->dest[0] += 16 >> lowres;
+        s->dest[1] += 16 >> (s->chroma_x_shift + lowres);
+        s->dest[2] += 16 >> (s->chroma_x_shift + lowres);
+
+        MPV_decode_mb(s, s->block);
+
+        if (++s->mb_x >= s->mb_width) {
+            const int mb_size= 16>>s->avctx->lowres;
+
+            ff_draw_horiz_band(s, mb_size*s->mb_y, mb_size);
+
+            s->mb_x = 0;
+            s->mb_y++;
+
+            if(s->mb_y<<field_pic >= s->mb_height){
+                int left= s->gb.size_in_bits - get_bits_count(&s->gb);
+                int is_d10= s->chroma_format==2 && s->pict_type==I_TYPE && avctx->profile==0 && avctx->level==5
+                            && s->intra_dc_precision == 2 && s->q_scale_type == 1 && s->alternate_scan == 0
+                            && s->progressive_frame == 0 /* vbv_delay == 0xBBB || 0xE10*/;
+
+                if(left < 0 || (left && show_bits(&s->gb, FFMIN(left, 23)) && !is_d10)
+                   || (avctx->error_resilience >= FF_ER_AGGRESSIVE && left>8)){
+                    av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d %0X\n", left, show_bits(&s->gb, FFMIN(left, 23)));
+                    return -1;
+                }else
+                    goto eos;
+            }
+
+            ff_init_block_index(s);
+        }
+
+        /* skip mb handling */
+        if (s->mb_skip_run == -1) {
+            /* read again increment */
+            s->mb_skip_run = 0;
+            for(;;) {
+                int code = get_vlc2(&s->gb, mbincr_vlc.table, MBINCR_VLC_BITS, 2);
+                if (code < 0){
+                    av_log(s->avctx, AV_LOG_ERROR, "mb incr damaged\n");
+                    return -1;
+                }
+                if (code >= 33) {
+                    if (code == 33) {
+                        s->mb_skip_run += 33;
+                    }else if(code == 35){
+                        if(s->mb_skip_run != 0 || show_bits(&s->gb, 15) != 0){
+                            av_log(s->avctx, AV_LOG_ERROR, "slice mismatch\n");
+                            return -1;
+                        }
+                        goto eos; /* end of slice */
+                    }
+                    /* otherwise, stuffing, nothing to do */
+                } else {
+                    s->mb_skip_run += code;
+                    break;
+                }
+            }
+        }
+    }
+eos: // end of slice
+    *buf += get_bits_count(&s->gb)/8 - 1;
+//printf("y %d %d %d %d\n", s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y);
+    return 0;
+}
+
+static int slice_decode_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    const uint8_t *buf= s->gb.buffer;
+    int mb_y= s->start_mb_y;
+
+    s->error_count= 3*(s->end_mb_y - s->start_mb_y)*s->mb_width;
+
+    for(;;){
+        uint32_t start_code;
+        int ret;
+
+        ret= mpeg_decode_slice((Mpeg1Context*)s, mb_y, &buf, s->gb.buffer_end - buf);
+        emms_c();
+//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n",
+//ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
+        if(ret < 0){
+            if(s->resync_mb_x>=0 && s->resync_mb_y>=0)
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
+        }else{
+            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
+        }
+
+        if(s->mb_y == s->end_mb_y)
+            return 0;
+
+        start_code= -1;
+        buf = ff_find_start_code(buf, s->gb.buffer_end, &start_code);
+        mb_y= start_code - SLICE_MIN_START_CODE;
+        if(mb_y < 0 || mb_y >= s->end_mb_y)
+            return -1;
+    }
+
+    return 0; //not reached
+}
+
+/**
+ * handles slice ends.
+ * @return 1 if it seems to be the last slice of
+ */
+static int slice_end(AVCodecContext *avctx, AVFrame *pict)
+{
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+
+    if (!s1->mpeg_enc_ctx_allocated || !s->current_picture_ptr)
+        return 0;
+
+#ifdef HAVE_XVMC
+    if(s->avctx->xvmc_acceleration)
+        XVMC_field_end(s);
+#endif
+    /* end of slice reached */
+    if (/*s->mb_y<<field_pic == s->mb_height &&*/ !s->first_field) {
+        /* end of image */
+
+        s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_MPEG2;
+
+        ff_er_frame_end(s);
+
+        MPV_frame_end(s);
+
+        if (s->pict_type == B_TYPE || s->low_delay) {
+            *pict= *(AVFrame*)s->current_picture_ptr;
+            ff_print_debug_info(s, pict);
+        } else {
+            s->picture_number++;
+            /* latency of 1 frame for I and P frames */
+            /* XXX: use another variable than picture_number */
+            if (s->last_picture_ptr != NULL) {
+                *pict= *(AVFrame*)s->last_picture_ptr;
+                 ff_print_debug_info(s, pict);
+            }
+        }
+
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int mpeg1_decode_sequence(AVCodecContext *avctx,
+                                 const uint8_t *buf, int buf_size)
+{
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    int width,height;
+    int i, v, j;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    width = get_bits(&s->gb, 12);
+    height = get_bits(&s->gb, 12);
+    if (width <= 0 || height <= 0 ||
+        (width % 2) != 0 || (height % 2) != 0)
+        return -1;
+    s->aspect_ratio_info= get_bits(&s->gb, 4);
+    if (s->aspect_ratio_info == 0)
+        return -1;
+    s->frame_rate_index = get_bits(&s->gb, 4);
+    if (s->frame_rate_index == 0 || s->frame_rate_index > 13)
+        return -1;
+    s->bit_rate = get_bits(&s->gb, 18) * 400;
+    if (get_bits1(&s->gb) == 0) /* marker */
+        return -1;
+    s->width = width;
+    s->height = height;
+
+    s->avctx->rc_buffer_size= get_bits(&s->gb, 10) * 1024*16;
+    skip_bits(&s->gb, 1);
+
+    /* get matrix */
+    if (get_bits1(&s->gb)) {
+        for(i=0;i<64;i++) {
+            v = get_bits(&s->gb, 8);
+            if(v==0){
+                av_log(s->avctx, AV_LOG_ERROR, "intra matrix damaged\n");
+                return -1;
+            }
+            j = s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+            s->intra_matrix[j] = v;
+            s->chroma_intra_matrix[j] = v;
+        }
+#ifdef DEBUG
+        dprintf("intra matrix present\n");
+        for(i=0;i<64;i++)
+            dprintf(" %d", s->intra_matrix[s->dsp.idct_permutation[i]]);
+        dprintf("\n");
+#endif
+    } else {
+        for(i=0;i<64;i++) {
+            j = s->dsp.idct_permutation[i];
+            v = ff_mpeg1_default_intra_matrix[i];
+            s->intra_matrix[j] = v;
+            s->chroma_intra_matrix[j] = v;
+        }
+    }
+    if (get_bits1(&s->gb)) {
+        for(i=0;i<64;i++) {
+            v = get_bits(&s->gb, 8);
+            if(v==0){
+                av_log(s->avctx, AV_LOG_ERROR, "inter matrix damaged\n");
+                return -1;
+            }
+            j = s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+            s->inter_matrix[j] = v;
+            s->chroma_inter_matrix[j] = v;
+        }
+#ifdef DEBUG
+        dprintf("non intra matrix present\n");
+        for(i=0;i<64;i++)
+            dprintf(" %d", s->inter_matrix[s->dsp.idct_permutation[i]]);
+        dprintf("\n");
+#endif
+    } else {
+        for(i=0;i<64;i++) {
+            int j= s->dsp.idct_permutation[i];
+            v = ff_mpeg1_default_non_intra_matrix[i];
+            s->inter_matrix[j] = v;
+            s->chroma_inter_matrix[j] = v;
+        }
+    }
+
+    if(show_bits(&s->gb, 23) != 0){
+        av_log(s->avctx, AV_LOG_ERROR, "sequence header damaged\n");
+        return -1;
+    }
+
+    /* we set mpeg2 parameters so that it emulates mpeg1 */
+    s->progressive_sequence = 1;
+    s->progressive_frame = 1;
+    s->picture_structure = PICT_FRAME;
+    s->frame_pred_frame_dct = 1;
+    s->chroma_format = 1;
+    s->codec_id= s->avctx->codec_id= CODEC_ID_MPEG1VIDEO;
+    avctx->sub_id = 1; /* indicates mpeg1 */
+    s->out_format = FMT_MPEG1;
+    s->swap_uv = 0;//AFAIK VCR2 don't have SEQ_HEADER
+    if(s->flags & CODEC_FLAG_LOW_DELAY) s->low_delay=1;
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_DEBUG, "vbv buffer: %d, bitrate:%d\n",
+               s->avctx->rc_buffer_size, s->bit_rate);
+
+    return 0;
+}
+
+static int vcr2_init_sequence(AVCodecContext *avctx)
+{
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    int i, v;
+
+    /* start new mpeg1 context decoding */
+    s->out_format = FMT_MPEG1;
+    if (s1->mpeg_enc_ctx_allocated) {
+        MPV_common_end(s);
+    }
+    s->width  = avctx->coded_width;
+    s->height = avctx->coded_height;
+    avctx->has_b_frames= 0; //true?
+    s->low_delay= 1;
+
+    if(avctx->xvmc_acceleration){
+        avctx->pix_fmt = avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
+    }else{
+        avctx->pix_fmt = avctx->get_format(avctx,pixfmt_yuv_420);
+    }
+
+    if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
+        if( avctx->idct_algo == FF_IDCT_AUTO )
+            avctx->idct_algo = FF_IDCT_SIMPLE;
+
+    if (MPV_common_init(s) < 0)
+        return -1;
+    exchange_uv(s);//common init reset pblocks, so we swap them here
+    s->swap_uv = 1;// in case of xvmc we need to swap uv for each MB
+    s1->mpeg_enc_ctx_allocated = 1;
+
+    for(i=0;i<64;i++) {
+        int j= s->dsp.idct_permutation[i];
+        v = ff_mpeg1_default_intra_matrix[i];
+        s->intra_matrix[j] = v;
+        s->chroma_intra_matrix[j] = v;
+
+        v = ff_mpeg1_default_non_intra_matrix[i];
+        s->inter_matrix[j] = v;
+        s->chroma_inter_matrix[j] = v;
+    }
+
+    s->progressive_sequence = 1;
+    s->progressive_frame = 1;
+    s->picture_structure = PICT_FRAME;
+    s->frame_pred_frame_dct = 1;
+    s->chroma_format = 1;
+    s->codec_id= s->avctx->codec_id= CODEC_ID_MPEG2VIDEO;
+    avctx->sub_id = 2; /* indicates mpeg2 */
+    return 0;
+}
+
+
+static void mpeg_decode_user_data(AVCodecContext *avctx,
+                                  const uint8_t *buf, int buf_size)
+{
+    const uint8_t *p;
+    int len, flags;
+    p = buf;
+    len = buf_size;
+
+    /* we parse the DTG active format information */
+    if (len >= 5 &&
+        p[0] == 'D' && p[1] == 'T' && p[2] == 'G' && p[3] == '1') {
+        flags = p[4];
+        p += 5;
+        len -= 5;
+        if (flags & 0x80) {
+            /* skip event id */
+            if (len < 2)
+                return;
+            p += 2;
+            len -= 2;
+        }
+        if (flags & 0x40) {
+            if (len < 1)
+                return;
+            avctx->dtg_active_format = p[0] & 0x0f;
+        }
+    }
+}
+
+static void mpeg_decode_gop(AVCodecContext *avctx,
+                            const uint8_t *buf, int buf_size){
+    Mpeg1Context *s1 = avctx->priv_data;
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+
+    int drop_frame_flag;
+    int time_code_hours, time_code_minutes;
+    int time_code_seconds, time_code_pictures;
+    int broken_link;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    drop_frame_flag = get_bits1(&s->gb);
+
+    time_code_hours=get_bits(&s->gb,5);
+    time_code_minutes = get_bits(&s->gb,6);
+    skip_bits1(&s->gb);//marker bit
+    time_code_seconds = get_bits(&s->gb,6);
+    time_code_pictures = get_bits(&s->gb,6);
+
+    /*broken_link indicate that after editing the
+      reference frames of the first B-Frames after GOP I-Frame
+      are missing (open gop)*/
+    broken_link = get_bits1(&s->gb);
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n",
+            time_code_hours, time_code_minutes, time_code_seconds,
+            time_code_pictures, broken_link);
+}
+/**
+ * finds the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or -1
+ */
+static int mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size)
+{
+    int i;
+    uint32_t state= pc->state;
+
+    i=0;
+    if(!pc->frame_start_found){
+        for(i=0; i<buf_size; i++){
+            i= ff_find_start_code(buf+i, buf+buf_size, &state) - buf - 1;
+            if(state >= SLICE_MIN_START_CODE && state <= SLICE_MAX_START_CODE){
+                i++;
+                pc->frame_start_found=1;
+                break;
+            }
+        }
+    }
+
+    if(pc->frame_start_found){
+        /* EOF considered as end of frame */
+        if (buf_size == 0)
+            return 0;
+        for(; i<buf_size; i++){
+            i= ff_find_start_code(buf+i, buf+buf_size, &state) - buf - 1;
+            if((state&0xFFFFFF00) == 0x100){
+                if(state < SLICE_MIN_START_CODE || state > SLICE_MAX_START_CODE){
+                    pc->frame_start_found=0;
+                    pc->state=-1;
+                    return i-3;
+                }
+            }
+        }
+    }
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+/* handle buffering and image synchronisation */
+static int mpeg_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    Mpeg1Context *s = avctx->priv_data;
+    const uint8_t *buf_end;
+    const uint8_t *buf_ptr;
+    uint32_t start_code;
+    int ret, input_size;
+    AVFrame *picture = data;
+    MpegEncContext *s2 = &s->mpeg_enc_ctx;
+    dprintf("fill_buffer\n");
+
+    if (buf_size == 0) {
+        /* special case for last picture */
+        if (s2->low_delay==0 && s2->next_picture_ptr) {
+            *picture= *(AVFrame*)s2->next_picture_ptr;
+            s2->next_picture_ptr= NULL;
+
+            *data_size = sizeof(AVFrame);
+        }
+        return 0;
+    }
+
+    if(s2->flags&CODEC_FLAG_TRUNCATED){
+        int next= mpeg1_find_frame_end(&s2->parse_context, buf, buf_size);
+
+        if( ff_combine_frame(&s2->parse_context, next, &buf, &buf_size) < 0 )
+            return buf_size;
+    }
+
+    buf_ptr = buf;
+    buf_end = buf + buf_size;
+
+#if 0
+    if (s->repeat_field % 2 == 1) {
+        s->repeat_field++;
+        //fprintf(stderr,"\nRepeating last frame: %d -> %d! pict: %d %d", avctx->frame_number-1, avctx->frame_number,
+        //        s2->picture_number, s->repeat_field);
+        if (avctx->flags & CODEC_FLAG_REPEAT_FIELD) {
+            *data_size = sizeof(AVPicture);
+            goto the_end;
+        }
+    }
+#endif
+
+    if(s->mpeg_enc_ctx_allocated==0 && avctx->codec_tag == ff_get_fourcc("VCR2"))
+        vcr2_init_sequence(avctx);
+
+    s->slice_count= 0;
+
+    for(;;) {
+        /* find start next code */
+        start_code = -1;
+        buf_ptr = ff_find_start_code(buf_ptr,buf_end, &start_code);
+        if (start_code > 0x1ff){
+            if(s2->pict_type != B_TYPE || avctx->skip_frame <= AVDISCARD_DEFAULT){
+                if(avctx->thread_count > 1){
+                    int i;
+
+                    avctx->execute(avctx, slice_decode_thread,  (void**)&(s2->thread_context[0]), NULL, s->slice_count);
+                    for(i=0; i<s->slice_count; i++)
+                        s2->error_count += s2->thread_context[i]->error_count;
+                }
+                if (slice_end(avctx, picture)) {
+                    if(s2->last_picture_ptr || s2->low_delay) //FIXME merge with the stuff in mpeg_decode_slice
+                        *data_size = sizeof(AVPicture);
+                }
+            }
+            return FFMAX(0, buf_ptr - buf - s2->parse_context.last_index);
+        }
+
+        input_size = buf_end - buf_ptr;
+
+        if(avctx->debug & FF_DEBUG_STARTCODE){
+            av_log(avctx, AV_LOG_DEBUG, "%3X at %zd left %d\n", start_code, buf_ptr-buf, input_size);
+        }
+
+        /* prepare data for next start code */
+        switch(start_code) {
+        case SEQ_START_CODE:
+            mpeg1_decode_sequence(avctx, buf_ptr,
+                                    input_size);
+            break;
+
+        case PICTURE_START_CODE:
+            /* we have a complete image : we try to decompress it */
+            mpeg1_decode_picture(avctx,
+                                    buf_ptr, input_size);
+            break;
+        case EXT_START_CODE:
+            mpeg_decode_extension(avctx,
+                                    buf_ptr, input_size);
+            break;
+        case USER_START_CODE:
+            mpeg_decode_user_data(avctx,
+                                    buf_ptr, input_size);
+            break;
+        case GOP_START_CODE:
+            s2->first_field=0;
+            mpeg_decode_gop(avctx,
+                                    buf_ptr, input_size);
+            break;
+        default:
+            if (start_code >= SLICE_MIN_START_CODE &&
+                start_code <= SLICE_MAX_START_CODE) {
+                int mb_y= start_code - SLICE_MIN_START_CODE;
+
+                if(s2->last_picture_ptr==NULL){
+                /* skip b frames if we dont have reference frames */
+                    if(s2->pict_type==B_TYPE) break;
+                /* skip P frames if we dont have reference frame no valid header */
+//                    if(s2->pict_type==P_TYPE && s2->first_field && !s2->first_slice) break;
+                }
+                /* skip b frames if we are in a hurry */
+                if(avctx->hurry_up && s2->pict_type==B_TYPE) break;
+                if(  (avctx->skip_frame >= AVDISCARD_NONREF && s2->pict_type==B_TYPE)
+                    ||(avctx->skip_frame >= AVDISCARD_NONKEY && s2->pict_type!=I_TYPE)
+                    || avctx->skip_frame >= AVDISCARD_ALL)
+                    break;
+                /* skip everything if we are in a hurry>=5 */
+                if(avctx->hurry_up>=5) break;
+
+                if (!s->mpeg_enc_ctx_allocated) break;
+
+                if(s2->codec_id == CODEC_ID_MPEG2VIDEO){
+                    if(mb_y < avctx->skip_top || mb_y >= s2->mb_height - avctx->skip_bottom)
+                        break;
+                }
+
+                if(s2->first_slice){
+                    s2->first_slice=0;
+                            if(mpeg_field_start(s2) < 0)
+                        return -1;
+                    }
+
+                if(avctx->thread_count > 1){
+                    int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
+                    if(threshold <= mb_y){
+                        MpegEncContext *thread_context= s2->thread_context[s->slice_count];
+
+                        thread_context->start_mb_y= mb_y;
+                        thread_context->end_mb_y  = s2->mb_height;
+                        if(s->slice_count){
+                            s2->thread_context[s->slice_count-1]->end_mb_y= mb_y;
+                            ff_update_duplicate_context(thread_context, s2);
+                        }
+                        init_get_bits(&thread_context->gb, buf_ptr, input_size*8);
+                        s->slice_count++;
+                    }
+                    buf_ptr += 2; //FIXME add minimum num of bytes per slice
+                }else{
+                    ret = mpeg_decode_slice(s, mb_y, &buf_ptr, input_size);
+                    emms_c();
+
+                    if(ret < 0){
+                        if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0)
+                            ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
+                    }else{
+                        ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END);
+                    }
+                }
+            }
+            break;
+        }
+    }
+}
+
+static int mpeg_decode_end(AVCodecContext *avctx)
+{
+    Mpeg1Context *s = avctx->priv_data;
+
+    if (s->mpeg_enc_ctx_allocated)
+        MPV_common_end(&s->mpeg_enc_ctx);
+    return 0;
+}
+
+AVCodec mpeg1video_decoder = {
+    "mpeg1video",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG1VIDEO,
+    sizeof(Mpeg1Context),
+    mpeg_decode_init,
+    NULL,
+    mpeg_decode_end,
+    mpeg_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
+AVCodec mpeg2video_decoder = {
+    "mpeg2video",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG2VIDEO,
+    sizeof(Mpeg1Context),
+    mpeg_decode_init,
+    NULL,
+    mpeg_decode_end,
+    mpeg_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
+//legacy decoder
+AVCodec mpegvideo_decoder = {
+    "mpegvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG2VIDEO,
+    sizeof(Mpeg1Context),
+    mpeg_decode_init,
+    NULL,
+    mpeg_decode_end,
+    mpeg_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
+#ifdef CONFIG_ENCODERS
+
+AVCodec mpeg1video_encoder = {
+    "mpeg1video",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG1VIDEO,
+    sizeof(MpegEncContext),
+    encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .supported_framerates= ff_frame_rate_tab+1,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+    .capabilities= CODEC_CAP_DELAY,
+};
+
+AVCodec mpeg2video_encoder = {
+    "mpeg2video",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG2VIDEO,
+    sizeof(MpegEncContext),
+    encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .supported_framerates= ff_frame_rate_tab+1,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, -1},
+    .capabilities= CODEC_CAP_DELAY,
+};
+#endif
+
+#ifdef HAVE_XVMC
+static int mpeg_mc_decode_init(AVCodecContext *avctx){
+    Mpeg1Context *s;
+
+    if( avctx->thread_count > 1)
+        return -1;
+    if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
+        return -1;
+    if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ){
+        dprintf("mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n");
+    }
+    mpeg_decode_init(avctx);
+    s = avctx->priv_data;
+
+    avctx->pix_fmt = PIX_FMT_XVMC_MPEG2_IDCT;
+    avctx->xvmc_acceleration = 2;//2 - the blocks are packed!
+
+    return 0;
+}
+
+AVCodec mpeg_xvmc_decoder = {
+    "mpegvideo_xvmc",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG2VIDEO_XVMC,
+    sizeof(Mpeg1Context),
+    mpeg_mc_decode_init,
+    NULL,
+    mpeg_decode_end,
+    mpeg_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED| CODEC_CAP_HWACCEL | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
+#endif
+
+#ifdef CONFIG_MPEGVIDEO_PARSER
+static void mpegvideo_extract_headers(AVCodecParserContext *s,
+                                      AVCodecContext *avctx,
+                                      const uint8_t *buf, int buf_size)
+{
+    ParseContext1 *pc = s->priv_data;
+    const uint8_t *buf_end;
+    uint32_t start_code;
+    int frame_rate_index, ext_type, bytes_left;
+    int frame_rate_ext_n, frame_rate_ext_d;
+    int picture_structure, top_field_first, repeat_first_field, progressive_frame;
+    int horiz_size_ext, vert_size_ext, bit_rate_ext;
+//FIXME replace the crap with get_bits()
+    s->repeat_pict = 0;
+    buf_end = buf + buf_size;
+    while (buf < buf_end) {
+        start_code= -1;
+        buf= ff_find_start_code(buf, buf_end, &start_code);
+        bytes_left = buf_end - buf;
+        switch(start_code) {
+        case PICTURE_START_CODE:
+            if (bytes_left >= 2) {
+                s->pict_type = (buf[1] >> 3) & 7;
+            }
+            break;
+        case SEQ_START_CODE:
+            if (bytes_left >= 7) {
+                pc->width  = (buf[0] << 4) | (buf[1] >> 4);
+                pc->height = ((buf[1] & 0x0f) << 8) | buf[2];
+                avcodec_set_dimensions(avctx, pc->width, pc->height);
+                frame_rate_index = buf[3] & 0xf;
+                pc->frame_rate.den = avctx->time_base.den = ff_frame_rate_tab[frame_rate_index].num;
+                pc->frame_rate.num = avctx->time_base.num = ff_frame_rate_tab[frame_rate_index].den;
+                avctx->bit_rate = ((buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6))*400;
+                avctx->codec_id = CODEC_ID_MPEG1VIDEO;
+                avctx->sub_id = 1;
+            }
+            break;
+        case EXT_START_CODE:
+            if (bytes_left >= 1) {
+                ext_type = (buf[0] >> 4);
+                switch(ext_type) {
+                case 0x1: /* sequence extension */
+                    if (bytes_left >= 6) {
+                        horiz_size_ext = ((buf[1] & 1) << 1) | (buf[2] >> 7);
+                        vert_size_ext = (buf[2] >> 5) & 3;
+                        bit_rate_ext = ((buf[2] & 0x1F)<<7) | (buf[3]>>1);
+                        frame_rate_ext_n = (buf[5] >> 5) & 3;
+                        frame_rate_ext_d = (buf[5] & 0x1f);
+                        pc->progressive_sequence = buf[1] & (1 << 3);
+                        avctx->has_b_frames= !(buf[5] >> 7);
+
+                        pc->width  |=(horiz_size_ext << 12);
+                        pc->height |=( vert_size_ext << 12);
+                        avctx->bit_rate += (bit_rate_ext << 18) * 400;
+                        avcodec_set_dimensions(avctx, pc->width, pc->height);
+                        avctx->time_base.den = pc->frame_rate.den * (frame_rate_ext_n + 1);
+                        avctx->time_base.num = pc->frame_rate.num * (frame_rate_ext_d + 1);
+                        avctx->codec_id = CODEC_ID_MPEG2VIDEO;
+                        avctx->sub_id = 2; /* forces MPEG2 */
+                    }
+                    break;
+                case 0x8: /* picture coding extension */
+                    if (bytes_left >= 5) {
+                        picture_structure = buf[2]&3;
+                        top_field_first = buf[3] & (1 << 7);
+                        repeat_first_field = buf[3] & (1 << 1);
+                        progressive_frame = buf[4] & (1 << 7);
+
+                        /* check if we must repeat the frame */
+                        if (repeat_first_field) {
+                            if (pc->progressive_sequence) {
+                                if (top_field_first)
+                                    s->repeat_pict = 4;
+                                else
+                                    s->repeat_pict = 2;
+                            } else if (progressive_frame) {
+                                s->repeat_pict = 1;
+                            }
+                        }
+
+                        /* the packet only represents half a frame
+                           XXX,FIXME maybe find a different solution */
+                        if(picture_structure != 3)
+                            s->repeat_pict = -1;
+                    }
+                    break;
+                }
+            }
+            break;
+        case -1:
+            goto the_end;
+        default:
+            /* we stop parsing when we encounter a slice. It ensures
+               that this function takes a negligible amount of time */
+            if (start_code >= SLICE_MIN_START_CODE &&
+                start_code <= SLICE_MAX_START_CODE)
+                goto the_end;
+            break;
+        }
+    }
+ the_end: ;
+}
+
+static int mpegvideo_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext1 *pc1 = s->priv_data;
+    ParseContext *pc= &pc1->pc;
+    int next;
+
+    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
+        next= buf_size;
+    }else{
+        next= mpeg1_find_frame_end(pc, buf, buf_size);
+
+        if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+
+    }
+    /* we have a full frame : we just parse the first few MPEG headers
+       to have the full timing information. The time take by this
+       function should be negligible for uncorrupted streams */
+    mpegvideo_extract_headers(s, avctx, buf, buf_size);
+#if 0
+    printf("pict_type=%d frame_rate=%0.3f repeat_pict=%d\n",
+           s->pict_type, (double)avctx->time_base.den / avctx->time_base.num, s->repeat_pict);
+#endif
+
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+static int mpegvideo_split(AVCodecContext *avctx,
+                           const uint8_t *buf, int buf_size)
+{
+    int i;
+    uint32_t state= -1;
+
+    for(i=0; i<buf_size; i++){
+        state= (state<<8) | buf[i];
+        if(state != 0x1B3 && state != 0x1B5 && state < 0x200 && state >= 0x100)
+            return i-3;
+    }
+    return 0;
+}
+
+AVCodecParser mpegvideo_parser = {
+    { CODEC_ID_MPEG1VIDEO, CODEC_ID_MPEG2VIDEO },
+    sizeof(ParseContext1),
+    NULL,
+    mpegvideo_parse,
+    ff_parse1_close,
+    mpegvideo_split,
+};
+#endif /* !CONFIG_MPEGVIDEO_PARSER */
+
+/* this is ugly i know, but the alternative is too make
+   hundreds of vars global and prefix them with ff_mpeg1_
+   which is far uglier. */
+#include "mdec.c"
diff --git a/contrib/ffmpeg/libavcodec/mpeg12data.h b/contrib/ffmpeg/libavcodec/mpeg12data.h
new file mode 100644
index 000000000..6c96a495b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpeg12data.h
@@ -0,0 +1,469 @@
+/*
+ * MPEG1 codec / MPEG2 decoder
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpeg12data.h
+ * MPEG1/2 tables.
+ */
+
+const uint16_t ff_mpeg1_default_intra_matrix[64] = {
+        8, 16, 19, 22, 26, 27, 29, 34,
+        16, 16, 22, 24, 27, 29, 34, 37,
+        19, 22, 26, 27, 29, 34, 34, 38,
+        22, 22, 26, 27, 29, 34, 37, 40,
+        22, 26, 27, 29, 32, 35, 40, 48,
+        26, 27, 29, 32, 35, 40, 48, 58,
+        26, 27, 29, 34, 38, 46, 56, 69,
+        27, 29, 35, 38, 46, 56, 69, 83
+};
+
+const uint16_t ff_mpeg1_default_non_intra_matrix[64] = {
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16,
+};
+
+static const uint16_t vlc_dc_lum_code[12] = {
+    0x4, 0x0, 0x1, 0x5, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe, 0x1ff,
+};
+static const unsigned char vlc_dc_lum_bits[12] = {
+    3, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 9,
+};
+
+static const uint16_t vlc_dc_chroma_code[12] = {
+    0x0, 0x1, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe, 0x3fe, 0x3ff,
+};
+static const unsigned char vlc_dc_chroma_bits[12] = {
+    2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10,
+};
+
+static const uint16_t mpeg1_vlc[113][2] = {
+ { 0x3, 2 }, { 0x4, 4 }, { 0x5, 5 }, { 0x6, 7 },
+ { 0x26, 8 }, { 0x21, 8 }, { 0xa, 10 }, { 0x1d, 12 },
+ { 0x18, 12 }, { 0x13, 12 }, { 0x10, 12 }, { 0x1a, 13 },
+ { 0x19, 13 }, { 0x18, 13 }, { 0x17, 13 }, { 0x1f, 14 },
+ { 0x1e, 14 }, { 0x1d, 14 }, { 0x1c, 14 }, { 0x1b, 14 },
+ { 0x1a, 14 }, { 0x19, 14 }, { 0x18, 14 }, { 0x17, 14 },
+ { 0x16, 14 }, { 0x15, 14 }, { 0x14, 14 }, { 0x13, 14 },
+ { 0x12, 14 }, { 0x11, 14 }, { 0x10, 14 }, { 0x18, 15 },
+ { 0x17, 15 }, { 0x16, 15 }, { 0x15, 15 }, { 0x14, 15 },
+ { 0x13, 15 }, { 0x12, 15 }, { 0x11, 15 }, { 0x10, 15 },
+ { 0x3, 3 }, { 0x6, 6 }, { 0x25, 8 }, { 0xc, 10 },
+ { 0x1b, 12 }, { 0x16, 13 }, { 0x15, 13 }, { 0x1f, 15 },
+ { 0x1e, 15 }, { 0x1d, 15 }, { 0x1c, 15 }, { 0x1b, 15 },
+ { 0x1a, 15 }, { 0x19, 15 }, { 0x13, 16 }, { 0x12, 16 },
+ { 0x11, 16 }, { 0x10, 16 }, { 0x5, 4 }, { 0x4, 7 },
+ { 0xb, 10 }, { 0x14, 12 }, { 0x14, 13 }, { 0x7, 5 },
+ { 0x24, 8 }, { 0x1c, 12 }, { 0x13, 13 }, { 0x6, 5 },
+ { 0xf, 10 }, { 0x12, 12 }, { 0x7, 6 }, { 0x9, 10 },
+ { 0x12, 13 }, { 0x5, 6 }, { 0x1e, 12 }, { 0x14, 16 },
+ { 0x4, 6 }, { 0x15, 12 }, { 0x7, 7 }, { 0x11, 12 },
+ { 0x5, 7 }, { 0x11, 13 }, { 0x27, 8 }, { 0x10, 13 },
+ { 0x23, 8 }, { 0x1a, 16 }, { 0x22, 8 }, { 0x19, 16 },
+ { 0x20, 8 }, { 0x18, 16 }, { 0xe, 10 }, { 0x17, 16 },
+ { 0xd, 10 }, { 0x16, 16 }, { 0x8, 10 }, { 0x15, 16 },
+ { 0x1f, 12 }, { 0x1a, 12 }, { 0x19, 12 }, { 0x17, 12 },
+ { 0x16, 12 }, { 0x1f, 13 }, { 0x1e, 13 }, { 0x1d, 13 },
+ { 0x1c, 13 }, { 0x1b, 13 }, { 0x1f, 16 }, { 0x1e, 16 },
+ { 0x1d, 16 }, { 0x1c, 16 }, { 0x1b, 16 },
+ { 0x1, 6 }, /* escape */
+ { 0x2, 2 }, /* EOB */
+};
+
+static const uint16_t mpeg2_vlc[113][2] = {
+  {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5},
+  {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7},
+  {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8},
+  {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14},
+  {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14},
+  {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14},
+  {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14},
+  {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15},
+  {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15},
+  {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15},
+  {0x02, 3}, {0x06, 5}, {0x79, 7}, {0x27, 8},
+  {0x20, 8}, {0x16,13}, {0x15,13}, {0x1f,15},
+  {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15},
+  {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16},
+  {0x11,16}, {0x10,16}, {0x05, 5}, {0x07, 7},
+  {0xfc, 8}, {0x0c,10}, {0x14,13}, {0x07, 5},
+  {0x26, 8}, {0x1c,12}, {0x13,13}, {0x06, 6},
+  {0xfd, 8}, {0x12,12}, {0x07, 6}, {0x04, 9},
+  {0x12,13}, {0x06, 7}, {0x1e,12}, {0x14,16},
+  {0x04, 7}, {0x15,12}, {0x05, 7}, {0x11,12},
+  {0x78, 7}, {0x11,13}, {0x7a, 7}, {0x10,13},
+  {0x21, 8}, {0x1a,16}, {0x25, 8}, {0x19,16},
+  {0x24, 8}, {0x18,16}, {0x05, 9}, {0x17,16},
+  {0x07, 9}, {0x16,16}, {0x0d,10}, {0x15,16},
+  {0x1f,12}, {0x1a,12}, {0x19,12}, {0x17,12},
+  {0x16,12}, {0x1f,13}, {0x1e,13}, {0x1d,13},
+  {0x1c,13}, {0x1b,13}, {0x1f,16}, {0x1e,16},
+  {0x1d,16}, {0x1c,16}, {0x1b,16},
+  {0x01,6}, /* escape */
+  {0x06,4}, /* EOB */
+};
+
+static const int8_t mpeg1_level[111] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18,  1,  2,  3,  4,  5,  1,
+  2,  3,  4,  1,  2,  3,  1,  2,
+  3,  1,  2,  3,  1,  2,  1,  2,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,
+};
+
+static const int8_t mpeg1_run[111] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  2,  2,  2,  2,  2,  3,
+  3,  3,  3,  4,  4,  4,  5,  5,
+  5,  6,  6,  6,  7,  7,  8,  8,
+  9,  9, 10, 10, 11, 11, 12, 12,
+ 13, 13, 14, 14, 15, 15, 16, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31,
+};
+
+static RLTable rl_mpeg1 = {
+    111,
+    111,
+    mpeg1_vlc,
+    mpeg1_run,
+    mpeg1_level,
+};
+
+static RLTable rl_mpeg2 = {
+    111,
+    111,
+    mpeg2_vlc,
+    mpeg1_run,
+    mpeg1_level,
+};
+
+static const uint8_t mbAddrIncrTable[36][2] = {
+    {0x1, 1},
+    {0x3, 3},
+    {0x2, 3},
+    {0x3, 4},
+    {0x2, 4},
+    {0x3, 5},
+    {0x2, 5},
+    {0x7, 7},
+    {0x6, 7},
+    {0xb, 8},
+    {0xa, 8},
+    {0x9, 8},
+    {0x8, 8},
+    {0x7, 8},
+    {0x6, 8},
+    {0x17, 10},
+    {0x16, 10},
+    {0x15, 10},
+    {0x14, 10},
+    {0x13, 10},
+    {0x12, 10},
+    {0x23, 11},
+    {0x22, 11},
+    {0x21, 11},
+    {0x20, 11},
+    {0x1f, 11},
+    {0x1e, 11},
+    {0x1d, 11},
+    {0x1c, 11},
+    {0x1b, 11},
+    {0x1a, 11},
+    {0x19, 11},
+    {0x18, 11},
+    {0x8, 11}, /* escape */
+    {0xf, 11}, /* stuffing */
+    {0x0, 8}, /* end (and 15 more 0 bits should follow) */
+};
+
+static const uint8_t mbPatTable[64][2] = {
+    {0x1, 9},
+    {0xb, 5},
+    {0x9, 5},
+    {0xd, 6},
+    {0xd, 4},
+    {0x17, 7},
+    {0x13, 7},
+    {0x1f, 8},
+    {0xc, 4},
+    {0x16, 7},
+    {0x12, 7},
+    {0x1e, 8},
+    {0x13, 5},
+    {0x1b, 8},
+    {0x17, 8},
+    {0x13, 8},
+    {0xb, 4},
+    {0x15, 7},
+    {0x11, 7},
+    {0x1d, 8},
+    {0x11, 5},
+    {0x19, 8},
+    {0x15, 8},
+    {0x11, 8},
+    {0xf, 6},
+    {0xf, 8},
+    {0xd, 8},
+    {0x3, 9},
+    {0xf, 5},
+    {0xb, 8},
+    {0x7, 8},
+    {0x7, 9},
+    {0xa, 4},
+    {0x14, 7},
+    {0x10, 7},
+    {0x1c, 8},
+    {0xe, 6},
+    {0xe, 8},
+    {0xc, 8},
+    {0x2, 9},
+    {0x10, 5},
+    {0x18, 8},
+    {0x14, 8},
+    {0x10, 8},
+    {0xe, 5},
+    {0xa, 8},
+    {0x6, 8},
+    {0x6, 9},
+    {0x12, 5},
+    {0x1a, 8},
+    {0x16, 8},
+    {0x12, 8},
+    {0xd, 5},
+    {0x9, 8},
+    {0x5, 8},
+    {0x5, 9},
+    {0xc, 5},
+    {0x8, 8},
+    {0x4, 8},
+    {0x4, 9},
+    {0x7, 3},
+    {0xa, 5},
+    {0x8, 5},
+    {0xc, 6}
+};
+
+#define MB_TYPE_ZERO_MV   0x20000000
+#define IS_ZERO_MV(a)   ((a)&MB_TYPE_ZERO_MV)
+
+static const uint8_t table_mb_ptype[7][2] = {
+    { 3, 5 }, // 0x01 MB_INTRA
+    { 1, 2 }, // 0x02 MB_PAT
+    { 1, 3 }, // 0x08 MB_FOR
+    { 1, 1 }, // 0x0A MB_FOR|MB_PAT
+    { 1, 6 }, // 0x11 MB_QUANT|MB_INTRA
+    { 1, 5 }, // 0x12 MB_QUANT|MB_PAT
+    { 2, 5 }, // 0x1A MB_QUANT|MB_FOR|MB_PAT
+};
+
+static const uint32_t ptype2mb_type[7] = {
+                    MB_TYPE_INTRA,
+                    MB_TYPE_L0 | MB_TYPE_CBP | MB_TYPE_ZERO_MV | MB_TYPE_16x16,
+                    MB_TYPE_L0,
+                    MB_TYPE_L0 | MB_TYPE_CBP,
+    MB_TYPE_QUANT | MB_TYPE_INTRA,
+    MB_TYPE_QUANT | MB_TYPE_L0 | MB_TYPE_CBP | MB_TYPE_ZERO_MV | MB_TYPE_16x16,
+    MB_TYPE_QUANT | MB_TYPE_L0 | MB_TYPE_CBP,
+};
+
+static const uint8_t table_mb_btype[11][2] = {
+    { 3, 5 }, // 0x01 MB_INTRA
+    { 2, 3 }, // 0x04 MB_BACK
+    { 3, 3 }, // 0x06 MB_BACK|MB_PAT
+    { 2, 4 }, // 0x08 MB_FOR
+    { 3, 4 }, // 0x0A MB_FOR|MB_PAT
+    { 2, 2 }, // 0x0C MB_FOR|MB_BACK
+    { 3, 2 }, // 0x0E MB_FOR|MB_BACK|MB_PAT
+    { 1, 6 }, // 0x11 MB_QUANT|MB_INTRA
+    { 2, 6 }, // 0x16 MB_QUANT|MB_BACK|MB_PAT
+    { 3, 6 }, // 0x1A MB_QUANT|MB_FOR|MB_PAT
+    { 2, 5 }, // 0x1E MB_QUANT|MB_FOR|MB_BACK|MB_PAT
+};
+
+static const uint32_t btype2mb_type[11] = {
+                    MB_TYPE_INTRA,
+                    MB_TYPE_L1,
+                    MB_TYPE_L1   | MB_TYPE_CBP,
+                    MB_TYPE_L0,
+                    MB_TYPE_L0   | MB_TYPE_CBP,
+                    MB_TYPE_L0L1,
+                    MB_TYPE_L0L1 | MB_TYPE_CBP,
+    MB_TYPE_QUANT | MB_TYPE_INTRA,
+    MB_TYPE_QUANT | MB_TYPE_L1   | MB_TYPE_CBP,
+    MB_TYPE_QUANT | MB_TYPE_L0   | MB_TYPE_CBP,
+    MB_TYPE_QUANT | MB_TYPE_L0L1 | MB_TYPE_CBP,
+};
+
+static const uint8_t mbMotionVectorTable[17][2] = {
+{ 0x1, 1 },
+{ 0x1, 2 },
+{ 0x1, 3 },
+{ 0x1, 4 },
+{ 0x3, 6 },
+{ 0x5, 7 },
+{ 0x4, 7 },
+{ 0x3, 7 },
+{ 0xb, 9 },
+{ 0xa, 9 },
+{ 0x9, 9 },
+{ 0x11, 10 },
+{ 0x10, 10 },
+{ 0xf, 10 },
+{ 0xe, 10 },
+{ 0xd, 10 },
+{ 0xc, 10 },
+};
+
+const AVRational ff_frame_rate_tab[] = {
+    {    0,    0},
+    {24000, 1001},
+    {   24,    1},
+    {   25,    1},
+    {30000, 1001},
+    {   30,    1},
+    {   50,    1},
+    {60000, 1001},
+    {   60,    1},
+  // Xing's 15fps: (9)
+    {   15,    1},
+  // libmpeg3's "Unofficial economy rates": (10-13)
+    {    5,    1},
+    {   10,    1},
+    {   12,    1},
+    {   15,    1},
+    {    0,    0},
+};
+
+static const uint8_t non_linear_qscale[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8,10,12,14,16,18,20,22,
+    24,28,32,36,40,44,48,52,
+    56,64,72,80,88,96,104,112,
+};
+
+const uint8_t ff_mpeg1_dc_scale_table[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+};
+
+static const uint8_t mpeg2_dc_scale_table1[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+};
+
+static const uint8_t mpeg2_dc_scale_table2[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const uint8_t mpeg2_dc_scale_table3[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const uint8_t *mpeg2_dc_scale_table[4]={
+    ff_mpeg1_dc_scale_table,
+    mpeg2_dc_scale_table1,
+    mpeg2_dc_scale_table2,
+    mpeg2_dc_scale_table3,
+};
+
+static const float mpeg1_aspect[16]={
+    0.0000,
+    1.0000,
+    0.6735,
+    0.7031,
+
+    0.7615,
+    0.8055,
+    0.8437,
+    0.8935,
+
+    0.9157,
+    0.9815,
+    1.0255,
+    1.0695,
+
+    1.0950,
+    1.1575,
+    1.2015,
+};
+
+static const AVRational mpeg2_aspect[16]={
+    {0,1},
+    {1,1},
+    {4,3},
+    {16,9},
+    {221,100},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+    {0,1},
+};
+
+static const uint8_t svcd_scan_offset_placeholder[14]={
+    0x10, 0x0E,
+    0x00, 0x80, 0x81,
+    0x00, 0x80, 0x81,
+    0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff,
+};
diff --git a/contrib/ffmpeg/libavcodec/mpeg4data.h b/contrib/ffmpeg/libavcodec/mpeg4data.h
new file mode 100644
index 000000000..e199c6a14
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpeg4data.h
@@ -0,0 +1,423 @@
+/*
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * H263+ support
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpeg4data.h
+ * mpeg4 tables.
+ */
+
+// shapes
+#define RECT_SHAPE       0
+#define BIN_SHAPE        1
+#define BIN_ONLY_SHAPE   2
+#define GRAY_SHAPE       3
+
+#define SIMPLE_VO_TYPE             1
+#define CORE_VO_TYPE               3
+#define MAIN_VO_TYPE               4
+#define NBIT_VO_TYPE               5
+#define ARTS_VO_TYPE               10
+#define ACE_VO_TYPE                12
+#define ADV_SIMPLE_VO_TYPE         17
+
+// aspect_ratio_info
+#define EXTENDED_PAR 15
+
+//vol_sprite_usage / sprite_enable
+#define STATIC_SPRITE 1
+#define GMC_SPRITE 2
+
+#define MOTION_MARKER 0x1F001
+#define DC_MARKER     0x6B001
+
+static const int mb_type_b_map[4]= {
+    MB_TYPE_DIRECT2 | MB_TYPE_L0L1,
+    MB_TYPE_L0L1 | MB_TYPE_16x16,
+    MB_TYPE_L1 | MB_TYPE_16x16,
+    MB_TYPE_L0 | MB_TYPE_16x16,
+};
+
+#define VOS_STARTCODE        0x1B0
+#define USER_DATA_STARTCODE  0x1B2
+#define GOP_STARTCODE        0x1B3
+#define VISUAL_OBJ_STARTCODE 0x1B5
+#define VOP_STARTCODE        0x1B6
+
+/* dc encoding for mpeg4 */
+const uint8_t DCtab_lum[13][2] =
+{
+    {3,3}, {3,2}, {2,2}, {2,3}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7},
+    {1,8}, {1,9}, {1,10}, {1,11},
+};
+
+const uint8_t DCtab_chrom[13][2] =
+{
+    {3,2}, {2,2}, {1,2}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, {1,8},
+    {1,9}, {1,10}, {1,11}, {1,12},
+};
+
+const uint16_t intra_vlc[103][2] = {
+{ 0x2, 2 },
+{ 0x6, 3 },{ 0xf, 4 },{ 0xd, 5 },{ 0xc, 5 },
+{ 0x15, 6 },{ 0x13, 6 },{ 0x12, 6 },{ 0x17, 7 },
+{ 0x1f, 8 },{ 0x1e, 8 },{ 0x1d, 8 },{ 0x25, 9 },
+{ 0x24, 9 },{ 0x23, 9 },{ 0x21, 9 },{ 0x21, 10 },
+{ 0x20, 10 },{ 0xf, 10 },{ 0xe, 10 },{ 0x7, 11 },
+{ 0x6, 11 },{ 0x20, 11 },{ 0x21, 11 },{ 0x50, 12 },
+{ 0x51, 12 },{ 0x52, 12 },{ 0xe, 4 },{ 0x14, 6 },
+{ 0x16, 7 },{ 0x1c, 8 },{ 0x20, 9 },{ 0x1f, 9 },
+{ 0xd, 10 },{ 0x22, 11 },{ 0x53, 12 },{ 0x55, 12 },
+{ 0xb, 5 },{ 0x15, 7 },{ 0x1e, 9 },{ 0xc, 10 },
+{ 0x56, 12 },{ 0x11, 6 },{ 0x1b, 8 },{ 0x1d, 9 },
+{ 0xb, 10 },{ 0x10, 6 },{ 0x22, 9 },{ 0xa, 10 },
+{ 0xd, 6 },{ 0x1c, 9 },{ 0x8, 10 },{ 0x12, 7 },
+{ 0x1b, 9 },{ 0x54, 12 },{ 0x14, 7 },{ 0x1a, 9 },
+{ 0x57, 12 },{ 0x19, 8 },{ 0x9, 10 },{ 0x18, 8 },
+{ 0x23, 11 },{ 0x17, 8 },{ 0x19, 9 },{ 0x18, 9 },
+{ 0x7, 10 },{ 0x58, 12 },{ 0x7, 4 },{ 0xc, 6 },
+{ 0x16, 8 },{ 0x17, 9 },{ 0x6, 10 },{ 0x5, 11 },
+{ 0x4, 11 },{ 0x59, 12 },{ 0xf, 6 },{ 0x16, 9 },
+{ 0x5, 10 },{ 0xe, 6 },{ 0x4, 10 },{ 0x11, 7 },
+{ 0x24, 11 },{ 0x10, 7 },{ 0x25, 11 },{ 0x13, 7 },
+{ 0x5a, 12 },{ 0x15, 8 },{ 0x5b, 12 },{ 0x14, 8 },
+{ 0x13, 8 },{ 0x1a, 8 },{ 0x15, 9 },{ 0x14, 9 },
+{ 0x13, 9 },{ 0x12, 9 },{ 0x11, 9 },{ 0x26, 11 },
+{ 0x27, 11 },{ 0x5c, 12 },{ 0x5d, 12 },{ 0x5e, 12 },
+{ 0x5f, 12 },{ 0x3, 7 },
+};
+
+const int8_t intra_level[102] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27,  1,  2,  3,  4,  5,
+  6,  7,  8,  9, 10,  1,  2,  3,
+  4,  5,  1,  2,  3,  4,  1,  2,
+  3,  1,  2,  3,  1,  2,  3,  1,
+  2,  3,  1,  2,  1,  2,  1,  1,
+  1,  1,  1,  1,  2,  3,  4,  5,
+  6,  7,  8,  1,  2,  3,  1,  2,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,
+};
+
+const int8_t intra_run[102] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  2,  2,  2,
+  2,  2,  3,  3,  3,  3,  4,  4,
+  4,  5,  5,  5,  6,  6,  6,  7,
+  7,  7,  8,  8,  9,  9, 10, 11,
+ 12, 13, 14,  0,  0,  0,  0,  0,
+  0,  0,  0,  1,  1,  1,  2,  2,
+  3,  3,  4,  4,  5,  5,  6,  6,
+  7,  8,  9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20,
+};
+
+static RLTable rl_intra = {
+    102,
+    67,
+    intra_vlc,
+    intra_run,
+    intra_level,
+};
+
+static const uint16_t inter_rvlc[170][2]={ //note this is identical to the intra rvlc except that its reordered
+{0x0006,  3},{0x0001,  4},{0x0004,  5},{0x001C,  7},
+{0x003C,  8},{0x003D,  8},{0x007C,  9},{0x00FC, 10},
+{0x00FD, 10},{0x01FC, 11},{0x01FD, 11},{0x03FC, 12},
+{0x07FC, 13},{0x07FD, 13},{0x0BFC, 13},{0x0BFD, 13},
+{0x0FFC, 14},{0x0FFD, 14},{0x1FFC, 15},{0x0007,  3},
+{0x000C,  6},{0x005C,  8},{0x007D,  9},{0x017C, 10},
+{0x02FC, 11},{0x03FD, 12},{0x0DFC, 13},{0x17FC, 14},
+{0x17FD, 14},{0x000A,  4},{0x001D,  7},{0x00BC,  9},
+{0x02FD, 11},{0x05FC, 12},{0x1BFC, 14},{0x1BFD, 14},
+{0x0005,  5},{0x005D,  8},{0x017D, 10},{0x05FD, 12},
+{0x0DFD, 13},{0x1DFC, 14},{0x1FFD, 15},{0x0008,  5},
+{0x006C,  8},{0x037C, 11},{0x0EFC, 13},{0x2FFC, 15},
+{0x0009,  5},{0x00BD,  9},{0x037D, 11},{0x0EFD, 13},
+{0x000D,  6},{0x01BC, 10},{0x06FC, 12},{0x1DFD, 14},
+{0x0014,  6},{0x01BD, 10},{0x06FD, 12},{0x2FFD, 15},
+{0x0015,  6},{0x01DC, 10},{0x0F7C, 13},{0x002C,  7},
+{0x01DD, 10},{0x1EFC, 14},{0x002D,  7},{0x03BC, 11},
+{0x0034,  7},{0x077C, 12},{0x006D,  8},{0x0F7D, 13},
+{0x0074,  8},{0x1EFD, 14},{0x0075,  8},{0x1F7C, 14},
+{0x00DC,  9},{0x1F7D, 14},{0x00DD,  9},{0x1FBC, 14},
+{0x00EC,  9},{0x37FC, 15},{0x01EC, 10},{0x01ED, 10},
+{0x01F4, 10},{0x03BD, 11},{0x03DC, 11},{0x03DD, 11},
+{0x03EC, 11},{0x03ED, 11},{0x03F4, 11},{0x077D, 12},
+{0x07BC, 12},{0x07BD, 12},{0x0FBC, 13},{0x0FBD, 13},
+{0x0FDC, 13},{0x0FDD, 13},{0x1FBD, 14},{0x1FDC, 14},
+{0x1FDD, 14},{0x37FD, 15},{0x3BFC, 15},
+{0x000B,  4},{0x0078,  8},{0x03F5, 11},{0x0FEC, 13},
+{0x1FEC, 14},{0x0012,  5},{0x00ED,  9},{0x07DC, 12},
+{0x1FED, 14},{0x3BFD, 15},{0x0013,  5},{0x03F8, 11},
+{0x3DFC, 15},{0x0018,  6},{0x07DD, 12},{0x0019,  6},
+{0x07EC, 12},{0x0022,  6},{0x0FED, 13},{0x0023,  6},
+{0x0FF4, 13},{0x0035,  7},{0x0FF5, 13},{0x0038,  7},
+{0x0FF8, 13},{0x0039,  7},{0x0FF9, 13},{0x0042,  7},
+{0x1FF4, 14},{0x0043,  7},{0x1FF5, 14},{0x0079,  8},
+{0x1FF8, 14},{0x0082,  8},{0x3DFD, 15},{0x0083,  8},
+{0x00F4,  9},{0x00F5,  9},{0x00F8,  9},{0x00F9,  9},
+{0x0102,  9},{0x0103,  9},{0x01F5, 10},{0x01F8, 10},
+{0x01F9, 10},{0x0202, 10},{0x0203, 10},{0x03F9, 11},
+{0x0402, 11},{0x0403, 11},{0x07ED, 12},{0x07F4, 12},
+{0x07F5, 12},{0x07F8, 12},{0x07F9, 12},{0x0802, 12},
+{0x0803, 12},{0x1002, 13},{0x1003, 13},{0x1FF9, 14},
+{0x2002, 14},{0x2003, 14},{0x3EFC, 15},{0x3EFD, 15},
+{0x3F7C, 15},{0x3F7D, 15},{0x0000, 4}
+};
+
+static const int8_t inter_rvlc_run[169]={
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  2,  2,  2,
+ 2,  2,  2,  2,  3,  3,  3,  3,
+ 3,  3,  3,  4,  4,  4,  4,  4,
+ 5,  5,  5,  5,  6,  6,  6,  6,
+ 7,  7,  7,  7,  8,  8,  8,  9,
+ 9,  9, 10, 10, 11, 11, 12, 12,
+13, 13, 14, 14, 15, 15, 16, 16,
+17, 17, 18, 19, 20, 21, 22, 23,
+24, 25, 26, 27, 28, 29, 30, 31,
+32, 33, 34, 35, 36, 37, 38,
+ 0,  0,  0,  0,  0,  1,  1,  1,
+ 1,  1,  2,  2,  2,  3,  3,  4,
+ 4,  5,  5,  6,  6,  7,  7,  8,
+ 8,  9,  9, 10, 10, 11, 11, 12,
+12, 13, 13, 14, 15, 16, 17, 18,
+19, 20, 21, 22, 23, 24, 25, 26,
+27, 28, 29, 30, 31, 32, 33, 34,
+35, 36, 37, 38, 39, 40, 41, 42,
+43, 44,
+};
+
+static const int8_t inter_rvlc_level[169]={
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19,  1,  2,  3,  4,  5,
+ 6,  7,  8,  9, 10,  1,  2,  3,
+ 4,  5,  6,  7,  1,  2,  3,  4,
+ 5,  6,  7,  1,  2,  3,  4,  5,
+ 1,  2,  3,  4,  1,  2,  3,  4,
+ 1,  2,  3,  4,  1,  2,  3,  1,
+ 2,  3,  1,  2,  1,  2,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  2,
+ 1,  2,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,
+ 1,  2,  3,  4,  5,  1,  2,  3,
+ 4,  5,  1,  2,  3,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,
+};
+
+static RLTable rvlc_rl_inter = {
+    169,
+    103,
+    inter_rvlc,
+    inter_rvlc_run,
+    inter_rvlc_level,
+};
+
+static const uint16_t intra_rvlc[170][2]={
+{0x0006,  3},{0x0007,  3},{0x000A,  4},{0x0009,  5},
+{0x0014,  6},{0x0015,  6},{0x0034,  7},{0x0074,  8},
+{0x0075,  8},{0x00DD,  9},{0x00EC,  9},{0x01EC, 10},
+{0x01ED, 10},{0x01F4, 10},{0x03EC, 11},{0x03ED, 11},
+{0x03F4, 11},{0x077D, 12},{0x07BC, 12},{0x0FBD, 13},
+{0x0FDC, 13},{0x07BD, 12},{0x0FDD, 13},{0x1FBD, 14},
+{0x1FDC, 14},{0x1FDD, 14},{0x1FFC, 15},{0x0001,  4},
+{0x0008,  5},{0x002D,  7},{0x006C,  8},{0x006D,  8},
+{0x00DC,  9},{0x01DD, 10},{0x03DC, 11},{0x03DD, 11},
+{0x077C, 12},{0x0FBC, 13},{0x1F7D, 14},{0x1FBC, 14},
+{0x0004,  5},{0x002C,  7},{0x00BC,  9},{0x01DC, 10},
+{0x03BC, 11},{0x03BD, 11},{0x0EFD, 13},{0x0F7C, 13},
+{0x0F7D, 13},{0x1EFD, 14},{0x1F7C, 14},{0x0005,  5},
+{0x005C,  8},{0x00BD,  9},{0x037D, 11},{0x06FC, 12},
+{0x0EFC, 13},{0x1DFD, 14},{0x1EFC, 14},{0x1FFD, 15},
+{0x000C,  6},{0x005D,  8},{0x01BD, 10},{0x03FD, 12},
+{0x06FD, 12},{0x1BFD, 14},{0x000D,  6},{0x007D,  9},
+{0x02FC, 11},{0x05FC, 12},{0x1BFC, 14},{0x1DFC, 14},
+{0x001C,  7},{0x017C, 10},{0x02FD, 11},{0x05FD, 12},
+{0x2FFC, 15},{0x001D,  7},{0x017D, 10},{0x037C, 11},
+{0x0DFD, 13},{0x2FFD, 15},{0x003C,  8},{0x01BC, 10},
+{0x0BFD, 13},{0x17FD, 14},{0x003D,  8},{0x01FD, 11},
+{0x0DFC, 13},{0x37FC, 15},{0x007C,  9},{0x03FC, 12},
+{0x00FC, 10},{0x0BFC, 13},{0x00FD, 10},{0x37FD, 15},
+{0x01FC, 11},{0x07FC, 13},{0x07FD, 13},{0x0FFC, 14},
+{0x0FFD, 14},{0x17FC, 14},{0x3BFC, 15},
+{0x000B,  4},{0x0078,  8},{0x03F5, 11},{0x0FEC, 13},
+{0x1FEC, 14},{0x0012,  5},{0x00ED,  9},{0x07DC, 12},
+{0x1FED, 14},{0x3BFD, 15},{0x0013,  5},{0x03F8, 11},
+{0x3DFC, 15},{0x0018,  6},{0x07DD, 12},{0x0019,  6},
+{0x07EC, 12},{0x0022,  6},{0x0FED, 13},{0x0023,  6},
+{0x0FF4, 13},{0x0035,  7},{0x0FF5, 13},{0x0038,  7},
+{0x0FF8, 13},{0x0039,  7},{0x0FF9, 13},{0x0042,  7},
+{0x1FF4, 14},{0x0043,  7},{0x1FF5, 14},{0x0079,  8},
+{0x1FF8, 14},{0x0082,  8},{0x3DFD, 15},{0x0083,  8},
+{0x00F4,  9},{0x00F5,  9},{0x00F8,  9},{0x00F9,  9},
+{0x0102,  9},{0x0103,  9},{0x01F5, 10},{0x01F8, 10},
+{0x01F9, 10},{0x0202, 10},{0x0203, 10},{0x03F9, 11},
+{0x0402, 11},{0x0403, 11},{0x07ED, 12},{0x07F4, 12},
+{0x07F5, 12},{0x07F8, 12},{0x07F9, 12},{0x0802, 12},
+{0x0803, 12},{0x1002, 13},{0x1003, 13},{0x1FF9, 14},
+{0x2002, 14},{0x2003, 14},{0x3EFC, 15},{0x3EFD, 15},
+{0x3F7C, 15},{0x3F7D, 15},{0x0000,  4}
+};
+
+static const int8_t intra_rvlc_run[169]={
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 2,  2,  2,  2,  2,  2,  2,  2,
+ 2,  2,  2,  3,  3,  3,  3,  3,
+ 3,  3,  3,  3,  4,  4,  4,  4,
+ 4,  4,  5,  5,  5,  5,  5,  5,
+ 6,  6,  6,  6,  6,  7,  7,  7,
+ 7,  7,  8,  8,  8,  8,  9,  9,
+ 9,  9, 10, 10, 11, 11, 12, 12,
+13, 14, 15, 16, 17, 18, 19,
+ 0,  0,  0,  0,  0,  1,  1,  1,
+ 1,  1,  2,  2,  2,  3,  3,  4,
+ 4,  5,  5,  6,  6,  7,  7,  8,
+ 8,  9,  9, 10, 10, 11, 11, 12,
+12, 13, 13, 14, 15, 16, 17, 18,
+19, 20, 21, 22, 23, 24, 25, 26,
+27, 28, 29, 30, 31, 32, 33, 34,
+35, 36, 37, 38, 39, 40, 41, 42,
+43, 44,
+};
+
+static const int8_t intra_rvlc_level[169]={
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19, 20, 21, 22, 23, 24,
+25, 26, 27,  1,  2,  3,  4,  5,
+ 6,  7,  8,  9, 10, 11, 12, 13,
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11,  1,  2,  3,  4,  5,
+ 6,  7,  8,  9,  1,  2,  3,  4,
+ 5,  6,  1,  2,  3,  4,  5,  6,
+ 1,  2,  3,  4,  5,  1,  2,  3,
+ 4,  5,  1,  2,  3,  4,  1,  2,
+ 3,  4,  1,  2,  1,  2,  1,  2,
+ 1,  1,  1,  1,  1,  1,  1,
+ 1,  2,  3,  4,  5,  1,  2,  3,
+ 4,  5,  1,  2,  3,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,
+};
+
+static RLTable rvlc_rl_intra = {
+    169,
+    103,
+    intra_rvlc,
+    intra_rvlc_run,
+    intra_rvlc_level,
+};
+
+static const uint16_t sprite_trajectory_tab[15][2] = {
+ {0x00, 2}, {0x02, 3},  {0x03, 3},  {0x04, 3}, {0x05, 3}, {0x06, 3},
+ {0x0E, 4}, {0x1E, 5},  {0x3E, 6},  {0x7E, 7}, {0xFE, 8},
+ {0x1FE, 9},{0x3FE, 10},{0x7FE, 11},{0xFFE, 12},
+};
+
+static const uint8_t mb_type_b_tab[4][2] = {
+ {1, 1}, {1, 2}, {1, 3}, {1, 4},
+};
+
+static const AVRational pixel_aspect[16]={
+ {0, 1},
+ {1, 1},
+ {12, 11},
+ {10, 11},
+ {16, 11},
+ {40, 33},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+ {0, 1},
+};
+
+/* these matrixes will be permuted for the idct */
+const int16_t ff_mpeg4_default_intra_matrix[64] = {
+  8, 17, 18, 19, 21, 23, 25, 27,
+ 17, 18, 19, 21, 23, 25, 27, 28,
+ 20, 21, 22, 23, 24, 26, 28, 30,
+ 21, 22, 23, 24, 26, 28, 30, 32,
+ 22, 23, 24, 26, 28, 30, 32, 35,
+ 23, 24, 26, 28, 30, 32, 35, 38,
+ 25, 26, 28, 30, 32, 35, 38, 41,
+ 27, 28, 30, 32, 35, 38, 41, 45,
+};
+
+const int16_t ff_mpeg4_default_non_intra_matrix[64] = {
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 19, 20, 21, 22, 23, 24, 26, 27,
+ 20, 21, 22, 23, 25, 26, 27, 28,
+ 21, 22, 23, 24, 26, 27, 28, 30,
+ 22, 23, 24, 26, 27, 28, 30, 31,
+ 23, 24, 25, 27, 28, 30, 31, 33,
+};
+
+const uint8_t ff_mpeg4_y_dc_scale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,40,42,44,46
+};
+const uint8_t ff_mpeg4_c_dc_scale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,20,21,22,23,24,25
+};
+
+const uint16_t ff_mpeg4_resync_prefix[8]={
+    0x7F00, 0x7E00, 0x7C00, 0x7800, 0x7000, 0x6000, 0x4000, 0x0000
+};
+
+static const uint8_t mpeg4_dc_threshold[8]={
+    99, 13, 15, 17, 19, 21, 23, 0
+};
diff --git a/contrib/ffmpeg/libavcodec/mpegaudio.c b/contrib/ffmpeg/libavcodec/mpegaudio.c
new file mode 100644
index 000000000..2e5e28a18
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegaudio.c
@@ -0,0 +1,801 @@
+/*
+ * The simplest mpeg audio layer 2 encoder
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpegaudio.c
+ * The simplest mpeg audio layer 2 encoder.
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "mpegaudio.h"
+
+/* currently, cannot change these constants (need to modify
+   quantization stage) */
+#define MUL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
+#define FIX(a)   ((int)((a) * (1 << FRAC_BITS)))
+
+#define SAMPLES_BUF_SIZE 4096
+
+typedef struct MpegAudioContext {
+    PutBitContext pb;
+    int nb_channels;
+    int freq, bit_rate;
+    int lsf;           /* 1 if mpeg2 low bitrate selected */
+    int bitrate_index; /* bit rate */
+    int freq_index;
+    int frame_size; /* frame size, in bits, without padding */
+    int64_t nb_samples; /* total number of samples encoded */
+    /* padding computation */
+    int frame_frac, frame_frac_incr, do_padding;
+    short samples_buf[MPA_MAX_CHANNELS][SAMPLES_BUF_SIZE]; /* buffer for filter */
+    int samples_offset[MPA_MAX_CHANNELS];       /* offset in samples_buf */
+    int sb_samples[MPA_MAX_CHANNELS][3][12][SBLIMIT];
+    unsigned char scale_factors[MPA_MAX_CHANNELS][SBLIMIT][3]; /* scale factors */
+    /* code to group 3 scale factors */
+    unsigned char scale_code[MPA_MAX_CHANNELS][SBLIMIT];
+    int sblimit; /* number of used subbands */
+    const unsigned char *alloc_table;
+} MpegAudioContext;
+
+/* define it to use floats in quantization (I don't like floats !) */
+//#define USE_FLOATS
+
+#include "mpegaudiotab.h"
+
+static int MPA_encode_init(AVCodecContext *avctx)
+{
+    MpegAudioContext *s = avctx->priv_data;
+    int freq = avctx->sample_rate;
+    int bitrate = avctx->bit_rate;
+    int channels = avctx->channels;
+    int i, v, table;
+    float a;
+
+    if (channels > 2)
+        return -1;
+    bitrate = bitrate / 1000;
+    s->nb_channels = channels;
+    s->freq = freq;
+    s->bit_rate = bitrate * 1000;
+    avctx->frame_size = MPA_FRAME_SIZE;
+
+    /* encoding freq */
+    s->lsf = 0;
+    for(i=0;i<3;i++) {
+        if (mpa_freq_tab[i] == freq)
+            break;
+        if ((mpa_freq_tab[i] / 2) == freq) {
+            s->lsf = 1;
+            break;
+        }
+    }
+    if (i == 3){
+        av_log(avctx, AV_LOG_ERROR, "Sampling rate %d is not allowed in mp2\n", freq);
+        return -1;
+    }
+    s->freq_index = i;
+
+    /* encoding bitrate & frequency */
+    for(i=0;i<15;i++) {
+        if (mpa_bitrate_tab[s->lsf][1][i] == bitrate)
+            break;
+    }
+    if (i == 15){
+        av_log(avctx, AV_LOG_ERROR, "bitrate %d is not allowed in mp2\n", bitrate);
+        return -1;
+    }
+    s->bitrate_index = i;
+
+    /* compute total header size & pad bit */
+
+    a = (float)(bitrate * 1000 * MPA_FRAME_SIZE) / (freq * 8.0);
+    s->frame_size = ((int)a) * 8;
+
+    /* frame fractional size to compute padding */
+    s->frame_frac = 0;
+    s->frame_frac_incr = (int)((a - floor(a)) * 65536.0);
+
+    /* select the right allocation table */
+    table = l2_select_table(bitrate, s->nb_channels, freq, s->lsf);
+
+    /* number of used subbands */
+    s->sblimit = sblimit_table[table];
+    s->alloc_table = alloc_tables[table];
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "%d kb/s, %d Hz, frame_size=%d bits, table=%d, padincr=%x\n",
+           bitrate, freq, s->frame_size, table, s->frame_frac_incr);
+#endif
+
+    for(i=0;i<s->nb_channels;i++)
+        s->samples_offset[i] = 0;
+
+    for(i=0;i<257;i++) {
+        int v;
+        v = mpa_enwindow[i];
+#if WFRAC_BITS != 16
+        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
+#endif
+        filter_bank[i] = v;
+        if ((i & 63) != 0)
+            v = -v;
+        if (i != 0)
+            filter_bank[512 - i] = v;
+    }
+
+    for(i=0;i<64;i++) {
+        v = (int)(pow(2.0, (3 - i) / 3.0) * (1 << 20));
+        if (v <= 0)
+            v = 1;
+        scale_factor_table[i] = v;
+#ifdef USE_FLOATS
+        scale_factor_inv_table[i] = pow(2.0, -(3 - i) / 3.0) / (float)(1 << 20);
+#else
+#define P 15
+        scale_factor_shift[i] = 21 - P - (i / 3);
+        scale_factor_mult[i] = (1 << P) * pow(2.0, (i % 3) / 3.0);
+#endif
+    }
+    for(i=0;i<128;i++) {
+        v = i - 64;
+        if (v <= -3)
+            v = 0;
+        else if (v < 0)
+            v = 1;
+        else if (v == 0)
+            v = 2;
+        else if (v < 3)
+            v = 3;
+        else
+            v = 4;
+        scale_diff_table[i] = v;
+    }
+
+    for(i=0;i<17;i++) {
+        v = quant_bits[i];
+        if (v < 0)
+            v = -v;
+        else
+            v = v * 3;
+        total_quant_bits[i] = 12 * v;
+    }
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    return 0;
+}
+
+/* 32 point floating point IDCT without 1/sqrt(2) coef zero scaling */
+static void idct32(int *out, int *tab)
+{
+    int i, j;
+    int *t, *t1, xr;
+    const int *xp = costab32;
+
+    for(j=31;j>=3;j-=2) tab[j] += tab[j - 2];
+
+    t = tab + 30;
+    t1 = tab + 2;
+    do {
+        t[0] += t[-4];
+        t[1] += t[1 - 4];
+        t -= 4;
+    } while (t != t1);
+
+    t = tab + 28;
+    t1 = tab + 4;
+    do {
+        t[0] += t[-8];
+        t[1] += t[1-8];
+        t[2] += t[2-8];
+        t[3] += t[3-8];
+        t -= 8;
+    } while (t != t1);
+
+    t = tab;
+    t1 = tab + 32;
+    do {
+        t[ 3] = -t[ 3];
+        t[ 6] = -t[ 6];
+
+        t[11] = -t[11];
+        t[12] = -t[12];
+        t[13] = -t[13];
+        t[15] = -t[15];
+        t += 16;
+    } while (t != t1);
+
+
+    t = tab;
+    t1 = tab + 8;
+    do {
+        int x1, x2, x3, x4;
+
+        x3 = MUL(t[16], FIX(SQRT2*0.5));
+        x4 = t[0] - x3;
+        x3 = t[0] + x3;
+
+        x2 = MUL(-(t[24] + t[8]), FIX(SQRT2*0.5));
+        x1 = MUL((t[8] - x2), xp[0]);
+        x2 = MUL((t[8] + x2), xp[1]);
+
+        t[ 0] = x3 + x1;
+        t[ 8] = x4 - x2;
+        t[16] = x4 + x2;
+        t[24] = x3 - x1;
+        t++;
+    } while (t != t1);
+
+    xp += 2;
+    t = tab;
+    t1 = tab + 4;
+    do {
+        xr = MUL(t[28],xp[0]);
+        t[28] = (t[0] - xr);
+        t[0] = (t[0] + xr);
+
+        xr = MUL(t[4],xp[1]);
+        t[ 4] = (t[24] - xr);
+        t[24] = (t[24] + xr);
+
+        xr = MUL(t[20],xp[2]);
+        t[20] = (t[8] - xr);
+        t[ 8] = (t[8] + xr);
+
+        xr = MUL(t[12],xp[3]);
+        t[12] = (t[16] - xr);
+        t[16] = (t[16] + xr);
+        t++;
+    } while (t != t1);
+    xp += 4;
+
+    for (i = 0; i < 4; i++) {
+        xr = MUL(tab[30-i*4],xp[0]);
+        tab[30-i*4] = (tab[i*4] - xr);
+        tab[   i*4] = (tab[i*4] + xr);
+
+        xr = MUL(tab[ 2+i*4],xp[1]);
+        tab[ 2+i*4] = (tab[28-i*4] - xr);
+        tab[28-i*4] = (tab[28-i*4] + xr);
+
+        xr = MUL(tab[31-i*4],xp[0]);
+        tab[31-i*4] = (tab[1+i*4] - xr);
+        tab[ 1+i*4] = (tab[1+i*4] + xr);
+
+        xr = MUL(tab[ 3+i*4],xp[1]);
+        tab[ 3+i*4] = (tab[29-i*4] - xr);
+        tab[29-i*4] = (tab[29-i*4] + xr);
+
+        xp += 2;
+    }
+
+    t = tab + 30;
+    t1 = tab + 1;
+    do {
+        xr = MUL(t1[0], *xp);
+        t1[0] = (t[0] - xr);
+        t[0] = (t[0] + xr);
+        t -= 2;
+        t1 += 2;
+        xp++;
+    } while (t >= tab);
+
+    for(i=0;i<32;i++) {
+        out[i] = tab[bitinv32[i]];
+    }
+}
+
+#define WSHIFT (WFRAC_BITS + 15 - FRAC_BITS)
+
+static void filter(MpegAudioContext *s, int ch, short *samples, int incr)
+{
+    short *p, *q;
+    int sum, offset, i, j;
+    int tmp[64];
+    int tmp1[32];
+    int *out;
+
+    //    print_pow1(samples, 1152);
+
+    offset = s->samples_offset[ch];
+    out = &s->sb_samples[ch][0][0][0];
+    for(j=0;j<36;j++) {
+        /* 32 samples at once */
+        for(i=0;i<32;i++) {
+            s->samples_buf[ch][offset + (31 - i)] = samples[0];
+            samples += incr;
+        }
+
+        /* filter */
+        p = s->samples_buf[ch] + offset;
+        q = filter_bank;
+        /* maxsum = 23169 */
+        for(i=0;i<64;i++) {
+            sum = p[0*64] * q[0*64];
+            sum += p[1*64] * q[1*64];
+            sum += p[2*64] * q[2*64];
+            sum += p[3*64] * q[3*64];
+            sum += p[4*64] * q[4*64];
+            sum += p[5*64] * q[5*64];
+            sum += p[6*64] * q[6*64];
+            sum += p[7*64] * q[7*64];
+            tmp[i] = sum;
+            p++;
+            q++;
+        }
+        tmp1[0] = tmp[16] >> WSHIFT;
+        for( i=1; i<=16; i++ ) tmp1[i] = (tmp[i+16]+tmp[16-i]) >> WSHIFT;
+        for( i=17; i<=31; i++ ) tmp1[i] = (tmp[i+16]-tmp[80-i]) >> WSHIFT;
+
+        idct32(out, tmp1);
+
+        /* advance of 32 samples */
+        offset -= 32;
+        out += 32;
+        /* handle the wrap around */
+        if (offset < 0) {
+            memmove(s->samples_buf[ch] + SAMPLES_BUF_SIZE - (512 - 32),
+                    s->samples_buf[ch], (512 - 32) * 2);
+            offset = SAMPLES_BUF_SIZE - 512;
+        }
+    }
+    s->samples_offset[ch] = offset;
+
+    //    print_pow(s->sb_samples, 1152);
+}
+
+static void compute_scale_factors(unsigned char scale_code[SBLIMIT],
+                                  unsigned char scale_factors[SBLIMIT][3],
+                                  int sb_samples[3][12][SBLIMIT],
+                                  int sblimit)
+{
+    int *p, vmax, v, n, i, j, k, code;
+    int index, d1, d2;
+    unsigned char *sf = &scale_factors[0][0];
+
+    for(j=0;j<sblimit;j++) {
+        for(i=0;i<3;i++) {
+            /* find the max absolute value */
+            p = &sb_samples[i][0][j];
+            vmax = abs(*p);
+            for(k=1;k<12;k++) {
+                p += SBLIMIT;
+                v = abs(*p);
+                if (v > vmax)
+                    vmax = v;
+            }
+            /* compute the scale factor index using log 2 computations */
+            if (vmax > 0) {
+                n = av_log2(vmax);
+                /* n is the position of the MSB of vmax. now
+                   use at most 2 compares to find the index */
+                index = (21 - n) * 3 - 3;
+                if (index >= 0) {
+                    while (vmax <= scale_factor_table[index+1])
+                        index++;
+                } else {
+                    index = 0; /* very unlikely case of overflow */
+                }
+            } else {
+                index = 62; /* value 63 is not allowed */
+            }
+
+#if 0
+            printf("%2d:%d in=%x %x %d\n",
+                   j, i, vmax, scale_factor_table[index], index);
+#endif
+            /* store the scale factor */
+            assert(index >=0 && index <= 63);
+            sf[i] = index;
+        }
+
+        /* compute the transmission factor : look if the scale factors
+           are close enough to each other */
+        d1 = scale_diff_table[sf[0] - sf[1] + 64];
+        d2 = scale_diff_table[sf[1] - sf[2] + 64];
+
+        /* handle the 25 cases */
+        switch(d1 * 5 + d2) {
+        case 0*5+0:
+        case 0*5+4:
+        case 3*5+4:
+        case 4*5+0:
+        case 4*5+4:
+            code = 0;
+            break;
+        case 0*5+1:
+        case 0*5+2:
+        case 4*5+1:
+        case 4*5+2:
+            code = 3;
+            sf[2] = sf[1];
+            break;
+        case 0*5+3:
+        case 4*5+3:
+            code = 3;
+            sf[1] = sf[2];
+            break;
+        case 1*5+0:
+        case 1*5+4:
+        case 2*5+4:
+            code = 1;
+            sf[1] = sf[0];
+            break;
+        case 1*5+1:
+        case 1*5+2:
+        case 2*5+0:
+        case 2*5+1:
+        case 2*5+2:
+            code = 2;
+            sf[1] = sf[2] = sf[0];
+            break;
+        case 2*5+3:
+        case 3*5+3:
+            code = 2;
+            sf[0] = sf[1] = sf[2];
+            break;
+        case 3*5+0:
+        case 3*5+1:
+        case 3*5+2:
+            code = 2;
+            sf[0] = sf[2] = sf[1];
+            break;
+        case 1*5+3:
+            code = 2;
+            if (sf[0] > sf[2])
+              sf[0] = sf[2];
+            sf[1] = sf[2] = sf[0];
+            break;
+        default:
+            assert(0); //cant happen
+            code = 0;           /* kill warning */
+        }
+
+#if 0
+        printf("%d: %2d %2d %2d %d %d -> %d\n", j,
+               sf[0], sf[1], sf[2], d1, d2, code);
+#endif
+        scale_code[j] = code;
+        sf += 3;
+    }
+}
+
+/* The most important function : psycho acoustic module. In this
+   encoder there is basically none, so this is the worst you can do,
+   but also this is the simpler. */
+static void psycho_acoustic_model(MpegAudioContext *s, short smr[SBLIMIT])
+{
+    int i;
+
+    for(i=0;i<s->sblimit;i++) {
+        smr[i] = (int)(fixed_smr[i] * 10);
+    }
+}
+
+
+#define SB_NOTALLOCATED  0
+#define SB_ALLOCATED     1
+#define SB_NOMORE        2
+
+/* Try to maximize the smr while using a number of bits inferior to
+   the frame size. I tried to make the code simpler, faster and
+   smaller than other encoders :-) */
+static void compute_bit_allocation(MpegAudioContext *s,
+                                   short smr1[MPA_MAX_CHANNELS][SBLIMIT],
+                                   unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT],
+                                   int *padding)
+{
+    int i, ch, b, max_smr, max_ch, max_sb, current_frame_size, max_frame_size;
+    int incr;
+    short smr[MPA_MAX_CHANNELS][SBLIMIT];
+    unsigned char subband_status[MPA_MAX_CHANNELS][SBLIMIT];
+    const unsigned char *alloc;
+
+    memcpy(smr, smr1, s->nb_channels * sizeof(short) * SBLIMIT);
+    memset(subband_status, SB_NOTALLOCATED, s->nb_channels * SBLIMIT);
+    memset(bit_alloc, 0, s->nb_channels * SBLIMIT);
+
+    /* compute frame size and padding */
+    max_frame_size = s->frame_size;
+    s->frame_frac += s->frame_frac_incr;
+    if (s->frame_frac >= 65536) {
+        s->frame_frac -= 65536;
+        s->do_padding = 1;
+        max_frame_size += 8;
+    } else {
+        s->do_padding = 0;
+    }
+
+    /* compute the header + bit alloc size */
+    current_frame_size = 32;
+    alloc = s->alloc_table;
+    for(i=0;i<s->sblimit;i++) {
+        incr = alloc[0];
+        current_frame_size += incr * s->nb_channels;
+        alloc += 1 << incr;
+    }
+    for(;;) {
+        /* look for the subband with the largest signal to mask ratio */
+        max_sb = -1;
+        max_ch = -1;
+        max_smr = 0x80000000;
+        for(ch=0;ch<s->nb_channels;ch++) {
+            for(i=0;i<s->sblimit;i++) {
+                if (smr[ch][i] > max_smr && subband_status[ch][i] != SB_NOMORE) {
+                    max_smr = smr[ch][i];
+                    max_sb = i;
+                    max_ch = ch;
+                }
+            }
+        }
+#if 0
+        printf("current=%d max=%d max_sb=%d alloc=%d\n",
+               current_frame_size, max_frame_size, max_sb,
+               bit_alloc[max_sb]);
+#endif
+        if (max_sb < 0)
+            break;
+
+        /* find alloc table entry (XXX: not optimal, should use
+           pointer table) */
+        alloc = s->alloc_table;
+        for(i=0;i<max_sb;i++) {
+            alloc += 1 << alloc[0];
+        }
+
+        if (subband_status[max_ch][max_sb] == SB_NOTALLOCATED) {
+            /* nothing was coded for this band: add the necessary bits */
+            incr = 2 + nb_scale_factors[s->scale_code[max_ch][max_sb]] * 6;
+            incr += total_quant_bits[alloc[1]];
+        } else {
+            /* increments bit allocation */
+            b = bit_alloc[max_ch][max_sb];
+            incr = total_quant_bits[alloc[b + 1]] -
+                total_quant_bits[alloc[b]];
+        }
+
+        if (current_frame_size + incr <= max_frame_size) {
+            /* can increase size */
+            b = ++bit_alloc[max_ch][max_sb];
+            current_frame_size += incr;
+            /* decrease smr by the resolution we added */
+            smr[max_ch][max_sb] = smr1[max_ch][max_sb] - quant_snr[alloc[b]];
+            /* max allocation size reached ? */
+            if (b == ((1 << alloc[0]) - 1))
+                subband_status[max_ch][max_sb] = SB_NOMORE;
+            else
+                subband_status[max_ch][max_sb] = SB_ALLOCATED;
+        } else {
+            /* cannot increase the size of this subband */
+            subband_status[max_ch][max_sb] = SB_NOMORE;
+        }
+    }
+    *padding = max_frame_size - current_frame_size;
+    assert(*padding >= 0);
+
+#if 0
+    for(i=0;i<s->sblimit;i++) {
+        printf("%d ", bit_alloc[i]);
+    }
+    printf("\n");
+#endif
+}
+
+/*
+ * Output the mpeg audio layer 2 frame. Note how the code is small
+ * compared to other encoders :-)
+ */
+static void encode_frame(MpegAudioContext *s,
+                         unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT],
+                         int padding)
+{
+    int i, j, k, l, bit_alloc_bits, b, ch;
+    unsigned char *sf;
+    int q[3];
+    PutBitContext *p = &s->pb;
+
+    /* header */
+
+    put_bits(p, 12, 0xfff);
+    put_bits(p, 1, 1 - s->lsf); /* 1 = mpeg1 ID, 0 = mpeg2 lsf ID */
+    put_bits(p, 2, 4-2);  /* layer 2 */
+    put_bits(p, 1, 1); /* no error protection */
+    put_bits(p, 4, s->bitrate_index);
+    put_bits(p, 2, s->freq_index);
+    put_bits(p, 1, s->do_padding); /* use padding */
+    put_bits(p, 1, 0);             /* private_bit */
+    put_bits(p, 2, s->nb_channels == 2 ? MPA_STEREO : MPA_MONO);
+    put_bits(p, 2, 0); /* mode_ext */
+    put_bits(p, 1, 0); /* no copyright */
+    put_bits(p, 1, 1); /* original */
+    put_bits(p, 2, 0); /* no emphasis */
+
+    /* bit allocation */
+    j = 0;
+    for(i=0;i<s->sblimit;i++) {
+        bit_alloc_bits = s->alloc_table[j];
+        for(ch=0;ch<s->nb_channels;ch++) {
+            put_bits(p, bit_alloc_bits, bit_alloc[ch][i]);
+        }
+        j += 1 << bit_alloc_bits;
+    }
+
+    /* scale codes */
+    for(i=0;i<s->sblimit;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (bit_alloc[ch][i])
+                put_bits(p, 2, s->scale_code[ch][i]);
+        }
+    }
+
+    /* scale factors */
+    for(i=0;i<s->sblimit;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (bit_alloc[ch][i]) {
+                sf = &s->scale_factors[ch][i][0];
+                switch(s->scale_code[ch][i]) {
+                case 0:
+                    put_bits(p, 6, sf[0]);
+                    put_bits(p, 6, sf[1]);
+                    put_bits(p, 6, sf[2]);
+                    break;
+                case 3:
+                case 1:
+                    put_bits(p, 6, sf[0]);
+                    put_bits(p, 6, sf[2]);
+                    break;
+                case 2:
+                    put_bits(p, 6, sf[0]);
+                    break;
+                }
+            }
+        }
+    }
+
+    /* quantization & write sub band samples */
+
+    for(k=0;k<3;k++) {
+        for(l=0;l<12;l+=3) {
+            j = 0;
+            for(i=0;i<s->sblimit;i++) {
+                bit_alloc_bits = s->alloc_table[j];
+                for(ch=0;ch<s->nb_channels;ch++) {
+                    b = bit_alloc[ch][i];
+                    if (b) {
+                        int qindex, steps, m, sample, bits;
+                        /* we encode 3 sub band samples of the same sub band at a time */
+                        qindex = s->alloc_table[j+b];
+                        steps = quant_steps[qindex];
+                        for(m=0;m<3;m++) {
+                            sample = s->sb_samples[ch][k][l + m][i];
+                            /* divide by scale factor */
+#ifdef USE_FLOATS
+                            {
+                                float a;
+                                a = (float)sample * scale_factor_inv_table[s->scale_factors[ch][i][k]];
+                                q[m] = (int)((a + 1.0) * steps * 0.5);
+                            }
+#else
+                            {
+                                int q1, e, shift, mult;
+                                e = s->scale_factors[ch][i][k];
+                                shift = scale_factor_shift[e];
+                                mult = scale_factor_mult[e];
+
+                                /* normalize to P bits */
+                                if (shift < 0)
+                                    q1 = sample << (-shift);
+                                else
+                                    q1 = sample >> shift;
+                                q1 = (q1 * mult) >> P;
+                                q[m] = ((q1 + (1 << P)) * steps) >> (P + 1);
+                            }
+#endif
+                            if (q[m] >= steps)
+                                q[m] = steps - 1;
+                            assert(q[m] >= 0 && q[m] < steps);
+                        }
+                        bits = quant_bits[qindex];
+                        if (bits < 0) {
+                            /* group the 3 values to save bits */
+                            put_bits(p, -bits,
+                                     q[0] + steps * (q[1] + steps * q[2]));
+#if 0
+                            printf("%d: gr1 %d\n",
+                                   i, q[0] + steps * (q[1] + steps * q[2]));
+#endif
+                        } else {
+#if 0
+                            printf("%d: gr3 %d %d %d\n",
+                                   i, q[0], q[1], q[2]);
+#endif
+                            put_bits(p, bits, q[0]);
+                            put_bits(p, bits, q[1]);
+                            put_bits(p, bits, q[2]);
+                        }
+                    }
+                }
+                /* next subband in alloc table */
+                j += 1 << bit_alloc_bits;
+            }
+        }
+    }
+
+    /* padding */
+    for(i=0;i<padding;i++)
+        put_bits(p, 1, 0);
+
+    /* flush */
+    flush_put_bits(p);
+}
+
+static int MPA_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame, int buf_size, void *data)
+{
+    MpegAudioContext *s = avctx->priv_data;
+    short *samples = data;
+    short smr[MPA_MAX_CHANNELS][SBLIMIT];
+    unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT];
+    int padding, i;
+
+    for(i=0;i<s->nb_channels;i++) {
+        filter(s, i, samples + i, s->nb_channels);
+    }
+
+    for(i=0;i<s->nb_channels;i++) {
+        compute_scale_factors(s->scale_code[i], s->scale_factors[i],
+                              s->sb_samples[i], s->sblimit);
+    }
+    for(i=0;i<s->nb_channels;i++) {
+        psycho_acoustic_model(s, smr[i]);
+    }
+    compute_bit_allocation(s, smr, bit_alloc, &padding);
+
+    init_put_bits(&s->pb, frame, MPA_MAX_CODED_FRAME_SIZE);
+
+    encode_frame(s, bit_alloc, padding);
+
+    s->nb_samples += MPA_FRAME_SIZE;
+    return pbBufPtr(&s->pb) - s->pb.buf;
+}
+
+static int MPA_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+    return 0;
+}
+
+#ifdef CONFIG_MP2_ENCODER
+AVCodec mp2_encoder = {
+    "mp2",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP2,
+    sizeof(MpegAudioContext),
+    MPA_encode_init,
+    MPA_encode_frame,
+    MPA_encode_close,
+    NULL,
+};
+#endif // CONFIG_MP2_ENCODER
+
+#undef FIX
diff --git a/contrib/ffmpeg/libavcodec/mpegaudio.h b/contrib/ffmpeg/libavcodec/mpegaudio.h
new file mode 100644
index 000000000..3eadf92a8
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegaudio.h
@@ -0,0 +1,106 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpegaudio.h
+ * mpeg audio declarations for both encoder and decoder.
+ */
+
+/* max frame size, in samples */
+#define MPA_FRAME_SIZE 1152
+
+/* max compressed frame size */
+#define MPA_MAX_CODED_FRAME_SIZE 1792
+
+#define MPA_MAX_CHANNELS 2
+
+#define SBLIMIT 32 /* number of subbands */
+
+#define MPA_STEREO  0
+#define MPA_JSTEREO 1
+#define MPA_DUAL    2
+#define MPA_MONO    3
+
+/* header + layer + bitrate + freq + lsf/mpeg25 */
+#define SAME_HEADER_MASK \
+   (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19))
+
+/* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
+   audio decoder */
+
+#ifdef USE_HIGHPRECISION
+#define FRAC_BITS   23   /* fractional bits for sb_samples and dct */
+#define WFRAC_BITS  16   /* fractional bits for window */
+#else
+#define FRAC_BITS   15   /* fractional bits for sb_samples and dct */
+#define WFRAC_BITS  14   /* fractional bits for window */
+#endif
+
+#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
+typedef int32_t OUT_INT;
+#define OUT_MAX INT32_MAX
+#define OUT_MIN INT32_MIN
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 31)
+#else
+typedef int16_t OUT_INT;
+#define OUT_MAX INT16_MAX
+#define OUT_MIN INT16_MIN
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
+#endif
+
+#if FRAC_BITS <= 15
+typedef int16_t MPA_INT;
+#else
+typedef int32_t MPA_INT;
+#endif
+
+int l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
+int mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate);
+void ff_mpa_synth_init(MPA_INT *window);
+void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+                         MPA_INT *window, int *dither_state,
+                         OUT_INT *samples, int incr,
+                         int32_t sb_samples[SBLIMIT]);
+
+extern const uint16_t mpa_bitrate_tab[2][3][15];
+extern const uint16_t mpa_freq_tab[3];
+extern const unsigned char *alloc_tables[5];
+extern const double enwindow[512];
+extern const int sblimit_table[5];
+extern const int quant_steps[17];
+extern const int quant_bits[17];
+extern const int32_t mpa_enwindow[257];
+
+/* fast header check for resync */
+static inline int ff_mpa_check_header(uint32_t header){
+    /* header */
+    if ((header & 0xffe00000) != 0xffe00000)
+        return -1;
+    /* layer check */
+    if ((header & (3<<17)) == 0)
+        return -1;
+    /* bit rate */
+    if ((header & (0xf<<12)) == 0xf<<12)
+        return -1;
+    /* frequency */
+    if ((header & (3<<10)) == 3<<10)
+        return -1;
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/mpegaudiodec.c b/contrib/ffmpeg/libavcodec/mpegaudiodec.c
new file mode 100644
index 000000000..54bcee3b0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegaudiodec.c
@@ -0,0 +1,2879 @@
+/*
+ * MPEG Audio decoder
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpegaudiodec.c
+ * MPEG Audio decoder.
+ */
+
+//#define DEBUG
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+/*
+ * TODO:
+ *  - in low precision mode, use more 16 bit multiplies in synth filter
+ *  - test lsf / mpeg25 extensively.
+ */
+
+/* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
+   audio decoder */
+#ifdef CONFIG_MPEGAUDIO_HP
+#   define USE_HIGHPRECISION
+#endif
+
+#include "mpegaudio.h"
+
+#include "mathops.h"
+
+#define FRAC_ONE    (1 << FRAC_BITS)
+
+#define FIX(a)   ((int)((a) * FRAC_ONE))
+/* WARNING: only correct for posititive numbers */
+#define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
+#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)
+
+#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
+
+/****************/
+
+#define HEADER_SIZE 4
+#define BACKSTEP_SIZE 512
+#define EXTRABYTES 24
+
+struct GranuleDef;
+
+typedef struct MPADecodeContext {
+    DECLARE_ALIGNED_8(uint8_t, last_buf[2*BACKSTEP_SIZE + EXTRABYTES]);
+    int last_buf_size;
+    int frame_size;
+    /* next header (used in free format parsing) */
+    uint32_t free_format_next_header;
+    int error_protection;
+    int layer;
+    int sample_rate;
+    int sample_rate_index; /* between 0 and 8 */
+    int bit_rate;
+    GetBitContext gb;
+    GetBitContext in_gb;
+    int nb_channels;
+    int mode;
+    int mode_ext;
+    int lsf;
+    MPA_INT synth_buf[MPA_MAX_CHANNELS][512 * 2] __attribute__((aligned(16)));
+    int synth_buf_offset[MPA_MAX_CHANNELS];
+    int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16)));
+    int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
+#ifdef DEBUG
+    int frame_count;
+#endif
+    void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g);
+    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
+    int dither_state;
+    int error_resilience;
+} MPADecodeContext;
+
+/**
+ * Context for MP3On4 decoder
+ */
+typedef struct MP3On4DecodeContext {
+    int frames;   ///< number of mp3 frames per block (number of mp3 decoder instances)
+    int chan_cfg; ///< channel config number
+    MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
+} MP3On4DecodeContext;
+
+/* layer 3 "granule" */
+typedef struct GranuleDef {
+    uint8_t scfsi;
+    int part2_3_length;
+    int big_values;
+    int global_gain;
+    int scalefac_compress;
+    uint8_t block_type;
+    uint8_t switch_point;
+    int table_select[3];
+    int subblock_gain[3];
+    uint8_t scalefac_scale;
+    uint8_t count1table_select;
+    int region_size[3]; /* number of huffman codes in each region */
+    int preflag;
+    int short_start, long_end; /* long/short band indexes */
+    uint8_t scale_factors[40];
+    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
+} GranuleDef;
+
+#define MODE_EXT_MS_STEREO 2
+#define MODE_EXT_I_STEREO  1
+
+/* layer 3 huffman tables */
+typedef struct HuffTable {
+    int xsize;
+    const uint8_t *bits;
+    const uint16_t *codes;
+} HuffTable;
+
+#include "mpegaudiodectab.h"
+
+static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
+static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);
+
+/* vlc structure for decoding layer 3 huffman tables */
+static VLC huff_vlc[16];
+static VLC huff_quad_vlc[2];
+/* computed from band_size_long */
+static uint16_t band_index_long[9][23];
+/* XXX: free when all decoders are closed */
+#define TABLE_4_3_SIZE (8191 + 16)*4
+static int8_t  *table_4_3_exp;
+static uint32_t *table_4_3_value;
+static uint32_t exp_table[512];
+static uint32_t expval_table[512][16];
+/* intensity stereo coef table */
+static int32_t is_table[2][16];
+static int32_t is_table_lsf[2][2][16];
+static int32_t csa_table[8][4];
+static float csa_table_float[8][4];
+static int32_t mdct_win[8][36];
+
+/* lower 2 bits: modulo 3, higher bits: shift */
+static uint16_t scale_factor_modshift[64];
+/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
+static int32_t scale_factor_mult[15][3];
+/* mult table for layer 2 group quantization */
+
+#define SCALE_GEN(v) \
+{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }
+
+static const int32_t scale_factor_mult2[3][3] = {
+    SCALE_GEN(4.0 / 3.0), /* 3 steps */
+    SCALE_GEN(4.0 / 5.0), /* 5 steps */
+    SCALE_GEN(4.0 / 9.0), /* 9 steps */
+};
+
+static MPA_INT window[512] __attribute__((aligned(16)));
+
+/* layer 1 unscaling */
+/* n = number of bits of the mantissa minus 1 */
+static inline int l1_unscale(int n, int mant, int scale_factor)
+{
+    int shift, mod;
+    int64_t val;
+
+    shift = scale_factor_modshift[scale_factor];
+    mod = shift & 3;
+    shift >>= 2;
+    val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
+    shift += n;
+    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
+    return (int)((val + (1LL << (shift - 1))) >> shift);
+}
+
+static inline int l2_unscale_group(int steps, int mant, int scale_factor)
+{
+    int shift, mod, val;
+
+    shift = scale_factor_modshift[scale_factor];
+    mod = shift & 3;
+    shift >>= 2;
+
+    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
+    /* NOTE: at this point, 0 <= shift <= 21 */
+    if (shift > 0)
+        val = (val + (1 << (shift - 1))) >> shift;
+    return val;
+}
+
+/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
+static inline int l3_unscale(int value, int exponent)
+{
+    unsigned int m;
+    int e;
+
+    e = table_4_3_exp  [4*value + (exponent&3)];
+    m = table_4_3_value[4*value + (exponent&3)];
+    e -= (exponent >> 2);
+    assert(e>=1);
+    if (e > 31)
+        return 0;
+    m = (m + (1 << (e-1))) >> e;
+
+    return m;
+}
+
+/* all integer n^(4/3) computation code */
+#define DEV_ORDER 13
+
+#define POW_FRAC_BITS 24
+#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
+#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
+#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
+
+static int dev_4_3_coefs[DEV_ORDER];
+
+#if 0 /* unused */
+static int pow_mult3[3] = {
+    POW_FIX(1.0),
+    POW_FIX(1.25992104989487316476),
+    POW_FIX(1.58740105196819947474),
+};
+#endif
+
+static void int_pow_init(void)
+{
+    int i, a;
+
+    a = POW_FIX(1.0);
+    for(i=0;i<DEV_ORDER;i++) {
+        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
+        dev_4_3_coefs[i] = a;
+    }
+}
+
+#if 0 /* unused, remove? */
+/* return the mantissa and the binary exponent */
+static int int_pow(int i, int *exp_ptr)
+{
+    int e, er, eq, j;
+    int a, a1;
+
+    /* renormalize */
+    a = i;
+    e = POW_FRAC_BITS;
+    while (a < (1 << (POW_FRAC_BITS - 1))) {
+        a = a << 1;
+        e--;
+    }
+    a -= (1 << POW_FRAC_BITS);
+    a1 = 0;
+    for(j = DEV_ORDER - 1; j >= 0; j--)
+        a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
+    a = (1 << POW_FRAC_BITS) + a1;
+    /* exponent compute (exact) */
+    e = e * 4;
+    er = e % 3;
+    eq = e / 3;
+    a = POW_MULL(a, pow_mult3[er]);
+    while (a >= 2 * POW_FRAC_ONE) {
+        a = a >> 1;
+        eq++;
+    }
+    /* convert to float */
+    while (a < POW_FRAC_ONE) {
+        a = a << 1;
+        eq--;
+    }
+    /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
+#if POW_FRAC_BITS > FRAC_BITS
+    a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
+    /* correct overflow */
+    if (a >= 2 * (1 << FRAC_BITS)) {
+        a = a >> 1;
+        eq++;
+    }
+#endif
+    *exp_ptr = eq;
+    return a;
+}
+#endif
+
+static int decode_init(AVCodecContext * avctx)
+{
+    MPADecodeContext *s = avctx->priv_data;
+    static int init=0;
+    int i, j, k;
+
+#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
+    avctx->sample_fmt= SAMPLE_FMT_S32;
+#else
+    avctx->sample_fmt= SAMPLE_FMT_S16;
+#endif
+    s->error_resilience= avctx->error_resilience;
+
+    if(avctx->antialias_algo != FF_AA_FLOAT)
+        s->compute_antialias= compute_antialias_integer;
+    else
+        s->compute_antialias= compute_antialias_float;
+
+    if (!init && !avctx->parse_only) {
+        /* scale factors table for layer 1/2 */
+        for(i=0;i<64;i++) {
+            int shift, mod;
+            /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
+            shift = (i / 3);
+            mod = i % 3;
+            scale_factor_modshift[i] = mod | (shift << 2);
+        }
+
+        /* scale factor multiply for layer 1 */
+        for(i=0;i<15;i++) {
+            int n, norm;
+            n = i + 2;
+            norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
+            scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
+            scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
+            scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
+            dprintf("%d: norm=%x s=%x %x %x\n",
+                    i, norm,
+                    scale_factor_mult[i][0],
+                    scale_factor_mult[i][1],
+                    scale_factor_mult[i][2]);
+        }
+
+        ff_mpa_synth_init(window);
+
+        /* huffman decode tables */
+        for(i=1;i<16;i++) {
+            const HuffTable *h = &mpa_huff_tables[i];
+            int xsize, x, y;
+            unsigned int n;
+            uint8_t  tmp_bits [512];
+            uint16_t tmp_codes[512];
+
+            memset(tmp_bits , 0, sizeof(tmp_bits ));
+            memset(tmp_codes, 0, sizeof(tmp_codes));
+
+            xsize = h->xsize;
+            n = xsize * xsize;
+
+            j = 0;
+            for(x=0;x<xsize;x++) {
+                for(y=0;y<xsize;y++){
+                    tmp_bits [(x << 5) | y | ((x&&y)<<4)]= h->bits [j  ];
+                    tmp_codes[(x << 5) | y | ((x&&y)<<4)]= h->codes[j++];
+                }
+            }
+
+            /* XXX: fail test */
+            init_vlc(&huff_vlc[i], 7, 512,
+                     tmp_bits, 1, 1, tmp_codes, 2, 2, 1);
+        }
+        for(i=0;i<2;i++) {
+            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
+                     mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1);
+        }
+
+        for(i=0;i<9;i++) {
+            k = 0;
+            for(j=0;j<22;j++) {
+                band_index_long[i][j] = k;
+                k += band_size_long[i][j];
+            }
+            band_index_long[i][22] = k;
+        }
+
+        /* compute n ^ (4/3) and store it in mantissa/exp format */
+        table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
+        if(!table_4_3_exp)
+            return -1;
+        table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
+        if(!table_4_3_value)
+            return -1;
+
+        int_pow_init();
+        for(i=1;i<TABLE_4_3_SIZE;i++) {
+            double f, fm;
+            int e, m;
+            f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
+            fm = frexp(f, &e);
+            m = (uint32_t)(fm*(1LL<<31) + 0.5);
+            e+= FRAC_BITS - 31 + 5 - 100;
+
+            /* normalized to FRAC_BITS */
+            table_4_3_value[i] = m;
+//            av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0));
+            table_4_3_exp[i] = -e;
+        }
+        for(i=0; i<512*16; i++){
+            int exponent= (i>>4);
+            double f= pow(i&15, 4.0 / 3.0) * pow(2, (exponent-400)*0.25 + FRAC_BITS + 5);
+            expval_table[exponent][i&15]= llrint(f);
+            if((i&15)==1)
+                exp_table[exponent]= llrint(f);
+        }
+
+        for(i=0;i<7;i++) {
+            float f;
+            int v;
+            if (i != 6) {
+                f = tan((double)i * M_PI / 12.0);
+                v = FIXR(f / (1.0 + f));
+            } else {
+                v = FIXR(1.0);
+            }
+            is_table[0][i] = v;
+            is_table[1][6 - i] = v;
+        }
+        /* invalid values */
+        for(i=7;i<16;i++)
+            is_table[0][i] = is_table[1][i] = 0.0;
+
+        for(i=0;i<16;i++) {
+            double f;
+            int e, k;
+
+            for(j=0;j<2;j++) {
+                e = -(j + 1) * ((i + 1) >> 1);
+                f = pow(2.0, e / 4.0);
+                k = i & 1;
+                is_table_lsf[j][k ^ 1][i] = FIXR(f);
+                is_table_lsf[j][k][i] = FIXR(1.0);
+                dprintf("is_table_lsf %d %d: %x %x\n",
+                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
+            }
+        }
+
+        for(i=0;i<8;i++) {
+            float ci, cs, ca;
+            ci = ci_table[i];
+            cs = 1.0 / sqrt(1.0 + ci * ci);
+            ca = cs * ci;
+            csa_table[i][0] = FIXHR(cs/4);
+            csa_table[i][1] = FIXHR(ca/4);
+            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
+            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
+            csa_table_float[i][0] = cs;
+            csa_table_float[i][1] = ca;
+            csa_table_float[i][2] = ca + cs;
+            csa_table_float[i][3] = ca - cs;
+//            printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
+//            av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs);
+        }
+
+        /* compute mdct windows */
+        for(i=0;i<36;i++) {
+            for(j=0; j<4; j++){
+                double d;
+
+                if(j==2 && i%3 != 1)
+                    continue;
+
+                d= sin(M_PI * (i + 0.5) / 36.0);
+                if(j==1){
+                    if     (i>=30) d= 0;
+                    else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
+                    else if(i>=18) d= 1;
+                }else if(j==3){
+                    if     (i<  6) d= 0;
+                    else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
+                    else if(i< 18) d= 1;
+                }
+                //merge last stage of imdct into the window coefficients
+                d*= 0.5 / cos(M_PI*(2*i + 19)/72);
+
+                if(j==2)
+                    mdct_win[j][i/3] = FIXHR((d / (1<<5)));
+                else
+                    mdct_win[j][i  ] = FIXHR((d / (1<<5)));
+//                av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5));
+            }
+        }
+
+        /* NOTE: we do frequency inversion adter the MDCT by changing
+           the sign of the right window coefs */
+        for(j=0;j<4;j++) {
+            for(i=0;i<36;i+=2) {
+                mdct_win[j + 4][i] = mdct_win[j][i];
+                mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
+            }
+        }
+
+#if defined(DEBUG)
+        for(j=0;j<8;j++) {
+            av_log(avctx, AV_LOG_DEBUG, "win%d=\n", j);
+            for(i=0;i<36;i++)
+                av_log(avctx, AV_LOG_DEBUG, "%f, ", (double)mdct_win[j][i] / FRAC_ONE);
+            av_log(avctx, AV_LOG_DEBUG, "\n");
+        }
+#endif
+        init = 1;
+    }
+
+#ifdef DEBUG
+    s->frame_count = 0;
+#endif
+    if (avctx->codec_id == CODEC_ID_MP3ADU)
+        s->adu_mode = 1;
+    return 0;
+}
+
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+
+/* cos(i*pi/64) */
+
+#define COS0_0  FIXHR(0.50060299823519630134/2)
+#define COS0_1  FIXHR(0.50547095989754365998/2)
+#define COS0_2  FIXHR(0.51544730992262454697/2)
+#define COS0_3  FIXHR(0.53104259108978417447/2)
+#define COS0_4  FIXHR(0.55310389603444452782/2)
+#define COS0_5  FIXHR(0.58293496820613387367/2)
+#define COS0_6  FIXHR(0.62250412303566481615/2)
+#define COS0_7  FIXHR(0.67480834145500574602/2)
+#define COS0_8  FIXHR(0.74453627100229844977/2)
+#define COS0_9  FIXHR(0.83934964541552703873/2)
+#define COS0_10 FIXHR(0.97256823786196069369/2)
+#define COS0_11 FIXHR(1.16943993343288495515/4)
+#define COS0_12 FIXHR(1.48416461631416627724/4)
+#define COS0_13 FIXHR(2.05778100995341155085/8)
+#define COS0_14 FIXHR(3.40760841846871878570/8)
+#define COS0_15 FIXHR(10.19000812354805681150/32)
+
+#define COS1_0 FIXHR(0.50241928618815570551/2)
+#define COS1_1 FIXHR(0.52249861493968888062/2)
+#define COS1_2 FIXHR(0.56694403481635770368/2)
+#define COS1_3 FIXHR(0.64682178335999012954/2)
+#define COS1_4 FIXHR(0.78815462345125022473/2)
+#define COS1_5 FIXHR(1.06067768599034747134/4)
+#define COS1_6 FIXHR(1.72244709823833392782/4)
+#define COS1_7 FIXHR(5.10114861868916385802/16)
+
+#define COS2_0 FIXHR(0.50979557910415916894/2)
+#define COS2_1 FIXHR(0.60134488693504528054/2)
+#define COS2_2 FIXHR(0.89997622313641570463/2)
+#define COS2_3 FIXHR(2.56291544774150617881/8)
+
+#define COS3_0 FIXHR(0.54119610014619698439/2)
+#define COS3_1 FIXHR(1.30656296487637652785/4)
+
+#define COS4_0 FIXHR(0.70710678118654752439/2)
+
+/* butterfly operator */
+#define BF(a, b, c, s)\
+{\
+    tmp0 = tab[a] + tab[b];\
+    tmp1 = tab[a] - tab[b];\
+    tab[a] = tmp0;\
+    tab[b] = MULH(tmp1<<(s), c);\
+}
+
+#define BF1(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    tab[c] += tab[d];\
+}
+
+#define BF2(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    tab[c] += tab[d];\
+    tab[a] += tab[c];\
+    tab[c] += tab[b];\
+    tab[b] += tab[d];\
+}
+
+#define ADD(a, b) tab[a] += tab[b]
+
+/* DCT32 without 1/sqrt(2) coef zero scaling. */
+static void dct32(int32_t *out, int32_t *tab)
+{
+    int tmp0, tmp1;
+
+    /* pass 1 */
+    BF( 0, 31, COS0_0 , 1);
+    BF(15, 16, COS0_15, 5);
+    /* pass 2 */
+    BF( 0, 15, COS1_0 , 1);
+    BF(16, 31,-COS1_0 , 1);
+    /* pass 1 */
+    BF( 7, 24, COS0_7 , 1);
+    BF( 8, 23, COS0_8 , 1);
+    /* pass 2 */
+    BF( 7,  8, COS1_7 , 4);
+    BF(23, 24,-COS1_7 , 4);
+    /* pass 3 */
+    BF( 0,  7, COS2_0 , 1);
+    BF( 8, 15,-COS2_0 , 1);
+    BF(16, 23, COS2_0 , 1);
+    BF(24, 31,-COS2_0 , 1);
+    /* pass 1 */
+    BF( 3, 28, COS0_3 , 1);
+    BF(12, 19, COS0_12, 2);
+    /* pass 2 */
+    BF( 3, 12, COS1_3 , 1);
+    BF(19, 28,-COS1_3 , 1);
+    /* pass 1 */
+    BF( 4, 27, COS0_4 , 1);
+    BF(11, 20, COS0_11, 2);
+    /* pass 2 */
+    BF( 4, 11, COS1_4 , 1);
+    BF(20, 27,-COS1_4 , 1);
+    /* pass 3 */
+    BF( 3,  4, COS2_3 , 3);
+    BF(11, 12,-COS2_3 , 3);
+    BF(19, 20, COS2_3 , 3);
+    BF(27, 28,-COS2_3 , 3);
+    /* pass 4 */
+    BF( 0,  3, COS3_0 , 1);
+    BF( 4,  7,-COS3_0 , 1);
+    BF( 8, 11, COS3_0 , 1);
+    BF(12, 15,-COS3_0 , 1);
+    BF(16, 19, COS3_0 , 1);
+    BF(20, 23,-COS3_0 , 1);
+    BF(24, 27, COS3_0 , 1);
+    BF(28, 31,-COS3_0 , 1);
+
+
+
+    /* pass 1 */
+    BF( 1, 30, COS0_1 , 1);
+    BF(14, 17, COS0_14, 3);
+    /* pass 2 */
+    BF( 1, 14, COS1_1 , 1);
+    BF(17, 30,-COS1_1 , 1);
+    /* pass 1 */
+    BF( 6, 25, COS0_6 , 1);
+    BF( 9, 22, COS0_9 , 1);
+    /* pass 2 */
+    BF( 6,  9, COS1_6 , 2);
+    BF(22, 25,-COS1_6 , 2);
+    /* pass 3 */
+    BF( 1,  6, COS2_1 , 1);
+    BF( 9, 14,-COS2_1 , 1);
+    BF(17, 22, COS2_1 , 1);
+    BF(25, 30,-COS2_1 , 1);
+
+    /* pass 1 */
+    BF( 2, 29, COS0_2 , 1);
+    BF(13, 18, COS0_13, 3);
+    /* pass 2 */
+    BF( 2, 13, COS1_2 , 1);
+    BF(18, 29,-COS1_2 , 1);
+    /* pass 1 */
+    BF( 5, 26, COS0_5 , 1);
+    BF(10, 21, COS0_10, 1);
+    /* pass 2 */
+    BF( 5, 10, COS1_5 , 2);
+    BF(21, 26,-COS1_5 , 2);
+    /* pass 3 */
+    BF( 2,  5, COS2_2 , 1);
+    BF(10, 13,-COS2_2 , 1);
+    BF(18, 21, COS2_2 , 1);
+    BF(26, 29,-COS2_2 , 1);
+    /* pass 4 */
+    BF( 1,  2, COS3_1 , 2);
+    BF( 5,  6,-COS3_1 , 2);
+    BF( 9, 10, COS3_1 , 2);
+    BF(13, 14,-COS3_1 , 2);
+    BF(17, 18, COS3_1 , 2);
+    BF(21, 22,-COS3_1 , 2);
+    BF(25, 26, COS3_1 , 2);
+    BF(29, 30,-COS3_1 , 2);
+
+    /* pass 5 */
+    BF1( 0,  1,  2,  3);
+    BF2( 4,  5,  6,  7);
+    BF1( 8,  9, 10, 11);
+    BF2(12, 13, 14, 15);
+    BF1(16, 17, 18, 19);
+    BF2(20, 21, 22, 23);
+    BF1(24, 25, 26, 27);
+    BF2(28, 29, 30, 31);
+
+    /* pass 6 */
+
+    ADD( 8, 12);
+    ADD(12, 10);
+    ADD(10, 14);
+    ADD(14,  9);
+    ADD( 9, 13);
+    ADD(13, 11);
+    ADD(11, 15);
+
+    out[ 0] = tab[0];
+    out[16] = tab[1];
+    out[ 8] = tab[2];
+    out[24] = tab[3];
+    out[ 4] = tab[4];
+    out[20] = tab[5];
+    out[12] = tab[6];
+    out[28] = tab[7];
+    out[ 2] = tab[8];
+    out[18] = tab[9];
+    out[10] = tab[10];
+    out[26] = tab[11];
+    out[ 6] = tab[12];
+    out[22] = tab[13];
+    out[14] = tab[14];
+    out[30] = tab[15];
+
+    ADD(24, 28);
+    ADD(28, 26);
+    ADD(26, 30);
+    ADD(30, 25);
+    ADD(25, 29);
+    ADD(29, 27);
+    ADD(27, 31);
+
+    out[ 1] = tab[16] + tab[24];
+    out[17] = tab[17] + tab[25];
+    out[ 9] = tab[18] + tab[26];
+    out[25] = tab[19] + tab[27];
+    out[ 5] = tab[20] + tab[28];
+    out[21] = tab[21] + tab[29];
+    out[13] = tab[22] + tab[30];
+    out[29] = tab[23] + tab[31];
+    out[ 3] = tab[24] + tab[20];
+    out[19] = tab[25] + tab[21];
+    out[11] = tab[26] + tab[22];
+    out[27] = tab[27] + tab[23];
+    out[ 7] = tab[28] + tab[18];
+    out[23] = tab[29] + tab[19];
+    out[15] = tab[30] + tab[17];
+    out[31] = tab[31];
+}
+
+#if FRAC_BITS <= 15
+
+static inline int round_sample(int *sum)
+{
+    int sum1;
+    sum1 = (*sum) >> OUT_SHIFT;
+    *sum &= (1<<OUT_SHIFT)-1;
+    if (sum1 < OUT_MIN)
+        sum1 = OUT_MIN;
+    else if (sum1 > OUT_MAX)
+        sum1 = OUT_MAX;
+    return sum1;
+}
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#define MACS(rt, ra, rb) MAC16(rt, ra, rb)
+
+/* signed 16x16 -> 32 multiply */
+#define MULS(ra, rb) MUL16(ra, rb)
+
+#else
+
+static inline int round_sample(int64_t *sum)
+{
+    int sum1;
+    sum1 = (int)((*sum) >> OUT_SHIFT);
+    *sum &= (1<<OUT_SHIFT)-1;
+    if (sum1 < OUT_MIN)
+        sum1 = OUT_MIN;
+    else if (sum1 > OUT_MAX)
+        sum1 = OUT_MAX;
+    return sum1;
+}
+
+#   define MULS(ra, rb) MUL64(ra, rb)
+#endif
+
+#define SUM8(sum, op, w, p) \
+{                                               \
+    sum op MULS((w)[0 * 64], p[0 * 64]);\
+    sum op MULS((w)[1 * 64], p[1 * 64]);\
+    sum op MULS((w)[2 * 64], p[2 * 64]);\
+    sum op MULS((w)[3 * 64], p[3 * 64]);\
+    sum op MULS((w)[4 * 64], p[4 * 64]);\
+    sum op MULS((w)[5 * 64], p[5 * 64]);\
+    sum op MULS((w)[6 * 64], p[6 * 64]);\
+    sum op MULS((w)[7 * 64], p[7 * 64]);\
+}
+
+#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
+{                                               \
+    int tmp;\
+    tmp = p[0 * 64];\
+    sum1 op1 MULS((w1)[0 * 64], tmp);\
+    sum2 op2 MULS((w2)[0 * 64], tmp);\
+    tmp = p[1 * 64];\
+    sum1 op1 MULS((w1)[1 * 64], tmp);\
+    sum2 op2 MULS((w2)[1 * 64], tmp);\
+    tmp = p[2 * 64];\
+    sum1 op1 MULS((w1)[2 * 64], tmp);\
+    sum2 op2 MULS((w2)[2 * 64], tmp);\
+    tmp = p[3 * 64];\
+    sum1 op1 MULS((w1)[3 * 64], tmp);\
+    sum2 op2 MULS((w2)[3 * 64], tmp);\
+    tmp = p[4 * 64];\
+    sum1 op1 MULS((w1)[4 * 64], tmp);\
+    sum2 op2 MULS((w2)[4 * 64], tmp);\
+    tmp = p[5 * 64];\
+    sum1 op1 MULS((w1)[5 * 64], tmp);\
+    sum2 op2 MULS((w2)[5 * 64], tmp);\
+    tmp = p[6 * 64];\
+    sum1 op1 MULS((w1)[6 * 64], tmp);\
+    sum2 op2 MULS((w2)[6 * 64], tmp);\
+    tmp = p[7 * 64];\
+    sum1 op1 MULS((w1)[7 * 64], tmp);\
+    sum2 op2 MULS((w2)[7 * 64], tmp);\
+}
+
+void ff_mpa_synth_init(MPA_INT *window)
+{
+    int i;
+
+    /* max = 18760, max sum over all 16 coefs : 44736 */
+    for(i=0;i<257;i++) {
+        int v;
+        v = mpa_enwindow[i];
+#if WFRAC_BITS < 16
+        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
+#endif
+        window[i] = v;
+        if ((i & 63) != 0)
+            v = -v;
+        if (i != 0)
+            window[512 - i] = v;
+    }
+}
+
+/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
+   32 samples. */
+/* XXX: optimize by avoiding ring buffer usage */
+void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+                         MPA_INT *window, int *dither_state,
+                         OUT_INT *samples, int incr,
+                         int32_t sb_samples[SBLIMIT])
+{
+    int32_t tmp[32];
+    register MPA_INT *synth_buf;
+    register const MPA_INT *w, *w2, *p;
+    int j, offset, v;
+    OUT_INT *samples2;
+#if FRAC_BITS <= 15
+    int sum, sum2;
+#else
+    int64_t sum, sum2;
+#endif
+
+    dct32(tmp, sb_samples);
+
+    offset = *synth_buf_offset;
+    synth_buf = synth_buf_ptr + offset;
+
+    for(j=0;j<32;j++) {
+        v = tmp[j];
+#if FRAC_BITS <= 15
+        /* NOTE: can cause a loss in precision if very high amplitude
+           sound */
+        if (v > 32767)
+            v = 32767;
+        else if (v < -32768)
+            v = -32768;
+#endif
+        synth_buf[j] = v;
+    }
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));
+
+    samples2 = samples + 31 * incr;
+    w = window;
+    w2 = window + 31;
+
+    sum = *dither_state;
+    p = synth_buf + 16;
+    SUM8(sum, +=, w, p);
+    p = synth_buf + 48;
+    SUM8(sum, -=, w + 32, p);
+    *samples = round_sample(&sum);
+    samples += incr;
+    w++;
+
+    /* we calculate two samples at the same time to avoid one memory
+       access per two sample */
+    for(j=1;j<16;j++) {
+        sum2 = 0;
+        p = synth_buf + 16 + j;
+        SUM8P2(sum, +=, sum2, -=, w, w2, p);
+        p = synth_buf + 48 - j;
+        SUM8P2(sum, -=, sum2, -=, w + 32, w2 + 32, p);
+
+        *samples = round_sample(&sum);
+        samples += incr;
+        sum += sum2;
+        *samples2 = round_sample(&sum);
+        samples2 -= incr;
+        w++;
+        w2--;
+    }
+
+    p = synth_buf + 32;
+    SUM8(sum, -=, w + 32, p);
+    *samples = round_sample(&sum);
+    *dither_state= sum;
+
+    offset = (offset - 32) & 511;
+    *synth_buf_offset = offset;
+}
+
+#define C3 FIXHR(0.86602540378443864676/2)
+
+/* 0.5 / cos(pi*(2*i+1)/36) */
+static const int icos36[9] = {
+    FIXR(0.50190991877167369479),
+    FIXR(0.51763809020504152469), //0
+    FIXR(0.55168895948124587824),
+    FIXR(0.61038729438072803416),
+    FIXR(0.70710678118654752439), //1
+    FIXR(0.87172339781054900991),
+    FIXR(1.18310079157624925896),
+    FIXR(1.93185165257813657349), //2
+    FIXR(5.73685662283492756461),
+};
+
+/* 0.5 / cos(pi*(2*i+1)/36) */
+static const int icos36h[9] = {
+    FIXHR(0.50190991877167369479/2),
+    FIXHR(0.51763809020504152469/2), //0
+    FIXHR(0.55168895948124587824/2),
+    FIXHR(0.61038729438072803416/2),
+    FIXHR(0.70710678118654752439/2), //1
+    FIXHR(0.87172339781054900991/2),
+    FIXHR(1.18310079157624925896/4),
+    FIXHR(1.93185165257813657349/4), //2
+//    FIXHR(5.73685662283492756461),
+};
+
+/* 12 points IMDCT. We compute it "by hand" by factorizing obvious
+   cases. */
+static void imdct12(int *out, int *in)
+{
+    int in0, in1, in2, in3, in4, in5, t1, t2;
+
+    in0= in[0*3];
+    in1= in[1*3] + in[0*3];
+    in2= in[2*3] + in[1*3];
+    in3= in[3*3] + in[2*3];
+    in4= in[4*3] + in[3*3];
+    in5= in[5*3] + in[4*3];
+    in5 += in3;
+    in3 += in1;
+
+    in2= MULH(2*in2, C3);
+    in3= MULH(4*in3, C3);
+
+    t1 = in0 - in4;
+    t2 = MULH(2*(in1 - in5), icos36h[4]);
+
+    out[ 7]=
+    out[10]= t1 + t2;
+    out[ 1]=
+    out[ 4]= t1 - t2;
+
+    in0 += in4>>1;
+    in4 = in0 + in2;
+    in5 += 2*in1;
+    in1 = MULH(in5 + in3, icos36h[1]);
+    out[ 8]=
+    out[ 9]= in4 + in1;
+    out[ 2]=
+    out[ 3]= in4 - in1;
+
+    in0 -= in2;
+    in5 = MULH(2*(in5 - in3), icos36h[7]);
+    out[ 0]=
+    out[ 5]= in0 - in5;
+    out[ 6]=
+    out[11]= in0 + in5;
+}
+
+/* cos(pi*i/18) */
+#define C1 FIXHR(0.98480775301220805936/2)
+#define C2 FIXHR(0.93969262078590838405/2)
+#define C3 FIXHR(0.86602540378443864676/2)
+#define C4 FIXHR(0.76604444311897803520/2)
+#define C5 FIXHR(0.64278760968653932632/2)
+#define C6 FIXHR(0.5/2)
+#define C7 FIXHR(0.34202014332566873304/2)
+#define C8 FIXHR(0.17364817766693034885/2)
+
+
+/* using Lee like decomposition followed by hand coded 9 points DCT */
+static void imdct36(int *out, int *buf, int *in, int *win)
+{
+    int i, j, t0, t1, t2, t3, s0, s1, s2, s3;
+    int tmp[18], *tmp1, *in1;
+
+    for(i=17;i>=1;i--)
+        in[i] += in[i-1];
+    for(i=17;i>=3;i-=2)
+        in[i] += in[i-2];
+
+    for(j=0;j<2;j++) {
+        tmp1 = tmp + j;
+        in1 = in + j;
+#if 0
+//more accurate but slower
+        int64_t t0, t1, t2, t3;
+        t2 = in1[2*4] + in1[2*8] - in1[2*2];
+
+        t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32;
+        t1 = in1[2*0] - in1[2*6];
+        tmp1[ 6] = t1 - (t2>>1);
+        tmp1[16] = t1 + t2;
+
+        t0 = MUL64(2*(in1[2*2] + in1[2*4]),    C2);
+        t1 = MUL64(   in1[2*4] - in1[2*8] , -2*C8);
+        t2 = MUL64(2*(in1[2*2] + in1[2*8]),   -C4);
+
+        tmp1[10] = (t3 - t0 - t2) >> 32;
+        tmp1[ 2] = (t3 + t0 + t1) >> 32;
+        tmp1[14] = (t3 + t2 - t1) >> 32;
+
+        tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
+        t2 = MUL64(2*(in1[2*1] + in1[2*5]),    C1);
+        t3 = MUL64(   in1[2*5] - in1[2*7] , -2*C7);
+        t0 = MUL64(2*in1[2*3], C3);
+
+        t1 = MUL64(2*(in1[2*1] + in1[2*7]),   -C5);
+
+        tmp1[ 0] = (t2 + t3 + t0) >> 32;
+        tmp1[12] = (t2 + t1 - t0) >> 32;
+        tmp1[ 8] = (t3 - t1 - t0) >> 32;
+#else
+        t2 = in1[2*4] + in1[2*8] - in1[2*2];
+
+        t3 = in1[2*0] + (in1[2*6]>>1);
+        t1 = in1[2*0] - in1[2*6];
+        tmp1[ 6] = t1 - (t2>>1);
+        tmp1[16] = t1 + t2;
+
+        t0 = MULH(2*(in1[2*2] + in1[2*4]),    C2);
+        t1 = MULH(   in1[2*4] - in1[2*8] , -2*C8);
+        t2 = MULH(2*(in1[2*2] + in1[2*8]),   -C4);
+
+        tmp1[10] = t3 - t0 - t2;
+        tmp1[ 2] = t3 + t0 + t1;
+        tmp1[14] = t3 + t2 - t1;
+
+        tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
+        t2 = MULH(2*(in1[2*1] + in1[2*5]),    C1);
+        t3 = MULH(   in1[2*5] - in1[2*7] , -2*C7);
+        t0 = MULH(2*in1[2*3], C3);
+
+        t1 = MULH(2*(in1[2*1] + in1[2*7]),   -C5);
+
+        tmp1[ 0] = t2 + t3 + t0;
+        tmp1[12] = t2 + t1 - t0;
+        tmp1[ 8] = t3 - t1 - t0;
+#endif
+    }
+
+    i = 0;
+    for(j=0;j<4;j++) {
+        t0 = tmp[i];
+        t1 = tmp[i + 2];
+        s0 = t1 + t0;
+        s2 = t1 - t0;
+
+        t2 = tmp[i + 1];
+        t3 = tmp[i + 3];
+        s1 = MULH(2*(t3 + t2), icos36h[j]);
+        s3 = MULL(t3 - t2, icos36[8 - j]);
+
+        t0 = s0 + s1;
+        t1 = s0 - s1;
+        out[(9 + j)*SBLIMIT] =  MULH(t1, win[9 + j]) + buf[9 + j];
+        out[(8 - j)*SBLIMIT] =  MULH(t1, win[8 - j]) + buf[8 - j];
+        buf[9 + j] = MULH(t0, win[18 + 9 + j]);
+        buf[8 - j] = MULH(t0, win[18 + 8 - j]);
+
+        t0 = s2 + s3;
+        t1 = s2 - s3;
+        out[(9 + 8 - j)*SBLIMIT] =  MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j];
+        out[(        j)*SBLIMIT] =  MULH(t1, win[        j]) + buf[        j];
+        buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]);
+        buf[      + j] = MULH(t0, win[18         + j]);
+        i += 4;
+    }
+
+    s0 = tmp[16];
+    s1 = MULH(2*tmp[17], icos36h[4]);
+    t0 = s0 + s1;
+    t1 = s0 - s1;
+    out[(9 + 4)*SBLIMIT] =  MULH(t1, win[9 + 4]) + buf[9 + 4];
+    out[(8 - 4)*SBLIMIT] =  MULH(t1, win[8 - 4]) + buf[8 - 4];
+    buf[9 + 4] = MULH(t0, win[18 + 9 + 4]);
+    buf[8 - 4] = MULH(t0, win[18 + 8 - 4]);
+}
+
+/* header decoding. MUST check the header before because no
+   consistency check is done there. Return 1 if free format found and
+   that the frame size must be computed externally */
+static int decode_header(MPADecodeContext *s, uint32_t header)
+{
+    int sample_rate, frame_size, mpeg25, padding;
+    int sample_rate_index, bitrate_index;
+    if (header & (1<<20)) {
+        s->lsf = (header & (1<<19)) ? 0 : 1;
+        mpeg25 = 0;
+    } else {
+        s->lsf = 1;
+        mpeg25 = 1;
+    }
+
+    s->layer = 4 - ((header >> 17) & 3);
+    /* extract frequency */
+    sample_rate_index = (header >> 10) & 3;
+    sample_rate = mpa_freq_tab[sample_rate_index] >> (s->lsf + mpeg25);
+    sample_rate_index += 3 * (s->lsf + mpeg25);
+    s->sample_rate_index = sample_rate_index;
+    s->error_protection = ((header >> 16) & 1) ^ 1;
+    s->sample_rate = sample_rate;
+
+    bitrate_index = (header >> 12) & 0xf;
+    padding = (header >> 9) & 1;
+    //extension = (header >> 8) & 1;
+    s->mode = (header >> 6) & 3;
+    s->mode_ext = (header >> 4) & 3;
+    //copyright = (header >> 3) & 1;
+    //original = (header >> 2) & 1;
+    //emphasis = header & 3;
+
+    if (s->mode == MPA_MONO)
+        s->nb_channels = 1;
+    else
+        s->nb_channels = 2;
+
+    if (bitrate_index != 0) {
+        frame_size = mpa_bitrate_tab[s->lsf][s->layer - 1][bitrate_index];
+        s->bit_rate = frame_size * 1000;
+        switch(s->layer) {
+        case 1:
+            frame_size = (frame_size * 12000) / sample_rate;
+            frame_size = (frame_size + padding) * 4;
+            break;
+        case 2:
+            frame_size = (frame_size * 144000) / sample_rate;
+            frame_size += padding;
+            break;
+        default:
+        case 3:
+            frame_size = (frame_size * 144000) / (sample_rate << s->lsf);
+            frame_size += padding;
+            break;
+        }
+        s->frame_size = frame_size;
+    } else {
+        /* if no frame size computed, signal it */
+        return 1;
+    }
+
+#if defined(DEBUG)
+    dprintf("layer%d, %d Hz, %d kbits/s, ",
+           s->layer, s->sample_rate, s->bit_rate);
+    if (s->nb_channels == 2) {
+        if (s->layer == 3) {
+            if (s->mode_ext & MODE_EXT_MS_STEREO)
+                dprintf("ms-");
+            if (s->mode_ext & MODE_EXT_I_STEREO)
+                dprintf("i-");
+        }
+        dprintf("stereo");
+    } else {
+        dprintf("mono");
+    }
+    dprintf("\n");
+#endif
+    return 0;
+}
+
+/* useful helper to get mpeg audio stream infos. Return -1 if error in
+   header, otherwise the coded frame size in bytes */
+int mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate)
+{
+    MPADecodeContext s1, *s = &s1;
+
+    if (ff_mpa_check_header(head) != 0)
+        return -1;
+
+    if (decode_header(s, head) != 0) {
+        return -1;
+    }
+
+    switch(s->layer) {
+    case 1:
+        avctx->frame_size = 384;
+        break;
+    case 2:
+        avctx->frame_size = 1152;
+        break;
+    default:
+    case 3:
+        if (s->lsf)
+            avctx->frame_size = 576;
+        else
+            avctx->frame_size = 1152;
+        break;
+    }
+
+    *sample_rate = s->sample_rate;
+    avctx->channels = s->nb_channels;
+    avctx->bit_rate = s->bit_rate;
+    avctx->sub_id = s->layer;
+    return s->frame_size;
+}
+
+/* return the number of decoded frames */
+static int mp_decode_layer1(MPADecodeContext *s)
+{
+    int bound, i, v, n, ch, j, mant;
+    uint8_t allocation[MPA_MAX_CHANNELS][SBLIMIT];
+    uint8_t scale_factors[MPA_MAX_CHANNELS][SBLIMIT];
+
+    if (s->mode == MPA_JSTEREO)
+        bound = (s->mode_ext + 1) * 4;
+    else
+        bound = SBLIMIT;
+
+    /* allocation bits */
+    for(i=0;i<bound;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            allocation[ch][i] = get_bits(&s->gb, 4);
+        }
+    }
+    for(i=bound;i<SBLIMIT;i++) {
+        allocation[0][i] = get_bits(&s->gb, 4);
+    }
+
+    /* scale factors */
+    for(i=0;i<bound;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (allocation[ch][i])
+                scale_factors[ch][i] = get_bits(&s->gb, 6);
+        }
+    }
+    for(i=bound;i<SBLIMIT;i++) {
+        if (allocation[0][i]) {
+            scale_factors[0][i] = get_bits(&s->gb, 6);
+            scale_factors[1][i] = get_bits(&s->gb, 6);
+        }
+    }
+
+    /* compute samples */
+    for(j=0;j<12;j++) {
+        for(i=0;i<bound;i++) {
+            for(ch=0;ch<s->nb_channels;ch++) {
+                n = allocation[ch][i];
+                if (n) {
+                    mant = get_bits(&s->gb, n + 1);
+                    v = l1_unscale(n, mant, scale_factors[ch][i]);
+                } else {
+                    v = 0;
+                }
+                s->sb_samples[ch][j][i] = v;
+            }
+        }
+        for(i=bound;i<SBLIMIT;i++) {
+            n = allocation[0][i];
+            if (n) {
+                mant = get_bits(&s->gb, n + 1);
+                v = l1_unscale(n, mant, scale_factors[0][i]);
+                s->sb_samples[0][j][i] = v;
+                v = l1_unscale(n, mant, scale_factors[1][i]);
+                s->sb_samples[1][j][i] = v;
+            } else {
+                s->sb_samples[0][j][i] = 0;
+                s->sb_samples[1][j][i] = 0;
+            }
+        }
+    }
+    return 12;
+}
+
+/* bitrate is in kb/s */
+int l2_select_table(int bitrate, int nb_channels, int freq, int lsf)
+{
+    int ch_bitrate, table;
+
+    ch_bitrate = bitrate / nb_channels;
+    if (!lsf) {
+        if ((freq == 48000 && ch_bitrate >= 56) ||
+            (ch_bitrate >= 56 && ch_bitrate <= 80))
+            table = 0;
+        else if (freq != 48000 && ch_bitrate >= 96)
+            table = 1;
+        else if (freq != 32000 && ch_bitrate <= 48)
+            table = 2;
+        else
+            table = 3;
+    } else {
+        table = 4;
+    }
+    return table;
+}
+
+static int mp_decode_layer2(MPADecodeContext *s)
+{
+    int sblimit; /* number of used subbands */
+    const unsigned char *alloc_table;
+    int table, bit_alloc_bits, i, j, ch, bound, v;
+    unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT];
+    unsigned char scale_code[MPA_MAX_CHANNELS][SBLIMIT];
+    unsigned char scale_factors[MPA_MAX_CHANNELS][SBLIMIT][3], *sf;
+    int scale, qindex, bits, steps, k, l, m, b;
+
+    /* select decoding table */
+    table = l2_select_table(s->bit_rate / 1000, s->nb_channels,
+                            s->sample_rate, s->lsf);
+    sblimit = sblimit_table[table];
+    alloc_table = alloc_tables[table];
+
+    if (s->mode == MPA_JSTEREO)
+        bound = (s->mode_ext + 1) * 4;
+    else
+        bound = sblimit;
+
+    dprintf("bound=%d sblimit=%d\n", bound, sblimit);
+
+    /* sanity check */
+    if( bound > sblimit ) bound = sblimit;
+
+    /* parse bit allocation */
+    j = 0;
+    for(i=0;i<bound;i++) {
+        bit_alloc_bits = alloc_table[j];
+        for(ch=0;ch<s->nb_channels;ch++) {
+            bit_alloc[ch][i] = get_bits(&s->gb, bit_alloc_bits);
+        }
+        j += 1 << bit_alloc_bits;
+    }
+    for(i=bound;i<sblimit;i++) {
+        bit_alloc_bits = alloc_table[j];
+        v = get_bits(&s->gb, bit_alloc_bits);
+        bit_alloc[0][i] = v;
+        bit_alloc[1][i] = v;
+        j += 1 << bit_alloc_bits;
+    }
+
+#ifdef DEBUG
+    {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            for(i=0;i<sblimit;i++)
+                dprintf(" %d", bit_alloc[ch][i]);
+            dprintf("\n");
+        }
+    }
+#endif
+
+    /* scale codes */
+    for(i=0;i<sblimit;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (bit_alloc[ch][i])
+                scale_code[ch][i] = get_bits(&s->gb, 2);
+        }
+    }
+
+    /* scale factors */
+    for(i=0;i<sblimit;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            if (bit_alloc[ch][i]) {
+                sf = scale_factors[ch][i];
+                switch(scale_code[ch][i]) {
+                default:
+                case 0:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[1] = get_bits(&s->gb, 6);
+                    sf[2] = get_bits(&s->gb, 6);
+                    break;
+                case 2:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[1] = sf[0];
+                    sf[2] = sf[0];
+                    break;
+                case 1:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[2] = get_bits(&s->gb, 6);
+                    sf[1] = sf[0];
+                    break;
+                case 3:
+                    sf[0] = get_bits(&s->gb, 6);
+                    sf[2] = get_bits(&s->gb, 6);
+                    sf[1] = sf[2];
+                    break;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    for(ch=0;ch<s->nb_channels;ch++) {
+        for(i=0;i<sblimit;i++) {
+            if (bit_alloc[ch][i]) {
+                sf = scale_factors[ch][i];
+                dprintf(" %d %d %d", sf[0], sf[1], sf[2]);
+            } else {
+                dprintf(" -");
+            }
+        }
+        dprintf("\n");
+    }
+#endif
+
+    /* samples */
+    for(k=0;k<3;k++) {
+        for(l=0;l<12;l+=3) {
+            j = 0;
+            for(i=0;i<bound;i++) {
+                bit_alloc_bits = alloc_table[j];
+                for(ch=0;ch<s->nb_channels;ch++) {
+                    b = bit_alloc[ch][i];
+                    if (b) {
+                        scale = scale_factors[ch][i][k];
+                        qindex = alloc_table[j+b];
+                        bits = quant_bits[qindex];
+                        if (bits < 0) {
+                            /* 3 values at the same time */
+                            v = get_bits(&s->gb, -bits);
+                            steps = quant_steps[qindex];
+                            s->sb_samples[ch][k * 12 + l + 0][i] =
+                                l2_unscale_group(steps, v % steps, scale);
+                            v = v / steps;
+                            s->sb_samples[ch][k * 12 + l + 1][i] =
+                                l2_unscale_group(steps, v % steps, scale);
+                            v = v / steps;
+                            s->sb_samples[ch][k * 12 + l + 2][i] =
+                                l2_unscale_group(steps, v, scale);
+                        } else {
+                            for(m=0;m<3;m++) {
+                                v = get_bits(&s->gb, bits);
+                                v = l1_unscale(bits - 1, v, scale);
+                                s->sb_samples[ch][k * 12 + l + m][i] = v;
+                            }
+                        }
+                    } else {
+                        s->sb_samples[ch][k * 12 + l + 0][i] = 0;
+                        s->sb_samples[ch][k * 12 + l + 1][i] = 0;
+                        s->sb_samples[ch][k * 12 + l + 2][i] = 0;
+                    }
+                }
+                /* next subband in alloc table */
+                j += 1 << bit_alloc_bits;
+            }
+            /* XXX: find a way to avoid this duplication of code */
+            for(i=bound;i<sblimit;i++) {
+                bit_alloc_bits = alloc_table[j];
+                b = bit_alloc[0][i];
+                if (b) {
+                    int mant, scale0, scale1;
+                    scale0 = scale_factors[0][i][k];
+                    scale1 = scale_factors[1][i][k];
+                    qindex = alloc_table[j+b];
+                    bits = quant_bits[qindex];
+                    if (bits < 0) {
+                        /* 3 values at the same time */
+                        v = get_bits(&s->gb, -bits);
+                        steps = quant_steps[qindex];
+                        mant = v % steps;
+                        v = v / steps;
+                        s->sb_samples[0][k * 12 + l + 0][i] =
+                            l2_unscale_group(steps, mant, scale0);
+                        s->sb_samples[1][k * 12 + l + 0][i] =
+                            l2_unscale_group(steps, mant, scale1);
+                        mant = v % steps;
+                        v = v / steps;
+                        s->sb_samples[0][k * 12 + l + 1][i] =
+                            l2_unscale_group(steps, mant, scale0);
+                        s->sb_samples[1][k * 12 + l + 1][i] =
+                            l2_unscale_group(steps, mant, scale1);
+                        s->sb_samples[0][k * 12 + l + 2][i] =
+                            l2_unscale_group(steps, v, scale0);
+                        s->sb_samples[1][k * 12 + l + 2][i] =
+                            l2_unscale_group(steps, v, scale1);
+                    } else {
+                        for(m=0;m<3;m++) {
+                            mant = get_bits(&s->gb, bits);
+                            s->sb_samples[0][k * 12 + l + m][i] =
+                                l1_unscale(bits - 1, mant, scale0);
+                            s->sb_samples[1][k * 12 + l + m][i] =
+                                l1_unscale(bits - 1, mant, scale1);
+                        }
+                    }
+                } else {
+                    s->sb_samples[0][k * 12 + l + 0][i] = 0;
+                    s->sb_samples[0][k * 12 + l + 1][i] = 0;
+                    s->sb_samples[0][k * 12 + l + 2][i] = 0;
+                    s->sb_samples[1][k * 12 + l + 0][i] = 0;
+                    s->sb_samples[1][k * 12 + l + 1][i] = 0;
+                    s->sb_samples[1][k * 12 + l + 2][i] = 0;
+                }
+                /* next subband in alloc table */
+                j += 1 << bit_alloc_bits;
+            }
+            /* fill remaining samples to zero */
+            for(i=sblimit;i<SBLIMIT;i++) {
+                for(ch=0;ch<s->nb_channels;ch++) {
+                    s->sb_samples[ch][k * 12 + l + 0][i] = 0;
+                    s->sb_samples[ch][k * 12 + l + 1][i] = 0;
+                    s->sb_samples[ch][k * 12 + l + 2][i] = 0;
+                }
+            }
+        }
+    }
+    return 3 * 12;
+}
+
+static inline void lsf_sf_expand(int *slen,
+                                 int sf, int n1, int n2, int n3)
+{
+    if (n3) {
+        slen[3] = sf % n3;
+        sf /= n3;
+    } else {
+        slen[3] = 0;
+    }
+    if (n2) {
+        slen[2] = sf % n2;
+        sf /= n2;
+    } else {
+        slen[2] = 0;
+    }
+    slen[1] = sf % n1;
+    sf /= n1;
+    slen[0] = sf;
+}
+
+static void exponents_from_scale_factors(MPADecodeContext *s,
+                                         GranuleDef *g,
+                                         int16_t *exponents)
+{
+    const uint8_t *bstab, *pretab;
+    int len, i, j, k, l, v0, shift, gain, gains[3];
+    int16_t *exp_ptr;
+
+    exp_ptr = exponents;
+    gain = g->global_gain - 210;
+    shift = g->scalefac_scale + 1;
+
+    bstab = band_size_long[s->sample_rate_index];
+    pretab = mpa_pretab[g->preflag];
+    for(i=0;i<g->long_end;i++) {
+        v0 = gain - ((g->scale_factors[i] + pretab[i]) << shift) + 400;
+        len = bstab[i];
+        for(j=len;j>0;j--)
+            *exp_ptr++ = v0;
+    }
+
+    if (g->short_start < 13) {
+        bstab = band_size_short[s->sample_rate_index];
+        gains[0] = gain - (g->subblock_gain[0] << 3);
+        gains[1] = gain - (g->subblock_gain[1] << 3);
+        gains[2] = gain - (g->subblock_gain[2] << 3);
+        k = g->long_end;
+        for(i=g->short_start;i<13;i++) {
+            len = bstab[i];
+            for(l=0;l<3;l++) {
+                v0 = gains[l] - (g->scale_factors[k++] << shift) + 400;
+                for(j=len;j>0;j--)
+                *exp_ptr++ = v0;
+            }
+        }
+    }
+}
+
+/* handle n = 0 too */
+static inline int get_bitsz(GetBitContext *s, int n)
+{
+    if (n == 0)
+        return 0;
+    else
+        return get_bits(s, n);
+}
+
+static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
+                          int16_t *exponents, int end_pos2)
+{
+    int s_index;
+    int i;
+    int last_pos, bits_left;
+    VLC *vlc;
+    int end_pos= FFMIN(end_pos2, s->gb.size_in_bits);
+
+    /* low frequencies (called big values) */
+    s_index = 0;
+    for(i=0;i<3;i++) {
+        int j, k, l, linbits;
+        j = g->region_size[i];
+        if (j == 0)
+            continue;
+        /* select vlc table */
+        k = g->table_select[i];
+        l = mpa_huff_data[k][0];
+        linbits = mpa_huff_data[k][1];
+        vlc = &huff_vlc[l];
+
+        if(!l){
+            memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid)*2*j);
+            s_index += 2*j;
+            continue;
+        }
+
+        /* read huffcode and compute each couple */
+        for(;j>0;j--) {
+            int exponent, x, y, v;
+            int pos= get_bits_count(&s->gb);
+
+            if (pos >= end_pos){
+//                av_log(NULL, AV_LOG_ERROR, "pos: %d %d %d %d\n", pos, end_pos, end_pos2, s_index);
+                if(s->in_gb.buffer && pos >= s->gb.size_in_bits){
+                    s->gb= s->in_gb;
+                    s->in_gb.buffer=NULL;
+                    assert((get_bits_count(&s->gb) & 7) == 0);
+                    skip_bits_long(&s->gb, pos - end_pos);
+                    end_pos2=
+                    end_pos= end_pos2 + get_bits_count(&s->gb) - pos;
+                    pos= get_bits_count(&s->gb);
+                }
+//                av_log(NULL, AV_LOG_ERROR, "new pos: %d %d\n", pos, end_pos);
+                if(pos >= end_pos)
+                    break;
+            }
+            y = get_vlc2(&s->gb, vlc->table, 7, 3);
+
+            if(!y){
+                g->sb_hybrid[s_index  ] =
+                g->sb_hybrid[s_index+1] = 0;
+                s_index += 2;
+                continue;
+            }
+
+            exponent= exponents[s_index];
+
+            dprintf("region=%d n=%d x=%d y=%d exp=%d\n",
+                    i, g->region_size[i] - j, x, y, exponent);
+            if(y&16){
+                x = y >> 5;
+                y = y & 0x0f;
+                if (x < 15){
+                    v = expval_table[ exponent ][ x ];
+//                      v = expval_table[ (exponent&3) ][ x ] >> FFMIN(0 - (exponent>>2), 31);
+                }else{
+                    x += get_bitsz(&s->gb, linbits);
+                    v = l3_unscale(x, exponent);
+                }
+                if (get_bits1(&s->gb))
+                    v = -v;
+                g->sb_hybrid[s_index] = v;
+                if (y < 15){
+                    v = expval_table[ exponent ][ y ];
+                }else{
+                    y += get_bitsz(&s->gb, linbits);
+                    v = l3_unscale(y, exponent);
+                }
+                if (get_bits1(&s->gb))
+                    v = -v;
+                g->sb_hybrid[s_index+1] = v;
+            }else{
+                x = y >> 5;
+                y = y & 0x0f;
+                x += y;
+                if (x < 15){
+                    v = expval_table[ exponent ][ x ];
+                }else{
+                    x += get_bitsz(&s->gb, linbits);
+                    v = l3_unscale(x, exponent);
+                }
+                if (get_bits1(&s->gb))
+                    v = -v;
+                g->sb_hybrid[s_index+!!y] = v;
+                g->sb_hybrid[s_index+ !y] = 0;
+            }
+            s_index+=2;
+        }
+    }
+
+    /* high frequencies */
+    vlc = &huff_quad_vlc[g->count1table_select];
+    last_pos=0;
+    while (s_index <= 572) {
+        int pos, code;
+        pos = get_bits_count(&s->gb);
+        if (pos >= end_pos) {
+            if (pos > end_pos2 && last_pos){
+                /* some encoders generate an incorrect size for this
+                   part. We must go back into the data */
+                s_index -= 4;
+                skip_bits_long(&s->gb, last_pos - pos);
+                av_log(NULL, AV_LOG_INFO, "overread, skip %d enddists: %d %d\n", last_pos - pos, end_pos-pos, end_pos2-pos);
+                if(s->error_resilience >= FF_ER_COMPLIANT)
+                    s_index=0;
+                break;
+            }
+//                av_log(NULL, AV_LOG_ERROR, "pos2: %d %d %d %d\n", pos, end_pos, end_pos2, s_index);
+            if(s->in_gb.buffer && pos >= s->gb.size_in_bits){
+                s->gb= s->in_gb;
+                s->in_gb.buffer=NULL;
+                assert((get_bits_count(&s->gb) & 7) == 0);
+                skip_bits_long(&s->gb, pos - end_pos);
+                end_pos2=
+                end_pos= end_pos2 + get_bits_count(&s->gb) - pos;
+                pos= get_bits_count(&s->gb);
+            }
+//                av_log(NULL, AV_LOG_ERROR, "new pos2: %d %d %d\n", pos, end_pos, s_index);
+            if(pos >= end_pos)
+                break;
+        }
+        last_pos= pos;
+
+        code = get_vlc2(&s->gb, vlc->table, vlc->bits, 1);
+        dprintf("t=%d code=%d\n", g->count1table_select, code);
+        g->sb_hybrid[s_index+0]=
+        g->sb_hybrid[s_index+1]=
+        g->sb_hybrid[s_index+2]=
+        g->sb_hybrid[s_index+3]= 0;
+        while(code){
+            const static int idxtab[16]={3,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};
+            int v;
+            int pos= s_index+idxtab[code];
+            code ^= 8>>idxtab[code];
+            v = exp_table[ exponents[pos] ];
+//            v = exp_table[ (exponents[pos]&3) ] >> FFMIN(0 - (exponents[pos]>>2), 31);
+            if(get_bits1(&s->gb))
+                v = -v;
+            g->sb_hybrid[pos] = v;
+        }
+        s_index+=4;
+    }
+    /* skip extension bits */
+    bits_left = end_pos - get_bits_count(&s->gb);
+//av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer);
+    if (bits_left < 0 || bits_left > 16) {
+        av_log(NULL, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
+        s_index=0;
+    }else if(bits_left > 0 && s->error_resilience >= FF_ER_AGGRESSIVE){
+        av_log(NULL, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
+        s_index=0;
+    }
+    memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid)*(576 - s_index));
+    skip_bits_long(&s->gb, bits_left);
+
+    return 0;
+}
+
+/* Reorder short blocks from bitstream order to interleaved order. It
+   would be faster to do it in parsing, but the code would be far more
+   complicated */
+static void reorder_block(MPADecodeContext *s, GranuleDef *g)
+{
+    int i, j, len;
+    int32_t *ptr, *dst, *ptr1;
+    int32_t tmp[576];
+
+    if (g->block_type != 2)
+        return;
+
+    if (g->switch_point) {
+        if (s->sample_rate_index != 8) {
+            ptr = g->sb_hybrid + 36;
+        } else {
+            ptr = g->sb_hybrid + 48;
+        }
+    } else {
+        ptr = g->sb_hybrid;
+    }
+
+    for(i=g->short_start;i<13;i++) {
+        len = band_size_short[s->sample_rate_index][i];
+        ptr1 = ptr;
+        dst = tmp;
+        for(j=len;j>0;j--) {
+            *dst++ = ptr[0*len];
+            *dst++ = ptr[1*len];
+            *dst++ = ptr[2*len];
+            ptr++;
+        }
+        ptr+=2*len;
+        memcpy(ptr1, tmp, len * 3 * sizeof(*ptr1));
+    }
+}
+
+#define ISQRT2 FIXR(0.70710678118654752440)
+
+static void compute_stereo(MPADecodeContext *s,
+                           GranuleDef *g0, GranuleDef *g1)
+{
+    int i, j, k, l;
+    int32_t v1, v2;
+    int sf_max, tmp0, tmp1, sf, len, non_zero_found;
+    int32_t (*is_tab)[16];
+    int32_t *tab0, *tab1;
+    int non_zero_found_short[3];
+
+    /* intensity stereo */
+    if (s->mode_ext & MODE_EXT_I_STEREO) {
+        if (!s->lsf) {
+            is_tab = is_table;
+            sf_max = 7;
+        } else {
+            is_tab = is_table_lsf[g1->scalefac_compress & 1];
+            sf_max = 16;
+        }
+
+        tab0 = g0->sb_hybrid + 576;
+        tab1 = g1->sb_hybrid + 576;
+
+        non_zero_found_short[0] = 0;
+        non_zero_found_short[1] = 0;
+        non_zero_found_short[2] = 0;
+        k = (13 - g1->short_start) * 3 + g1->long_end - 3;
+        for(i = 12;i >= g1->short_start;i--) {
+            /* for last band, use previous scale factor */
+            if (i != 11)
+                k -= 3;
+            len = band_size_short[s->sample_rate_index][i];
+            for(l=2;l>=0;l--) {
+                tab0 -= len;
+                tab1 -= len;
+                if (!non_zero_found_short[l]) {
+                    /* test if non zero band. if so, stop doing i-stereo */
+                    for(j=0;j<len;j++) {
+                        if (tab1[j] != 0) {
+                            non_zero_found_short[l] = 1;
+                            goto found1;
+                        }
+                    }
+                    sf = g1->scale_factors[k + l];
+                    if (sf >= sf_max)
+                        goto found1;
+
+                    v1 = is_tab[0][sf];
+                    v2 = is_tab[1][sf];
+                    for(j=0;j<len;j++) {
+                        tmp0 = tab0[j];
+                        tab0[j] = MULL(tmp0, v1);
+                        tab1[j] = MULL(tmp0, v2);
+                    }
+                } else {
+                found1:
+                    if (s->mode_ext & MODE_EXT_MS_STEREO) {
+                        /* lower part of the spectrum : do ms stereo
+                           if enabled */
+                        for(j=0;j<len;j++) {
+                            tmp0 = tab0[j];
+                            tmp1 = tab1[j];
+                            tab0[j] = MULL(tmp0 + tmp1, ISQRT2);
+                            tab1[j] = MULL(tmp0 - tmp1, ISQRT2);
+                        }
+                    }
+                }
+            }
+        }
+
+        non_zero_found = non_zero_found_short[0] |
+            non_zero_found_short[1] |
+            non_zero_found_short[2];
+
+        for(i = g1->long_end - 1;i >= 0;i--) {
+            len = band_size_long[s->sample_rate_index][i];
+            tab0 -= len;
+            tab1 -= len;
+            /* test if non zero band. if so, stop doing i-stereo */
+            if (!non_zero_found) {
+                for(j=0;j<len;j++) {
+                    if (tab1[j] != 0) {
+                        non_zero_found = 1;
+                        goto found2;
+                    }
+                }
+                /* for last band, use previous scale factor */
+                k = (i == 21) ? 20 : i;
+                sf = g1->scale_factors[k];
+                if (sf >= sf_max)
+                    goto found2;
+                v1 = is_tab[0][sf];
+                v2 = is_tab[1][sf];
+                for(j=0;j<len;j++) {
+                    tmp0 = tab0[j];
+                    tab0[j] = MULL(tmp0, v1);
+                    tab1[j] = MULL(tmp0, v2);
+                }
+            } else {
+            found2:
+                if (s->mode_ext & MODE_EXT_MS_STEREO) {
+                    /* lower part of the spectrum : do ms stereo
+                       if enabled */
+                    for(j=0;j<len;j++) {
+                        tmp0 = tab0[j];
+                        tmp1 = tab1[j];
+                        tab0[j] = MULL(tmp0 + tmp1, ISQRT2);
+                        tab1[j] = MULL(tmp0 - tmp1, ISQRT2);
+                    }
+                }
+            }
+        }
+    } else if (s->mode_ext & MODE_EXT_MS_STEREO) {
+        /* ms stereo ONLY */
+        /* NOTE: the 1/sqrt(2) normalization factor is included in the
+           global gain */
+        tab0 = g0->sb_hybrid;
+        tab1 = g1->sb_hybrid;
+        for(i=0;i<576;i++) {
+            tmp0 = tab0[i];
+            tmp1 = tab1[i];
+            tab0[i] = tmp0 + tmp1;
+            tab1[i] = tmp0 - tmp1;
+        }
+    }
+}
+
+static void compute_antialias_integer(MPADecodeContext *s,
+                              GranuleDef *g)
+{
+    int32_t *ptr, *csa;
+    int n, i;
+
+    /* we antialias only "long" bands */
+    if (g->block_type == 2) {
+        if (!g->switch_point)
+            return;
+        /* XXX: check this for 8000Hz case */
+        n = 1;
+    } else {
+        n = SBLIMIT - 1;
+    }
+
+    ptr = g->sb_hybrid + 18;
+    for(i = n;i > 0;i--) {
+        int tmp0, tmp1, tmp2;
+        csa = &csa_table[0][0];
+#define INT_AA(j) \
+            tmp0 = ptr[-1-j];\
+            tmp1 = ptr[   j];\
+            tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\
+            ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa[2+4*j]));\
+            ptr[   j] = 4*(tmp2 + MULH(tmp0, csa[3+4*j]));
+
+        INT_AA(0)
+        INT_AA(1)
+        INT_AA(2)
+        INT_AA(3)
+        INT_AA(4)
+        INT_AA(5)
+        INT_AA(6)
+        INT_AA(7)
+
+        ptr += 18;
+    }
+}
+
+static void compute_antialias_float(MPADecodeContext *s,
+                              GranuleDef *g)
+{
+    int32_t *ptr;
+    int n, i;
+
+    /* we antialias only "long" bands */
+    if (g->block_type == 2) {
+        if (!g->switch_point)
+            return;
+        /* XXX: check this for 8000Hz case */
+        n = 1;
+    } else {
+        n = SBLIMIT - 1;
+    }
+
+    ptr = g->sb_hybrid + 18;
+    for(i = n;i > 0;i--) {
+        float tmp0, tmp1;
+        float *csa = &csa_table_float[0][0];
+#define FLOAT_AA(j)\
+        tmp0= ptr[-1-j];\
+        tmp1= ptr[   j];\
+        ptr[-1-j] = lrintf(tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j]);\
+        ptr[   j] = lrintf(tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]);
+
+        FLOAT_AA(0)
+        FLOAT_AA(1)
+        FLOAT_AA(2)
+        FLOAT_AA(3)
+        FLOAT_AA(4)
+        FLOAT_AA(5)
+        FLOAT_AA(6)
+        FLOAT_AA(7)
+
+        ptr += 18;
+    }
+}
+
+static void compute_imdct(MPADecodeContext *s,
+                          GranuleDef *g,
+                          int32_t *sb_samples,
+                          int32_t *mdct_buf)
+{
+    int32_t *ptr, *win, *win1, *buf, *out_ptr, *ptr1;
+    int32_t out2[12];
+    int i, j, mdct_long_end, v, sblimit;
+
+    /* find last non zero block */
+    ptr = g->sb_hybrid + 576;
+    ptr1 = g->sb_hybrid + 2 * 18;
+    while (ptr >= ptr1) {
+        ptr -= 6;
+        v = ptr[0] | ptr[1] | ptr[2] | ptr[3] | ptr[4] | ptr[5];
+        if (v != 0)
+            break;
+    }
+    sblimit = ((ptr - g->sb_hybrid) / 18) + 1;
+
+    if (g->block_type == 2) {
+        /* XXX: check for 8000 Hz */
+        if (g->switch_point)
+            mdct_long_end = 2;
+        else
+            mdct_long_end = 0;
+    } else {
+        mdct_long_end = sblimit;
+    }
+
+    buf = mdct_buf;
+    ptr = g->sb_hybrid;
+    for(j=0;j<mdct_long_end;j++) {
+        /* apply window & overlap with previous buffer */
+        out_ptr = sb_samples + j;
+        /* select window */
+        if (g->switch_point && j < 2)
+            win1 = mdct_win[0];
+        else
+            win1 = mdct_win[g->block_type];
+        /* select frequency inversion */
+        win = win1 + ((4 * 36) & -(j & 1));
+        imdct36(out_ptr, buf, ptr, win);
+        out_ptr += 18*SBLIMIT;
+        ptr += 18;
+        buf += 18;
+    }
+    for(j=mdct_long_end;j<sblimit;j++) {
+        /* select frequency inversion */
+        win = mdct_win[2] + ((4 * 36) & -(j & 1));
+        out_ptr = sb_samples + j;
+
+        for(i=0; i<6; i++){
+            *out_ptr = buf[i];
+            out_ptr += SBLIMIT;
+        }
+        imdct12(out2, ptr + 0);
+        for(i=0;i<6;i++) {
+            *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1];
+            buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]);
+            out_ptr += SBLIMIT;
+        }
+        imdct12(out2, ptr + 1);
+        for(i=0;i<6;i++) {
+            *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2];
+            buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]);
+            out_ptr += SBLIMIT;
+        }
+        imdct12(out2, ptr + 2);
+        for(i=0;i<6;i++) {
+            buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0];
+            buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]);
+            buf[i + 6*2] = 0;
+        }
+        ptr += 18;
+        buf += 18;
+    }
+    /* zero bands */
+    for(j=sblimit;j<SBLIMIT;j++) {
+        /* overlap */
+        out_ptr = sb_samples + j;
+        for(i=0;i<18;i++) {
+            *out_ptr = buf[i];
+            buf[i] = 0;
+            out_ptr += SBLIMIT;
+        }
+        buf += 18;
+    }
+}
+
+#if defined(DEBUG)
+void sample_dump(int fnum, int32_t *tab, int n)
+{
+    static FILE *files[16], *f;
+    char buf[512];
+    int i;
+    int32_t v;
+
+    f = files[fnum];
+    if (!f) {
+        snprintf(buf, sizeof(buf), "/tmp/out%d.%s.pcm",
+                fnum,
+#ifdef USE_HIGHPRECISION
+                "hp"
+#else
+                "lp"
+#endif
+                );
+        f = fopen(buf, "w");
+        if (!f)
+            return;
+        files[fnum] = f;
+    }
+
+    if (fnum == 0) {
+        static int pos = 0;
+        av_log(NULL, AV_LOG_DEBUG, "pos=%d\n", pos);
+        for(i=0;i<n;i++) {
+            av_log(NULL, AV_LOG_DEBUG, " %0.4f", (double)tab[i] / FRAC_ONE);
+            if ((i % 18) == 17)
+                av_log(NULL, AV_LOG_DEBUG, "\n");
+        }
+        pos += n;
+    }
+    for(i=0;i<n;i++) {
+        /* normalize to 23 frac bits */
+        v = tab[i] << (23 - FRAC_BITS);
+        fwrite(&v, 1, sizeof(int32_t), f);
+    }
+}
+#endif
+
+
+/* main layer3 decoding function */
+static int mp_decode_layer3(MPADecodeContext *s)
+{
+    int nb_granules, main_data_begin, private_bits;
+    int gr, ch, blocksplit_flag, i, j, k, n, bits_pos;
+    GranuleDef granules[2][2], *g;
+    int16_t exponents[576];
+
+    /* read side info */
+    if (s->lsf) {
+        main_data_begin = get_bits(&s->gb, 8);
+        private_bits = get_bits(&s->gb, s->nb_channels);
+        nb_granules = 1;
+    } else {
+        main_data_begin = get_bits(&s->gb, 9);
+        if (s->nb_channels == 2)
+            private_bits = get_bits(&s->gb, 3);
+        else
+            private_bits = get_bits(&s->gb, 5);
+        nb_granules = 2;
+        for(ch=0;ch<s->nb_channels;ch++) {
+            granules[ch][0].scfsi = 0; /* all scale factors are transmitted */
+            granules[ch][1].scfsi = get_bits(&s->gb, 4);
+        }
+    }
+
+    for(gr=0;gr<nb_granules;gr++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            dprintf("gr=%d ch=%d: side_info\n", gr, ch);
+            g = &granules[ch][gr];
+            g->part2_3_length = get_bits(&s->gb, 12);
+            g->big_values = get_bits(&s->gb, 9);
+            if(g->big_values > 288){
+                av_log(NULL, AV_LOG_ERROR, "big_values too big\n");
+                return -1;
+            }
+
+            g->global_gain = get_bits(&s->gb, 8);
+            /* if MS stereo only is selected, we precompute the
+               1/sqrt(2) renormalization factor */
+            if ((s->mode_ext & (MODE_EXT_MS_STEREO | MODE_EXT_I_STEREO)) ==
+                MODE_EXT_MS_STEREO)
+                g->global_gain -= 2;
+            if (s->lsf)
+                g->scalefac_compress = get_bits(&s->gb, 9);
+            else
+                g->scalefac_compress = get_bits(&s->gb, 4);
+            blocksplit_flag = get_bits(&s->gb, 1);
+            if (blocksplit_flag) {
+                g->block_type = get_bits(&s->gb, 2);
+                if (g->block_type == 0){
+                    av_log(NULL, AV_LOG_ERROR, "invalid block type\n");
+                    return -1;
+                }
+                g->switch_point = get_bits(&s->gb, 1);
+                for(i=0;i<2;i++)
+                    g->table_select[i] = get_bits(&s->gb, 5);
+                for(i=0;i<3;i++)
+                    g->subblock_gain[i] = get_bits(&s->gb, 3);
+                /* compute huffman coded region sizes */
+                if (g->block_type == 2)
+                    g->region_size[0] = (36 / 2);
+                else {
+                    if (s->sample_rate_index <= 2)
+                        g->region_size[0] = (36 / 2);
+                    else if (s->sample_rate_index != 8)
+                        g->region_size[0] = (54 / 2);
+                    else
+                        g->region_size[0] = (108 / 2);
+                }
+                g->region_size[1] = (576 / 2);
+            } else {
+                int region_address1, region_address2, l;
+                g->block_type = 0;
+                g->switch_point = 0;
+                for(i=0;i<3;i++)
+                    g->table_select[i] = get_bits(&s->gb, 5);
+                /* compute huffman coded region sizes */
+                region_address1 = get_bits(&s->gb, 4);
+                region_address2 = get_bits(&s->gb, 3);
+                dprintf("region1=%d region2=%d\n",
+                        region_address1, region_address2);
+                g->region_size[0] =
+                    band_index_long[s->sample_rate_index][region_address1 + 1] >> 1;
+                l = region_address1 + region_address2 + 2;
+                /* should not overflow */
+                if (l > 22)
+                    l = 22;
+                g->region_size[1] =
+                    band_index_long[s->sample_rate_index][l] >> 1;
+            }
+            /* convert region offsets to region sizes and truncate
+               size to big_values */
+            g->region_size[2] = (576 / 2);
+            j = 0;
+            for(i=0;i<3;i++) {
+                k = FFMIN(g->region_size[i], g->big_values);
+                g->region_size[i] = k - j;
+                j = k;
+            }
+
+            /* compute band indexes */
+            if (g->block_type == 2) {
+                if (g->switch_point) {
+                    /* if switched mode, we handle the 36 first samples as
+                       long blocks.  For 8000Hz, we handle the 48 first
+                       exponents as long blocks (XXX: check this!) */
+                    if (s->sample_rate_index <= 2)
+                        g->long_end = 8;
+                    else if (s->sample_rate_index != 8)
+                        g->long_end = 6;
+                    else
+                        g->long_end = 4; /* 8000 Hz */
+
+                    g->short_start = 2 + (s->sample_rate_index != 8);
+                } else {
+                    g->long_end = 0;
+                    g->short_start = 0;
+                }
+            } else {
+                g->short_start = 13;
+                g->long_end = 22;
+            }
+
+            g->preflag = 0;
+            if (!s->lsf)
+                g->preflag = get_bits(&s->gb, 1);
+            g->scalefac_scale = get_bits(&s->gb, 1);
+            g->count1table_select = get_bits(&s->gb, 1);
+            dprintf("block_type=%d switch_point=%d\n",
+                    g->block_type, g->switch_point);
+        }
+    }
+
+  if (!s->adu_mode) {
+    const uint8_t *ptr = s->gb.buffer + (get_bits_count(&s->gb)>>3);
+    assert((get_bits_count(&s->gb) & 7) == 0);
+    /* now we get bits from the main_data_begin offset */
+    dprintf("seekback: %d\n", main_data_begin);
+//av_log(NULL, AV_LOG_ERROR, "backstep:%d, lastbuf:%d\n", main_data_begin, s->last_buf_size);
+
+    memcpy(s->last_buf + s->last_buf_size, ptr, EXTRABYTES);
+    s->in_gb= s->gb;
+        init_get_bits(&s->gb, s->last_buf, s->last_buf_size*8);
+        skip_bits_long(&s->gb, 8*(s->last_buf_size - main_data_begin));
+  }
+
+    for(gr=0;gr<nb_granules;gr++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            g = &granules[ch][gr];
+            if(get_bits_count(&s->gb)<0){
+                av_log(NULL, AV_LOG_ERROR, "mdb:%d, lastbuf:%d skiping granule %d\n",
+                                            main_data_begin, s->last_buf_size, gr);
+                skip_bits_long(&s->gb, g->part2_3_length);
+                memset(g->sb_hybrid, 0, sizeof(g->sb_hybrid));
+                if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->in_gb.buffer){
+                    skip_bits_long(&s->in_gb, get_bits_count(&s->gb) - s->gb.size_in_bits);
+                    s->gb= s->in_gb;
+                    s->in_gb.buffer=NULL;
+                }
+                continue;
+            }
+
+            bits_pos = get_bits_count(&s->gb);
+
+            if (!s->lsf) {
+                uint8_t *sc;
+                int slen, slen1, slen2;
+
+                /* MPEG1 scale factors */
+                slen1 = slen_table[0][g->scalefac_compress];
+                slen2 = slen_table[1][g->scalefac_compress];
+                dprintf("slen1=%d slen2=%d\n", slen1, slen2);
+                if (g->block_type == 2) {
+                    n = g->switch_point ? 17 : 18;
+                    j = 0;
+                    if(slen1){
+                        for(i=0;i<n;i++)
+                            g->scale_factors[j++] = get_bits(&s->gb, slen1);
+                    }else{
+                        for(i=0;i<n;i++)
+                            g->scale_factors[j++] = 0;
+                    }
+                    if(slen2){
+                        for(i=0;i<18;i++)
+                            g->scale_factors[j++] = get_bits(&s->gb, slen2);
+                        for(i=0;i<3;i++)
+                            g->scale_factors[j++] = 0;
+                    }else{
+                        for(i=0;i<21;i++)
+                            g->scale_factors[j++] = 0;
+                    }
+                } else {
+                    sc = granules[ch][0].scale_factors;
+                    j = 0;
+                    for(k=0;k<4;k++) {
+                        n = (k == 0 ? 6 : 5);
+                        if ((g->scfsi & (0x8 >> k)) == 0) {
+                            slen = (k < 2) ? slen1 : slen2;
+                            if(slen){
+                                for(i=0;i<n;i++)
+                                    g->scale_factors[j++] = get_bits(&s->gb, slen);
+                            }else{
+                                for(i=0;i<n;i++)
+                                    g->scale_factors[j++] = 0;
+                            }
+                        } else {
+                            /* simply copy from last granule */
+                            for(i=0;i<n;i++) {
+                                g->scale_factors[j] = sc[j];
+                                j++;
+                            }
+                        }
+                    }
+                    g->scale_factors[j++] = 0;
+                }
+#if defined(DEBUG)
+                {
+                    dprintf("scfsi=%x gr=%d ch=%d scale_factors:\n",
+                           g->scfsi, gr, ch);
+                    for(i=0;i<j;i++)
+                        dprintf(" %d", g->scale_factors[i]);
+                    dprintf("\n");
+                }
+#endif
+            } else {
+                int tindex, tindex2, slen[4], sl, sf;
+
+                /* LSF scale factors */
+                if (g->block_type == 2) {
+                    tindex = g->switch_point ? 2 : 1;
+                } else {
+                    tindex = 0;
+                }
+                sf = g->scalefac_compress;
+                if ((s->mode_ext & MODE_EXT_I_STEREO) && ch == 1) {
+                    /* intensity stereo case */
+                    sf >>= 1;
+                    if (sf < 180) {
+                        lsf_sf_expand(slen, sf, 6, 6, 0);
+                        tindex2 = 3;
+                    } else if (sf < 244) {
+                        lsf_sf_expand(slen, sf - 180, 4, 4, 0);
+                        tindex2 = 4;
+                    } else {
+                        lsf_sf_expand(slen, sf - 244, 3, 0, 0);
+                        tindex2 = 5;
+                    }
+                } else {
+                    /* normal case */
+                    if (sf < 400) {
+                        lsf_sf_expand(slen, sf, 5, 4, 4);
+                        tindex2 = 0;
+                    } else if (sf < 500) {
+                        lsf_sf_expand(slen, sf - 400, 5, 4, 0);
+                        tindex2 = 1;
+                    } else {
+                        lsf_sf_expand(slen, sf - 500, 3, 0, 0);
+                        tindex2 = 2;
+                        g->preflag = 1;
+                    }
+                }
+
+                j = 0;
+                for(k=0;k<4;k++) {
+                    n = lsf_nsf_table[tindex2][tindex][k];
+                    sl = slen[k];
+                    if(sl){
+                        for(i=0;i<n;i++)
+                            g->scale_factors[j++] = get_bits(&s->gb, sl);
+                    }else{
+                        for(i=0;i<n;i++)
+                            g->scale_factors[j++] = 0;
+                    }
+                }
+                /* XXX: should compute exact size */
+                for(;j<40;j++)
+                    g->scale_factors[j] = 0;
+#if defined(DEBUG)
+                {
+                    dprintf("gr=%d ch=%d scale_factors:\n",
+                           gr, ch);
+                    for(i=0;i<40;i++)
+                        dprintf(" %d", g->scale_factors[i]);
+                    dprintf("\n");
+                }
+#endif
+            }
+
+            exponents_from_scale_factors(s, g, exponents);
+
+            /* read Huffman coded residue */
+            huffman_decode(s, g, exponents, bits_pos + g->part2_3_length);
+#if defined(DEBUG)
+            sample_dump(0, g->sb_hybrid, 576);
+#endif
+        } /* ch */
+
+        if (s->nb_channels == 2)
+            compute_stereo(s, &granules[0][gr], &granules[1][gr]);
+
+        for(ch=0;ch<s->nb_channels;ch++) {
+            g = &granules[ch][gr];
+
+            reorder_block(s, g);
+#if defined(DEBUG)
+            sample_dump(0, g->sb_hybrid, 576);
+#endif
+            s->compute_antialias(s, g);
+#if defined(DEBUG)
+            sample_dump(1, g->sb_hybrid, 576);
+#endif
+            compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
+#if defined(DEBUG)
+            sample_dump(2, &s->sb_samples[ch][18 * gr][0], 576);
+#endif
+        }
+    } /* gr */
+    if(get_bits_count(&s->gb)<0)
+        skip_bits_long(&s->gb, -get_bits_count(&s->gb));
+    return nb_granules * 18;
+}
+
+static int mp_decode_frame(MPADecodeContext *s,
+                           OUT_INT *samples, const uint8_t *buf, int buf_size)
+{
+    int i, nb_frames, ch;
+    OUT_INT *samples_ptr;
+
+    init_get_bits(&s->gb, buf + HEADER_SIZE, (buf_size - HEADER_SIZE)*8);
+
+    /* skip error protection field */
+    if (s->error_protection)
+        get_bits(&s->gb, 16);
+
+    dprintf("frame %d:\n", s->frame_count);
+    switch(s->layer) {
+    case 1:
+        nb_frames = mp_decode_layer1(s);
+        break;
+    case 2:
+        nb_frames = mp_decode_layer2(s);
+        break;
+    case 3:
+    default:
+        nb_frames = mp_decode_layer3(s);
+
+        s->last_buf_size=0;
+        if(s->in_gb.buffer){
+            align_get_bits(&s->gb);
+            i= (s->gb.size_in_bits - get_bits_count(&s->gb))>>3;
+            if(i >= 0 && i <= BACKSTEP_SIZE){
+                memmove(s->last_buf, s->gb.buffer + (get_bits_count(&s->gb)>>3), i);
+                s->last_buf_size=i;
+            }else
+                av_log(NULL, AV_LOG_ERROR, "invalid old backstep %d\n", i);
+            s->gb= s->in_gb;
+            s->in_gb.buffer= NULL;
+        }
+
+        align_get_bits(&s->gb);
+        assert((get_bits_count(&s->gb) & 7) == 0);
+        i= (s->gb.size_in_bits - get_bits_count(&s->gb))>>3;
+
+        if(i<0 || i > BACKSTEP_SIZE || nb_frames<0){
+            av_log(NULL, AV_LOG_ERROR, "invalid new backstep %d\n", i);
+            i= FFMIN(BACKSTEP_SIZE, buf_size - HEADER_SIZE);
+        }
+        assert(i <= buf_size - HEADER_SIZE && i>= 0);
+        memcpy(s->last_buf + s->last_buf_size, s->gb.buffer + buf_size - HEADER_SIZE - i, i);
+        s->last_buf_size += i;
+
+        break;
+    }
+#if defined(DEBUG)
+    for(i=0;i<nb_frames;i++) {
+        for(ch=0;ch<s->nb_channels;ch++) {
+            int j;
+            dprintf("%d-%d:", i, ch);
+            for(j=0;j<SBLIMIT;j++)
+                dprintf(" %0.6f", (double)s->sb_samples[ch][i][j] / FRAC_ONE);
+            dprintf("\n");
+        }
+    }
+#endif
+    /* apply the synthesis filter */
+    for(ch=0;ch<s->nb_channels;ch++) {
+        samples_ptr = samples + ch;
+        for(i=0;i<nb_frames;i++) {
+            ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
+                         window, &s->dither_state,
+                         samples_ptr, s->nb_channels,
+                         s->sb_samples[ch][i]);
+            samples_ptr += 32 * s->nb_channels;
+        }
+    }
+#ifdef DEBUG
+    s->frame_count++;
+#endif
+    return nb_frames * 32 * sizeof(OUT_INT) * s->nb_channels;
+}
+
+static int decode_frame(AVCodecContext * avctx,
+                        void *data, int *data_size,
+                        uint8_t * buf, int buf_size)
+{
+    MPADecodeContext *s = avctx->priv_data;
+    uint32_t header;
+    int out_size;
+    OUT_INT *out_samples = data;
+
+retry:
+    if(buf_size < HEADER_SIZE)
+        return -1;
+
+    header = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+    if(ff_mpa_check_header(header) < 0){
+        buf++;
+//        buf_size--;
+        av_log(avctx, AV_LOG_ERROR, "Header missing skipping one byte.\n");
+        goto retry;
+    }
+
+    if (decode_header(s, header) == 1) {
+        /* free format: prepare to compute frame size */
+        s->frame_size = -1;
+        return -1;
+    }
+    /* update codec info */
+    avctx->channels = s->nb_channels;
+    avctx->bit_rate = s->bit_rate;
+    avctx->sub_id = s->layer;
+    switch(s->layer) {
+    case 1:
+        avctx->frame_size = 384;
+        break;
+    case 2:
+        avctx->frame_size = 1152;
+        break;
+    case 3:
+        if (s->lsf)
+            avctx->frame_size = 576;
+        else
+            avctx->frame_size = 1152;
+        break;
+    }
+
+    if(s->frame_size<=0 || s->frame_size > buf_size){
+        av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
+        return -1;
+    }else if(s->frame_size < buf_size){
+        av_log(avctx, AV_LOG_ERROR, "incorrect frame size\n");
+    }
+
+    out_size = mp_decode_frame(s, out_samples, buf, buf_size);
+    if(out_size>=0){
+        *data_size = out_size;
+        avctx->sample_rate = s->sample_rate;
+        //FIXME maybe move the other codec info stuff from above here too
+    }else
+        av_log(avctx, AV_LOG_DEBUG, "Error while decoding MPEG audio frame.\n"); //FIXME return -1 / but also return the number of bytes consumed
+    s->frame_size = 0;
+    return buf_size;
+}
+
+static void flush(AVCodecContext *avctx){
+    MPADecodeContext *s = avctx->priv_data;
+    s->last_buf_size= 0;
+}
+
+#ifdef CONFIG_MP3ADU_DECODER
+static int decode_frame_adu(AVCodecContext * avctx,
+                        void *data, int *data_size,
+                        uint8_t * buf, int buf_size)
+{
+    MPADecodeContext *s = avctx->priv_data;
+    uint32_t header;
+    int len, out_size;
+    OUT_INT *out_samples = data;
+
+    len = buf_size;
+
+    // Discard too short frames
+    if (buf_size < HEADER_SIZE) {
+        *data_size = 0;
+        return buf_size;
+    }
+
+
+    if (len > MPA_MAX_CODED_FRAME_SIZE)
+        len = MPA_MAX_CODED_FRAME_SIZE;
+
+    // Get header and restore sync word
+    header = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3] | 0xffe00000;
+
+    if (ff_mpa_check_header(header) < 0) { // Bad header, discard frame
+        *data_size = 0;
+        return buf_size;
+    }
+
+    decode_header(s, header);
+    /* update codec info */
+    avctx->sample_rate = s->sample_rate;
+    avctx->channels = s->nb_channels;
+    avctx->bit_rate = s->bit_rate;
+    avctx->sub_id = s->layer;
+
+    avctx->frame_size=s->frame_size = len;
+
+    if (avctx->parse_only) {
+        out_size = buf_size;
+    } else {
+        out_size = mp_decode_frame(s, out_samples, buf, buf_size);
+    }
+
+    *data_size = out_size;
+    return buf_size;
+}
+#endif /* CONFIG_MP3ADU_DECODER */
+
+#ifdef CONFIG_MP3ON4_DECODER
+/* Next 3 arrays are indexed by channel config number (passed via codecdata) */
+static int mp3Frames[16] = {0,1,1,2,3,3,4,5,2};   /* number of mp3 decoder instances */
+static int mp3Channels[16] = {0,1,2,3,4,5,6,8,4}; /* total output channels */
+/* offsets into output buffer, assume output order is FL FR BL BR C LFE */
+static int chan_offset[9][5] = {
+    {0},
+    {0},            // C
+    {0},            // FLR
+    {2,0},          // C FLR
+    {2,0,3},        // C FLR BS
+    {4,0,2},        // C FLR BLRS
+    {4,0,2,5},      // C FLR BLRS LFE
+    {4,0,2,6,5},    // C FLR BLRS BLR LFE
+    {0,2}           // FLR BLRS
+};
+
+
+static int decode_init_mp3on4(AVCodecContext * avctx)
+{
+    MP3On4DecodeContext *s = avctx->priv_data;
+    int i;
+
+    if ((avctx->extradata_size < 2) || (avctx->extradata == NULL)) {
+        av_log(avctx, AV_LOG_ERROR, "Codec extradata missing or too short.\n");
+        return -1;
+    }
+
+    s->chan_cfg = (((unsigned char *)avctx->extradata)[1] >> 3) & 0x0f;
+    s->frames = mp3Frames[s->chan_cfg];
+    if(!s->frames) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid channel config number.\n");
+        return -1;
+    }
+    avctx->channels = mp3Channels[s->chan_cfg];
+
+    /* Init the first mp3 decoder in standard way, so that all tables get builded
+     * We replace avctx->priv_data with the context of the first decoder so that
+     * decode_init() does not have to be changed.
+     * Other decoders will be inited here copying data from the first context
+     */
+    // Allocate zeroed memory for the first decoder context
+    s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
+    // Put decoder context in place to make init_decode() happy
+    avctx->priv_data = s->mp3decctx[0];
+    decode_init(avctx);
+    // Restore mp3on4 context pointer
+    avctx->priv_data = s;
+    s->mp3decctx[0]->adu_mode = 1; // Set adu mode
+
+    /* Create a separate codec/context for each frame (first is already ok).
+     * Each frame is 1 or 2 channels - up to 5 frames allowed
+     */
+    for (i = 1; i < s->frames; i++) {
+        s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
+        s->mp3decctx[i]->compute_antialias = s->mp3decctx[0]->compute_antialias;
+        s->mp3decctx[i]->adu_mode = 1;
+    }
+
+    return 0;
+}
+
+
+static int decode_close_mp3on4(AVCodecContext * avctx)
+{
+    MP3On4DecodeContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->frames; i++)
+        if (s->mp3decctx[i])
+            av_free(s->mp3decctx[i]);
+
+    return 0;
+}
+
+
+static int decode_frame_mp3on4(AVCodecContext * avctx,
+                        void *data, int *data_size,
+                        uint8_t * buf, int buf_size)
+{
+    MP3On4DecodeContext *s = avctx->priv_data;
+    MPADecodeContext *m;
+    int len, out_size = 0;
+    uint32_t header;
+    OUT_INT *out_samples = data;
+    OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS];
+    OUT_INT *outptr, *bp;
+    int fsize;
+    unsigned char *start2 = buf, *start;
+    int fr, i, j, n;
+    int off = avctx->channels;
+    int *coff = chan_offset[s->chan_cfg];
+
+    len = buf_size;
+
+    // Discard too short frames
+    if (buf_size < HEADER_SIZE) {
+        *data_size = 0;
+        return buf_size;
+    }
+
+    // If only one decoder interleave is not needed
+    outptr = s->frames == 1 ? out_samples : decoded_buf;
+
+    for (fr = 0; fr < s->frames; fr++) {
+        start = start2;
+        fsize = (start[0] << 4) | (start[1] >> 4);
+        start2 += fsize;
+        if (fsize > len)
+            fsize = len;
+        len -= fsize;
+        if (fsize > MPA_MAX_CODED_FRAME_SIZE)
+            fsize = MPA_MAX_CODED_FRAME_SIZE;
+        m = s->mp3decctx[fr];
+        assert (m != NULL);
+
+        // Get header
+        header = (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3] | 0xfff00000;
+
+        if (ff_mpa_check_header(header) < 0) { // Bad header, discard block
+            *data_size = 0;
+            return buf_size;
+        }
+
+        decode_header(m, header);
+        mp_decode_frame(m, decoded_buf, start, fsize);
+
+        n = MPA_FRAME_SIZE * m->nb_channels;
+        out_size += n * sizeof(OUT_INT);
+        if(s->frames > 1) {
+            /* interleave output data */
+            bp = out_samples + coff[fr];
+            if(m->nb_channels == 1) {
+                for(j = 0; j < n; j++) {
+                    *bp = decoded_buf[j];
+                    bp += off;
+                }
+            } else {
+                for(j = 0; j < n; j++) {
+                    bp[0] = decoded_buf[j++];
+                    bp[1] = decoded_buf[j];
+                    bp += off;
+                }
+            }
+        }
+    }
+
+    /* update codec info */
+    avctx->sample_rate = s->mp3decctx[0]->sample_rate;
+    avctx->frame_size= buf_size;
+    avctx->bit_rate = 0;
+    for (i = 0; i < s->frames; i++)
+        avctx->bit_rate += s->mp3decctx[i]->bit_rate;
+
+    *data_size = out_size;
+    return buf_size;
+}
+#endif /* CONFIG_MP3ON4_DECODER */
+
+#ifdef CONFIG_MP2_DECODER
+AVCodec mp2_decoder =
+{
+    "mp2",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP2,
+    sizeof(MPADecodeContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_PARSE_ONLY,
+};
+#endif
+#ifdef CONFIG_MP3_DECODER
+AVCodec mp3_decoder =
+{
+    "mp3",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP3,
+    sizeof(MPADecodeContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_PARSE_ONLY,
+    .flush= flush,
+};
+#endif
+#ifdef CONFIG_MP3ADU_DECODER
+AVCodec mp3adu_decoder =
+{
+    "mp3adu",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP3ADU,
+    sizeof(MPADecodeContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame_adu,
+    CODEC_CAP_PARSE_ONLY,
+    .flush= flush,
+};
+#endif
+#ifdef CONFIG_MP3ON4_DECODER
+AVCodec mp3on4_decoder =
+{
+    "mp3on4",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_MP3ON4,
+    sizeof(MP3On4DecodeContext),
+    decode_init_mp3on4,
+    NULL,
+    decode_close_mp3on4,
+    decode_frame_mp3on4,
+    .flush= flush,
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/mpegaudiodectab.h b/contrib/ffmpeg/libavcodec/mpegaudiodectab.h
new file mode 100644
index 000000000..fdd1096fc
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegaudiodectab.h
@@ -0,0 +1,795 @@
+/*
+ * MPEG Audio decoder
+ * copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpegaudiodectab.h
+ * mpeg audio layer decoder tables.
+ */
+
+const uint16_t mpa_bitrate_tab[2][3][15] = {
+    { {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 },
+      {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 },
+      {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 } },
+    { {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256},
+      {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160},
+      {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
+    }
+};
+
+const uint16_t mpa_freq_tab[3] = { 44100, 48000, 32000 };
+
+/*******************************************************/
+/* half mpeg encoding window (full precision) */
+const int32_t mpa_enwindow[257] = {
+     0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,
+    -2,    -2,    -2,    -3,    -3,    -4,    -4,    -5,
+    -5,    -6,    -7,    -7,    -8,    -9,   -10,   -11,
+   -13,   -14,   -16,   -17,   -19,   -21,   -24,   -26,
+   -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53,
+   -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97,
+  -104,  -111,  -117,  -125,  -132,  -139,  -147,  -154,
+  -161,  -169,  -176,  -183,  -190,  -196,  -202,  -208,
+   213,   218,   222,   225,   227,   228,   228,   227,
+   224,   221,   215,   208,   200,   189,   177,   163,
+   146,   127,   106,    83,    57,    29,    -2,   -36,
+   -72,  -111,  -153,  -197,  -244,  -294,  -347,  -401,
+  -459,  -519,  -581,  -645,  -711,  -779,  -848,  -919,
+  -991, -1064, -1137, -1210, -1283, -1356, -1428, -1498,
+ -1567, -1634, -1698, -1759, -1817, -1870, -1919, -1962,
+ -2001, -2032, -2057, -2075, -2085, -2087, -2080, -2063,
+  2037,  2000,  1952,  1893,  1822,  1739,  1644,  1535,
+  1414,  1280,  1131,   970,   794,   605,   402,   185,
+   -45,  -288,  -545,  -814, -1095, -1388, -1692, -2006,
+ -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
+ -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597,
+ -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585,
+ -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750,
+ -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134,
+  6574,  5959,  5288,  4561,  3776,  2935,  2037,  1082,
+    70,  -998, -2122, -3300, -4533, -5818, -7154, -8540,
+ -9975,-11455,-12980,-14548,-16155,-17799,-19478,-21189,
+-22929,-24694,-26482,-28289,-30112,-31947,-33791,-35640,
+-37489,-39336,-41176,-43006,-44821,-46617,-48390,-50137,
+-51853,-53534,-55178,-56778,-58333,-59838,-61289,-62684,
+-64019,-65290,-66494,-67629,-68692,-69679,-70590,-71420,
+-72169,-72835,-73415,-73908,-74313,-74630,-74856,-74992,
+ 75038,
+};
+
+/*******************************************************/
+/* layer 2 tables */
+
+const int sblimit_table[5] = { 27 , 30 , 8, 12 , 30 };
+
+const int quant_steps[17] = {
+    3,     5,    7,    9,    15,
+    31,    63,  127,  255,   511,
+    1023,  2047, 4095, 8191, 16383,
+    32767, 65535
+};
+
+/* we use a negative value if grouped */
+const int quant_bits[17] = {
+    -5,  -7,  3, -10, 4,
+     5,  6,  7,  8,  9,
+    10, 11, 12, 13, 14,
+    15, 16
+};
+
+/* encoding tables which give the quantization index. Note how it is
+   possible to store them efficiently ! */
+static const unsigned char alloc_table_0[] = {
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+};
+
+static const unsigned char alloc_table_1[] = {
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+};
+
+static const unsigned char alloc_table_2[] = {
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+};
+
+static const unsigned char alloc_table_3[] = {
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+};
+
+static const unsigned char alloc_table_4[] = {
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+};
+
+const unsigned char *alloc_tables[5] =
+{ alloc_table_0, alloc_table_1, alloc_table_2, alloc_table_3, alloc_table_4, };
+
+/*******************************************************/
+/* layer 3 tables */
+
+/* layer3 scale factor size */
+static const uint8_t slen_table[2][16] = {
+    { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
+    { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 },
+};
+
+/* number of lsf scale factors for a given size */
+static const uint8_t lsf_nsf_table[6][3][4] = {
+    { {  6,  5,  5, 5 }, {  9,  9,  9, 9 }, {  6,  9,  9, 9 } },
+    { {  6,  5,  7, 3 }, {  9,  9, 12, 6 }, {  6,  9, 12, 6 } },
+    { { 11, 10,  0, 0 }, { 18, 18,  0, 0 }, { 15, 18,  0, 0 } },
+    { {  7,  7,  7, 0 }, { 12, 12, 12, 0 }, {  6, 15, 12, 0 } },
+    { {  6,  6,  6, 3 }, { 12,  9,  9, 6 }, {  6, 12,  9, 6 } },
+    { {  8,  8,  5, 0 }, { 15, 12,  9, 0 }, {  6, 18,  9, 0 } },
+};
+
+/* mpegaudio layer 3 huffman tables */
+
+static const uint16_t mpa_huffcodes_1[4] = {
+ 0x0001, 0x0001, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_1[4] = {
+  1,  3,  2,  3,
+};
+
+static const uint16_t mpa_huffcodes_2[9] = {
+ 0x0001, 0x0002, 0x0001, 0x0003, 0x0001, 0x0001, 0x0003, 0x0002,
+ 0x0000,
+};
+
+static const uint8_t mpa_huffbits_2[9] = {
+  1,  3,  6,  3,  3,  5,  5,  5,
+  6,
+};
+
+static const uint16_t mpa_huffcodes_3[9] = {
+ 0x0003, 0x0002, 0x0001, 0x0001, 0x0001, 0x0001, 0x0003, 0x0002,
+ 0x0000,
+};
+
+static const uint8_t mpa_huffbits_3[9] = {
+  2,  2,  6,  3,  2,  5,  5,  5,
+  6,
+};
+
+static const uint16_t mpa_huffcodes_5[16] = {
+ 0x0001, 0x0002, 0x0006, 0x0005, 0x0003, 0x0001, 0x0004, 0x0004,
+ 0x0007, 0x0005, 0x0007, 0x0001, 0x0006, 0x0001, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_5[16] = {
+  1,  3,  6,  7,  3,  3,  6,  7,
+  6,  6,  7,  8,  7,  6,  7,  8,
+};
+
+static const uint16_t mpa_huffcodes_6[16] = {
+ 0x0007, 0x0003, 0x0005, 0x0001, 0x0006, 0x0002, 0x0003, 0x0002,
+ 0x0005, 0x0004, 0x0004, 0x0001, 0x0003, 0x0003, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_6[16] = {
+  3,  3,  5,  7,  3,  2,  4,  5,
+  4,  4,  5,  6,  6,  5,  6,  7,
+};
+
+static const uint16_t mpa_huffcodes_7[36] = {
+ 0x0001, 0x0002, 0x000a, 0x0013, 0x0010, 0x000a, 0x0003, 0x0003,
+ 0x0007, 0x000a, 0x0005, 0x0003, 0x000b, 0x0004, 0x000d, 0x0011,
+ 0x0008, 0x0004, 0x000c, 0x000b, 0x0012, 0x000f, 0x000b, 0x0002,
+ 0x0007, 0x0006, 0x0009, 0x000e, 0x0003, 0x0001, 0x0006, 0x0004,
+ 0x0005, 0x0003, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_7[36] = {
+  1,  3,  6,  8,  8,  9,  3,  4,
+  6,  7,  7,  8,  6,  5,  7,  8,
+  8,  9,  7,  7,  8,  9,  9,  9,
+  7,  7,  8,  9,  9, 10,  8,  8,
+  9, 10, 10, 10,
+};
+
+static const uint16_t mpa_huffcodes_8[36] = {
+ 0x0003, 0x0004, 0x0006, 0x0012, 0x000c, 0x0005, 0x0005, 0x0001,
+ 0x0002, 0x0010, 0x0009, 0x0003, 0x0007, 0x0003, 0x0005, 0x000e,
+ 0x0007, 0x0003, 0x0013, 0x0011, 0x000f, 0x000d, 0x000a, 0x0004,
+ 0x000d, 0x0005, 0x0008, 0x000b, 0x0005, 0x0001, 0x000c, 0x0004,
+ 0x0004, 0x0001, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_8[36] = {
+  2,  3,  6,  8,  8,  9,  3,  2,
+  4,  8,  8,  8,  6,  4,  6,  8,
+  8,  9,  8,  8,  8,  9,  9, 10,
+  8,  7,  8,  9, 10, 10,  9,  8,
+  9,  9, 11, 11,
+};
+
+static const uint16_t mpa_huffcodes_9[36] = {
+ 0x0007, 0x0005, 0x0009, 0x000e, 0x000f, 0x0007, 0x0006, 0x0004,
+ 0x0005, 0x0005, 0x0006, 0x0007, 0x0007, 0x0006, 0x0008, 0x0008,
+ 0x0008, 0x0005, 0x000f, 0x0006, 0x0009, 0x000a, 0x0005, 0x0001,
+ 0x000b, 0x0007, 0x0009, 0x0006, 0x0004, 0x0001, 0x000e, 0x0004,
+ 0x0006, 0x0002, 0x0006, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_9[36] = {
+  3,  3,  5,  6,  8,  9,  3,  3,
+  4,  5,  6,  8,  4,  4,  5,  6,
+  7,  8,  6,  5,  6,  7,  7,  8,
+  7,  6,  7,  7,  8,  9,  8,  7,
+  8,  8,  9,  9,
+};
+
+static const uint16_t mpa_huffcodes_10[64] = {
+ 0x0001, 0x0002, 0x000a, 0x0017, 0x0023, 0x001e, 0x000c, 0x0011,
+ 0x0003, 0x0003, 0x0008, 0x000c, 0x0012, 0x0015, 0x000c, 0x0007,
+ 0x000b, 0x0009, 0x000f, 0x0015, 0x0020, 0x0028, 0x0013, 0x0006,
+ 0x000e, 0x000d, 0x0016, 0x0022, 0x002e, 0x0017, 0x0012, 0x0007,
+ 0x0014, 0x0013, 0x0021, 0x002f, 0x001b, 0x0016, 0x0009, 0x0003,
+ 0x001f, 0x0016, 0x0029, 0x001a, 0x0015, 0x0014, 0x0005, 0x0003,
+ 0x000e, 0x000d, 0x000a, 0x000b, 0x0010, 0x0006, 0x0005, 0x0001,
+ 0x0009, 0x0008, 0x0007, 0x0008, 0x0004, 0x0004, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_10[64] = {
+  1,  3,  6,  8,  9,  9,  9, 10,
+  3,  4,  6,  7,  8,  9,  8,  8,
+  6,  6,  7,  8,  9, 10,  9,  9,
+  7,  7,  8,  9, 10, 10,  9, 10,
+  8,  8,  9, 10, 10, 10, 10, 10,
+  9,  9, 10, 10, 11, 11, 10, 11,
+  8,  8,  9, 10, 10, 10, 11, 11,
+  9,  8,  9, 10, 10, 11, 11, 11,
+};
+
+static const uint16_t mpa_huffcodes_11[64] = {
+ 0x0003, 0x0004, 0x000a, 0x0018, 0x0022, 0x0021, 0x0015, 0x000f,
+ 0x0005, 0x0003, 0x0004, 0x000a, 0x0020, 0x0011, 0x000b, 0x000a,
+ 0x000b, 0x0007, 0x000d, 0x0012, 0x001e, 0x001f, 0x0014, 0x0005,
+ 0x0019, 0x000b, 0x0013, 0x003b, 0x001b, 0x0012, 0x000c, 0x0005,
+ 0x0023, 0x0021, 0x001f, 0x003a, 0x001e, 0x0010, 0x0007, 0x0005,
+ 0x001c, 0x001a, 0x0020, 0x0013, 0x0011, 0x000f, 0x0008, 0x000e,
+ 0x000e, 0x000c, 0x0009, 0x000d, 0x000e, 0x0009, 0x0004, 0x0001,
+ 0x000b, 0x0004, 0x0006, 0x0006, 0x0006, 0x0003, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_11[64] = {
+  2,  3,  5,  7,  8,  9,  8,  9,
+  3,  3,  4,  6,  8,  8,  7,  8,
+  5,  5,  6,  7,  8,  9,  8,  8,
+  7,  6,  7,  9,  8, 10,  8,  9,
+  8,  8,  8,  9,  9, 10,  9, 10,
+  8,  8,  9, 10, 10, 11, 10, 11,
+  8,  7,  7,  8,  9, 10, 10, 10,
+  8,  7,  8,  9, 10, 10, 10, 10,
+};
+
+static const uint16_t mpa_huffcodes_12[64] = {
+ 0x0009, 0x0006, 0x0010, 0x0021, 0x0029, 0x0027, 0x0026, 0x001a,
+ 0x0007, 0x0005, 0x0006, 0x0009, 0x0017, 0x0010, 0x001a, 0x000b,
+ 0x0011, 0x0007, 0x000b, 0x000e, 0x0015, 0x001e, 0x000a, 0x0007,
+ 0x0011, 0x000a, 0x000f, 0x000c, 0x0012, 0x001c, 0x000e, 0x0005,
+ 0x0020, 0x000d, 0x0016, 0x0013, 0x0012, 0x0010, 0x0009, 0x0005,
+ 0x0028, 0x0011, 0x001f, 0x001d, 0x0011, 0x000d, 0x0004, 0x0002,
+ 0x001b, 0x000c, 0x000b, 0x000f, 0x000a, 0x0007, 0x0004, 0x0001,
+ 0x001b, 0x000c, 0x0008, 0x000c, 0x0006, 0x0003, 0x0001, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_12[64] = {
+  4,  3,  5,  7,  8,  9,  9,  9,
+  3,  3,  4,  5,  7,  7,  8,  8,
+  5,  4,  5,  6,  7,  8,  7,  8,
+  6,  5,  6,  6,  7,  8,  8,  8,
+  7,  6,  7,  7,  8,  8,  8,  9,
+  8,  7,  8,  8,  8,  9,  8,  9,
+  8,  7,  7,  8,  8,  9,  9, 10,
+  9,  8,  8,  9,  9,  9,  9, 10,
+};
+
+static const uint16_t mpa_huffcodes_13[256] = {
+ 0x0001, 0x0005, 0x000e, 0x0015, 0x0022, 0x0033, 0x002e, 0x0047,
+ 0x002a, 0x0034, 0x0044, 0x0034, 0x0043, 0x002c, 0x002b, 0x0013,
+ 0x0003, 0x0004, 0x000c, 0x0013, 0x001f, 0x001a, 0x002c, 0x0021,
+ 0x001f, 0x0018, 0x0020, 0x0018, 0x001f, 0x0023, 0x0016, 0x000e,
+ 0x000f, 0x000d, 0x0017, 0x0024, 0x003b, 0x0031, 0x004d, 0x0041,
+ 0x001d, 0x0028, 0x001e, 0x0028, 0x001b, 0x0021, 0x002a, 0x0010,
+ 0x0016, 0x0014, 0x0025, 0x003d, 0x0038, 0x004f, 0x0049, 0x0040,
+ 0x002b, 0x004c, 0x0038, 0x0025, 0x001a, 0x001f, 0x0019, 0x000e,
+ 0x0023, 0x0010, 0x003c, 0x0039, 0x0061, 0x004b, 0x0072, 0x005b,
+ 0x0036, 0x0049, 0x0037, 0x0029, 0x0030, 0x0035, 0x0017, 0x0018,
+ 0x003a, 0x001b, 0x0032, 0x0060, 0x004c, 0x0046, 0x005d, 0x0054,
+ 0x004d, 0x003a, 0x004f, 0x001d, 0x004a, 0x0031, 0x0029, 0x0011,
+ 0x002f, 0x002d, 0x004e, 0x004a, 0x0073, 0x005e, 0x005a, 0x004f,
+ 0x0045, 0x0053, 0x0047, 0x0032, 0x003b, 0x0026, 0x0024, 0x000f,
+ 0x0048, 0x0022, 0x0038, 0x005f, 0x005c, 0x0055, 0x005b, 0x005a,
+ 0x0056, 0x0049, 0x004d, 0x0041, 0x0033, 0x002c, 0x002b, 0x002a,
+ 0x002b, 0x0014, 0x001e, 0x002c, 0x0037, 0x004e, 0x0048, 0x0057,
+ 0x004e, 0x003d, 0x002e, 0x0036, 0x0025, 0x001e, 0x0014, 0x0010,
+ 0x0035, 0x0019, 0x0029, 0x0025, 0x002c, 0x003b, 0x0036, 0x0051,
+ 0x0042, 0x004c, 0x0039, 0x0036, 0x0025, 0x0012, 0x0027, 0x000b,
+ 0x0023, 0x0021, 0x001f, 0x0039, 0x002a, 0x0052, 0x0048, 0x0050,
+ 0x002f, 0x003a, 0x0037, 0x0015, 0x0016, 0x001a, 0x0026, 0x0016,
+ 0x0035, 0x0019, 0x0017, 0x0026, 0x0046, 0x003c, 0x0033, 0x0024,
+ 0x0037, 0x001a, 0x0022, 0x0017, 0x001b, 0x000e, 0x0009, 0x0007,
+ 0x0022, 0x0020, 0x001c, 0x0027, 0x0031, 0x004b, 0x001e, 0x0034,
+ 0x0030, 0x0028, 0x0034, 0x001c, 0x0012, 0x0011, 0x0009, 0x0005,
+ 0x002d, 0x0015, 0x0022, 0x0040, 0x0038, 0x0032, 0x0031, 0x002d,
+ 0x001f, 0x0013, 0x000c, 0x000f, 0x000a, 0x0007, 0x0006, 0x0003,
+ 0x0030, 0x0017, 0x0014, 0x0027, 0x0024, 0x0023, 0x0035, 0x0015,
+ 0x0010, 0x0017, 0x000d, 0x000a, 0x0006, 0x0001, 0x0004, 0x0002,
+ 0x0010, 0x000f, 0x0011, 0x001b, 0x0019, 0x0014, 0x001d, 0x000b,
+ 0x0011, 0x000c, 0x0010, 0x0008, 0x0001, 0x0001, 0x0000, 0x0001,
+};
+
+static const uint8_t mpa_huffbits_13[256] = {
+  1,  4,  6,  7,  8,  9,  9, 10,
+  9, 10, 11, 11, 12, 12, 13, 13,
+  3,  4,  6,  7,  8,  8,  9,  9,
+  9,  9, 10, 10, 11, 12, 12, 12,
+  6,  6,  7,  8,  9,  9, 10, 10,
+  9, 10, 10, 11, 11, 12, 13, 13,
+  7,  7,  8,  9,  9, 10, 10, 10,
+ 10, 11, 11, 11, 11, 12, 13, 13,
+  8,  7,  9,  9, 10, 10, 11, 11,
+ 10, 11, 11, 12, 12, 13, 13, 14,
+  9,  8,  9, 10, 10, 10, 11, 11,
+ 11, 11, 12, 11, 13, 13, 14, 14,
+  9,  9, 10, 10, 11, 11, 11, 11,
+ 11, 12, 12, 12, 13, 13, 14, 14,
+ 10,  9, 10, 11, 11, 11, 12, 12,
+ 12, 12, 13, 13, 13, 14, 16, 16,
+  9,  8,  9, 10, 10, 11, 11, 12,
+ 12, 12, 12, 13, 13, 14, 15, 15,
+ 10,  9, 10, 10, 11, 11, 11, 13,
+ 12, 13, 13, 14, 14, 14, 16, 15,
+ 10, 10, 10, 11, 11, 12, 12, 13,
+ 12, 13, 14, 13, 14, 15, 16, 17,
+ 11, 10, 10, 11, 12, 12, 12, 12,
+ 13, 13, 13, 14, 15, 15, 15, 16,
+ 11, 11, 11, 12, 12, 13, 12, 13,
+ 14, 14, 15, 15, 15, 16, 16, 16,
+ 12, 11, 12, 13, 13, 13, 14, 14,
+ 14, 14, 14, 15, 16, 15, 16, 16,
+ 13, 12, 12, 13, 13, 13, 15, 14,
+ 14, 17, 15, 15, 15, 17, 16, 16,
+ 12, 12, 13, 14, 14, 14, 15, 14,
+ 15, 15, 16, 16, 19, 18, 19, 16,
+};
+
+static const uint16_t mpa_huffcodes_15[256] = {
+ 0x0007, 0x000c, 0x0012, 0x0035, 0x002f, 0x004c, 0x007c, 0x006c,
+ 0x0059, 0x007b, 0x006c, 0x0077, 0x006b, 0x0051, 0x007a, 0x003f,
+ 0x000d, 0x0005, 0x0010, 0x001b, 0x002e, 0x0024, 0x003d, 0x0033,
+ 0x002a, 0x0046, 0x0034, 0x0053, 0x0041, 0x0029, 0x003b, 0x0024,
+ 0x0013, 0x0011, 0x000f, 0x0018, 0x0029, 0x0022, 0x003b, 0x0030,
+ 0x0028, 0x0040, 0x0032, 0x004e, 0x003e, 0x0050, 0x0038, 0x0021,
+ 0x001d, 0x001c, 0x0019, 0x002b, 0x0027, 0x003f, 0x0037, 0x005d,
+ 0x004c, 0x003b, 0x005d, 0x0048, 0x0036, 0x004b, 0x0032, 0x001d,
+ 0x0034, 0x0016, 0x002a, 0x0028, 0x0043, 0x0039, 0x005f, 0x004f,
+ 0x0048, 0x0039, 0x0059, 0x0045, 0x0031, 0x0042, 0x002e, 0x001b,
+ 0x004d, 0x0025, 0x0023, 0x0042, 0x003a, 0x0034, 0x005b, 0x004a,
+ 0x003e, 0x0030, 0x004f, 0x003f, 0x005a, 0x003e, 0x0028, 0x0026,
+ 0x007d, 0x0020, 0x003c, 0x0038, 0x0032, 0x005c, 0x004e, 0x0041,
+ 0x0037, 0x0057, 0x0047, 0x0033, 0x0049, 0x0033, 0x0046, 0x001e,
+ 0x006d, 0x0035, 0x0031, 0x005e, 0x0058, 0x004b, 0x0042, 0x007a,
+ 0x005b, 0x0049, 0x0038, 0x002a, 0x0040, 0x002c, 0x0015, 0x0019,
+ 0x005a, 0x002b, 0x0029, 0x004d, 0x0049, 0x003f, 0x0038, 0x005c,
+ 0x004d, 0x0042, 0x002f, 0x0043, 0x0030, 0x0035, 0x0024, 0x0014,
+ 0x0047, 0x0022, 0x0043, 0x003c, 0x003a, 0x0031, 0x0058, 0x004c,
+ 0x0043, 0x006a, 0x0047, 0x0036, 0x0026, 0x0027, 0x0017, 0x000f,
+ 0x006d, 0x0035, 0x0033, 0x002f, 0x005a, 0x0052, 0x003a, 0x0039,
+ 0x0030, 0x0048, 0x0039, 0x0029, 0x0017, 0x001b, 0x003e, 0x0009,
+ 0x0056, 0x002a, 0x0028, 0x0025, 0x0046, 0x0040, 0x0034, 0x002b,
+ 0x0046, 0x0037, 0x002a, 0x0019, 0x001d, 0x0012, 0x000b, 0x000b,
+ 0x0076, 0x0044, 0x001e, 0x0037, 0x0032, 0x002e, 0x004a, 0x0041,
+ 0x0031, 0x0027, 0x0018, 0x0010, 0x0016, 0x000d, 0x000e, 0x0007,
+ 0x005b, 0x002c, 0x0027, 0x0026, 0x0022, 0x003f, 0x0034, 0x002d,
+ 0x001f, 0x0034, 0x001c, 0x0013, 0x000e, 0x0008, 0x0009, 0x0003,
+ 0x007b, 0x003c, 0x003a, 0x0035, 0x002f, 0x002b, 0x0020, 0x0016,
+ 0x0025, 0x0018, 0x0011, 0x000c, 0x000f, 0x000a, 0x0002, 0x0001,
+ 0x0047, 0x0025, 0x0022, 0x001e, 0x001c, 0x0014, 0x0011, 0x001a,
+ 0x0015, 0x0010, 0x000a, 0x0006, 0x0008, 0x0006, 0x0002, 0x0000,
+};
+
+static const uint8_t mpa_huffbits_15[256] = {
+  3,  4,  5,  7,  7,  8,  9,  9,
+  9, 10, 10, 11, 11, 11, 12, 13,
+  4,  3,  5,  6,  7,  7,  8,  8,
+  8,  9,  9, 10, 10, 10, 11, 11,
+  5,  5,  5,  6,  7,  7,  8,  8,
+  8,  9,  9, 10, 10, 11, 11, 11,
+  6,  6,  6,  7,  7,  8,  8,  9,
+  9,  9, 10, 10, 10, 11, 11, 11,
+  7,  6,  7,  7,  8,  8,  9,  9,
+  9,  9, 10, 10, 10, 11, 11, 11,
+  8,  7,  7,  8,  8,  8,  9,  9,
+  9,  9, 10, 10, 11, 11, 11, 12,
+  9,  7,  8,  8,  8,  9,  9,  9,
+  9, 10, 10, 10, 11, 11, 12, 12,
+  9,  8,  8,  9,  9,  9,  9, 10,
+ 10, 10, 10, 10, 11, 11, 11, 12,
+  9,  8,  8,  9,  9,  9,  9, 10,
+ 10, 10, 10, 11, 11, 12, 12, 12,
+  9,  8,  9,  9,  9,  9, 10, 10,
+ 10, 11, 11, 11, 11, 12, 12, 12,
+ 10,  9,  9,  9, 10, 10, 10, 10,
+ 10, 11, 11, 11, 11, 12, 13, 12,
+ 10,  9,  9,  9, 10, 10, 10, 10,
+ 11, 11, 11, 11, 12, 12, 12, 13,
+ 11, 10,  9, 10, 10, 10, 11, 11,
+ 11, 11, 11, 11, 12, 12, 13, 13,
+ 11, 10, 10, 10, 10, 11, 11, 11,
+ 11, 12, 12, 12, 12, 12, 13, 13,
+ 12, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 12, 13,
+ 12, 11, 11, 11, 11, 11, 11, 12,
+ 12, 12, 12, 12, 13, 13, 13, 13,
+};
+
+static const uint16_t mpa_huffcodes_16[256] = {
+ 0x0001, 0x0005, 0x000e, 0x002c, 0x004a, 0x003f, 0x006e, 0x005d,
+ 0x00ac, 0x0095, 0x008a, 0x00f2, 0x00e1, 0x00c3, 0x0178, 0x0011,
+ 0x0003, 0x0004, 0x000c, 0x0014, 0x0023, 0x003e, 0x0035, 0x002f,
+ 0x0053, 0x004b, 0x0044, 0x0077, 0x00c9, 0x006b, 0x00cf, 0x0009,
+ 0x000f, 0x000d, 0x0017, 0x0026, 0x0043, 0x003a, 0x0067, 0x005a,
+ 0x00a1, 0x0048, 0x007f, 0x0075, 0x006e, 0x00d1, 0x00ce, 0x0010,
+ 0x002d, 0x0015, 0x0027, 0x0045, 0x0040, 0x0072, 0x0063, 0x0057,
+ 0x009e, 0x008c, 0x00fc, 0x00d4, 0x00c7, 0x0183, 0x016d, 0x001a,
+ 0x004b, 0x0024, 0x0044, 0x0041, 0x0073, 0x0065, 0x00b3, 0x00a4,
+ 0x009b, 0x0108, 0x00f6, 0x00e2, 0x018b, 0x017e, 0x016a, 0x0009,
+ 0x0042, 0x001e, 0x003b, 0x0038, 0x0066, 0x00b9, 0x00ad, 0x0109,
+ 0x008e, 0x00fd, 0x00e8, 0x0190, 0x0184, 0x017a, 0x01bd, 0x0010,
+ 0x006f, 0x0036, 0x0034, 0x0064, 0x00b8, 0x00b2, 0x00a0, 0x0085,
+ 0x0101, 0x00f4, 0x00e4, 0x00d9, 0x0181, 0x016e, 0x02cb, 0x000a,
+ 0x0062, 0x0030, 0x005b, 0x0058, 0x00a5, 0x009d, 0x0094, 0x0105,
+ 0x00f8, 0x0197, 0x018d, 0x0174, 0x017c, 0x0379, 0x0374, 0x0008,
+ 0x0055, 0x0054, 0x0051, 0x009f, 0x009c, 0x008f, 0x0104, 0x00f9,
+ 0x01ab, 0x0191, 0x0188, 0x017f, 0x02d7, 0x02c9, 0x02c4, 0x0007,
+ 0x009a, 0x004c, 0x0049, 0x008d, 0x0083, 0x0100, 0x00f5, 0x01aa,
+ 0x0196, 0x018a, 0x0180, 0x02df, 0x0167, 0x02c6, 0x0160, 0x000b,
+ 0x008b, 0x0081, 0x0043, 0x007d, 0x00f7, 0x00e9, 0x00e5, 0x00db,
+ 0x0189, 0x02e7, 0x02e1, 0x02d0, 0x0375, 0x0372, 0x01b7, 0x0004,
+ 0x00f3, 0x0078, 0x0076, 0x0073, 0x00e3, 0x00df, 0x018c, 0x02ea,
+ 0x02e6, 0x02e0, 0x02d1, 0x02c8, 0x02c2, 0x00df, 0x01b4, 0x0006,
+ 0x00ca, 0x00e0, 0x00de, 0x00da, 0x00d8, 0x0185, 0x0182, 0x017d,
+ 0x016c, 0x0378, 0x01bb, 0x02c3, 0x01b8, 0x01b5, 0x06c0, 0x0004,
+ 0x02eb, 0x00d3, 0x00d2, 0x00d0, 0x0172, 0x017b, 0x02de, 0x02d3,
+ 0x02ca, 0x06c7, 0x0373, 0x036d, 0x036c, 0x0d83, 0x0361, 0x0002,
+ 0x0179, 0x0171, 0x0066, 0x00bb, 0x02d6, 0x02d2, 0x0166, 0x02c7,
+ 0x02c5, 0x0362, 0x06c6, 0x0367, 0x0d82, 0x0366, 0x01b2, 0x0000,
+ 0x000c, 0x000a, 0x0007, 0x000b, 0x000a, 0x0011, 0x000b, 0x0009,
+ 0x000d, 0x000c, 0x000a, 0x0007, 0x0005, 0x0003, 0x0001, 0x0003,
+};
+
+static const uint8_t mpa_huffbits_16[256] = {
+  1,  4,  6,  8,  9,  9, 10, 10,
+ 11, 11, 11, 12, 12, 12, 13,  9,
+  3,  4,  6,  7,  8,  9,  9,  9,
+ 10, 10, 10, 11, 12, 11, 12,  8,
+  6,  6,  7,  8,  9,  9, 10, 10,
+ 11, 10, 11, 11, 11, 12, 12,  9,
+  8,  7,  8,  9,  9, 10, 10, 10,
+ 11, 11, 12, 12, 12, 13, 13, 10,
+  9,  8,  9,  9, 10, 10, 11, 11,
+ 11, 12, 12, 12, 13, 13, 13,  9,
+  9,  8,  9,  9, 10, 11, 11, 12,
+ 11, 12, 12, 13, 13, 13, 14, 10,
+ 10,  9,  9, 10, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 14, 10,
+ 10,  9, 10, 10, 11, 11, 11, 12,
+ 12, 13, 13, 13, 13, 15, 15, 10,
+ 10, 10, 10, 11, 11, 11, 12, 12,
+ 13, 13, 13, 13, 14, 14, 14, 10,
+ 11, 10, 10, 11, 11, 12, 12, 13,
+ 13, 13, 13, 14, 13, 14, 13, 11,
+ 11, 11, 10, 11, 12, 12, 12, 12,
+ 13, 14, 14, 14, 15, 15, 14, 10,
+ 12, 11, 11, 11, 12, 12, 13, 14,
+ 14, 14, 14, 14, 14, 13, 14, 11,
+ 12, 12, 12, 12, 12, 13, 13, 13,
+ 13, 15, 14, 14, 14, 14, 16, 11,
+ 14, 12, 12, 12, 13, 13, 14, 14,
+ 14, 16, 15, 15, 15, 17, 15, 11,
+ 13, 13, 11, 12, 14, 14, 13, 14,
+ 14, 15, 16, 15, 17, 15, 14, 11,
+  9,  8,  8,  9,  9, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11,  8,
+};
+
+static const uint16_t mpa_huffcodes_24[256] = {
+ 0x000f, 0x000d, 0x002e, 0x0050, 0x0092, 0x0106, 0x00f8, 0x01b2,
+ 0x01aa, 0x029d, 0x028d, 0x0289, 0x026d, 0x0205, 0x0408, 0x0058,
+ 0x000e, 0x000c, 0x0015, 0x0026, 0x0047, 0x0082, 0x007a, 0x00d8,
+ 0x00d1, 0x00c6, 0x0147, 0x0159, 0x013f, 0x0129, 0x0117, 0x002a,
+ 0x002f, 0x0016, 0x0029, 0x004a, 0x0044, 0x0080, 0x0078, 0x00dd,
+ 0x00cf, 0x00c2, 0x00b6, 0x0154, 0x013b, 0x0127, 0x021d, 0x0012,
+ 0x0051, 0x0027, 0x004b, 0x0046, 0x0086, 0x007d, 0x0074, 0x00dc,
+ 0x00cc, 0x00be, 0x00b2, 0x0145, 0x0137, 0x0125, 0x010f, 0x0010,
+ 0x0093, 0x0048, 0x0045, 0x0087, 0x007f, 0x0076, 0x0070, 0x00d2,
+ 0x00c8, 0x00bc, 0x0160, 0x0143, 0x0132, 0x011d, 0x021c, 0x000e,
+ 0x0107, 0x0042, 0x0081, 0x007e, 0x0077, 0x0072, 0x00d6, 0x00ca,
+ 0x00c0, 0x00b4, 0x0155, 0x013d, 0x012d, 0x0119, 0x0106, 0x000c,
+ 0x00f9, 0x007b, 0x0079, 0x0075, 0x0071, 0x00d7, 0x00ce, 0x00c3,
+ 0x00b9, 0x015b, 0x014a, 0x0134, 0x0123, 0x0110, 0x0208, 0x000a,
+ 0x01b3, 0x0073, 0x006f, 0x006d, 0x00d3, 0x00cb, 0x00c4, 0x00bb,
+ 0x0161, 0x014c, 0x0139, 0x012a, 0x011b, 0x0213, 0x017d, 0x0011,
+ 0x01ab, 0x00d4, 0x00d0, 0x00cd, 0x00c9, 0x00c1, 0x00ba, 0x00b1,
+ 0x00a9, 0x0140, 0x012f, 0x011e, 0x010c, 0x0202, 0x0179, 0x0010,
+ 0x014f, 0x00c7, 0x00c5, 0x00bf, 0x00bd, 0x00b5, 0x00ae, 0x014d,
+ 0x0141, 0x0131, 0x0121, 0x0113, 0x0209, 0x017b, 0x0173, 0x000b,
+ 0x029c, 0x00b8, 0x00b7, 0x00b3, 0x00af, 0x0158, 0x014b, 0x013a,
+ 0x0130, 0x0122, 0x0115, 0x0212, 0x017f, 0x0175, 0x016e, 0x000a,
+ 0x028c, 0x015a, 0x00ab, 0x00a8, 0x00a4, 0x013e, 0x0135, 0x012b,
+ 0x011f, 0x0114, 0x0107, 0x0201, 0x0177, 0x0170, 0x016a, 0x0006,
+ 0x0288, 0x0142, 0x013c, 0x0138, 0x0133, 0x012e, 0x0124, 0x011c,
+ 0x010d, 0x0105, 0x0200, 0x0178, 0x0172, 0x016c, 0x0167, 0x0004,
+ 0x026c, 0x012c, 0x0128, 0x0126, 0x0120, 0x011a, 0x0111, 0x010a,
+ 0x0203, 0x017c, 0x0176, 0x0171, 0x016d, 0x0169, 0x0165, 0x0002,
+ 0x0409, 0x0118, 0x0116, 0x0112, 0x010b, 0x0108, 0x0103, 0x017e,
+ 0x017a, 0x0174, 0x016f, 0x016b, 0x0168, 0x0166, 0x0164, 0x0000,
+ 0x002b, 0x0014, 0x0013, 0x0011, 0x000f, 0x000d, 0x000b, 0x0009,
+ 0x0007, 0x0006, 0x0004, 0x0007, 0x0005, 0x0003, 0x0001, 0x0003,
+};
+
+static const uint8_t mpa_huffbits_24[256] = {
+  4,  4,  6,  7,  8,  9,  9, 10,
+ 10, 11, 11, 11, 11, 11, 12,  9,
+  4,  4,  5,  6,  7,  8,  8,  9,
+  9,  9, 10, 10, 10, 10, 10,  8,
+  6,  5,  6,  7,  7,  8,  8,  9,
+  9,  9,  9, 10, 10, 10, 11,  7,
+  7,  6,  7,  7,  8,  8,  8,  9,
+  9,  9,  9, 10, 10, 10, 10,  7,
+  8,  7,  7,  8,  8,  8,  8,  9,
+  9,  9, 10, 10, 10, 10, 11,  7,
+  9,  7,  8,  8,  8,  8,  9,  9,
+  9,  9, 10, 10, 10, 10, 10,  7,
+  9,  8,  8,  8,  8,  9,  9,  9,
+  9, 10, 10, 10, 10, 10, 11,  7,
+ 10,  8,  8,  8,  9,  9,  9,  9,
+ 10, 10, 10, 10, 10, 11, 11,  8,
+ 10,  9,  9,  9,  9,  9,  9,  9,
+  9, 10, 10, 10, 10, 11, 11,  8,
+ 10,  9,  9,  9,  9,  9,  9, 10,
+ 10, 10, 10, 10, 11, 11, 11,  8,
+ 11,  9,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 11,  8,
+ 11, 10,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 11,  8,
+ 11, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 11, 11, 11, 11, 11,  8,
+ 11, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11,  8,
+ 12, 10, 10, 10, 10, 10, 10, 11,
+ 11, 11, 11, 11, 11, 11, 11,  8,
+  8,  7,  7,  7,  7,  7,  7,  7,
+  7,  7,  7,  8,  8,  8,  8,  4,
+};
+
+static const HuffTable mpa_huff_tables[16] = {
+{ 1, NULL, NULL },
+{ 2, mpa_huffbits_1, mpa_huffcodes_1 },
+{ 3, mpa_huffbits_2, mpa_huffcodes_2 },
+{ 3, mpa_huffbits_3, mpa_huffcodes_3 },
+{ 4, mpa_huffbits_5, mpa_huffcodes_5 },
+{ 4, mpa_huffbits_6, mpa_huffcodes_6 },
+{ 6, mpa_huffbits_7, mpa_huffcodes_7 },
+{ 6, mpa_huffbits_8, mpa_huffcodes_8 },
+{ 6, mpa_huffbits_9, mpa_huffcodes_9 },
+{ 8, mpa_huffbits_10, mpa_huffcodes_10 },
+{ 8, mpa_huffbits_11, mpa_huffcodes_11 },
+{ 8, mpa_huffbits_12, mpa_huffcodes_12 },
+{ 16, mpa_huffbits_13, mpa_huffcodes_13 },
+{ 16, mpa_huffbits_15, mpa_huffcodes_15 },
+{ 16, mpa_huffbits_16, mpa_huffcodes_16 },
+{ 16, mpa_huffbits_24, mpa_huffcodes_24 },
+};
+
+static const uint8_t mpa_huff_data[32][2] = {
+{ 0, 0 },
+{ 1, 0 },
+{ 2, 0 },
+{ 3, 0 },
+{ 0, 0 },
+{ 4, 0 },
+{ 5, 0 },
+{ 6, 0 },
+{ 7, 0 },
+{ 8, 0 },
+{ 9, 0 },
+{ 10, 0 },
+{ 11, 0 },
+{ 12, 0 },
+{ 0, 0 },
+{ 13, 0 },
+{ 14, 1 },
+{ 14, 2 },
+{ 14, 3 },
+{ 14, 4 },
+{ 14, 6 },
+{ 14, 8 },
+{ 14, 10 },
+{ 14, 13 },
+{ 15, 4 },
+{ 15, 5 },
+{ 15, 6 },
+{ 15, 7 },
+{ 15, 8 },
+{ 15, 9 },
+{ 15, 11 },
+{ 15, 13 },
+};
+
+
+/* huffman tables for quadrules */
+static const uint8_t mpa_quad_codes[2][16] = {
+    {  1,  5,  4,  5,  6,  5,  4,  4, 7,  3,  6,  0,  7,  2,  3,  1, },
+    { 15, 14, 13, 12, 11, 10,  9,  8, 7,  6,  5,  4,  3,  2,  1,  0, },
+};
+
+static const uint8_t mpa_quad_bits[2][16] = {
+    { 1, 4, 4, 5, 4, 6, 5, 6, 4, 5, 5, 6, 5, 6, 6, 6, },
+    { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, },
+};
+
+/* band size tables */
+static const uint8_t band_size_long[9][22] = {
+{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 8, 10,
+  12, 16, 20, 24, 28, 34, 42, 50, 54, 76, 158, }, /* 44100 */
+{ 4, 4, 4, 4, 4, 4, 6, 6, 6, 8, 10,
+  12, 16, 18, 22, 28, 34, 40, 46, 54, 54, 192, }, /* 48000 */
+{ 4, 4, 4, 4, 4, 4, 6, 6, 8, 10, 12,
+  16, 20, 24, 30, 38, 46, 56, 68, 84, 102, 26, }, /* 32000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 22050 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  18, 22, 26, 32, 38, 46, 52, 64, 70, 76, 36, }, /* 24000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 16000 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 11025 */
+{ 6, 6, 6, 6, 6, 6, 8, 10, 12, 14, 16,
+  20, 24, 28, 32, 38, 46, 52, 60, 68, 58, 54, }, /* 12000 */
+{ 12, 12, 12, 12, 12, 12, 16, 20, 24, 28, 32,
+  40, 48, 56, 64, 76, 90, 2, 2, 2, 2, 2, }, /* 8000 */
+};
+
+static const uint8_t band_size_short[9][13] = {
+{ 4, 4, 4, 4, 6, 8, 10, 12, 14, 18, 22, 30, 56, }, /* 44100 */
+{ 4, 4, 4, 4, 6, 6, 10, 12, 14, 16, 20, 26, 66, }, /* 48000 */
+{ 4, 4, 4, 4, 6, 8, 12, 16, 20, 26, 34, 42, 12, }, /* 32000 */
+{ 4, 4, 4, 6, 6, 8, 10, 14, 18, 26, 32, 42, 18, }, /* 22050 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 32, 44, 12, }, /* 24000 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 16000 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 11025 */
+{ 4, 4, 4, 6, 8, 10, 12, 14, 18, 24, 30, 40, 18, }, /* 12000 */
+{ 8, 8, 8, 12, 16, 20, 24, 28, 36, 2, 2, 2, 26, }, /* 8000 */
+};
+
+static const uint8_t mpa_pretab[2][22] = {
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 0 },
+};
+
+/* table for alias reduction (XXX: store it as integer !) */
+static const float ci_table[8] = {
+    -0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037,
+};
diff --git a/contrib/ffmpeg/libavcodec/mpegaudiotab.h b/contrib/ffmpeg/libavcodec/mpegaudiotab.h
new file mode 100644
index 000000000..8fb37ddff
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegaudiotab.h
@@ -0,0 +1,111 @@
+/*
+ * mpeg audio layer 2 tables. Most of them come from the mpeg audio
+ * specification.
+ *
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpegaudiotab.h
+ * mpeg audio layer 2 tables.
+ * Most of them come from the mpeg audio specification.
+ */
+
+#define SQRT2 1.41421356237309514547
+
+static const int costab32[30] = {
+    FIX(0.54119610014619701222),
+    FIX(1.3065629648763763537),
+
+    FIX(0.50979557910415917998),
+    FIX(2.5629154477415054814),
+    FIX(0.89997622313641556513),
+    FIX(0.60134488693504528634),
+
+    FIX(0.5024192861881556782),
+    FIX(5.1011486186891552563),
+    FIX(0.78815462345125020249),
+    FIX(0.64682178335999007679),
+    FIX(0.56694403481635768927),
+    FIX(1.0606776859903470633),
+    FIX(1.7224470982383341955),
+    FIX(0.52249861493968885462),
+
+    FIX(10.19000812354803287),
+    FIX(0.674808341455005678),
+    FIX(1.1694399334328846596),
+    FIX(0.53104259108978413284),
+    FIX(2.0577810099534108446),
+    FIX(0.58293496820613388554),
+    FIX(0.83934964541552681272),
+    FIX(0.50547095989754364798),
+    FIX(3.4076084184687189804),
+    FIX(0.62250412303566482475),
+    FIX(0.97256823786196078263),
+    FIX(0.51544730992262455249),
+    FIX(1.4841646163141661852),
+    FIX(0.5531038960344445421),
+    FIX(0.74453627100229857749),
+    FIX(0.5006029982351962726),
+};
+
+static const int bitinv32[32] = {
+    0,  16,  8, 24,  4,  20,  12,  28,
+    2,  18, 10, 26,  6,  22,  14,  30,
+    1,  17,  9, 25,  5,  21,  13,  29,
+    3,  19, 11, 27,  7,  23,  15,  31
+};
+
+
+static int16_t filter_bank[512];
+
+static int scale_factor_table[64];
+#ifdef USE_FLOATS
+static float scale_factor_inv_table[64];
+#else
+static int8_t scale_factor_shift[64];
+static unsigned short scale_factor_mult[64];
+#endif
+static unsigned char scale_diff_table[128];
+
+/* total number of bits per allocation group */
+static unsigned short total_quant_bits[17];
+
+/* signal to noise ratio of each quantification step (could be
+   computed from quant_steps[]). The values are dB multiplied by 10
+*/
+static const unsigned short quant_snr[17] = {
+     70, 110, 160, 208,
+    253, 316, 378, 439,
+    499, 559, 620, 680,
+    740, 800, 861, 920,
+    980
+};
+
+/* fixed psycho acoustic model. Values of SNR taken from the 'toolame'
+   project */
+static const float fixed_smr[SBLIMIT] =  {
+    30, 17, 16, 10, 3, 12, 8, 2.5,
+    5, 5, 6, 6, 5, 6, 10, 6,
+    -4, -10, -21, -30, -42, -55, -68, -75,
+    -75, -75, -75, -75, -91, -107, -110, -108
+};
+
+static const unsigned char nb_scale_factors[4] = { 3, 2, 1, 2 };
+
diff --git a/contrib/ffmpeg/libavcodec/mpegvideo.c b/contrib/ffmpeg/libavcodec/mpegvideo.c
new file mode 100644
index 000000000..f0a04a402
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegvideo.c
@@ -0,0 +1,6903 @@
+/*
+ * The simplest mpeg encoder (well, it was the simplest!)
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file mpegvideo.c
+ * The simplest mpeg encoder (well, it was the simplest!).
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "faandct.h"
+#include <limits.h>
+
+#ifdef USE_FASTMEMCPY
+#include "libvo/fastmemcpy.h"
+#endif
+
+//#undef NDEBUG
+//#include <assert.h>
+
+#ifdef CONFIG_ENCODERS
+static int encode_picture(MpegEncContext *s, int picture_number);
+#endif //CONFIG_ENCODERS
+static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_h263_intra_c(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale);
+static void dct_unquantize_h263_inter_c(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale);
+static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
+#ifdef CONFIG_ENCODERS
+static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
+static int sse_mb(MpegEncContext *s);
+static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
+#endif //CONFIG_ENCODERS
+
+#ifdef HAVE_XVMC
+extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
+extern void XVMC_field_end(MpegEncContext *s);
+extern void XVMC_decode_mb(MpegEncContext *s);
+#endif
+
+void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
+
+
+/* enable all paranoid tests for rounding, overflows, etc... */
+//#define PARANOID
+
+//#define DEBUG
+
+
+/* for jpeg fast DCT */
+#define CONST_BITS 14
+
+static const uint16_t aanscales[64] = {
+    /* precomputed values scaled up by 14 bits */
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+    8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+    4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+};
+
+static const uint8_t h263_chroma_roundtab[16] = {
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
+};
+
+static const uint8_t ff_default_chroma_qscale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+};
+
+#ifdef CONFIG_ENCODERS
+static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
+static uint8_t default_fcode_tab[MAX_MV*2+1];
+
+enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
+
+static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
+                           const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
+{
+    int qscale;
+    int shift=0;
+
+    for(qscale=qmin; qscale<=qmax; qscale++){
+        int i;
+        if (dsp->fdct == ff_jpeg_fdct_islow
+#ifdef FAAN_POSTSCALE
+            || dsp->fdct == ff_faandct
+#endif
+            ) {
+            for(i=0;i<64;i++) {
+                const int j= dsp->idct_permutation[i];
+                /* 16 <= qscale * quant_matrix[i] <= 7905 */
+                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
+                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
+
+                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
+                                (qscale * quant_matrix[j]));
+            }
+        } else if (dsp->fdct == fdct_ifast
+#ifndef FAAN_POSTSCALE
+                   || dsp->fdct == ff_faandct
+#endif
+                   ) {
+            for(i=0;i<64;i++) {
+                const int j= dsp->idct_permutation[i];
+                /* 16 <= qscale * quant_matrix[i] <= 7905 */
+                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
+                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
+
+                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
+                                (aanscales[i] * qscale * quant_matrix[j]));
+            }
+        } else {
+            for(i=0;i<64;i++) {
+                const int j= dsp->idct_permutation[i];
+                /* We can safely suppose that 16 <= quant_matrix[i] <= 255
+                   So 16           <= qscale * quant_matrix[i]             <= 7905
+                   so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
+                   so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
+                */
+                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
+//                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
+                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
+
+                if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
+                qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
+            }
+        }
+
+        for(i=intra; i<64; i++){
+            int64_t max= 8191;
+            if (dsp->fdct == fdct_ifast
+#ifndef FAAN_POSTSCALE
+                   || dsp->fdct == ff_faandct
+#endif
+                   ) {
+                max= (8191LL*aanscales[i]) >> 14;
+            }
+            while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
+                shift++;
+            }
+        }
+    }
+    if(shift){
+        av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
+    }
+}
+
+static inline void update_qscale(MpegEncContext *s){
+    s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+    s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
+
+    s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
+}
+#endif //CONFIG_ENCODERS
+
+void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
+    int i;
+    int end;
+
+    st->scantable= src_scantable;
+
+    for(i=0; i<64; i++){
+        int j;
+        j = src_scantable[i];
+        st->permutated[i] = permutation[j];
+#ifdef ARCH_POWERPC
+        st->inverse[j] = i;
+#endif
+    }
+
+    end=-1;
+    for(i=0; i<64; i++){
+        int j;
+        j = st->permutated[i];
+        if(j>end) end=j;
+        st->raster_end[i]= end;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
+    int i;
+
+    if(matrix){
+        put_bits(pb, 1, 1);
+        for(i=0;i<64;i++) {
+            put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
+        }
+    }else
+        put_bits(pb, 1, 0);
+}
+#endif //CONFIG_ENCODERS
+
+const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
+    int i;
+
+    assert(p<=end);
+    if(p>=end)
+        return end;
+
+    for(i=0; i<3; i++){
+        uint32_t tmp= *state << 8;
+        *state= tmp + *(p++);
+        if(tmp == 0x100 || p==end)
+            return p;
+    }
+
+    while(p<end){
+        if     (p[-1] > 1      ) p+= 3;
+        else if(p[-2]          ) p+= 2;
+        else if(p[-3]|(p[-1]-1)) p++;
+        else{
+            p++;
+            break;
+        }
+    }
+
+    p= FFMIN(p, end)-4;
+    *state=  be2me_32(unaligned32(p));
+
+    return p+4;
+}
+
+/* init common dct for both encoder and decoder */
+int DCT_common_init(MpegEncContext *s)
+{
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
+    s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
+    s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
+    s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
+    if(s->flags & CODEC_FLAG_BITEXACT)
+        s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
+    s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
+
+#ifdef CONFIG_ENCODERS
+    s->dct_quantize= dct_quantize_c;
+    s->denoise_dct= denoise_dct_c;
+#endif //CONFIG_ENCODERS
+
+#ifdef HAVE_MMX
+    MPV_common_init_mmx(s);
+#endif
+#ifdef ARCH_ALPHA
+    MPV_common_init_axp(s);
+#endif
+#ifdef HAVE_MLIB
+    MPV_common_init_mlib(s);
+#endif
+#ifdef HAVE_MMI
+    MPV_common_init_mmi(s);
+#endif
+#ifdef ARCH_ARMV4L
+    MPV_common_init_armv4l(s);
+#endif
+#ifdef ARCH_POWERPC
+    MPV_common_init_ppc(s);
+#endif
+
+#ifdef CONFIG_ENCODERS
+    s->fast_dct_quantize= s->dct_quantize;
+
+    if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
+        s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
+    }
+
+#endif //CONFIG_ENCODERS
+
+    /* load & permutate scantables
+       note: only wmv uses different ones
+    */
+    if(s->alternate_scan){
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
+    }else{
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
+    }
+    ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+    ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+
+    return 0;
+}
+
+static void copy_picture(Picture *dst, Picture *src){
+    *dst = *src;
+    dst->type= FF_BUFFER_TYPE_COPY;
+}
+
+#ifdef CONFIG_ENCODERS
+static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
+    int i;
+
+    dst->pict_type              = src->pict_type;
+    dst->quality                = src->quality;
+    dst->coded_picture_number   = src->coded_picture_number;
+    dst->display_picture_number = src->display_picture_number;
+//    dst->reference              = src->reference;
+    dst->pts                    = src->pts;
+    dst->interlaced_frame       = src->interlaced_frame;
+    dst->top_field_first        = src->top_field_first;
+
+    if(s->avctx->me_threshold){
+        if(!src->motion_val[0])
+            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
+        if(!src->mb_type)
+            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
+        if(!src->ref_index[0])
+            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
+        if(src->motion_subsample_log2 != dst->motion_subsample_log2)
+            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
+            src->motion_subsample_log2, dst->motion_subsample_log2);
+
+        memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
+
+        for(i=0; i<2; i++){
+            int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
+            int height= ((16*s->mb_height)>>src->motion_subsample_log2);
+
+            if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
+                memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
+            }
+            if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
+                memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
+            }
+        }
+    }
+}
+#endif
+
+/**
+ * allocates a Picture
+ * The pixels are allocated/set by calling get_buffer() if shared=0
+ */
+static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
+    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
+    const int mb_array_size= s->mb_stride*s->mb_height;
+    const int b8_array_size= s->b8_stride*s->mb_height*2;
+    const int b4_array_size= s->b4_stride*s->mb_height*4;
+    int i;
+
+    if(shared){
+        assert(pic->data[0]);
+        assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
+        pic->type= FF_BUFFER_TYPE_SHARED;
+    }else{
+        int r;
+
+        assert(!pic->data[0]);
+
+        r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
+
+        if(r<0 || !pic->age || !pic->type || !pic->data[0]){
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
+            return -1;
+        }
+
+        if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
+            return -1;
+        }
+
+        if(pic->linesize[1] != pic->linesize[2]){
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
+            return -1;
+        }
+
+        s->linesize  = pic->linesize[0];
+        s->uvlinesize= pic->linesize[1];
+    }
+
+    if(pic->qscale_table==NULL){
+        if (s->encoding) {
+            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
+        }
+
+        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
+        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
+        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
+        if(s->out_format == FMT_H264){
+            for(i=0; i<2; i++){
+                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
+                pic->motion_val[i]= pic->motion_val_base[i]+4;
+                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
+            }
+            pic->motion_subsample_log2= 2;
+        }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
+            for(i=0; i<2; i++){
+                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
+                pic->motion_val[i]= pic->motion_val_base[i]+4;
+                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
+            }
+            pic->motion_subsample_log2= 3;
+        }
+        if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
+            CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
+        }
+        pic->qstride= s->mb_stride;
+        CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
+    }
+
+    //it might be nicer if the application would keep track of these but it would require a API change
+    memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
+    s->prev_pict_types[0]= s->pict_type;
+    if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
+        pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
+
+    return 0;
+fail: //for the CHECKED_ALLOCZ macro
+    return -1;
+}
+
+/**
+ * deallocates a picture
+ */
+static void free_picture(MpegEncContext *s, Picture *pic){
+    int i;
+
+    if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
+        s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
+    }
+
+    av_freep(&pic->mb_var);
+    av_freep(&pic->mc_mb_var);
+    av_freep(&pic->mb_mean);
+    av_freep(&pic->mbskip_table);
+    av_freep(&pic->qscale_table);
+    av_freep(&pic->mb_type_base);
+    av_freep(&pic->dct_coeff);
+    av_freep(&pic->pan_scan);
+    pic->mb_type= NULL;
+    for(i=0; i<2; i++){
+        av_freep(&pic->motion_val_base[i]);
+        av_freep(&pic->ref_index[i]);
+    }
+
+    if(pic->type == FF_BUFFER_TYPE_SHARED){
+        for(i=0; i<4; i++){
+            pic->base[i]=
+            pic->data[i]= NULL;
+        }
+        pic->type= 0;
+    }
+}
+
+static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
+    int i;
+
+    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
+    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
+    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
+
+     //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
+    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
+    s->rd_scratchpad=   s->me.scratchpad;
+    s->b_scratchpad=    s->me.scratchpad;
+    s->obmc_scratchpad= s->me.scratchpad + 16;
+    if (s->encoding) {
+        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
+        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
+        if(s->avctx->noise_reduction){
+            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
+        }
+    }
+    CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
+    s->block= s->blocks[0];
+
+    for(i=0;i<12;i++){
+        s->pblocks[i] = (short *)(&s->block[i]);
+    }
+    return 0;
+fail:
+    return -1; //free() through MPV_common_end()
+}
+
+static void free_duplicate_context(MpegEncContext *s){
+    if(s==NULL) return;
+
+    av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
+    av_freep(&s->me.scratchpad);
+    s->rd_scratchpad=
+    s->b_scratchpad=
+    s->obmc_scratchpad= NULL;
+
+    av_freep(&s->dct_error_sum);
+    av_freep(&s->me.map);
+    av_freep(&s->me.score_map);
+    av_freep(&s->blocks);
+    s->block= NULL;
+}
+
+static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
+#define COPY(a) bak->a= src->a
+    COPY(allocated_edge_emu_buffer);
+    COPY(edge_emu_buffer);
+    COPY(me.scratchpad);
+    COPY(rd_scratchpad);
+    COPY(b_scratchpad);
+    COPY(obmc_scratchpad);
+    COPY(me.map);
+    COPY(me.score_map);
+    COPY(blocks);
+    COPY(block);
+    COPY(start_mb_y);
+    COPY(end_mb_y);
+    COPY(me.map_generation);
+    COPY(pb);
+    COPY(dct_error_sum);
+    COPY(dct_count[0]);
+    COPY(dct_count[1]);
+#undef COPY
+}
+
+void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
+    MpegEncContext bak;
+    int i;
+    //FIXME copy only needed parts
+//START_TIMER
+    backup_duplicate_context(&bak, dst);
+    memcpy(dst, src, sizeof(MpegEncContext));
+    backup_duplicate_context(dst, &bak);
+    for(i=0;i<12;i++){
+        dst->pblocks[i] = (short *)(&dst->block[i]);
+    }
+//STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
+}
+
+#ifdef CONFIG_ENCODERS
+static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
+#define COPY(a) dst->a= src->a
+    COPY(pict_type);
+    COPY(current_picture);
+    COPY(f_code);
+    COPY(b_code);
+    COPY(qscale);
+    COPY(lambda);
+    COPY(lambda2);
+    COPY(picture_in_gop_number);
+    COPY(gop_picture_number);
+    COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
+    COPY(progressive_frame); //FIXME don't set in encode_header
+    COPY(partitioned_frame); //FIXME don't set in encode_header
+#undef COPY
+}
+#endif
+
+/**
+ * sets the given MpegEncContext to common defaults (same for encoding and decoding).
+ * the changed fields will not depend upon the prior state of the MpegEncContext.
+ */
+static void MPV_common_defaults(MpegEncContext *s){
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    s->chroma_qscale_table= ff_default_chroma_qscale_table;
+    s->progressive_frame= 1;
+    s->progressive_sequence= 1;
+    s->picture_structure= PICT_FRAME;
+
+    s->coded_picture_number = 0;
+    s->picture_number = 0;
+    s->input_picture_number = 0;
+
+    s->picture_in_gop_number = 0;
+
+    s->f_code = 1;
+    s->b_code = 1;
+}
+
+/**
+ * sets the given MpegEncContext to defaults for decoding.
+ * the changed fields will not depend upon the prior state of the MpegEncContext.
+ */
+void MPV_decode_defaults(MpegEncContext *s){
+    MPV_common_defaults(s);
+}
+
+/**
+ * sets the given MpegEncContext to defaults for encoding.
+ * the changed fields will not depend upon the prior state of the MpegEncContext.
+ */
+
+#ifdef CONFIG_ENCODERS
+static void MPV_encode_defaults(MpegEncContext *s){
+    static int done=0;
+
+    MPV_common_defaults(s);
+
+    if(!done){
+        int i;
+        done=1;
+
+        default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+        memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
+
+        for(i=-16; i<16; i++){
+            default_fcode_tab[i + MAX_MV]= 1;
+        }
+    }
+    s->me.mv_penalty= default_mv_penalty;
+    s->fcode_tab= default_fcode_tab;
+}
+#endif //CONFIG_ENCODERS
+
+/**
+ * init common structure for both encoder and decoder.
+ * this assumes that some variables like width/height are already set
+ */
+int MPV_common_init(MpegEncContext *s)
+{
+    int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
+
+    s->mb_height = (s->height + 15) / 16;
+
+    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
+        av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
+        return -1;
+    }
+
+    if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
+        return -1;
+
+    dsputil_init(&s->dsp, s->avctx);
+    DCT_common_init(s);
+
+    s->flags= s->avctx->flags;
+    s->flags2= s->avctx->flags2;
+
+    s->mb_width  = (s->width  + 15) / 16;
+    s->mb_stride = s->mb_width + 1;
+    s->b8_stride = s->mb_width*2 + 1;
+    s->b4_stride = s->mb_width*4 + 1;
+    mb_array_size= s->mb_height * s->mb_stride;
+    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
+
+    /* set chroma shifts */
+    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
+                                                    &(s->chroma_y_shift) );
+
+    /* set default edge pos, will be overriden in decode_header if needed */
+    s->h_edge_pos= s->mb_width*16;
+    s->v_edge_pos= s->mb_height*16;
+
+    s->mb_num = s->mb_width * s->mb_height;
+
+    s->block_wrap[0]=
+    s->block_wrap[1]=
+    s->block_wrap[2]=
+    s->block_wrap[3]= s->b8_stride;
+    s->block_wrap[4]=
+    s->block_wrap[5]= s->mb_stride;
+
+    y_size = s->b8_stride * (2 * s->mb_height + 1);
+    c_size = s->mb_stride * (s->mb_height + 1);
+    yc_size = y_size + 2 * c_size;
+
+    /* convert fourcc to upper case */
+    s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
+                        + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
+                        + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
+                        + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
+
+    s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
+                               + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
+                               + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
+                               + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
+
+    s->avctx->coded_frame= (AVFrame*)&s->current_picture;
+
+    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
+    for(y=0; y<s->mb_height; y++){
+        for(x=0; x<s->mb_width; x++){
+            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
+        }
+    }
+    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
+
+    if (s->encoding) {
+        /* Allocate MV tables */
+        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
+        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
+        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
+        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
+        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
+        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
+        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
+
+        if(s->msmpeg4_version){
+            CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
+        }
+        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
+
+        /* Allocate MB type table */
+        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
+
+        CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
+
+        CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
+        CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
+        CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
+        CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
+        CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
+        CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
+
+        if(s->avctx->noise_reduction){
+            CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
+        }
+    }
+    CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
+
+    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
+
+    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
+        /* interlaced direct mode decoding tables */
+            for(i=0; i<2; i++){
+                int j, k;
+                for(j=0; j<2; j++){
+                    for(k=0; k<2; k++){
+                        CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
+                        s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
+                    }
+                    CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
+                    CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
+                    s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
+                }
+                CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
+            }
+    }
+    if (s->out_format == FMT_H263) {
+        /* ac values */
+        CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
+        s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
+        s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
+        s->ac_val[2] = s->ac_val[1] + c_size;
+
+        /* cbp values */
+        CHECKED_ALLOCZ(s->coded_block_base, y_size);
+        s->coded_block= s->coded_block_base + s->b8_stride + 1;
+
+        /* cbp, ac_pred, pred_dir */
+        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
+    }
+
+    if (s->h263_pred || s->h263_plus || !s->encoding) {
+        /* dc values */
+        //MN: we need these for error resilience of intra-frames
+        CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
+        s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
+        s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
+        s->dc_val[2] = s->dc_val[1] + c_size;
+        for(i=0;i<yc_size;i++)
+            s->dc_val_base[i] = 1024;
+    }
+
+    /* which mb is a intra block */
+    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
+    memset(s->mbintra_table, 1, mb_array_size);
+
+    /* init macroblock skip table */
+    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
+    //Note the +1 is for a quicker mpeg4 slice_end detection
+    CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
+
+    s->parse_context.state= -1;
+    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
+       s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
+       s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
+       s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
+    }
+
+    s->context_initialized = 1;
+
+    s->thread_context[0]= s;
+    for(i=1; i<s->avctx->thread_count; i++){
+        s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
+        memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
+    }
+
+    for(i=0; i<s->avctx->thread_count; i++){
+        if(init_duplicate_context(s->thread_context[i], s) < 0)
+           goto fail;
+        s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
+        s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
+    }
+
+    return 0;
+ fail:
+    MPV_common_end(s);
+    return -1;
+}
+
+/* init common structure for both encoder and decoder */
+void MPV_common_end(MpegEncContext *s)
+{
+    int i, j, k;
+
+    for(i=0; i<s->avctx->thread_count; i++){
+        free_duplicate_context(s->thread_context[i]);
+    }
+    for(i=1; i<s->avctx->thread_count; i++){
+        av_freep(&s->thread_context[i]);
+    }
+
+    av_freep(&s->parse_context.buffer);
+    s->parse_context.buffer_size=0;
+
+    av_freep(&s->mb_type);
+    av_freep(&s->p_mv_table_base);
+    av_freep(&s->b_forw_mv_table_base);
+    av_freep(&s->b_back_mv_table_base);
+    av_freep(&s->b_bidir_forw_mv_table_base);
+    av_freep(&s->b_bidir_back_mv_table_base);
+    av_freep(&s->b_direct_mv_table_base);
+    s->p_mv_table= NULL;
+    s->b_forw_mv_table= NULL;
+    s->b_back_mv_table= NULL;
+    s->b_bidir_forw_mv_table= NULL;
+    s->b_bidir_back_mv_table= NULL;
+    s->b_direct_mv_table= NULL;
+    for(i=0; i<2; i++){
+        for(j=0; j<2; j++){
+            for(k=0; k<2; k++){
+                av_freep(&s->b_field_mv_table_base[i][j][k]);
+                s->b_field_mv_table[i][j][k]=NULL;
+            }
+            av_freep(&s->b_field_select_table[i][j]);
+            av_freep(&s->p_field_mv_table_base[i][j]);
+            s->p_field_mv_table[i][j]=NULL;
+        }
+        av_freep(&s->p_field_select_table[i]);
+    }
+
+    av_freep(&s->dc_val_base);
+    av_freep(&s->ac_val_base);
+    av_freep(&s->coded_block_base);
+    av_freep(&s->mbintra_table);
+    av_freep(&s->cbp_table);
+    av_freep(&s->pred_dir_table);
+
+    av_freep(&s->mbskip_table);
+    av_freep(&s->prev_pict_types);
+    av_freep(&s->bitstream_buffer);
+    s->allocated_bitstream_buffer_size=0;
+
+    av_freep(&s->avctx->stats_out);
+    av_freep(&s->ac_stats);
+    av_freep(&s->error_status_table);
+    av_freep(&s->mb_index2xy);
+    av_freep(&s->lambda_table);
+    av_freep(&s->q_intra_matrix);
+    av_freep(&s->q_inter_matrix);
+    av_freep(&s->q_intra_matrix16);
+    av_freep(&s->q_inter_matrix16);
+    av_freep(&s->input_picture);
+    av_freep(&s->reordered_input_picture);
+    av_freep(&s->dct_offset);
+
+    if(s->picture){
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            free_picture(s, &s->picture[i]);
+        }
+    }
+    av_freep(&s->picture);
+    s->context_initialized = 0;
+    s->last_picture_ptr=
+    s->next_picture_ptr=
+    s->current_picture_ptr= NULL;
+    s->linesize= s->uvlinesize= 0;
+
+    for(i=0; i<3; i++)
+        av_freep(&s->visualization_buffer[i]);
+
+    avcodec_default_free_buffers(s->avctx);
+}
+
+#ifdef CONFIG_ENCODERS
+
+/* init video encoder */
+int MPV_encode_init(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int i;
+    int chroma_h_shift, chroma_v_shift;
+
+    MPV_encode_defaults(s);
+
+    switch (avctx->codec_id) {
+    case CODEC_ID_MPEG2VIDEO:
+        if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
+            av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
+            return -1;
+        }
+        break;
+    case CODEC_ID_LJPEG:
+    case CODEC_ID_MJPEG:
+        if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
+           ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
+            av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
+            return -1;
+        }
+        break;
+    default:
+        if(avctx->pix_fmt != PIX_FMT_YUV420P){
+            av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
+            return -1;
+        }
+    }
+
+    switch (avctx->pix_fmt) {
+    case PIX_FMT_YUVJ422P:
+    case PIX_FMT_YUV422P:
+        s->chroma_format = CHROMA_422;
+        break;
+    case PIX_FMT_YUVJ420P:
+    case PIX_FMT_YUV420P:
+    default:
+        s->chroma_format = CHROMA_420;
+        break;
+    }
+
+    s->bit_rate = avctx->bit_rate;
+    s->width = avctx->width;
+    s->height = avctx->height;
+    if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
+        av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
+        avctx->gop_size=600;
+    }
+    s->gop_size = avctx->gop_size;
+    s->avctx = avctx;
+    s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
+    s->max_b_frames= avctx->max_b_frames;
+    s->codec_id= avctx->codec->id;
+    s->luma_elim_threshold  = avctx->luma_elim_threshold;
+    s->chroma_elim_threshold= avctx->chroma_elim_threshold;
+    s->strict_std_compliance= avctx->strict_std_compliance;
+    s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
+    s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
+    s->mpeg_quant= avctx->mpeg_quant;
+    s->rtp_mode= !!avctx->rtp_payload_size;
+    s->intra_dc_precision= avctx->intra_dc_precision;
+    s->user_specified_pts = AV_NOPTS_VALUE;
+
+    if (s->gop_size <= 1) {
+        s->intra_only = 1;
+        s->gop_size = 12;
+    } else {
+        s->intra_only = 0;
+    }
+
+    s->me_method = avctx->me_method;
+
+    /* Fixed QSCALE */
+    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
+
+    s->adaptive_quant= (   s->avctx->lumi_masking
+                        || s->avctx->dark_masking
+                        || s->avctx->temporal_cplx_masking
+                        || s->avctx->spatial_cplx_masking
+                        || s->avctx->p_masking
+                        || s->avctx->border_masking
+                        || (s->flags&CODEC_FLAG_QP_RD))
+                       && !s->fixed_qscale;
+
+    s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
+    s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
+    s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
+    s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
+
+    if(avctx->rc_max_rate && !avctx->rc_buffer_size){
+        av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
+        return -1;
+    }
+
+    if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
+        av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
+    }
+
+    if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
+        av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
+        return -1;
+    }
+
+    if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
+        av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
+        return -1;
+    }
+
+    if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
+       && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
+       && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
+
+        av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
+    }
+
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
+       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
+        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
+        return -1;
+    }
+
+    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
+        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
+        return -1;
+    }
+
+    if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
+        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
+        return -1;
+    }
+
+    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
+        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
+        return -1;
+    }
+
+    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
+        av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
+        return -1;
+    }
+
+    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
+        av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
+        return -1;
+    }
+
+    if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
+       && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
+        av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
+        return -1;
+    }
+
+    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
+        av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
+        return -1;
+    }
+
+    if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
+        av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
+        return -1;
+    }
+
+    if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
+        av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
+        return -1;
+    }
+
+    if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
+        av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
+        return -1;
+    }
+
+    if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
+        av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
+        return -1;
+    }
+
+    if(s->flags & CODEC_FLAG_LOW_DELAY){
+        if (s->codec_id != CODEC_ID_MPEG2VIDEO && s->codec_id != CODEC_ID_MPEG1VIDEO){
+            av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg1/2\n");
+            return -1;
+        }
+        if (s->max_b_frames != 0){
+            av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
+            return -1;
+        }
+    }
+
+    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
+       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
+       && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
+        av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
+        return -1;
+    }
+
+    if(s->avctx->thread_count > 1)
+        s->rtp_mode= 1;
+
+    if(!avctx->time_base.den || !avctx->time_base.num){
+        av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
+        return -1;
+    }
+
+    i= (INT_MAX/2+128)>>8;
+    if(avctx->me_threshold >= i){
+        av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
+        return -1;
+    }
+    if(avctx->mb_threshold >= i){
+        av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
+        return -1;
+    }
+
+    if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
+        av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
+        avctx->b_frame_strategy = 0;
+    }
+
+    i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
+    if(i > 1){
+        av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
+        avctx->time_base.den /= i;
+        avctx->time_base.num /= i;
+//        return -1;
+    }
+
+    if(s->codec_id==CODEC_ID_MJPEG){
+        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
+        s->inter_quant_bias= 0;
+    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
+        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
+        s->inter_quant_bias= 0;
+    }else{
+        s->intra_quant_bias=0;
+        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
+    }
+
+    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->intra_quant_bias= avctx->intra_quant_bias;
+    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->inter_quant_bias= avctx->inter_quant_bias;
+
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
+
+    if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
+        av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
+        return -1;
+    }
+    s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_MPEG1VIDEO:
+        s->out_format = FMT_MPEG1;
+        s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
+        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
+        break;
+    case CODEC_ID_MPEG2VIDEO:
+        s->out_format = FMT_MPEG1;
+        s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
+        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
+        s->rtp_mode= 1;
+        break;
+    case CODEC_ID_LJPEG:
+    case CODEC_ID_JPEGLS:
+    case CODEC_ID_MJPEG:
+        s->out_format = FMT_MJPEG;
+        s->intra_only = 1; /* force intra only for jpeg */
+        s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
+        s->mjpeg_data_only_frames = 0; /* write all the needed headers */
+        s->mjpeg_vsample[0] = 2;
+        s->mjpeg_vsample[1] = 2>>chroma_v_shift;
+        s->mjpeg_vsample[2] = 2>>chroma_v_shift;
+        s->mjpeg_hsample[0] = 2;
+        s->mjpeg_hsample[1] = 2>>chroma_h_shift;
+        s->mjpeg_hsample[2] = 2>>chroma_h_shift;
+        if (mjpeg_init(s) < 0)
+            return -1;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+#ifdef CONFIG_H261_ENCODER
+    case CODEC_ID_H261:
+        if (ff_h261_get_picture_format(s->width, s->height) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
+            return -1;
+        }
+        s->out_format = FMT_H261;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+#endif
+    case CODEC_ID_H263:
+        if (h263_get_picture_format(s->width, s->height) == 7) {
+            av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
+            return -1;
+        }
+        s->out_format = FMT_H263;
+        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_H263P:
+        s->out_format = FMT_H263;
+        s->h263_plus = 1;
+        /* Fx */
+        s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
+        s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
+        s->modified_quant= s->h263_aic;
+        s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
+        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
+        s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
+        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
+        s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
+
+        /* /Fx */
+        /* These are just to be sure */
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_FLV1:
+        s->out_format = FMT_H263;
+        s->h263_flv = 2; /* format = 1; 11-bit codes */
+        s->unrestricted_mv = 1;
+        s->rtp_mode=0; /* don't allow GOB */
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_RV10:
+        s->out_format = FMT_H263;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_RV20:
+        s->out_format = FMT_H263;
+        avctx->delay=0;
+        s->low_delay=1;
+        s->modified_quant=1;
+        s->h263_aic=1;
+        s->h263_plus=1;
+        s->loop_filter=1;
+        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
+        break;
+    case CODEC_ID_MPEG4:
+        s->out_format = FMT_H263;
+        s->h263_pred = 1;
+        s->unrestricted_mv = 1;
+        s->low_delay= s->max_b_frames ? 0 : 1;
+        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
+        break;
+    case CODEC_ID_MSMPEG4V1:
+        s->out_format = FMT_H263;
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->unrestricted_mv = 1;
+        s->msmpeg4_version= 1;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_MSMPEG4V2:
+        s->out_format = FMT_H263;
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->unrestricted_mv = 1;
+        s->msmpeg4_version= 2;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_MSMPEG4V3:
+        s->out_format = FMT_H263;
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->unrestricted_mv = 1;
+        s->msmpeg4_version= 3;
+        s->flipflop_rounding=1;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_WMV1:
+        s->out_format = FMT_H263;
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->unrestricted_mv = 1;
+        s->msmpeg4_version= 4;
+        s->flipflop_rounding=1;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    case CODEC_ID_WMV2:
+        s->out_format = FMT_H263;
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->unrestricted_mv = 1;
+        s->msmpeg4_version= 5;
+        s->flipflop_rounding=1;
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
+    default:
+        return -1;
+    }
+
+    avctx->has_b_frames= !s->low_delay;
+
+    s->encoding = 1;
+
+    /* init */
+    if (MPV_common_init(s) < 0)
+        return -1;
+
+    if(s->modified_quant)
+        s->chroma_qscale_table= ff_h263_chroma_qscale_table;
+    s->progressive_frame=
+    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
+    s->quant_precision=5;
+
+    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
+
+#ifdef CONFIG_H261_ENCODER
+    if (s->out_format == FMT_H261)
+        ff_h261_encode_init(s);
+#endif
+    if (s->out_format == FMT_H263)
+        h263_encode_init(s);
+    if(s->msmpeg4_version)
+        ff_msmpeg4_encode_init(s);
+    if (s->out_format == FMT_MPEG1)
+        ff_mpeg1_encode_init(s);
+
+    /* init q matrix */
+    for(i=0;i<64;i++) {
+        int j= s->dsp.idct_permutation[i];
+        if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
+            s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
+            s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
+        }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
+            s->intra_matrix[j] =
+            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
+        }else
+        { /* mpeg1/2 */
+            s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
+            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
+        }
+        if(s->avctx->intra_matrix)
+            s->intra_matrix[j] = s->avctx->intra_matrix[i];
+        if(s->avctx->inter_matrix)
+            s->inter_matrix[j] = s->avctx->inter_matrix[i];
+    }
+
+    /* precompute matrix */
+    /* for mjpeg, we do include qscale in the matrix */
+    if (s->out_format != FMT_MJPEG) {
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
+                       s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
+        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
+                       s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
+    }
+
+    if(ff_rate_control_init(s) < 0)
+        return -1;
+
+    return 0;
+}
+
+int MPV_encode_end(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    ff_rate_control_uninit(s);
+
+    MPV_common_end(s);
+    if (s->out_format == FMT_MJPEG)
+        mjpeg_close(s);
+
+    av_freep(&avctx->extradata);
+
+    return 0;
+}
+
+#endif //CONFIG_ENCODERS
+
+void init_rl(RLTable *rl, int use_static)
+{
+    int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
+    uint8_t index_run[MAX_RUN+1];
+    int last, run, level, start, end, i;
+
+    /* If table is static, we can quit if rl->max_level[0] is not NULL */
+    if(use_static && rl->max_level[0])
+        return;
+
+    /* compute max_level[], max_run[] and index_run[] */
+    for(last=0;last<2;last++) {
+        if (last == 0) {
+            start = 0;
+            end = rl->last;
+        } else {
+            start = rl->last;
+            end = rl->n;
+        }
+
+        memset(max_level, 0, MAX_RUN + 1);
+        memset(max_run, 0, MAX_LEVEL + 1);
+        memset(index_run, rl->n, MAX_RUN + 1);
+        for(i=start;i<end;i++) {
+            run = rl->table_run[i];
+            level = rl->table_level[i];
+            if (index_run[run] == rl->n)
+                index_run[run] = i;
+            if (level > max_level[run])
+                max_level[run] = level;
+            if (run > max_run[level])
+                max_run[level] = run;
+        }
+        if(use_static)
+            rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
+        else
+            rl->max_level[last] = av_malloc(MAX_RUN + 1);
+        memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
+        if(use_static)
+            rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
+        else
+            rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
+        memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
+        if(use_static)
+            rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
+        else
+            rl->index_run[last] = av_malloc(MAX_RUN + 1);
+        memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
+    }
+}
+
+/* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
+static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+
+    last_line = buf + (height - 1) * wrap;
+    for(i=0;i<w;i++) {
+        /* top and bottom */
+        memcpy(buf - (i + 1) * wrap, buf, width);
+        memcpy(last_line + (i + 1) * wrap, last_line, width);
+    }
+    /* left and right */
+    ptr = buf;
+    for(i=0;i<height;i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width-1], w);
+        ptr += wrap;
+    }
+    /* corners */
+    for(i=0;i<w;i++) {
+        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
+        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
+        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
+        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
+    }
+}
+
+int ff_find_unused_picture(MpegEncContext *s, int shared){
+    int i;
+
+    if(shared){
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
+        }
+    }else{
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
+        }
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0]==NULL) return i;
+        }
+    }
+
+    assert(0);
+    return -1;
+}
+
+static void update_noise_reduction(MpegEncContext *s){
+    int intra, i;
+
+    for(intra=0; intra<2; intra++){
+        if(s->dct_count[intra] > (1<<16)){
+            for(i=0; i<64; i++){
+                s->dct_error_sum[intra][i] >>=1;
+            }
+            s->dct_count[intra] >>= 1;
+        }
+
+        for(i=0; i<64; i++){
+            s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
+        }
+    }
+}
+
+/**
+ * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
+ */
+int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
+{
+    int i;
+    AVFrame *pic;
+    s->mb_skipped = 0;
+
+    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
+
+    /* mark&release old frames */
+    if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
+        avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
+
+        /* release forgotten pictures */
+        /* if(mpeg124/h263) */
+        if(!s->encoding){
+            for(i=0; i<MAX_PICTURE_COUNT; i++){
+                if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
+                    av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
+                    avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
+                }
+            }
+        }
+    }
+alloc:
+    if(!s->encoding){
+        /* release non reference frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
+
+        if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
+            pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
+        else{
+            i= ff_find_unused_picture(s, 0);
+            pic= (AVFrame*)&s->picture[i];
+        }
+
+        pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
+                        && !s->dropable ? 3 : 0;
+
+        pic->coded_picture_number= s->coded_picture_number++;
+
+        if( alloc_picture(s, (Picture*)pic, 0) < 0)
+            return -1;
+
+        s->current_picture_ptr= (Picture*)pic;
+        s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
+        s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
+    }
+
+    s->current_picture_ptr->pict_type= s->pict_type;
+//    if(s->flags && CODEC_FLAG_QSCALE)
+  //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
+    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
+
+    copy_picture(&s->current_picture, s->current_picture_ptr);
+
+  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
+    if (s->pict_type != B_TYPE) {
+        s->last_picture_ptr= s->next_picture_ptr;
+        if(!s->dropable)
+            s->next_picture_ptr= s->current_picture_ptr;
+    }
+/*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
+        s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
+        s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
+        s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
+        s->pict_type, s->dropable);*/
+
+    if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
+    if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
+
+    if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
+        av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
+        assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
+        goto alloc;
+    }
+
+    assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
+
+    if(s->picture_structure!=PICT_FRAME){
+        int i;
+        for(i=0; i<4; i++){
+            if(s->picture_structure == PICT_BOTTOM_FIELD){
+                 s->current_picture.data[i] += s->current_picture.linesize[i];
+            }
+            s->current_picture.linesize[i] *= 2;
+            s->last_picture.linesize[i] *=2;
+            s->next_picture.linesize[i] *=2;
+        }
+    }
+  }
+
+    s->hurry_up= s->avctx->hurry_up;
+    s->error_resilience= avctx->error_resilience;
+
+    /* set dequantizer, we can't do it during init as it might change for mpeg4
+       and we can't do it in the header decode as init isnt called for mpeg4 there yet */
+    if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
+        s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
+        s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
+    }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
+        s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
+        s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
+    }else{
+        s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
+        s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
+    }
+
+    if(s->dct_error_sum){
+        assert(s->avctx->noise_reduction && s->encoding);
+
+        update_noise_reduction(s);
+    }
+
+#ifdef HAVE_XVMC
+    if(s->avctx->xvmc_acceleration)
+        return XVMC_field_start(s, avctx);
+#endif
+    return 0;
+}
+
+/* generic function for encode/decode called after a frame has been coded/decoded */
+void MPV_frame_end(MpegEncContext *s)
+{
+    int i;
+    /* draw edge for correct motion prediction if outside */
+#ifdef HAVE_XVMC
+//just to make sure that all data is rendered.
+    if(s->avctx->xvmc_acceleration){
+        XVMC_field_end(s);
+    }else
+#endif
+    if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+            draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
+            draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
+            draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
+    }
+    emms_c();
+
+    s->last_pict_type    = s->pict_type;
+    s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
+    if(s->pict_type!=B_TYPE){
+        s->last_non_b_pict_type= s->pict_type;
+    }
+#if 0
+        /* copy back current_picture variables */
+    for(i=0; i<MAX_PICTURE_COUNT; i++){
+        if(s->picture[i].data[0] == s->current_picture.data[0]){
+            s->picture[i]= s->current_picture;
+            break;
+        }
+    }
+    assert(i<MAX_PICTURE_COUNT);
+#endif
+
+    if(s->encoding){
+        /* release non-reference frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
+    }
+    // clear copies, to avoid confusion
+#if 0
+    memset(&s->last_picture, 0, sizeof(Picture));
+    memset(&s->next_picture, 0, sizeof(Picture));
+    memset(&s->current_picture, 0, sizeof(Picture));
+#endif
+    s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
+}
+
+/**
+ * draws an line from (ex, ey) -> (sx, sy).
+ * @param w width of the image
+ * @param h height of the image
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
+    int x, y, fr, f;
+
+    sx= clip(sx, 0, w-1);
+    sy= clip(sy, 0, h-1);
+    ex= clip(ex, 0, w-1);
+    ey= clip(ey, 0, h-1);
+
+    buf[sy*stride + sx]+= color;
+
+    if(FFABS(ex - sx) > FFABS(ey - sy)){
+        if(sx > ex){
+            FFSWAP(int, sx, ex);
+            FFSWAP(int, sy, ey);
+        }
+        buf+= sx + sy*stride;
+        ex-= sx;
+        f= ((ey-sy)<<16)/ex;
+        for(x= 0; x <= ex; x++){
+            y = (x*f)>>16;
+            fr= (x*f)&0xFFFF;
+            buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
+            buf[(y+1)*stride + x]+= (color*         fr )>>16;
+        }
+    }else{
+        if(sy > ey){
+            FFSWAP(int, sx, ex);
+            FFSWAP(int, sy, ey);
+        }
+        buf+= sx + sy*stride;
+        ey-= sy;
+        if(ey) f= ((ex-sx)<<16)/ey;
+        else   f= 0;
+        for(y= 0; y <= ey; y++){
+            x = (y*f)>>16;
+            fr= (y*f)&0xFFFF;
+            buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
+            buf[y*stride + x+1]+= (color*         fr )>>16;;
+        }
+    }
+}
+
+/**
+ * draws an arrow from (ex, ey) -> (sx, sy).
+ * @param w width of the image
+ * @param h height of the image
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
+    int dx,dy;
+
+    sx= clip(sx, -100, w+100);
+    sy= clip(sy, -100, h+100);
+    ex= clip(ex, -100, w+100);
+    ey= clip(ey, -100, h+100);
+
+    dx= ex - sx;
+    dy= ey - sy;
+
+    if(dx*dx + dy*dy > 3*3){
+        int rx=  dx + dy;
+        int ry= -dx + dy;
+        int length= ff_sqrt((rx*rx + ry*ry)<<8);
+
+        //FIXME subpixel accuracy
+        rx= ROUNDED_DIV(rx*3<<4, length);
+        ry= ROUNDED_DIV(ry*3<<4, length);
+
+        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
+        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
+    }
+    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
+}
+
+/**
+ * prints debuging info for the given picture.
+ */
+void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
+
+    if(!pict || !pict->mb_type) return;
+
+    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
+        int x,y;
+
+        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
+        switch (pict->pict_type) {
+            case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
+            case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
+            case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
+            case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
+            case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
+            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
+        }
+        for(y=0; y<s->mb_height; y++){
+            for(x=0; x<s->mb_width; x++){
+                if(s->avctx->debug&FF_DEBUG_SKIP){
+                    int count= s->mbskip_table[x + y*s->mb_stride];
+                    if(count>9) count=9;
+                    av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
+                }
+                if(s->avctx->debug&FF_DEBUG_QP){
+                    av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
+                }
+                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
+                    int mb_type= pict->mb_type[x + y*s->mb_stride];
+                    //Type & MV direction
+                    if(IS_PCM(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "P");
+                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "A");
+                    else if(IS_INTRA4x4(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "i");
+                    else if(IS_INTRA16x16(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "I");
+                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "d");
+                    else if(IS_DIRECT(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "D");
+                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "g");
+                    else if(IS_GMC(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "G");
+                    else if(IS_SKIP(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "S");
+                    else if(!USES_LIST(mb_type, 1))
+                        av_log(s->avctx, AV_LOG_DEBUG, ">");
+                    else if(!USES_LIST(mb_type, 0))
+                        av_log(s->avctx, AV_LOG_DEBUG, "<");
+                    else{
+                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        av_log(s->avctx, AV_LOG_DEBUG, "X");
+                    }
+
+                    //segmentation
+                    if(IS_8X8(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "+");
+                    else if(IS_16X8(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "-");
+                    else if(IS_8X16(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, "|");
+                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
+                        av_log(s->avctx, AV_LOG_DEBUG, " ");
+                    else
+                        av_log(s->avctx, AV_LOG_DEBUG, "?");
+
+
+                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
+                        av_log(s->avctx, AV_LOG_DEBUG, "=");
+                    else
+                        av_log(s->avctx, AV_LOG_DEBUG, " ");
+                }
+//                av_log(s->avctx, AV_LOG_DEBUG, " ");
+            }
+            av_log(s->avctx, AV_LOG_DEBUG, "\n");
+        }
+    }
+
+    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
+        const int shift= 1 + s->quarter_sample;
+        int mb_y;
+        uint8_t *ptr;
+        int i;
+        int h_chroma_shift, v_chroma_shift;
+        const int width = s->avctx->width;
+        const int height= s->avctx->height;
+        const int mv_sample_log2= 4 - pict->motion_subsample_log2;
+        const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
+        s->low_delay=0; //needed to see the vectors without trashing the buffers
+
+        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
+        for(i=0; i<3; i++){
+            memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
+            pict->data[i]= s->visualization_buffer[i];
+        }
+        pict->type= FF_BUFFER_TYPE_COPY;
+        ptr= pict->data[0];
+
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            int mb_x;
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                const int mb_index= mb_x + mb_y*s->mb_stride;
+                if((s->avctx->debug_mv) && pict->motion_val){
+                  int type;
+                  for(type=0; type<3; type++){
+                    int direction = 0;
+                    switch (type) {
+                      case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
+                                continue;
+                              direction = 0;
+                              break;
+                      case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
+                                continue;
+                              direction = 0;
+                              break;
+                      case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
+                                continue;
+                              direction = 1;
+                              break;
+                    }
+                    if(!USES_LIST(pict->mb_type[mb_index], direction))
+                        continue;
+
+                    if(IS_8X8(pict->mb_type[mb_index])){
+                      int i;
+                      for(i=0; i<4; i++){
+                        int sx= mb_x*16 + 4 + 8*(i&1);
+                        int sy= mb_y*16 + 4 + 8*(i>>1);
+                        int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
+                        int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
+                        int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
+                        draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
+                      }
+                    }else if(IS_16X8(pict->mb_type[mb_index])){
+                      int i;
+                      for(i=0; i<2; i++){
+                        int sx=mb_x*16 + 8;
+                        int sy=mb_y*16 + 4 + 8*i;
+                        int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
+                        int mx=(pict->motion_val[direction][xy][0]>>shift);
+                        int my=(pict->motion_val[direction][xy][1]>>shift);
+
+                        if(IS_INTERLACED(pict->mb_type[mb_index]))
+                            my*=2;
+
+                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
+                      }
+                    }else if(IS_8X16(pict->mb_type[mb_index])){
+                      int i;
+                      for(i=0; i<2; i++){
+                        int sx=mb_x*16 + 4 + 8*i;
+                        int sy=mb_y*16 + 8;
+                        int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
+                        int mx=(pict->motion_val[direction][xy][0]>>shift);
+                        int my=(pict->motion_val[direction][xy][1]>>shift);
+
+                        if(IS_INTERLACED(pict->mb_type[mb_index]))
+                            my*=2;
+
+                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
+                      }
+                    }else{
+                      int sx= mb_x*16 + 8;
+                      int sy= mb_y*16 + 8;
+                      int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
+                      int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
+                      int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
+                      draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
+                    }
+                  }
+                }
+                if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
+                    uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
+                    int y;
+                    for(y=0; y<8; y++){
+                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
+                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
+                    }
+                }
+                if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
+                    int mb_type= pict->mb_type[mb_index];
+                    uint64_t u,v;
+                    int y;
+#define COLOR(theta, r)\
+u= (int)(128 + r*cos(theta*3.141592/180));\
+v= (int)(128 + r*sin(theta*3.141592/180));
+
+
+                    u=v=128;
+                    if(IS_PCM(mb_type)){
+                        COLOR(120,48)
+                    }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
+                        COLOR(30,48)
+                    }else if(IS_INTRA4x4(mb_type)){
+                        COLOR(90,48)
+                    }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
+//                        COLOR(120,48)
+                    }else if(IS_DIRECT(mb_type)){
+                        COLOR(150,48)
+                    }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
+                        COLOR(170,48)
+                    }else if(IS_GMC(mb_type)){
+                        COLOR(190,48)
+                    }else if(IS_SKIP(mb_type)){
+//                        COLOR(180,48)
+                    }else if(!USES_LIST(mb_type, 1)){
+                        COLOR(240,48)
+                    }else if(!USES_LIST(mb_type, 0)){
+                        COLOR(0,48)
+                    }else{
+                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        COLOR(300,48)
+                    }
+
+                    u*= 0x0101010101010101ULL;
+                    v*= 0x0101010101010101ULL;
+                    for(y=0; y<8; y++){
+                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
+                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
+                    }
+
+                    //segmentation
+                    if(IS_8X8(mb_type) || IS_16X8(mb_type)){
+                        *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
+                        *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
+                    }
+                    if(IS_8X8(mb_type) || IS_8X16(mb_type)){
+                        for(y=0; y<16; y++)
+                            pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
+                    }
+                    if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
+                        int dm= 1 << (mv_sample_log2-2);
+                        for(i=0; i<4; i++){
+                            int sx= mb_x*16 + 8*(i&1);
+                            int sy= mb_y*16 + 8*(i>>1);
+                            int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
+                            //FIXME bidir
+                            int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
+                            if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
+                                for(y=0; y<8; y++)
+                                    pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
+                            if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
+                                *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
+                        }
+                    }
+
+                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
+                        // hmm
+                    }
+                }
+                s->mbskip_table[mb_index]=0;
+            }
+        }
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+
+static int get_sae(uint8_t *src, int ref, int stride){
+    int x,y;
+    int acc=0;
+
+    for(y=0; y<16; y++){
+        for(x=0; x<16; x++){
+            acc+= FFABS(src[x+y*stride] - ref);
+        }
+    }
+
+    return acc;
+}
+
+static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
+    int x, y, w, h;
+    int acc=0;
+
+    w= s->width &~15;
+    h= s->height&~15;
+
+    for(y=0; y<h; y+=16){
+        for(x=0; x<w; x+=16){
+            int offset= x + y*stride;
+            int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
+            int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
+            int sae = get_sae(src + offset, mean, stride);
+
+            acc+= sae + 500 < sad;
+        }
+    }
+    return acc;
+}
+
+
+static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
+    AVFrame *pic=NULL;
+    int64_t pts;
+    int i;
+    const int encoding_delay= s->max_b_frames;
+    int direct=1;
+
+    if(pic_arg){
+        pts= pic_arg->pts;
+        pic_arg->display_picture_number= s->input_picture_number++;
+
+        if(pts != AV_NOPTS_VALUE){
+            if(s->user_specified_pts != AV_NOPTS_VALUE){
+                int64_t time= pts;
+                int64_t last= s->user_specified_pts;
+
+                if(time <= last){
+                    av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
+                    return -1;
+                }
+            }
+            s->user_specified_pts= pts;
+        }else{
+            if(s->user_specified_pts != AV_NOPTS_VALUE){
+                s->user_specified_pts=
+                pts= s->user_specified_pts + 1;
+                av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
+            }else{
+                pts= pic_arg->display_picture_number;
+            }
+        }
+    }
+
+  if(pic_arg){
+    if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
+    if(pic_arg->linesize[0] != s->linesize) direct=0;
+    if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
+    if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
+
+//    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
+
+    if(direct){
+        i= ff_find_unused_picture(s, 1);
+
+        pic= (AVFrame*)&s->picture[i];
+        pic->reference= 3;
+
+        for(i=0; i<4; i++){
+            pic->data[i]= pic_arg->data[i];
+            pic->linesize[i]= pic_arg->linesize[i];
+        }
+        alloc_picture(s, (Picture*)pic, 1);
+    }else{
+        i= ff_find_unused_picture(s, 0);
+
+        pic= (AVFrame*)&s->picture[i];
+        pic->reference= 3;
+
+        alloc_picture(s, (Picture*)pic, 0);
+
+        if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
+           && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
+           && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
+       // empty
+        }else{
+            int h_chroma_shift, v_chroma_shift;
+            avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
+
+            for(i=0; i<3; i++){
+                int src_stride= pic_arg->linesize[i];
+                int dst_stride= i ? s->uvlinesize : s->linesize;
+                int h_shift= i ? h_chroma_shift : 0;
+                int v_shift= i ? v_chroma_shift : 0;
+                int w= s->width >>h_shift;
+                int h= s->height>>v_shift;
+                uint8_t *src= pic_arg->data[i];
+                uint8_t *dst= pic->data[i];
+
+                if(!s->avctx->rc_buffer_size)
+                    dst +=INPLACE_OFFSET;
+
+                if(src_stride==dst_stride)
+                    memcpy(dst, src, src_stride*h);
+                else{
+                    while(h--){
+                        memcpy(dst, src, w);
+                        dst += dst_stride;
+                        src += src_stride;
+                    }
+                }
+            }
+        }
+    }
+    copy_picture_attributes(s, pic, pic_arg);
+    pic->pts= pts; //we set this here to avoid modifiying pic_arg
+  }
+
+    /* shift buffer entries */
+    for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
+        s->input_picture[i-1]= s->input_picture[i];
+
+    s->input_picture[encoding_delay]= (Picture*)pic;
+
+    return 0;
+}
+
+static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
+    int x, y, plane;
+    int score=0;
+    int64_t score64=0;
+
+    for(plane=0; plane<3; plane++){
+        const int stride= p->linesize[plane];
+        const int bw= plane ? 1 : 2;
+        for(y=0; y<s->mb_height*bw; y++){
+            for(x=0; x<s->mb_width*bw; x++){
+                int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
+                int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
+
+                switch(s->avctx->frame_skip_exp){
+                    case 0: score= FFMAX(score, v); break;
+                    case 1: score+= FFABS(v);break;
+                    case 2: score+= v*v;break;
+                    case 3: score64+= FFABS(v*v*(int64_t)v);break;
+                    case 4: score64+= v*v*(int64_t)(v*v);break;
+                }
+            }
+        }
+    }
+
+    if(score) score64= score;
+
+    if(score64 < s->avctx->frame_skip_threshold)
+        return 1;
+    if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
+        return 1;
+    return 0;
+}
+
+static int estimate_best_b_count(MpegEncContext *s){
+    AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
+    AVCodecContext *c= avcodec_alloc_context();
+    AVFrame input[FF_MAX_B_FRAMES+2];
+    const int scale= s->avctx->brd_scale;
+    int i, j, out_size, p_lambda, b_lambda, lambda2;
+    int outbuf_size= s->width * s->height; //FIXME
+    uint8_t *outbuf= av_malloc(outbuf_size);
+    int64_t best_rd= INT64_MAX;
+    int best_b_count= -1;
+
+    assert(scale>=0 && scale <=3);
+
+//    emms_c();
+    p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
+    b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
+    if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
+    lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
+
+    c->width = s->width >> scale;
+    c->height= s->height>> scale;
+    c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
+    c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
+    c->mb_decision= s->avctx->mb_decision;
+    c->me_cmp= s->avctx->me_cmp;
+    c->mb_cmp= s->avctx->mb_cmp;
+    c->me_sub_cmp= s->avctx->me_sub_cmp;
+    c->pix_fmt = PIX_FMT_YUV420P;
+    c->time_base= s->avctx->time_base;
+    c->max_b_frames= s->max_b_frames;
+
+    if (avcodec_open(c, codec) < 0)
+        return -1;
+
+    for(i=0; i<s->max_b_frames+2; i++){
+        int ysize= c->width*c->height;
+        int csize= (c->width/2)*(c->height/2);
+        Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
+
+        if(pre_input_ptr)
+            pre_input= *pre_input_ptr;
+
+        if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
+            pre_input.data[0]+=INPLACE_OFFSET;
+            pre_input.data[1]+=INPLACE_OFFSET;
+            pre_input.data[2]+=INPLACE_OFFSET;
+        }
+
+        avcodec_get_frame_defaults(&input[i]);
+        input[i].data[0]= av_malloc(ysize + 2*csize);
+        input[i].data[1]= input[i].data[0] + ysize;
+        input[i].data[2]= input[i].data[1] + csize;
+        input[i].linesize[0]= c->width;
+        input[i].linesize[1]=
+        input[i].linesize[2]= c->width/2;
+
+        if(!i || s->input_picture[i-1]){
+            s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
+            s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
+            s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
+        }
+    }
+
+    for(j=0; j<s->max_b_frames+1; j++){
+        int64_t rd=0;
+
+        if(!s->input_picture[j])
+            break;
+
+        c->error[0]= c->error[1]= c->error[2]= 0;
+
+        input[0].pict_type= I_TYPE;
+        input[0].quality= 1 * FF_QP2LAMBDA;
+        out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
+//        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
+
+        for(i=0; i<s->max_b_frames+1; i++){
+            int is_p= i % (j+1) == j || i==s->max_b_frames;
+
+            input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
+            input[i+1].quality= is_p ? p_lambda : b_lambda;
+            out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
+            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
+        }
+
+        /* get the delayed frames */
+        while(out_size){
+            out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
+            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
+        }
+
+        rd += c->error[0] + c->error[1] + c->error[2];
+
+        if(rd < best_rd){
+            best_rd= rd;
+            best_b_count= j;
+        }
+    }
+
+    av_freep(&outbuf);
+    avcodec_close(c);
+    av_freep(&c);
+
+    for(i=0; i<s->max_b_frames+2; i++){
+        av_freep(&input[i].data[0]);
+    }
+
+    return best_b_count;
+}
+
+static void select_input_picture(MpegEncContext *s){
+    int i;
+
+    for(i=1; i<MAX_PICTURE_COUNT; i++)
+        s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
+    s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
+
+    /* set next picture type & ordering */
+    if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
+        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
+            s->reordered_input_picture[0]= s->input_picture[0];
+            s->reordered_input_picture[0]->pict_type= I_TYPE;
+            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
+        }else{
+            int b_frames;
+
+            if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
+                if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
+                //FIXME check that te gop check above is +-1 correct
+//av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
+
+                    if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
+                        for(i=0; i<4; i++)
+                            s->input_picture[0]->data[i]= NULL;
+                        s->input_picture[0]->type= 0;
+                    }else{
+                        assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
+                               || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
+
+                        s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
+                    }
+
+                    emms_c();
+                    ff_vbv_update(s, 0);
+
+                    goto no_output_pic;
+                }
+            }
+
+            if(s->flags&CODEC_FLAG_PASS2){
+                for(i=0; i<s->max_b_frames+1; i++){
+                    int pict_num= s->input_picture[0]->display_picture_number + i;
+
+                    if(pict_num >= s->rc_context.num_entries)
+                        break;
+                    if(!s->input_picture[i]){
+                        s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
+                        break;
+                    }
+
+                    s->input_picture[i]->pict_type=
+                        s->rc_context.entry[pict_num].new_pict_type;
+                }
+            }
+
+            if(s->avctx->b_frame_strategy==0){
+                b_frames= s->max_b_frames;
+                while(b_frames && !s->input_picture[b_frames]) b_frames--;
+            }else if(s->avctx->b_frame_strategy==1){
+                for(i=1; i<s->max_b_frames+1; i++){
+                    if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
+                        s->input_picture[i]->b_frame_score=
+                            get_intra_count(s, s->input_picture[i  ]->data[0],
+                                               s->input_picture[i-1]->data[0], s->linesize) + 1;
+                    }
+                }
+                for(i=0; i<s->max_b_frames+1; i++){
+                    if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
+                }
+
+                b_frames= FFMAX(0, i-1);
+
+                /* reset scores */
+                for(i=0; i<b_frames+1; i++){
+                    s->input_picture[i]->b_frame_score=0;
+                }
+            }else if(s->avctx->b_frame_strategy==2){
+                b_frames= estimate_best_b_count(s);
+            }else{
+                av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
+                b_frames=0;
+            }
+
+            emms_c();
+//static int b_count=0;
+//b_count+= b_frames;
+//av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
+
+            for(i= b_frames - 1; i>=0; i--){
+                int type= s->input_picture[i]->pict_type;
+                if(type && type != B_TYPE)
+                    b_frames= i;
+            }
+            if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
+                av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
+            }
+
+            if(s->picture_in_gop_number + b_frames >= s->gop_size){
+              if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
+                    b_frames= s->gop_size - s->picture_in_gop_number - 1;
+              }else{
+                if(s->flags & CODEC_FLAG_CLOSED_GOP)
+                    b_frames=0;
+                s->input_picture[b_frames]->pict_type= I_TYPE;
+              }
+            }
+
+            if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
+               && b_frames
+               && s->input_picture[b_frames]->pict_type== I_TYPE)
+                b_frames--;
+
+            s->reordered_input_picture[0]= s->input_picture[b_frames];
+            if(s->reordered_input_picture[0]->pict_type != I_TYPE)
+                s->reordered_input_picture[0]->pict_type= P_TYPE;
+            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
+            for(i=0; i<b_frames; i++){
+                s->reordered_input_picture[i+1]= s->input_picture[i];
+                s->reordered_input_picture[i+1]->pict_type= B_TYPE;
+                s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
+            }
+        }
+    }
+no_output_pic:
+    if(s->reordered_input_picture[0]){
+        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
+
+        copy_picture(&s->new_picture, s->reordered_input_picture[0]);
+
+        if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size){
+            // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
+
+            int i= ff_find_unused_picture(s, 0);
+            Picture *pic= &s->picture[i];
+
+            pic->reference              = s->reordered_input_picture[0]->reference;
+            alloc_picture(s, pic, 0);
+
+            /* mark us unused / free shared pic */
+            if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_INTERNAL)
+                s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
+            for(i=0; i<4; i++)
+                s->reordered_input_picture[0]->data[i]= NULL;
+            s->reordered_input_picture[0]->type= 0;
+
+            copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
+
+            s->current_picture_ptr= pic;
+        }else{
+            // input is not a shared pix -> reuse buffer for current_pix
+
+            assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
+                   || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
+
+            s->current_picture_ptr= s->reordered_input_picture[0];
+            for(i=0; i<4; i++){
+                s->new_picture.data[i]+= INPLACE_OFFSET;
+            }
+        }
+        copy_picture(&s->current_picture, s->current_picture_ptr);
+
+        s->picture_number= s->new_picture.display_picture_number;
+//printf("dpn:%d\n", s->picture_number);
+    }else{
+       memset(&s->new_picture, 0, sizeof(Picture));
+    }
+}
+
+int MPV_encode_picture(AVCodecContext *avctx,
+                       unsigned char *buf, int buf_size, void *data)
+{
+    MpegEncContext *s = avctx->priv_data;
+    AVFrame *pic_arg = data;
+    int i, stuffing_count;
+
+    for(i=0; i<avctx->thread_count; i++){
+        int start_y= s->thread_context[i]->start_mb_y;
+        int   end_y= s->thread_context[i]->  end_mb_y;
+        int h= s->mb_height;
+        uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
+        uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
+
+        init_put_bits(&s->thread_context[i]->pb, start, end - start);
+    }
+
+    s->picture_in_gop_number++;
+
+    if(load_input_picture(s, pic_arg) < 0)
+        return -1;
+
+    select_input_picture(s);
+
+    /* output? */
+    if(s->new_picture.data[0]){
+        s->pict_type= s->new_picture.pict_type;
+//emms_c();
+//printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
+        MPV_frame_start(s, avctx);
+vbv_retry:
+        if (encode_picture(s, s->picture_number) < 0)
+            return -1;
+
+        avctx->real_pict_num  = s->picture_number;
+        avctx->header_bits = s->header_bits;
+        avctx->mv_bits     = s->mv_bits;
+        avctx->misc_bits   = s->misc_bits;
+        avctx->i_tex_bits  = s->i_tex_bits;
+        avctx->p_tex_bits  = s->p_tex_bits;
+        avctx->i_count     = s->i_count;
+        avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
+        avctx->skip_count  = s->skip_count;
+
+        MPV_frame_end(s);
+
+        if (s->out_format == FMT_MJPEG)
+            mjpeg_picture_trailer(s);
+
+        if(avctx->rc_buffer_size){
+            RateControlContext *rcc= &s->rc_context;
+            int max_size= rcc->buffer_index/3;
+
+            if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
+                s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
+                if(s->adaptive_quant){
+                    int i;
+                    for(i=0; i<s->mb_height*s->mb_stride; i++)
+                        s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
+                }
+                s->mb_skipped = 0;        //done in MPV_frame_start()
+                if(s->pict_type==P_TYPE){ //done in encode_picture() so we must undo it
+                    if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
+                        s->no_rounding ^= 1;
+                }
+                if(s->pict_type!=B_TYPE){
+                    s->time_base= s->last_time_base;
+                    s->last_non_b_time= s->time - s->pp_time;
+                }
+//                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
+                for(i=0; i<avctx->thread_count; i++){
+                    PutBitContext *pb= &s->thread_context[i]->pb;
+                    init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
+                }
+                goto vbv_retry;
+            }
+
+            assert(s->avctx->rc_max_rate);
+        }
+
+        if(s->flags&CODEC_FLAG_PASS1)
+            ff_write_pass1_stats(s);
+
+        for(i=0; i<4; i++){
+            s->current_picture_ptr->error[i]= s->current_picture.error[i];
+            avctx->error[i] += s->current_picture_ptr->error[i];
+        }
+
+        if(s->flags&CODEC_FLAG_PASS1)
+            assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
+        flush_put_bits(&s->pb);
+        s->frame_bits  = put_bits_count(&s->pb);
+
+        stuffing_count= ff_vbv_update(s, s->frame_bits);
+        if(stuffing_count){
+            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
+                av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
+                return -1;
+            }
+
+            switch(s->codec_id){
+            case CODEC_ID_MPEG1VIDEO:
+            case CODEC_ID_MPEG2VIDEO:
+                while(stuffing_count--){
+                    put_bits(&s->pb, 8, 0);
+                }
+            break;
+            case CODEC_ID_MPEG4:
+                put_bits(&s->pb, 16, 0);
+                put_bits(&s->pb, 16, 0x1C3);
+                stuffing_count -= 4;
+                while(stuffing_count--){
+                    put_bits(&s->pb, 8, 0xFF);
+                }
+            break;
+            default:
+                av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
+            }
+            flush_put_bits(&s->pb);
+            s->frame_bits  = put_bits_count(&s->pb);
+        }
+
+        /* update mpeg1/2 vbv_delay for CBR */
+        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
+           && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
+            int vbv_delay;
+
+            assert(s->repeat_first_field==0);
+
+            vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
+            assert(vbv_delay < 0xFFFF);
+
+            s->vbv_delay_ptr[0] &= 0xF8;
+            s->vbv_delay_ptr[0] |= vbv_delay>>13;
+            s->vbv_delay_ptr[1]  = vbv_delay>>5;
+            s->vbv_delay_ptr[2] &= 0x07;
+            s->vbv_delay_ptr[2] |= vbv_delay<<3;
+        }
+        s->total_bits += s->frame_bits;
+        avctx->frame_bits  = s->frame_bits;
+    }else{
+        assert((pbBufPtr(&s->pb) == s->pb.buf));
+        s->frame_bits=0;
+    }
+    assert((s->frame_bits&7)==0);
+
+    return s->frame_bits/8;
+}
+
+#endif //CONFIG_ENCODERS
+
+static inline void gmc1_motion(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               uint8_t **ref_picture)
+{
+    uint8_t *ptr;
+    int offset, src_x, src_y, linesize, uvlinesize;
+    int motion_x, motion_y;
+    int emu=0;
+
+    motion_x= s->sprite_offset[0][0];
+    motion_y= s->sprite_offset[0][1];
+    src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
+    src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
+    motion_x<<=(3-s->sprite_warping_accuracy);
+    motion_y<<=(3-s->sprite_warping_accuracy);
+    src_x = clip(src_x, -16, s->width);
+    if (src_x == s->width)
+        motion_x =0;
+    src_y = clip(src_y, -16, s->height);
+    if (src_y == s->height)
+        motion_y =0;
+
+    linesize = s->linesize;
+    uvlinesize = s->uvlinesize;
+
+    ptr = ref_picture[0] + (src_y * linesize) + src_x;
+
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x >= s->h_edge_pos - 17
+           || (unsigned)src_y >= s->v_edge_pos - 17){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+            ptr= s->edge_emu_buffer;
+        }
+    }
+
+    if((motion_x|motion_y)&7){
+        s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
+        s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
+    }else{
+        int dxy;
+
+        dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
+        if (s->no_rounding){
+            s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
+        }else{
+            s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
+        }
+    }
+
+    if(s->flags&CODEC_FLAG_GRAY) return;
+
+    motion_x= s->sprite_offset[1][0];
+    motion_y= s->sprite_offset[1][1];
+    src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
+    src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
+    motion_x<<=(3-s->sprite_warping_accuracy);
+    motion_y<<=(3-s->sprite_warping_accuracy);
+    src_x = clip(src_x, -8, s->width>>1);
+    if (src_x == s->width>>1)
+        motion_x =0;
+    src_y = clip(src_y, -8, s->height>>1);
+    if (src_y == s->height>>1)
+        motion_y =0;
+
+    offset = (src_y * uvlinesize) + src_x;
+    ptr = ref_picture[1] + offset;
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
+           || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
+
+    return;
+}
+
+static inline void gmc_motion(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               uint8_t **ref_picture)
+{
+    uint8_t *ptr;
+    int linesize, uvlinesize;
+    const int a= s->sprite_warping_accuracy;
+    int ox, oy;
+
+    linesize = s->linesize;
+    uvlinesize = s->uvlinesize;
+
+    ptr = ref_picture[0];
+
+    ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
+    oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
+
+    s->dsp.gmc(dest_y, ptr, linesize, 16,
+           ox,
+           oy,
+           s->sprite_delta[0][0], s->sprite_delta[0][1],
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
+           a+1, (1<<(2*a+1)) - s->no_rounding,
+           s->h_edge_pos, s->v_edge_pos);
+    s->dsp.gmc(dest_y+8, ptr, linesize, 16,
+           ox + s->sprite_delta[0][0]*8,
+           oy + s->sprite_delta[1][0]*8,
+           s->sprite_delta[0][0], s->sprite_delta[0][1],
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
+           a+1, (1<<(2*a+1)) - s->no_rounding,
+           s->h_edge_pos, s->v_edge_pos);
+
+    if(s->flags&CODEC_FLAG_GRAY) return;
+
+    ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
+    oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
+
+    ptr = ref_picture[1];
+    s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
+           ox,
+           oy,
+           s->sprite_delta[0][0], s->sprite_delta[0][1],
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
+           a+1, (1<<(2*a+1)) - s->no_rounding,
+           s->h_edge_pos>>1, s->v_edge_pos>>1);
+
+    ptr = ref_picture[2];
+    s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
+           ox,
+           oy,
+           s->sprite_delta[0][0], s->sprite_delta[0][1],
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
+           a+1, (1<<(2*a+1)) - s->no_rounding,
+           s->h_edge_pos>>1, s->v_edge_pos>>1);
+}
+
+/**
+ * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h){
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+
+    if(src_y>= h){
+        src+= (h-1-src_y)*linesize;
+        src_y=h-1;
+    }else if(src_y<=-block_h){
+        src+= (1-block_h-src_y)*linesize;
+        src_y=1-block_h;
+    }
+    if(src_x>= w){
+        src+= (w-1-src_x);
+        src_x=w-1;
+    }else if(src_x<=-block_w){
+        src+= (1-block_w-src_x);
+        src_x=1-block_w;
+    }
+
+    start_y= FFMAX(0, -src_y);
+    start_x= FFMAX(0, -src_x);
+    end_y= FFMIN(block_h, h-src_y);
+    end_x= FFMIN(block_w, w-src_x);
+
+    // copy existing part
+    for(y=start_y; y<end_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= src[x + y*linesize];
+        }
+    }
+
+    //top
+    for(y=0; y<start_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + start_y*linesize];
+        }
+    }
+
+    //bottom
+    for(y=end_y; y<block_h; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
+        }
+    }
+
+    for(y=0; y<block_h; y++){
+       //left
+        for(x=0; x<start_x; x++){
+            buf[x + y*linesize]= buf[start_x + y*linesize];
+        }
+
+       //right
+        for(x=end_x; x<block_w; x++){
+            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
+        }
+    }
+}
+
+static inline int hpel_motion(MpegEncContext *s,
+                                  uint8_t *dest, uint8_t *src,
+                                  int field_based, int field_select,
+                                  int src_x, int src_y,
+                                  int width, int height, int stride,
+                                  int h_edge_pos, int v_edge_pos,
+                                  int w, int h, op_pixels_func *pix_op,
+                                  int motion_x, int motion_y)
+{
+    int dxy;
+    int emu=0;
+
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    src_x += motion_x >> 1;
+    src_y += motion_y >> 1;
+
+    /* WARNING: do no forget half pels */
+    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
+    if (src_x == width)
+        dxy &= ~1;
+    src_y = clip(src_y, -16, height);
+    if (src_y == height)
+        dxy &= ~2;
+    src += src_y * stride + src_x;
+
+    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
+        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
+           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
+            ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
+                             src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
+            src= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    if(field_select)
+        src += s->linesize;
+    pix_op[dxy](dest, src, stride, h);
+    return emu;
+}
+
+static inline int hpel_motion_lowres(MpegEncContext *s,
+                                  uint8_t *dest, uint8_t *src,
+                                  int field_based, int field_select,
+                                  int src_x, int src_y,
+                                  int width, int height, int stride,
+                                  int h_edge_pos, int v_edge_pos,
+                                  int w, int h, h264_chroma_mc_func *pix_op,
+                                  int motion_x, int motion_y)
+{
+    const int lowres= s->avctx->lowres;
+    const int s_mask= (2<<lowres)-1;
+    int emu=0;
+    int sx, sy;
+
+    if(s->quarter_sample){
+        motion_x/=2;
+        motion_y/=2;
+    }
+
+    sx= motion_x & s_mask;
+    sy= motion_y & s_mask;
+    src_x += motion_x >> (lowres+1);
+    src_y += motion_y >> (lowres+1);
+
+    src += src_y * stride + src_x;
+
+    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
+       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
+                            src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
+        src= s->edge_emu_buffer;
+        emu=1;
+    }
+
+    sx <<= 2 - lowres;
+    sy <<= 2 - lowres;
+    if(field_select)
+        src += s->linesize;
+    pix_op[lowres](dest, src, stride, h, sx, sy);
+    return emu;
+}
+
+/* apply one mpeg motion vector to the three components */
+static always_inline void mpeg_motion(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               int field_based, int bottom_field, int field_select,
+                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
+                               int motion_x, int motion_y, int h)
+{
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
+
+#if 0
+if(s->quarter_sample)
+{
+    motion_x>>=1;
+    motion_y>>=1;
+}
+#endif
+
+    v_edge_pos = s->v_edge_pos >> field_based;
+    linesize   = s->current_picture.linesize[0] << field_based;
+    uvlinesize = s->current_picture.linesize[1] << field_based;
+
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    src_x = s->mb_x* 16               + (motion_x >> 1);
+    src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
+
+    if (s->out_format == FMT_H263) {
+        if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
+            mx = (motion_x>>1)|(motion_x&1);
+            my = motion_y >>1;
+            uvdxy = ((my & 1) << 1) | (mx & 1);
+            uvsrc_x = s->mb_x* 8               + (mx >> 1);
+            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
+        }else{
+            uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
+            uvsrc_x = src_x>>1;
+            uvsrc_y = src_y>>1;
+        }
+    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
+        mx = motion_x / 4;
+        my = motion_y / 4;
+        uvdxy = 0;
+        uvsrc_x = s->mb_x*8 + mx;
+        uvsrc_y = s->mb_y*8 + my;
+    } else {
+        if(s->chroma_y_shift){
+            mx = motion_x / 2;
+            my = motion_y / 2;
+            uvdxy = ((my & 1) << 1) | (mx & 1);
+            uvsrc_x = s->mb_x* 8               + (mx >> 1);
+            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
+        } else {
+            if(s->chroma_x_shift){
+            //Chroma422
+                mx = motion_x / 2;
+                uvdxy = ((motion_y & 1) << 1) | (mx & 1);
+                uvsrc_x = s->mb_x* 8           + (mx >> 1);
+                uvsrc_y = src_y;
+            } else {
+            //Chroma444
+                uvdxy = dxy;
+                uvsrc_x = src_x;
+                uvsrc_y = src_y;
+            }
+        }
+    }
+
+    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
+    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
+    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
+
+    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
+       || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
+            if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
+               s->codec_id == CODEC_ID_MPEG1VIDEO){
+                av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
+                return ;
+            }
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
+                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
+            ptr_y = s->edge_emu_buffer;
+            if(!(s->flags&CODEC_FLAG_GRAY)){
+                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
+                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
+                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
+                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+                ptr_cb= uvbuf;
+                ptr_cr= uvbuf+16;
+            }
+    }
+
+    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
+        dest_y += s->linesize;
+        dest_cb+= s->uvlinesize;
+        dest_cr+= s->uvlinesize;
+    }
+
+    if(field_select){
+        ptr_y += s->linesize;
+        ptr_cb+= s->uvlinesize;
+        ptr_cr+= s->uvlinesize;
+    }
+
+    pix_op[0][dxy](dest_y, ptr_y, linesize, h);
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
+        pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
+    }
+#if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
+    if(s->out_format == FMT_H261){
+        ff_h261_loop_filter(s);
+    }
+#endif
+}
+
+/* apply one mpeg motion vector to the three components */
+static always_inline void mpeg_motion_lowres(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               int field_based, int bottom_field, int field_select,
+                               uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
+                               int motion_x, int motion_y, int h)
+{
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
+    const int lowres= s->avctx->lowres;
+    const int block_s= 8>>lowres;
+    const int s_mask= (2<<lowres)-1;
+    const int h_edge_pos = s->h_edge_pos >> lowres;
+    const int v_edge_pos = s->v_edge_pos >> lowres;
+    linesize   = s->current_picture.linesize[0] << field_based;
+    uvlinesize = s->current_picture.linesize[1] << field_based;
+
+    if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
+        motion_x/=2;
+        motion_y/=2;
+    }
+
+    if(field_based){
+        motion_y += (bottom_field - field_select)*((1<<lowres)-1);
+    }
+
+    sx= motion_x & s_mask;
+    sy= motion_y & s_mask;
+    src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
+    src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
+
+    if (s->out_format == FMT_H263) {
+        uvsx = ((motion_x>>1) & s_mask) | (sx&1);
+        uvsy = ((motion_y>>1) & s_mask) | (sy&1);
+        uvsrc_x = src_x>>1;
+        uvsrc_y = src_y>>1;
+    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
+        mx = motion_x / 4;
+        my = motion_y / 4;
+        uvsx = (2*mx) & s_mask;
+        uvsy = (2*my) & s_mask;
+        uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
+        uvsrc_y = s->mb_y*block_s               + (my >> lowres);
+    } else {
+        mx = motion_x / 2;
+        my = motion_y / 2;
+        uvsx = mx & s_mask;
+        uvsy = my & s_mask;
+        uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
+        uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
+    }
+
+    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
+    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
+    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
+
+    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
+       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
+                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
+            ptr_y = s->edge_emu_buffer;
+            if(!(s->flags&CODEC_FLAG_GRAY)){
+                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
+                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
+                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
+                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
+                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
+                ptr_cb= uvbuf;
+                ptr_cr= uvbuf+16;
+            }
+    }
+
+    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
+        dest_y += s->linesize;
+        dest_cb+= s->uvlinesize;
+        dest_cr+= s->uvlinesize;
+    }
+
+    if(field_select){
+        ptr_y += s->linesize;
+        ptr_cb+= s->uvlinesize;
+        ptr_cr+= s->uvlinesize;
+    }
+
+    sx <<= 2 - lowres;
+    sy <<= 2 - lowres;
+    pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        uvsx <<= 2 - lowres;
+        uvsy <<= 2 - lowres;
+        pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
+        pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
+    }
+    //FIXME h261 lowres loop filter
+}
+
+//FIXME move to dsputil, avg variant, 16x16 version
+static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
+    int x;
+    uint8_t * const top   = src[1];
+    uint8_t * const left  = src[2];
+    uint8_t * const mid   = src[0];
+    uint8_t * const right = src[3];
+    uint8_t * const bottom= src[4];
+#define OBMC_FILTER(x, t, l, m, r, b)\
+    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
+#define OBMC_FILTER4(x, t, l, m, r, b)\
+    OBMC_FILTER(x         , t, l, m, r, b);\
+    OBMC_FILTER(x+1       , t, l, m, r, b);\
+    OBMC_FILTER(x  +stride, t, l, m, r, b);\
+    OBMC_FILTER(x+1+stride, t, l, m, r, b);
+
+    x=0;
+    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
+    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
+    x+= stride;
+    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
+    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
+    x+= stride;
+    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
+    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
+    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
+    x+= 2*stride;
+    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
+    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
+    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
+    x+= 2*stride;
+    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
+    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
+    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
+    x+= stride;
+    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
+    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
+}
+
+/* obmc for 1 8x8 luma block */
+static inline void obmc_motion(MpegEncContext *s,
+                               uint8_t *dest, uint8_t *src,
+                               int src_x, int src_y,
+                               op_pixels_func *pix_op,
+                               int16_t mv[5][2]/* mid top left right bottom*/)
+#define MID    0
+{
+    int i;
+    uint8_t *ptr[5];
+
+    assert(s->quarter_sample==0);
+
+    for(i=0; i<5; i++){
+        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
+            ptr[i]= ptr[MID];
+        }else{
+            ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
+            hpel_motion(s, ptr[i], src, 0, 0,
+                        src_x, src_y,
+                        s->width, s->height, s->linesize,
+                        s->h_edge_pos, s->v_edge_pos,
+                        8, 8, pix_op,
+                        mv[i][0], mv[i][1]);
+        }
+    }
+
+    put_obmc(dest, ptr, s->linesize);
+}
+
+static inline void qpel_motion(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               int field_based, int bottom_field, int field_select,
+                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
+                               qpel_mc_func (*qpix_op)[16],
+                               int motion_x, int motion_y, int h)
+{
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
+
+    dxy = ((motion_y & 3) << 2) | (motion_x & 3);
+    src_x = s->mb_x *  16                 + (motion_x >> 2);
+    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
+
+    v_edge_pos = s->v_edge_pos >> field_based;
+    linesize = s->linesize << field_based;
+    uvlinesize = s->uvlinesize << field_based;
+
+    if(field_based){
+        mx= motion_x/2;
+        my= motion_y>>1;
+    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
+        static const int rtab[8]= {0,0,1,1,0,0,0,1};
+        mx= (motion_x>>1) + rtab[motion_x&7];
+        my= (motion_y>>1) + rtab[motion_y&7];
+    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
+        mx= (motion_x>>1)|(motion_x&1);
+        my= (motion_y>>1)|(motion_y&1);
+    }else{
+        mx= motion_x/2;
+        my= motion_y/2;
+    }
+    mx= (mx>>1)|(mx&1);
+    my= (my>>1)|(my&1);
+
+    uvdxy= (mx&1) | ((my&1)<<1);
+    mx>>=1;
+    my>>=1;
+
+    uvsrc_x = s->mb_x *  8                 + mx;
+    uvsrc_y = s->mb_y * (8 >> field_based) + my;
+
+    ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
+    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
+    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
+
+    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
+       || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
+                         src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
+        ptr_y= s->edge_emu_buffer;
+        if(!(s->flags&CODEC_FLAG_GRAY)){
+            uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
+            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
+                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
+                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr_cb= uvbuf;
+            ptr_cr= uvbuf + 16;
+        }
+    }
+
+    if(!field_based)
+        qpix_op[0][dxy](dest_y, ptr_y, linesize);
+    else{
+        if(bottom_field){
+            dest_y += s->linesize;
+            dest_cb+= s->uvlinesize;
+            dest_cr+= s->uvlinesize;
+        }
+
+        if(field_select){
+            ptr_y  += s->linesize;
+            ptr_cb += s->uvlinesize;
+            ptr_cr += s->uvlinesize;
+        }
+        //damn interlaced mode
+        //FIXME boundary mirroring is not exactly correct here
+        qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
+        qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
+    }
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
+        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
+    }
+}
+
+inline int ff_h263_round_chroma(int x){
+    if (x >= 0)
+        return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
+    else {
+        x = -x;
+        return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
+    }
+}
+
+/**
+ * h263 chorma 4mv motion compensation.
+ */
+static inline void chroma_4mv_motion(MpegEncContext *s,
+                                     uint8_t *dest_cb, uint8_t *dest_cr,
+                                     uint8_t **ref_picture,
+                                     op_pixels_func *pix_op,
+                                     int mx, int my){
+    int dxy, emu=0, src_x, src_y, offset;
+    uint8_t *ptr;
+
+    /* In case of 8X8, we construct a single chroma motion vector
+       with a special rounding */
+    mx= ff_h263_round_chroma(mx);
+    my= ff_h263_round_chroma(my);
+
+    dxy = ((my & 1) << 1) | (mx & 1);
+    mx >>= 1;
+    my >>= 1;
+
+    src_x = s->mb_x * 8 + mx;
+    src_y = s->mb_y * 8 + my;
+    src_x = clip(src_x, -8, s->width/2);
+    if (src_x == s->width/2)
+        dxy &= ~1;
+    src_y = clip(src_y, -8, s->height/2);
+    if (src_y == s->height/2)
+        dxy &= ~2;
+
+    offset = (src_y * (s->uvlinesize)) + src_x;
+    ptr = ref_picture[1] + offset;
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
+           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
+}
+
+static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
+                                     uint8_t *dest_cb, uint8_t *dest_cr,
+                                     uint8_t **ref_picture,
+                                     h264_chroma_mc_func *pix_op,
+                                     int mx, int my){
+    const int lowres= s->avctx->lowres;
+    const int block_s= 8>>lowres;
+    const int s_mask= (2<<lowres)-1;
+    const int h_edge_pos = s->h_edge_pos >> (lowres+1);
+    const int v_edge_pos = s->v_edge_pos >> (lowres+1);
+    int emu=0, src_x, src_y, offset, sx, sy;
+    uint8_t *ptr;
+
+    if(s->quarter_sample){
+        mx/=2;
+        my/=2;
+    }
+
+    /* In case of 8X8, we construct a single chroma motion vector
+       with a special rounding */
+    mx= ff_h263_round_chroma(mx);
+    my= ff_h263_round_chroma(my);
+
+    sx= mx & s_mask;
+    sy= my & s_mask;
+    src_x = s->mb_x*block_s + (mx >> (lowres+1));
+    src_y = s->mb_y*block_s + (my >> (lowres+1));
+
+    offset = src_y * s->uvlinesize + src_x;
+    ptr = ref_picture[1] + offset;
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
+           || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
+            ptr= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    sx <<= 2 - lowres;
+    sy <<= 2 - lowres;
+    pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
+}
+
+static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
+    /* fetch pixels for estimated mv 4 macroblocks ahead
+     * optimized for 64byte cache lines */
+    const int shift = s->quarter_sample ? 2 : 1;
+    const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
+    const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
+    int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
+    s->dsp.prefetch(pix[0]+off, s->linesize, 4);
+    off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
+    s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
+}
+
+/**
+ * motion compensation of a single macroblock
+ * @param s context
+ * @param dest_y luma destination pointer
+ * @param dest_cb chroma cb/u destination pointer
+ * @param dest_cr chroma cr/v destination pointer
+ * @param dir direction (0->forward, 1->backward)
+ * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
+ * @param pic_op halfpel motion compensation function (average or put normally)
+ * @param pic_op qpel motion compensation function (average or put normally)
+ * the motion vectors are taken from s->mv and the MV type from s->mv_type
+ */
+static inline void MPV_motion(MpegEncContext *s,
+                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                              int dir, uint8_t **ref_picture,
+                              op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
+{
+    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
+    int mb_x, mb_y, i;
+    uint8_t *ptr, *dest;
+
+    mb_x = s->mb_x;
+    mb_y = s->mb_y;
+
+    prefetch_motion(s, ref_picture, dir);
+
+    if(s->obmc && s->pict_type != B_TYPE){
+        int16_t mv_cache[4][4][2];
+        const int xy= s->mb_x + s->mb_y*s->mb_stride;
+        const int mot_stride= s->b8_stride;
+        const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
+
+        assert(!s->mb_skipped);
+
+        memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
+        memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
+        memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
+
+        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
+            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
+        }else{
+            memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
+        }
+
+        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
+        }else{
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
+        }
+
+        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
+        }else{
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
+        }
+
+        mx = 0;
+        my = 0;
+        for(i=0;i<4;i++) {
+            const int x= (i&1)+1;
+            const int y= (i>>1)+1;
+            int16_t mv[5][2]= {
+                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
+                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
+                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
+                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
+                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
+            //FIXME cleanup
+            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                        ref_picture[0],
+                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                        pix_op[1],
+                        mv);
+
+            mx += mv[0][0];
+            my += mv[0][1];
+        }
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+
+        return;
+    }
+
+    switch(s->mv_type) {
+    case MV_TYPE_16X16:
+        if(s->mcsel){
+            if(s->real_sprite_warping_points==1){
+                gmc1_motion(s, dest_y, dest_cb, dest_cr,
+                            ref_picture);
+            }else{
+                gmc_motion(s, dest_y, dest_cb, dest_cr,
+                            ref_picture);
+            }
+        }else if(s->quarter_sample){
+            qpel_motion(s, dest_y, dest_cb, dest_cr,
+                        0, 0, 0,
+                        ref_picture, pix_op, qpix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        }else if(s->mspel){
+            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
+                        ref_picture, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        }else
+        {
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                        0, 0, 0,
+                        ref_picture, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        }
+        break;
+    case MV_TYPE_8X8:
+        mx = 0;
+        my = 0;
+        if(s->quarter_sample){
+            for(i=0;i<4;i++) {
+                motion_x = s->mv[dir][i][0];
+                motion_y = s->mv[dir][i][1];
+
+                dxy = ((motion_y & 3) << 2) | (motion_x & 3);
+                src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
+                src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
+
+                /* WARNING: do no forget half pels */
+                src_x = clip(src_x, -16, s->width);
+                if (src_x == s->width)
+                    dxy &= ~3;
+                src_y = clip(src_y, -16, s->height);
+                if (src_y == s->height)
+                    dxy &= ~12;
+
+                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
+                if(s->flags&CODEC_FLAG_EMU_EDGE){
+                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
+                       || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
+                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+                        ptr= s->edge_emu_buffer;
+                    }
+                }
+                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
+                qpix_op[1][dxy](dest, ptr, s->linesize);
+
+                mx += s->mv[dir][i][0]/2;
+                my += s->mv[dir][i][1]/2;
+            }
+        }else{
+            for(i=0;i<4;i++) {
+                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                            ref_picture[0], 0, 0,
+                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                            s->width, s->height, s->linesize,
+                            s->h_edge_pos, s->v_edge_pos,
+                            8, 8, pix_op[1],
+                            s->mv[dir][i][0], s->mv[dir][i][1]);
+
+                mx += s->mv[dir][i][0];
+                my += s->mv[dir][i][1];
+            }
+        }
+
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+        break;
+    case MV_TYPE_FIELD:
+        if (s->picture_structure == PICT_FRAME) {
+            if(s->quarter_sample){
+                for(i=0; i<2; i++){
+                    qpel_motion(s, dest_y, dest_cb, dest_cr,
+                                1, i, s->field_select[dir][i],
+                                ref_picture, pix_op, qpix_op,
+                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
+                }
+            }else{
+                /* top field */
+                mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                            1, 0, s->field_select[dir][0],
+                            ref_picture, pix_op,
+                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
+                /* bottom field */
+                mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                            1, 1, s->field_select[dir][1],
+                            ref_picture, pix_op,
+                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
+            }
+        } else {
+            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
+                ref_picture= s->current_picture_ptr->data;
+            }
+
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                        0, 0, s->field_select[dir][0],
+                        ref_picture, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        }
+        break;
+    case MV_TYPE_16X8:
+        for(i=0; i<2; i++){
+            uint8_t ** ref2picture;
+
+            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
+                ref2picture= ref_picture;
+            }else{
+                ref2picture= s->current_picture_ptr->data;
+            }
+
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                        0, 0, s->field_select[dir][i],
+                        ref2picture, pix_op,
+                        s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
+
+            dest_y += 16*s->linesize;
+            dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
+            dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
+        }
+        break;
+    case MV_TYPE_DMV:
+        if(s->picture_structure == PICT_FRAME){
+            for(i=0; i<2; i++){
+                int j;
+                for(j=0; j<2; j++){
+                    mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                                1, j, j^i,
+                                ref_picture, pix_op,
+                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
+                }
+                pix_op = s->dsp.avg_pixels_tab;
+            }
+        }else{
+            for(i=0; i<2; i++){
+                mpeg_motion(s, dest_y, dest_cb, dest_cr,
+                            0, 0, s->picture_structure != i+1,
+                            ref_picture, pix_op,
+                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
+
+                // after put we make avg of the same block
+                pix_op=s->dsp.avg_pixels_tab;
+
+                //opposite parity is always in the same frame if this is second field
+                if(!s->first_field){
+                    ref_picture = s->current_picture_ptr->data;
+                }
+            }
+        }
+    break;
+    default: assert(0);
+    }
+}
+
+/**
+ * motion compensation of a single macroblock
+ * @param s context
+ * @param dest_y luma destination pointer
+ * @param dest_cb chroma cb/u destination pointer
+ * @param dest_cr chroma cr/v destination pointer
+ * @param dir direction (0->forward, 1->backward)
+ * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
+ * @param pic_op halfpel motion compensation function (average or put normally)
+ * the motion vectors are taken from s->mv and the MV type from s->mv_type
+ */
+static inline void MPV_motion_lowres(MpegEncContext *s,
+                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                              int dir, uint8_t **ref_picture,
+                              h264_chroma_mc_func *pix_op)
+{
+    int mx, my;
+    int mb_x, mb_y, i;
+    const int lowres= s->avctx->lowres;
+    const int block_s= 8>>lowres;
+
+    mb_x = s->mb_x;
+    mb_y = s->mb_y;
+
+    switch(s->mv_type) {
+    case MV_TYPE_16X16:
+        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                    0, 0, 0,
+                    ref_picture, pix_op,
+                    s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
+        break;
+    case MV_TYPE_8X8:
+        mx = 0;
+        my = 0;
+            for(i=0;i<4;i++) {
+                hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
+                            ref_picture[0], 0, 0,
+                            (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
+                            s->width, s->height, s->linesize,
+                            s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
+                            block_s, block_s, pix_op,
+                            s->mv[dir][i][0], s->mv[dir][i][1]);
+
+                mx += s->mv[dir][i][0];
+                my += s->mv[dir][i][1];
+            }
+
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
+        break;
+    case MV_TYPE_FIELD:
+        if (s->picture_structure == PICT_FRAME) {
+            /* top field */
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                        1, 0, s->field_select[dir][0],
+                        ref_picture, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], block_s);
+            /* bottom field */
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                        1, 1, s->field_select[dir][1],
+                        ref_picture, pix_op,
+                        s->mv[dir][1][0], s->mv[dir][1][1], block_s);
+        } else {
+            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
+                ref_picture= s->current_picture_ptr->data;
+            }
+
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                        0, 0, s->field_select[dir][0],
+                        ref_picture, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
+        }
+        break;
+    case MV_TYPE_16X8:
+        for(i=0; i<2; i++){
+            uint8_t ** ref2picture;
+
+            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
+                ref2picture= ref_picture;
+            }else{
+                ref2picture= s->current_picture_ptr->data;
+            }
+
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                        0, 0, s->field_select[dir][i],
+                        ref2picture, pix_op,
+                        s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
+
+            dest_y += 2*block_s*s->linesize;
+            dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
+            dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
+        }
+        break;
+    case MV_TYPE_DMV:
+        if(s->picture_structure == PICT_FRAME){
+            for(i=0; i<2; i++){
+                int j;
+                for(j=0; j<2; j++){
+                    mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                                1, j, j^i,
+                                ref_picture, pix_op,
+                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
+                }
+                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
+            }
+        }else{
+            for(i=0; i<2; i++){
+                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                            0, 0, s->picture_structure != i+1,
+                            ref_picture, pix_op,
+                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
+
+                // after put we make avg of the same block
+                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
+
+                //opposite parity is always in the same frame if this is second field
+                if(!s->first_field){
+                    ref_picture = s->current_picture_ptr->data;
+                }
+            }
+        }
+    break;
+    default: assert(0);
+    }
+}
+
+/* put block[] to dest[] */
+static inline void put_dct(MpegEncContext *s,
+                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
+{
+    s->dct_unquantize_intra(s, block, i, qscale);
+    s->dsp.idct_put (dest, line_size, block);
+}
+
+/* add block[] to dest[] */
+static inline void add_dct(MpegEncContext *s,
+                           DCTELEM *block, int i, uint8_t *dest, int line_size)
+{
+    if (s->block_last_index[i] >= 0) {
+        s->dsp.idct_add (dest, line_size, block);
+    }
+}
+
+static inline void add_dequant_dct(MpegEncContext *s,
+                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
+{
+    if (s->block_last_index[i] >= 0) {
+        s->dct_unquantize_inter(s, block, i, qscale);
+
+        s->dsp.idct_add (dest, line_size, block);
+    }
+}
+
+/**
+ * cleans dc, ac, coded_block for the current non intra MB
+ */
+void ff_clean_intra_table_entries(MpegEncContext *s)
+{
+    int wrap = s->b8_stride;
+    int xy = s->block_index[0];
+
+    s->dc_val[0][xy           ] =
+    s->dc_val[0][xy + 1       ] =
+    s->dc_val[0][xy     + wrap] =
+    s->dc_val[0][xy + 1 + wrap] = 1024;
+    /* ac pred */
+    memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
+    memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
+    if (s->msmpeg4_version>=3) {
+        s->coded_block[xy           ] =
+        s->coded_block[xy + 1       ] =
+        s->coded_block[xy     + wrap] =
+        s->coded_block[xy + 1 + wrap] = 0;
+    }
+    /* chroma */
+    wrap = s->mb_stride;
+    xy = s->mb_x + s->mb_y * wrap;
+    s->dc_val[1][xy] =
+    s->dc_val[2][xy] = 1024;
+    /* ac pred */
+    memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
+    memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
+
+    s->mbintra_table[xy]= 0;
+}
+
+/* generic function called after a macroblock has been parsed by the
+   decoder or after it has been encoded by the encoder.
+
+   Important variables used:
+   s->mb_intra : true if intra macroblock
+   s->mv_dir   : motion vector direction
+   s->mv_type  : motion vector type
+   s->mv       : motion vector
+   s->interlaced_dct : true if interlaced dct used (mpeg2)
+ */
+static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
+{
+    int mb_x, mb_y;
+    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
+#ifdef HAVE_XVMC
+    if(s->avctx->xvmc_acceleration){
+        XVMC_decode_mb(s);//xvmc uses pblocks
+        return;
+    }
+#endif
+
+    mb_x = s->mb_x;
+    mb_y = s->mb_y;
+
+    if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
+       /* save DCT coefficients */
+       int i,j;
+       DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
+       for(i=0; i<6; i++)
+           for(j=0; j<64; j++)
+               *dct++ = block[i][s->dsp.idct_permutation[j]];
+    }
+
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
+
+    /* update DC predictors for P macroblocks */
+    if (!s->mb_intra) {
+        if (s->h263_pred || s->h263_aic) {
+            if(s->mbintra_table[mb_xy])
+                ff_clean_intra_table_entries(s);
+        } else {
+            s->last_dc[0] =
+            s->last_dc[1] =
+            s->last_dc[2] = 128 << s->intra_dc_precision;
+        }
+    }
+    else if (s->h263_pred || s->h263_aic)
+        s->mbintra_table[mb_xy]=1;
+
+    if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
+        uint8_t *dest_y, *dest_cb, *dest_cr;
+        int dct_linesize, dct_offset;
+        op_pixels_func (*op_pix)[4];
+        qpel_mc_func (*op_qpix)[16];
+        const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
+        const int uvlinesize= s->current_picture.linesize[1];
+        const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
+        const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
+
+        /* avoid copy if macroblock skipped in last frame too */
+        /* skip only during decoding as we might trash the buffers during encoding a bit */
+        if(!s->encoding){
+            uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
+            const int age= s->current_picture.age;
+
+            assert(age);
+
+            if (s->mb_skipped) {
+                s->mb_skipped= 0;
+                assert(s->pict_type!=I_TYPE);
+
+                (*mbskip_ptr) ++; /* indicate that this time we skipped it */
+                if(*mbskip_ptr >99) *mbskip_ptr= 99;
+
+                /* if previous was skipped too, then nothing to do !  */
+                if (*mbskip_ptr >= age && s->current_picture.reference){
+                    return;
+                }
+            } else if(!s->current_picture.reference){
+                (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
+                if(*mbskip_ptr >99) *mbskip_ptr= 99;
+            } else{
+                *mbskip_ptr = 0; /* not skipped */
+            }
+        }
+
+        dct_linesize = linesize << s->interlaced_dct;
+        dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
+
+        if(readable){
+            dest_y=  s->dest[0];
+            dest_cb= s->dest[1];
+            dest_cr= s->dest[2];
+        }else{
+            dest_y = s->b_scratchpad;
+            dest_cb= s->b_scratchpad+16*linesize;
+            dest_cr= s->b_scratchpad+32*linesize;
+        }
+
+        if (!s->mb_intra) {
+            /* motion handling */
+            /* decoding or more than one mb_type (MC was already done otherwise) */
+            if(!s->encoding){
+                if(lowres_flag){
+                    h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
+
+                    if (s->mv_dir & MV_DIR_FORWARD) {
+                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
+                        op_pix = s->dsp.avg_h264_chroma_pixels_tab;
+                    }
+                    if (s->mv_dir & MV_DIR_BACKWARD) {
+                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
+                    }
+                }else{
+                    op_qpix= s->me.qpel_put;
+                    if ((!s->no_rounding) || s->pict_type==B_TYPE){
+                        op_pix = s->dsp.put_pixels_tab;
+                    }else{
+                        op_pix = s->dsp.put_no_rnd_pixels_tab;
+                    }
+                    if (s->mv_dir & MV_DIR_FORWARD) {
+                        MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
+                        op_pix = s->dsp.avg_pixels_tab;
+                        op_qpix= s->me.qpel_avg;
+                    }
+                    if (s->mv_dir & MV_DIR_BACKWARD) {
+                        MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
+                    }
+                }
+            }
+
+            /* skip dequant / idct if we are really late ;) */
+            if(s->hurry_up>1) goto skip_idct;
+            if(s->avctx->skip_idct){
+                if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
+                   ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
+                   || s->avctx->skip_idct >= AVDISCARD_ALL)
+                    goto skip_idct;
+            }
+
+            /* add dct residue */
+            if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
+                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
+                add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
+                add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
+                add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
+                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
+
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    if (s->chroma_y_shift){
+                        add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
+                    }else{
+                        dct_linesize >>= 1;
+                        dct_offset >>=1;
+                        add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
+                        add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
+                    }
+                }
+            } else if(s->codec_id != CODEC_ID_WMV2){
+                add_dct(s, block[0], 0, dest_y                          , dct_linesize);
+                add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
+                add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
+                add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
+
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    if(s->chroma_y_shift){//Chroma420
+                        add_dct(s, block[4], 4, dest_cb, uvlinesize);
+                        add_dct(s, block[5], 5, dest_cr, uvlinesize);
+                    }else{
+                        //chroma422
+                        dct_linesize = uvlinesize << s->interlaced_dct;
+                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
+
+                        add_dct(s, block[4], 4, dest_cb, dct_linesize);
+                        add_dct(s, block[5], 5, dest_cr, dct_linesize);
+                        add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
+                        add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
+                        if(!s->chroma_x_shift){//Chroma444
+                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
+                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
+                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
+                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
+                        }
+                    }
+                }//fi gray
+            }
+            else{
+                ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
+            }
+        } else {
+            /* dct only in intra block */
+            if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
+                put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
+                put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
+                put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
+                put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
+
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    if(s->chroma_y_shift){
+                        put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
+                        put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
+                    }else{
+                        dct_offset >>=1;
+                        dct_linesize >>=1;
+                        put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
+                        put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
+                        put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
+                        put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
+                    }
+                }
+            }else{
+                s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
+                s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
+                s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
+                s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
+
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    if(s->chroma_y_shift){
+                        s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
+                        s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
+                    }else{
+
+                        dct_linesize = uvlinesize << s->interlaced_dct;
+                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
+
+                        s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
+                        s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
+                        s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
+                        s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
+                        if(!s->chroma_x_shift){//Chroma444
+                            s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
+                            s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
+                            s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
+                            s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
+                        }
+                    }
+                }//gray
+            }
+        }
+skip_idct:
+        if(!readable){
+            s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
+            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
+            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
+        }
+    }
+}
+
+void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
+    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
+    else                  MPV_decode_mb_internal(s, block, 0);
+}
+
+#ifdef CONFIG_ENCODERS
+
+static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
+{
+    static const char tab[64]=
+        {3,2,2,1,1,1,1,1,
+         1,1,1,1,1,1,1,1,
+         1,1,1,1,1,1,1,1,
+         0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0};
+    int score=0;
+    int run=0;
+    int i;
+    DCTELEM *block= s->block[n];
+    const int last_index= s->block_last_index[n];
+    int skip_dc;
+
+    if(threshold<0){
+        skip_dc=0;
+        threshold= -threshold;
+    }else
+        skip_dc=1;
+
+    /* are all which we could set to zero are allready zero? */
+    if(last_index<=skip_dc - 1) return;
+
+    for(i=0; i<=last_index; i++){
+        const int j = s->intra_scantable.permutated[i];
+        const int level = FFABS(block[j]);
+        if(level==1){
+            if(skip_dc && i==0) continue;
+            score+= tab[run];
+            run=0;
+        }else if(level>1){
+            return;
+        }else{
+            run++;
+        }
+    }
+    if(score >= threshold) return;
+    for(i=skip_dc; i<=last_index; i++){
+        const int j = s->intra_scantable.permutated[i];
+        block[j]=0;
+    }
+    if(block[0]) s->block_last_index[n]= 0;
+    else         s->block_last_index[n]= -1;
+}
+
+static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
+{
+    int i;
+    const int maxlevel= s->max_qcoeff;
+    const int minlevel= s->min_qcoeff;
+    int overflow=0;
+
+    if(s->mb_intra){
+        i=1; //skip clipping of intra dc
+    }else
+        i=0;
+
+    for(;i<=last_index; i++){
+        const int j= s->intra_scantable.permutated[i];
+        int level = block[j];
+
+        if     (level>maxlevel){
+            level=maxlevel;
+            overflow++;
+        }else if(level<minlevel){
+            level=minlevel;
+            overflow++;
+        }
+
+        block[j]= level;
+    }
+
+    if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
+        av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
+}
+
+#endif //CONFIG_ENCODERS
+
+/**
+ *
+ * @param h is the normal height, this will be reduced automatically if needed for the last row
+ */
+void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
+    if (s->avctx->draw_horiz_band) {
+        AVFrame *src;
+        int offset[4];
+
+        if(s->picture_structure != PICT_FRAME){
+            h <<= 1;
+            y <<= 1;
+            if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
+        }
+
+        h= FFMIN(h, s->avctx->height - y);
+
+        if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
+            src= (AVFrame*)s->current_picture_ptr;
+        else if(s->last_picture_ptr)
+            src= (AVFrame*)s->last_picture_ptr;
+        else
+            return;
+
+        if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
+            offset[0]=
+            offset[1]=
+            offset[2]=
+            offset[3]= 0;
+        }else{
+            offset[0]= y * s->linesize;;
+            offset[1]=
+            offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
+            offset[3]= 0;
+        }
+
+        emms_c();
+
+        s->avctx->draw_horiz_band(s->avctx, src, offset,
+                                  y, s->picture_structure, h);
+    }
+}
+
+void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
+    const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
+    const int uvlinesize= s->current_picture.linesize[1];
+    const int mb_size= 4 - s->avctx->lowres;
+
+    s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
+    s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
+    s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
+    s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
+    s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
+    s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
+    //block_index is not used by mpeg2, so it is not affected by chroma_format
+
+    s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
+    s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
+    s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
+
+    if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
+    {
+        s->dest[0] += s->mb_y *   linesize << mb_size;
+        s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
+        s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+
+static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
+    int x, y;
+//FIXME optimize
+    for(y=0; y<8; y++){
+        for(x=0; x<8; x++){
+            int x2, y2;
+            int sum=0;
+            int sqr=0;
+            int count=0;
+
+            for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
+                for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
+                    int v= ptr[x2 + y2*stride];
+                    sum += v;
+                    sqr += v*v;
+                    count++;
+                }
+            }
+            weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
+        }
+    }
+}
+
+static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
+{
+    int16_t weight[8][64];
+    DCTELEM orig[8][64];
+    const int mb_x= s->mb_x;
+    const int mb_y= s->mb_y;
+    int i;
+    int skip_dct[8];
+    int dct_offset   = s->linesize*8; //default for progressive frames
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int wrap_y, wrap_c;
+
+    for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
+
+    if(s->adaptive_quant){
+        const int last_qp= s->qscale;
+        const int mb_xy= mb_x + mb_y*s->mb_stride;
+
+        s->lambda= s->lambda_table[mb_xy];
+        update_qscale(s);
+
+        if(!(s->flags&CODEC_FLAG_QP_RD)){
+            s->dquant= s->qscale - last_qp;
+
+            if(s->out_format==FMT_H263){
+                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
+
+                if(s->codec_id==CODEC_ID_MPEG4){
+                    if(!s->mb_intra){
+                        if(s->pict_type == B_TYPE){
+                            if(s->dquant&1)
+                                s->dquant= (s->dquant/2)*2;
+                            if(s->mv_dir&MV_DIRECT)
+                                s->dquant= 0;
+                        }
+                        if(s->mv_type==MV_TYPE_8X8)
+                            s->dquant=0;
+                    }
+                }
+            }
+        }
+        ff_set_qscale(s, last_qp + s->dquant);
+    }else if(s->flags&CODEC_FLAG_QP_RD)
+        ff_set_qscale(s, s->qscale + s->dquant);
+
+    wrap_y = s->linesize;
+    wrap_c = s->uvlinesize;
+    ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+    ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
+    ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
+
+    if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
+        uint8_t *ebuf= s->edge_emu_buffer + 32;
+        ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
+        ptr_y= ebuf;
+        ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+        ptr_cb= ebuf+18*wrap_y;
+        ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+        ptr_cr= ebuf+18*wrap_y+8;
+    }
+
+    if (s->mb_intra) {
+        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
+            int progressive_score, interlaced_score;
+
+            s->interlaced_dct=0;
+            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
+                              +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
+
+            if(progressive_score > 0){
+                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
+                                  +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
+                if(progressive_score > interlaced_score){
+                    s->interlaced_dct=1;
+
+                    dct_offset= wrap_y;
+                    wrap_y<<=1;
+                    if (s->chroma_format == CHROMA_422)
+                        wrap_c<<=1;
+                }
+            }
+        }
+
+        s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
+        s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
+        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
+        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
+
+        if(s->flags&CODEC_FLAG_GRAY){
+            skip_dct[4]= 1;
+            skip_dct[5]= 1;
+        }else{
+            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
+            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
+            if(!s->chroma_y_shift){ /* 422 */
+                s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
+                s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
+            }
+        }
+    }else{
+        op_pixels_func (*op_pix)[4];
+        qpel_mc_func (*op_qpix)[16];
+        uint8_t *dest_y, *dest_cb, *dest_cr;
+
+        dest_y  = s->dest[0];
+        dest_cb = s->dest[1];
+        dest_cr = s->dest[2];
+
+        if ((!s->no_rounding) || s->pict_type==B_TYPE){
+            op_pix = s->dsp.put_pixels_tab;
+            op_qpix= s->dsp.put_qpel_pixels_tab;
+        }else{
+            op_pix = s->dsp.put_no_rnd_pixels_tab;
+            op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
+        }
+
+        if (s->mv_dir & MV_DIR_FORWARD) {
+            MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
+            op_pix = s->dsp.avg_pixels_tab;
+            op_qpix= s->dsp.avg_qpel_pixels_tab;
+        }
+        if (s->mv_dir & MV_DIR_BACKWARD) {
+            MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
+        }
+
+        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
+            int progressive_score, interlaced_score;
+
+            s->interlaced_dct=0;
+            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
+                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
+
+            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
+
+            if(progressive_score>0){
+                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
+                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
+
+                if(progressive_score > interlaced_score){
+                    s->interlaced_dct=1;
+
+                    dct_offset= wrap_y;
+                    wrap_y<<=1;
+                    if (s->chroma_format == CHROMA_422)
+                        wrap_c<<=1;
+                }
+            }
+        }
+
+        s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
+        s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
+        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
+        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
+
+        if(s->flags&CODEC_FLAG_GRAY){
+            skip_dct[4]= 1;
+            skip_dct[5]= 1;
+        }else{
+            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
+            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
+            if(!s->chroma_y_shift){ /* 422 */
+                s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
+                s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
+            }
+        }
+        /* pre quantization */
+        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
+            //FIXME optimize
+            if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
+            if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
+            if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
+            if(!s->chroma_y_shift){ /* 422 */
+                if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
+                if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
+            }
+        }
+    }
+
+    if(s->avctx->quantizer_noise_shaping){
+        if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
+        if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
+        if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
+        if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
+        if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
+        if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
+        if(!s->chroma_y_shift){ /* 422 */
+            if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
+            if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
+        }
+        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
+    }
+
+    /* DCT & quantize */
+    assert(s->out_format!=FMT_MJPEG || s->qscale==8);
+    {
+        for(i=0;i<mb_block_count;i++) {
+            if(!skip_dct[i]){
+                int overflow;
+                s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
+            // FIXME we could decide to change to quantizer instead of clipping
+            // JS: I don't think that would be a good idea it could lower quality instead
+            //     of improve it. Just INTRADC clipping deserves changes in quantizer
+                if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
+            }else
+                s->block_last_index[i]= -1;
+        }
+        if(s->avctx->quantizer_noise_shaping){
+            for(i=0;i<mb_block_count;i++) {
+                if(!skip_dct[i]){
+                    s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
+                }
+            }
+        }
+
+        if(s->luma_elim_threshold && !s->mb_intra)
+            for(i=0; i<4; i++)
+                dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
+        if(s->chroma_elim_threshold && !s->mb_intra)
+            for(i=4; i<mb_block_count; i++)
+                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
+
+        if(s->flags & CODEC_FLAG_CBP_RD){
+            for(i=0;i<mb_block_count;i++) {
+                if(s->block_last_index[i] == -1)
+                    s->coded_score[i]= INT_MAX/256;
+            }
+        }
+    }
+
+    if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
+        s->block_last_index[4]=
+        s->block_last_index[5]= 0;
+        s->block[4][0]=
+        s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
+    }
+
+    //non c quantize code returns incorrect block_last_index FIXME
+    if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
+        for(i=0; i<mb_block_count; i++){
+            int j;
+            if(s->block_last_index[i]>0){
+                for(j=63; j>0; j--){
+                    if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
+                }
+                s->block_last_index[i]= j;
+            }
+        }
+    }
+
+    /* huffman encode */
+    switch(s->codec_id){ //FIXME funct ptr could be slightly faster
+    case CODEC_ID_MPEG1VIDEO:
+    case CODEC_ID_MPEG2VIDEO:
+        mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
+    case CODEC_ID_MPEG4:
+        mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
+    case CODEC_ID_MSMPEG4V2:
+    case CODEC_ID_MSMPEG4V3:
+    case CODEC_ID_WMV1:
+        msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
+    case CODEC_ID_WMV2:
+         ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
+#ifdef CONFIG_H261_ENCODER
+    case CODEC_ID_H261:
+        ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
+#endif
+    case CODEC_ID_H263:
+    case CODEC_ID_H263P:
+    case CODEC_ID_FLV1:
+    case CODEC_ID_RV10:
+    case CODEC_ID_RV20:
+        h263_encode_mb(s, s->block, motion_x, motion_y); break;
+    case CODEC_ID_MJPEG:
+        mjpeg_encode_mb(s, s->block); break;
+    default:
+        assert(0);
+    }
+}
+
+static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
+{
+    if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
+    else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
+}
+
+#endif //CONFIG_ENCODERS
+
+void ff_mpeg_flush(AVCodecContext *avctx){
+    int i;
+    MpegEncContext *s = avctx->priv_data;
+
+    if(s==NULL || s->picture==NULL)
+        return;
+
+    for(i=0; i<MAX_PICTURE_COUNT; i++){
+       if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
+                                    || s->picture[i].type == FF_BUFFER_TYPE_USER))
+        avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
+    }
+    s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
+
+    s->mb_x= s->mb_y= 0;
+
+    s->parse_context.state= -1;
+    s->parse_context.frame_start_found= 0;
+    s->parse_context.overread= 0;
+    s->parse_context.overread_index= 0;
+    s->parse_context.index= 0;
+    s->parse_context.last_index= 0;
+    s->bitstream_buffer_size=0;
+}
+
+#ifdef CONFIG_ENCODERS
+void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
+{
+    const uint16_t *srcw= (uint16_t*)src;
+    int words= length>>4;
+    int bits= length&15;
+    int i;
+
+    if(length==0) return;
+
+    if(words < 16){
+        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
+    }else if(put_bits_count(pb)&7){
+        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
+    }else{
+        for(i=0; put_bits_count(pb)&31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(pbBufPtr(pb), src+i, 2*words-i);
+        skip_put_bytes(pb, 2*words-i);
+    }
+
+    put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
+}
+
+static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
+    int i;
+
+    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
+
+    /* mpeg1 */
+    d->mb_skip_run= s->mb_skip_run;
+    for(i=0; i<3; i++)
+        d->last_dc[i]= s->last_dc[i];
+
+    /* statistics */
+    d->mv_bits= s->mv_bits;
+    d->i_tex_bits= s->i_tex_bits;
+    d->p_tex_bits= s->p_tex_bits;
+    d->i_count= s->i_count;
+    d->f_count= s->f_count;
+    d->b_count= s->b_count;
+    d->skip_count= s->skip_count;
+    d->misc_bits= s->misc_bits;
+    d->last_bits= 0;
+
+    d->mb_skipped= 0;
+    d->qscale= s->qscale;
+    d->dquant= s->dquant;
+}
+
+static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
+    int i;
+
+    memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
+    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
+
+    /* mpeg1 */
+    d->mb_skip_run= s->mb_skip_run;
+    for(i=0; i<3; i++)
+        d->last_dc[i]= s->last_dc[i];
+
+    /* statistics */
+    d->mv_bits= s->mv_bits;
+    d->i_tex_bits= s->i_tex_bits;
+    d->p_tex_bits= s->p_tex_bits;
+    d->i_count= s->i_count;
+    d->f_count= s->f_count;
+    d->b_count= s->b_count;
+    d->skip_count= s->skip_count;
+    d->misc_bits= s->misc_bits;
+
+    d->mb_intra= s->mb_intra;
+    d->mb_skipped= s->mb_skipped;
+    d->mv_type= s->mv_type;
+    d->mv_dir= s->mv_dir;
+    d->pb= s->pb;
+    if(s->data_partitioning){
+        d->pb2= s->pb2;
+        d->tex_pb= s->tex_pb;
+    }
+    d->block= s->block;
+    for(i=0; i<8; i++)
+        d->block_last_index[i]= s->block_last_index[i];
+    d->interlaced_dct= s->interlaced_dct;
+    d->qscale= s->qscale;
+}
+
+static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
+                           PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
+                           int *dmin, int *next_block, int motion_x, int motion_y)
+{
+    int score;
+    uint8_t *dest_backup[3];
+
+    copy_context_before_encode(s, backup, type);
+
+    s->block= s->blocks[*next_block];
+    s->pb= pb[*next_block];
+    if(s->data_partitioning){
+        s->pb2   = pb2   [*next_block];
+        s->tex_pb= tex_pb[*next_block];
+    }
+
+    if(*next_block){
+        memcpy(dest_backup, s->dest, sizeof(s->dest));
+        s->dest[0] = s->rd_scratchpad;
+        s->dest[1] = s->rd_scratchpad + 16*s->linesize;
+        s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
+        assert(s->linesize >= 32); //FIXME
+    }
+
+    encode_mb(s, motion_x, motion_y);
+
+    score= put_bits_count(&s->pb);
+    if(s->data_partitioning){
+        score+= put_bits_count(&s->pb2);
+        score+= put_bits_count(&s->tex_pb);
+    }
+
+    if(s->avctx->mb_decision == FF_MB_DECISION_RD){
+        MPV_decode_mb(s, s->block);
+
+        score *= s->lambda2;
+        score += sse_mb(s) << FF_LAMBDA_SHIFT;
+    }
+
+    if(*next_block){
+        memcpy(s->dest, dest_backup, sizeof(s->dest));
+    }
+
+    if(score<*dmin){
+        *dmin= score;
+        *next_block^=1;
+
+        copy_context_after_encode(best, s, type);
+    }
+}
+
+static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
+    uint32_t *sq = ff_squareTbl + 256;
+    int acc=0;
+    int x,y;
+
+    if(w==16 && h==16)
+        return s->dsp.sse[0](NULL, src1, src2, stride, 16);
+    else if(w==8 && h==8)
+        return s->dsp.sse[1](NULL, src1, src2, stride, 8);
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
+        }
+    }
+
+    assert(acc>=0);
+
+    return acc;
+}
+
+static int sse_mb(MpegEncContext *s){
+    int w= 16;
+    int h= 16;
+
+    if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
+    if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
+
+    if(w==16 && h==16)
+      if(s->avctx->mb_cmp == FF_CMP_NSSE){
+        return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
+               +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
+               +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
+      }else{
+        return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
+               +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
+               +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
+      }
+    else
+        return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
+               +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
+               +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
+}
+
+static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+
+
+    s->me.pre_pass=1;
+    s->me.dia_size= s->avctx->pre_dia_size;
+    s->first_slice_line=1;
+    for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
+        for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
+            ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
+        }
+        s->first_slice_line=0;
+    }
+
+    s->me.pre_pass=0;
+
+    return 0;
+}
+
+static int estimate_motion_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+
+    s->me.dia_size= s->avctx->dia_size;
+    s->first_slice_line=1;
+    for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
+        s->mb_x=0; //for block init below
+        ff_init_block_index(s);
+        for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
+            s->block_index[0]+=2;
+            s->block_index[1]+=2;
+            s->block_index[2]+=2;
+            s->block_index[3]+=2;
+
+            /* compute motion vector & mb_type and store in context */
+            if(s->pict_type==B_TYPE)
+                ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
+            else
+                ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
+        }
+        s->first_slice_line=0;
+    }
+    return 0;
+}
+
+static int mb_var_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    int mb_x, mb_y;
+
+    for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
+        for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+            int xx = mb_x * 16;
+            int yy = mb_y * 16;
+            uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
+            int varc;
+            int sum = s->dsp.pix_sum(pix, s->linesize);
+
+            varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
+
+            s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
+            s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
+            s->me.mb_var_sum_temp    += varc;
+        }
+    }
+    return 0;
+}
+
+static void write_slice_end(MpegEncContext *s){
+    if(s->codec_id==CODEC_ID_MPEG4){
+        if(s->partitioned_frame){
+            ff_mpeg4_merge_partitions(s);
+        }
+
+        ff_mpeg4_stuffing(&s->pb);
+    }else if(s->out_format == FMT_MJPEG){
+        ff_mjpeg_stuffing(&s->pb);
+    }
+
+    align_put_bits(&s->pb);
+    flush_put_bits(&s->pb);
+
+    if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
+        s->misc_bits+= get_bits_diff(s);
+}
+
+static int encode_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    int mb_x, mb_y, pdif = 0;
+    int i, j;
+    MpegEncContext best_s, backup_s;
+    uint8_t bit_buf[2][MAX_MB_BYTES];
+    uint8_t bit_buf2[2][MAX_MB_BYTES];
+    uint8_t bit_buf_tex[2][MAX_MB_BYTES];
+    PutBitContext pb[2], pb2[2], tex_pb[2];
+//printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
+
+    for(i=0; i<2; i++){
+        init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
+        init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
+        init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
+    }
+
+    s->last_bits= put_bits_count(&s->pb);
+    s->mv_bits=0;
+    s->misc_bits=0;
+    s->i_tex_bits=0;
+    s->p_tex_bits=0;
+    s->i_count=0;
+    s->f_count=0;
+    s->b_count=0;
+    s->skip_count=0;
+
+    for(i=0; i<3; i++){
+        /* init last dc values */
+        /* note: quant matrix value (8) is implied here */
+        s->last_dc[i] = 128 << s->intra_dc_precision;
+
+        s->current_picture.error[i] = 0;
+    }
+    s->mb_skip_run = 0;
+    memset(s->last_mv, 0, sizeof(s->last_mv));
+
+    s->last_mv_dir = 0;
+
+    switch(s->codec_id){
+    case CODEC_ID_H263:
+    case CODEC_ID_H263P:
+    case CODEC_ID_FLV1:
+        s->gob_index = ff_h263_get_gob_height(s);
+        break;
+    case CODEC_ID_MPEG4:
+        if(s->partitioned_frame)
+            ff_mpeg4_init_partitions(s);
+        break;
+    }
+
+    s->resync_mb_x=0;
+    s->resync_mb_y=0;
+    s->first_slice_line = 1;
+    s->ptr_lastgob = s->pb.buf;
+    for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
+//    printf("row %d at %X\n", s->mb_y, (int)s);
+        s->mb_x=0;
+        s->mb_y= mb_y;
+
+        ff_set_qscale(s, s->qscale);
+        ff_init_block_index(s);
+
+        for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+            int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
+            int mb_type= s->mb_type[xy];
+//            int d;
+            int dmin= INT_MAX;
+            int dir;
+
+            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
+                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                return -1;
+            }
+            if(s->data_partitioning){
+                if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
+                   || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
+                    av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                    return -1;
+                }
+            }
+
+            s->mb_x = mb_x;
+            s->mb_y = mb_y;  // moved into loop, can get changed by H.261
+            ff_update_block_index(s);
+
+#ifdef CONFIG_H261_ENCODER
+            if(s->codec_id == CODEC_ID_H261){
+                ff_h261_reorder_mb_index(s);
+                xy= s->mb_y*s->mb_stride + s->mb_x;
+                mb_type= s->mb_type[xy];
+            }
+#endif
+
+            /* write gob / video packet header  */
+            if(s->rtp_mode){
+                int current_packet_size, is_gob_start;
+
+                current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
+
+                is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
+
+                if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
+
+                switch(s->codec_id){
+                case CODEC_ID_H263:
+                case CODEC_ID_H263P:
+                    if(!s->h263_slice_structured)
+                        if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
+                    break;
+                case CODEC_ID_MPEG2VIDEO:
+                    if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
+                case CODEC_ID_MPEG1VIDEO:
+                    if(s->mb_skip_run) is_gob_start=0;
+                    break;
+                }
+
+                if(is_gob_start){
+                    if(s->start_mb_y != mb_y || mb_x!=0){
+                        write_slice_end(s);
+
+                        if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
+                            ff_mpeg4_init_partitions(s);
+                        }
+                    }
+
+                    assert((put_bits_count(&s->pb)&7) == 0);
+                    current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
+
+                    if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
+                        int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
+                        int d= 100 / s->avctx->error_rate;
+                        if(r % d == 0){
+                            current_packet_size=0;
+#ifndef ALT_BITSTREAM_WRITER
+                            s->pb.buf_ptr= s->ptr_lastgob;
+#endif
+                            assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
+                        }
+                    }
+
+                    if (s->avctx->rtp_callback){
+                        int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
+                        s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
+                    }
+
+                    switch(s->codec_id){
+                    case CODEC_ID_MPEG4:
+                        ff_mpeg4_encode_video_packet_header(s);
+                        ff_mpeg4_clean_buffers(s);
+                    break;
+                    case CODEC_ID_MPEG1VIDEO:
+                    case CODEC_ID_MPEG2VIDEO:
+                        ff_mpeg1_encode_slice_header(s);
+                        ff_mpeg1_clean_buffers(s);
+                    break;
+                    case CODEC_ID_H263:
+                    case CODEC_ID_H263P:
+                        h263_encode_gob_header(s, mb_y);
+                    break;
+                    }
+
+                    if(s->flags&CODEC_FLAG_PASS1){
+                        int bits= put_bits_count(&s->pb);
+                        s->misc_bits+= bits - s->last_bits;
+                        s->last_bits= bits;
+                    }
+
+                    s->ptr_lastgob += current_packet_size;
+                    s->first_slice_line=1;
+                    s->resync_mb_x=mb_x;
+                    s->resync_mb_y=mb_y;
+                }
+            }
+
+            if(  (s->resync_mb_x   == s->mb_x)
+               && s->resync_mb_y+1 == s->mb_y){
+                s->first_slice_line=0;
+            }
+
+            s->mb_skipped=0;
+            s->dquant=0; //only for QP_RD
+
+            if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
+                int next_block=0;
+                int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
+
+                copy_context_before_encode(&backup_s, s, -1);
+                backup_s.pb= s->pb;
+                best_s.data_partitioning= s->data_partitioning;
+                best_s.partitioned_frame= s->partitioned_frame;
+                if(s->data_partitioning){
+                    backup_s.pb2= s->pb2;
+                    backup_s.tex_pb= s->tex_pb;
+                }
+
+                if(mb_type&CANDIDATE_MB_TYPE_INTER){
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = s->p_mv_table[xy][0];
+                    s->mv[0][0][1] = s->p_mv_table[xy][1];
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
+                                 &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
+                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
+                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = 0;
+                    s->mv[0][0][1] = 0;
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
+                                 &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_8X8;
+                    s->mb_intra= 0;
+                    for(i=0; i<4; i++){
+                        s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
+                        s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
+                                 &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[1][0][0] = s->b_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_back_mv_table[xy][1];
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
+                                 &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
+                    s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
+                    int mx= s->b_direct_mv_table[xy][0];
+                    int my= s->b_direct_mv_table[xy][1];
+
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    ff_mpeg4_set_direct_mv(s, mx, my);
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
+                                 &dmin, &next_block, mx, my);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
+                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
+                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
+                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
+                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(dir=0; dir<2; dir++){
+                        for(i=0; i<2; i++){
+                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
+                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
+                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
+                        }
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_INTRA){
+                    s->mv_dir = 0;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 1;
+                    s->mv[0][0][0] = 0;
+                    s->mv[0][0][1] = 0;
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
+                                 &dmin, &next_block, 0, 0);
+                    if(s->h263_pred || s->h263_aic){
+                        if(best_s.mb_intra)
+                            s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
+                        else
+                            ff_clean_intra_table_entries(s); //old mode?
+                    }
+                }
+
+                if(s->flags & CODEC_FLAG_QP_RD){
+                    if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
+                        const int last_qp= backup_s.qscale;
+                        int qpi, qp, dc[6];
+                        DCTELEM ac[6][16];
+                        const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
+                        static const int dquant_tab[4]={-1,1,-2,2};
+
+                        assert(backup_s.dquant == 0);
+
+                        //FIXME intra
+                        s->mv_dir= best_s.mv_dir;
+                        s->mv_type = MV_TYPE_16X16;
+                        s->mb_intra= best_s.mb_intra;
+                        s->mv[0][0][0] = best_s.mv[0][0][0];
+                        s->mv[0][0][1] = best_s.mv[0][0][1];
+                        s->mv[1][0][0] = best_s.mv[1][0][0];
+                        s->mv[1][0][1] = best_s.mv[1][0][1];
+
+                        qpi = s->pict_type == B_TYPE ? 2 : 0;
+                        for(; qpi<4; qpi++){
+                            int dquant= dquant_tab[qpi];
+                            qp= last_qp + dquant;
+                            if(qp < s->avctx->qmin || qp > s->avctx->qmax)
+                                continue;
+                            backup_s.dquant= dquant;
+                            if(s->mb_intra && s->dc_val[0]){
+                                for(i=0; i<6; i++){
+                                    dc[i]= s->dc_val[0][ s->block_index[i] ];
+                                    memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
+                                }
+                            }
+
+                            encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
+                                         &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
+                            if(best_s.qscale != qp){
+                                if(s->mb_intra && s->dc_val[0]){
+                                    for(i=0; i<6; i++){
+                                        s->dc_val[0][ s->block_index[i] ]= dc[i];
+                                        memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
+                                    }
+                                }
+                            }
+                        }
+                        qp= best_s.qscale;
+                        s->current_picture.qscale_table[xy]= qp;
+                    }
+                }
+
+                copy_context_after_encode(s, &best_s, -1);
+
+                pb_bits_count= put_bits_count(&s->pb);
+                flush_put_bits(&s->pb);
+                ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
+                s->pb= backup_s.pb;
+
+                if(s->data_partitioning){
+                    pb2_bits_count= put_bits_count(&s->pb2);
+                    flush_put_bits(&s->pb2);
+                    ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
+                    s->pb2= backup_s.pb2;
+
+                    tex_pb_bits_count= put_bits_count(&s->tex_pb);
+                    flush_put_bits(&s->tex_pb);
+                    ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
+                    s->tex_pb= backup_s.tex_pb;
+                }
+                s->last_bits= put_bits_count(&s->pb);
+
+                if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
+                    ff_h263_update_motion_val(s);
+
+                if(next_block==0){ //FIXME 16 vs linesize16
+                    s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
+                    s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
+                    s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
+                }
+
+                if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
+                    MPV_decode_mb(s, s->block);
+            } else {
+                int motion_x, motion_y;
+                s->mv_type=MV_TYPE_16X16;
+                // only one MB-Type possible
+
+                switch(mb_type){
+                case CANDIDATE_MB_TYPE_INTRA:
+                    s->mv_dir = 0;
+                    s->mb_intra= 1;
+                    motion_x= s->mv[0][0][0] = 0;
+                    motion_y= s->mv[0][0][1] = 0;
+                    break;
+                case CANDIDATE_MB_TYPE_INTER:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mb_intra= 0;
+                    motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
+                    motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
+                    break;
+                case CANDIDATE_MB_TYPE_INTER_I:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
+                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
+                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
+                    }
+                    motion_x = motion_y = 0;
+                    break;
+                case CANDIDATE_MB_TYPE_INTER4V:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_8X8;
+                    s->mb_intra= 0;
+                    for(i=0; i<4; i++){
+                        s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
+                        s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
+                    }
+                    motion_x= motion_y= 0;
+                    break;
+                case CANDIDATE_MB_TYPE_DIRECT:
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    motion_x=s->b_direct_mv_table[xy][0];
+                    motion_y=s->b_direct_mv_table[xy][1];
+                    ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
+                    break;
+                case CANDIDATE_MB_TYPE_BIDIR:
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mb_intra= 0;
+                    motion_x=0;
+                    motion_y=0;
+                    s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
+                    s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
+                    break;
+                case CANDIDATE_MB_TYPE_BACKWARD:
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mb_intra= 0;
+                    motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
+                    motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
+                    break;
+                case CANDIDATE_MB_TYPE_FORWARD:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mb_intra= 0;
+                    motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
+                    motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
+//                    printf(" %d %d ", motion_x, motion_y);
+                    break;
+                case CANDIDATE_MB_TYPE_FORWARD_I:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
+                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
+                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
+                    }
+                    motion_x=motion_y=0;
+                    break;
+                case CANDIDATE_MB_TYPE_BACKWARD_I:
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
+                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
+                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
+                    }
+                    motion_x=motion_y=0;
+                    break;
+                case CANDIDATE_MB_TYPE_BIDIR_I:
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(dir=0; dir<2; dir++){
+                        for(i=0; i<2; i++){
+                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
+                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
+                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
+                        }
+                    }
+                    motion_x=motion_y=0;
+                    break;
+                default:
+                    motion_x=motion_y=0; //gcc warning fix
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
+                }
+
+                encode_mb(s, motion_x, motion_y);
+
+                // RAL: Update last macroblock type
+                s->last_mv_dir = s->mv_dir;
+
+                if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
+                    ff_h263_update_motion_val(s);
+
+                MPV_decode_mb(s, s->block);
+            }
+
+            /* clean the MV table in IPS frames for direct mode in B frames */
+            if(s->mb_intra /* && I,P,S_TYPE */){
+                s->p_mv_table[xy][0]=0;
+                s->p_mv_table[xy][1]=0;
+            }
+
+            if(s->flags&CODEC_FLAG_PSNR){
+                int w= 16;
+                int h= 16;
+
+                if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
+                if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
+
+                s->current_picture.error[0] += sse(
+                    s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
+                    s->dest[0], w, h, s->linesize);
+                s->current_picture.error[1] += sse(
+                    s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
+                    s->dest[1], w>>1, h>>1, s->uvlinesize);
+                s->current_picture.error[2] += sse(
+                    s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
+                    s->dest[2], w>>1, h>>1, s->uvlinesize);
+            }
+            if(s->loop_filter){
+                if(s->out_format == FMT_H263)
+                    ff_h263_loop_filter(s);
+            }
+//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
+        }
+    }
+
+    //not beautiful here but we must write it before flushing so it has to be here
+    if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
+        msmpeg4_encode_ext_header(s);
+
+    write_slice_end(s);
+
+    /* Send the last GOB if RTP */
+    if (s->avctx->rtp_callback) {
+        int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
+        pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
+        /* Call the RTP callback to send the last GOB */
+        emms_c();
+        s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
+    }
+
+    return 0;
+}
+
+#define MERGE(field) dst->field += src->field; src->field=0
+static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
+    MERGE(me.scene_change_score);
+    MERGE(me.mc_mb_var_sum_temp);
+    MERGE(me.mb_var_sum_temp);
+}
+
+static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
+    int i;
+
+    MERGE(dct_count[0]); //note, the other dct vars are not part of the context
+    MERGE(dct_count[1]);
+    MERGE(mv_bits);
+    MERGE(i_tex_bits);
+    MERGE(p_tex_bits);
+    MERGE(i_count);
+    MERGE(f_count);
+    MERGE(b_count);
+    MERGE(skip_count);
+    MERGE(misc_bits);
+    MERGE(error_count);
+    MERGE(padding_bug_score);
+    MERGE(current_picture.error[0]);
+    MERGE(current_picture.error[1]);
+    MERGE(current_picture.error[2]);
+
+    if(dst->avctx->noise_reduction){
+        for(i=0; i<64; i++){
+            MERGE(dct_error_sum[0][i]);
+            MERGE(dct_error_sum[1][i]);
+        }
+    }
+
+    assert(put_bits_count(&src->pb) % 8 ==0);
+    assert(put_bits_count(&dst->pb) % 8 ==0);
+    ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
+    flush_put_bits(&dst->pb);
+}
+
+static int estimate_qp(MpegEncContext *s, int dry_run){
+    if (s->next_lambda){
+        s->current_picture_ptr->quality=
+        s->current_picture.quality = s->next_lambda;
+        if(!dry_run) s->next_lambda= 0;
+    } else if (!s->fixed_qscale) {
+        s->current_picture_ptr->quality=
+        s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
+        if (s->current_picture.quality < 0)
+            return -1;
+    }
+
+    if(s->adaptive_quant){
+        switch(s->codec_id){
+        case CODEC_ID_MPEG4:
+            ff_clean_mpeg4_qscales(s);
+            break;
+        case CODEC_ID_H263:
+        case CODEC_ID_H263P:
+        case CODEC_ID_FLV1:
+            ff_clean_h263_qscales(s);
+            break;
+        }
+
+        s->lambda= s->lambda_table[0];
+        //FIXME broken
+    }else
+        s->lambda= s->current_picture.quality;
+//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
+    update_qscale(s);
+    return 0;
+}
+
+static int encode_picture(MpegEncContext *s, int picture_number)
+{
+    int i;
+    int bits;
+
+    s->picture_number = picture_number;
+
+    /* Reset the average MB variance */
+    s->me.mb_var_sum_temp    =
+    s->me.mc_mb_var_sum_temp = 0;
+
+    /* we need to initialize some time vars before we can encode b-frames */
+    // RAL: Condition added for MPEG1VIDEO
+    if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
+        ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
+
+    s->me.scene_change_score=0;
+
+//    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
+
+    if(s->pict_type==I_TYPE){
+        if(s->msmpeg4_version >= 3) s->no_rounding=1;
+        else                        s->no_rounding=0;
+    }else if(s->pict_type!=B_TYPE){
+        if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
+            s->no_rounding ^= 1;
+    }
+
+    if(s->flags & CODEC_FLAG_PASS2){
+        if (estimate_qp(s,1) < 0)
+            return -1;
+        ff_get_2pass_fcode(s);
+    }else if(!(s->flags & CODEC_FLAG_QSCALE)){
+        if(s->pict_type==B_TYPE)
+            s->lambda= s->last_lambda_for[s->pict_type];
+        else
+            s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
+        update_qscale(s);
+    }
+
+    s->mb_intra=0; //for the rate distortion & bit compare functions
+    for(i=1; i<s->avctx->thread_count; i++){
+        ff_update_duplicate_context(s->thread_context[i], s);
+    }
+
+    ff_init_me(s);
+
+    /* Estimate motion for every MB */
+    if(s->pict_type != I_TYPE){
+        s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
+        s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
+        if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
+            if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
+                s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+            }
+        }
+
+        s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+    }else /* if(s->pict_type == I_TYPE) */{
+        /* I-Frame */
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
+
+        if(!s->fixed_qscale){
+            /* finding spatial complexity for I-frame rate control */
+            s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+        }
+    }
+    for(i=1; i<s->avctx->thread_count; i++){
+        merge_context_after_me(s, s->thread_context[i]);
+    }
+    s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
+    s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
+    emms_c();
+
+    if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
+        s->pict_type= I_TYPE;
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
+//printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
+    }
+
+    if(!s->umvplus){
+        if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
+            s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
+
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int a,b;
+                a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
+                b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
+                s->f_code= FFMAX(s->f_code, FFMAX(a,b));
+            }
+
+            ff_fix_long_p_mvs(s);
+            ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int j;
+                for(i=0; i<2; i++){
+                    for(j=0; j<2; j++)
+                        ff_fix_long_mvs(s, s->p_field_select_table[i], j,
+                                        s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
+                }
+            }
+        }
+
+        if(s->pict_type==B_TYPE){
+            int a, b;
+
+            a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
+            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
+            s->f_code = FFMAX(a, b);
+
+            a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
+            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
+            s->b_code = FFMAX(a, b);
+
+            ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int dir, j;
+                for(dir=0; dir<2; dir++){
+                    for(i=0; i<2; i++){
+                        for(j=0; j<2; j++){
+                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
+                                          : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
+                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
+                                            s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    if (estimate_qp(s, 0) < 0)
+        return -1;
+
+    if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
+        s->qscale= 3; //reduce clipping problems
+
+    if (s->out_format == FMT_MJPEG) {
+        /* for mjpeg, we do include qscale in the matrix */
+        s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
+        for(i=1;i<64;i++){
+            int j= s->dsp.idct_permutation[i];
+
+            s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
+        }
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
+                       s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
+        s->qscale= 8;
+    }
+
+    //FIXME var duplication
+    s->current_picture_ptr->key_frame=
+    s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
+    s->current_picture_ptr->pict_type=
+    s->current_picture.pict_type= s->pict_type;
+
+    if(s->current_picture.key_frame)
+        s->picture_in_gop_number=0;
+
+    s->last_bits= put_bits_count(&s->pb);
+    switch(s->out_format) {
+    case FMT_MJPEG:
+        mjpeg_picture_header(s);
+        break;
+#ifdef CONFIG_H261_ENCODER
+    case FMT_H261:
+        ff_h261_encode_picture_header(s, picture_number);
+        break;
+#endif
+    case FMT_H263:
+        if (s->codec_id == CODEC_ID_WMV2)
+            ff_wmv2_encode_picture_header(s, picture_number);
+        else if (s->h263_msmpeg4)
+            msmpeg4_encode_picture_header(s, picture_number);
+        else if (s->h263_pred)
+            mpeg4_encode_picture_header(s, picture_number);
+#ifdef CONFIG_RV10_ENCODER
+        else if (s->codec_id == CODEC_ID_RV10)
+            rv10_encode_picture_header(s, picture_number);
+#endif
+#ifdef CONFIG_RV20_ENCODER
+        else if (s->codec_id == CODEC_ID_RV20)
+            rv20_encode_picture_header(s, picture_number);
+#endif
+        else if (s->codec_id == CODEC_ID_FLV1)
+            ff_flv_encode_picture_header(s, picture_number);
+        else
+            h263_encode_picture_header(s, picture_number);
+        break;
+    case FMT_MPEG1:
+        mpeg1_encode_picture_header(s, picture_number);
+        break;
+    case FMT_H264:
+        break;
+    default:
+        assert(0);
+    }
+    bits= put_bits_count(&s->pb);
+    s->header_bits= bits - s->last_bits;
+
+    for(i=1; i<s->avctx->thread_count; i++){
+        update_duplicate_context_after_me(s->thread_context[i], s);
+    }
+    s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
+    for(i=1; i<s->avctx->thread_count; i++){
+        merge_context_after_encode(s, s->thread_context[i]);
+    }
+    emms_c();
+    return 0;
+}
+
+static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
+    const int intra= s->mb_intra;
+    int i;
+
+    s->dct_count[intra]++;
+
+    for(i=0; i<64; i++){
+        int level= block[i];
+
+        if(level){
+            if(level>0){
+                s->dct_error_sum[intra][i] += level;
+                level -= s->dct_offset[intra][i];
+                if(level<0) level=0;
+            }else{
+                s->dct_error_sum[intra][i] -= level;
+                level += s->dct_offset[intra][i];
+                if(level>0) level=0;
+            }
+            block[i]= level;
+        }
+    }
+}
+
+static int dct_quantize_trellis_c(MpegEncContext *s,
+                        DCTELEM *block, int n,
+                        int qscale, int *overflow){
+    const int *qmat;
+    const uint8_t *scantable= s->intra_scantable.scantable;
+    const uint8_t *perm_scantable= s->intra_scantable.permutated;
+    int max=0;
+    unsigned int threshold1, threshold2;
+    int bias=0;
+    int run_tab[65];
+    int level_tab[65];
+    int score_tab[65];
+    int survivor[65];
+    int survivor_count;
+    int last_run=0;
+    int last_level=0;
+    int last_score= 0;
+    int last_i;
+    int coeff[2][64];
+    int coeff_count[64];
+    int qmul, qadd, start_i, last_non_zero, i, dc;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
+
+    s->dsp.fdct (block);
+
+    if(s->dct_error_sum)
+        s->denoise_dct(s, block);
+    qmul= qscale*16;
+    qadd= ((qscale-1)|1)*8;
+
+    if (s->mb_intra) {
+        int q;
+        if (!s->h263_aic) {
+            if (n < 4)
+                q = s->y_dc_scale;
+            else
+                q = s->c_dc_scale;
+            q = q << 3;
+        } else{
+            /* For AIC we skip quant/dequant of INTRADC */
+            q = 1 << 3;
+            qadd=0;
+        }
+
+        /* note: block[0] is assumed to be positive */
+        block[0] = (block[0] + (q >> 1)) / q;
+        start_i = 1;
+        last_non_zero = 0;
+        qmat = s->q_intra_matrix[qscale];
+        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
+            bias= 1<<(QMAT_SHIFT-1);
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+    } else {
+        start_i = 0;
+        last_non_zero = -1;
+        qmat = s->q_inter_matrix[qscale];
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    last_i= start_i;
+
+    threshold1= (1<<QMAT_SHIFT) - bias - 1;
+    threshold2= (threshold1<<1);
+
+    for(i=63; i>=start_i; i--) {
+        const int j = scantable[i];
+        int level = block[j] * qmat[j];
+
+        if(((unsigned)(level+threshold1))>threshold2){
+            last_non_zero = i;
+            break;
+        }
+    }
+
+    for(i=start_i; i<=last_non_zero; i++) {
+        const int j = scantable[i];
+        int level = block[j] * qmat[j];
+
+//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
+//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
+        if(((unsigned)(level+threshold1))>threshold2){
+            if(level>0){
+                level= (bias + level)>>QMAT_SHIFT;
+                coeff[0][i]= level;
+                coeff[1][i]= level-1;
+//                coeff[2][k]= level-2;
+            }else{
+                level= (bias - level)>>QMAT_SHIFT;
+                coeff[0][i]= -level;
+                coeff[1][i]= -level+1;
+//                coeff[2][k]= -level+2;
+            }
+            coeff_count[i]= FFMIN(level, 2);
+            assert(coeff_count[i]);
+            max |=level;
+        }else{
+            coeff[0][i]= (level>>31)|1;
+            coeff_count[i]= 1;
+        }
+    }
+
+    *overflow= s->max_qcoeff < max; //overflow might have happened
+
+    if(last_non_zero < start_i){
+        memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
+        return last_non_zero;
+    }
+
+    score_tab[start_i]= 0;
+    survivor[0]= start_i;
+    survivor_count= 1;
+
+    for(i=start_i; i<=last_non_zero; i++){
+        int level_index, j;
+        const int dct_coeff= FFABS(block[ scantable[i] ]);
+        const int zero_distoration= dct_coeff*dct_coeff;
+        int best_score=256*256*256*120;
+        for(level_index=0; level_index < coeff_count[i]; level_index++){
+            int distoration;
+            int level= coeff[level_index][i];
+            const int alevel= FFABS(level);
+            int unquant_coeff;
+
+            assert(level);
+
+            if(s->out_format == FMT_H263){
+                unquant_coeff= alevel*qmul + qadd;
+            }else{ //MPEG1
+                j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
+                if(s->mb_intra){
+                        unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
+                        unquant_coeff =   (unquant_coeff - 1) | 1;
+                }else{
+                        unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
+                        unquant_coeff =   (unquant_coeff - 1) | 1;
+                }
+                unquant_coeff<<= 3;
+            }
+
+            distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
+            level+=64;
+            if((level&(~127)) == 0){
+                for(j=survivor_count-1; j>=0; j--){
+                    int run= i - survivor[j];
+                    int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
+                    score += score_tab[i-run];
+
+                    if(score < best_score){
+                        best_score= score;
+                        run_tab[i+1]= run;
+                        level_tab[i+1]= level-64;
+                    }
+                }
+
+                if(s->out_format == FMT_H263){
+                    for(j=survivor_count-1; j>=0; j--){
+                        int run= i - survivor[j];
+                        int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
+                        score += score_tab[i-run];
+                        if(score < last_score){
+                            last_score= score;
+                            last_run= run;
+                            last_level= level-64;
+                            last_i= i+1;
+                        }
+                    }
+                }
+            }else{
+                distoration += esc_length*lambda;
+                for(j=survivor_count-1; j>=0; j--){
+                    int run= i - survivor[j];
+                    int score= distoration + score_tab[i-run];
+
+                    if(score < best_score){
+                        best_score= score;
+                        run_tab[i+1]= run;
+                        level_tab[i+1]= level-64;
+                    }
+                }
+
+                if(s->out_format == FMT_H263){
+                  for(j=survivor_count-1; j>=0; j--){
+                        int run= i - survivor[j];
+                        int score= distoration + score_tab[i-run];
+                        if(score < last_score){
+                            last_score= score;
+                            last_run= run;
+                            last_level= level-64;
+                            last_i= i+1;
+                        }
+                    }
+                }
+            }
+        }
+
+        score_tab[i+1]= best_score;
+
+        //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
+        if(last_non_zero <= 27){
+            for(; survivor_count; survivor_count--){
+                if(score_tab[ survivor[survivor_count-1] ] <= best_score)
+                    break;
+            }
+        }else{
+            for(; survivor_count; survivor_count--){
+                if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
+                    break;
+            }
+        }
+
+        survivor[ survivor_count++ ]= i+1;
+    }
+
+    if(s->out_format != FMT_H263){
+        last_score= 256*256*256*120;
+        for(i= survivor[0]; i<=last_non_zero + 1; i++){
+            int score= score_tab[i];
+            if(i) score += lambda*2; //FIXME exacter?
+
+            if(score < last_score){
+                last_score= score;
+                last_i= i;
+                last_level= level_tab[i];
+                last_run= run_tab[i];
+            }
+        }
+    }
+
+    s->coded_score[n] = last_score;
+
+    dc= FFABS(block[0]);
+    last_non_zero= last_i - 1;
+    memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
+
+    if(last_non_zero < start_i)
+        return last_non_zero;
+
+    if(last_non_zero == 0 && start_i == 0){
+        int best_level= 0;
+        int best_score= dc * dc;
+
+        for(i=0; i<coeff_count[0]; i++){
+            int level= coeff[i][0];
+            int alevel= FFABS(level);
+            int unquant_coeff, score, distortion;
+
+            if(s->out_format == FMT_H263){
+                    unquant_coeff= (alevel*qmul + qadd)>>3;
+            }else{ //MPEG1
+                    unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
+                    unquant_coeff =   (unquant_coeff - 1) | 1;
+            }
+            unquant_coeff = (unquant_coeff + 4) >> 3;
+            unquant_coeff<<= 3 + 3;
+
+            distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
+            level+=64;
+            if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
+            else                    score= distortion + esc_length*lambda;
+
+            if(score < best_score){
+                best_score= score;
+                best_level= level - 64;
+            }
+        }
+        block[0]= best_level;
+        s->coded_score[n] = best_score - dc*dc;
+        if(best_level == 0) return -1;
+        else                return last_non_zero;
+    }
+
+    i= last_i;
+    assert(last_level);
+
+    block[ perm_scantable[last_non_zero] ]= last_level;
+    i -= last_run + 1;
+
+    for(; i>start_i; i -= run_tab[i] + 1){
+        block[ perm_scantable[i-1] ]= level_tab[i];
+    }
+
+    return last_non_zero;
+}
+
+//#define REFINE_STATS 1
+static int16_t basis[64][64];
+
+static void build_basis(uint8_t *perm){
+    int i, j, x, y;
+    emms_c();
+    for(i=0; i<8; i++){
+        for(j=0; j<8; j++){
+            for(y=0; y<8; y++){
+                for(x=0; x<8; x++){
+                    double s= 0.25*(1<<BASIS_SHIFT);
+                    int index= 8*i + j;
+                    int perm_index= perm[index];
+                    if(i==0) s*= sqrt(0.5);
+                    if(j==0) s*= sqrt(0.5);
+                    basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
+                }
+            }
+        }
+    }
+}
+
+static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
+                        DCTELEM *block, int16_t *weight, DCTELEM *orig,
+                        int n, int qscale){
+    int16_t rem[64];
+    DECLARE_ALIGNED_16(DCTELEM, d1[64]);
+    const int *qmat;
+    const uint8_t *scantable= s->intra_scantable.scantable;
+    const uint8_t *perm_scantable= s->intra_scantable.permutated;
+//    unsigned int threshold1, threshold2;
+//    int bias=0;
+    int run_tab[65];
+    int prev_run=0;
+    int prev_level=0;
+    int qmul, qadd, start_i, last_non_zero, i, dc;
+    uint8_t * length;
+    uint8_t * last_length;
+    int lambda;
+    int rle_index, run, q, sum;
+#ifdef REFINE_STATS
+static int count=0;
+static int after_last=0;
+static int to_zero=0;
+static int from_zero=0;
+static int raise=0;
+static int lower=0;
+static int messed_sign=0;
+#endif
+
+    if(basis[0][0] == 0)
+        build_basis(s->dsp.idct_permutation);
+
+    qmul= qscale*2;
+    qadd= (qscale-1)|1;
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
+            if (n < 4)
+                q = s->y_dc_scale;
+            else
+                q = s->c_dc_scale;
+        } else{
+            /* For AIC we skip quant/dequant of INTRADC */
+            q = 1;
+            qadd=0;
+        }
+        q <<= RECON_SHIFT-3;
+        /* note: block[0] is assumed to be positive */
+        dc= block[0]*q;
+//        block[0] = (block[0] + (q >> 1)) / q;
+        start_i = 1;
+        qmat = s->q_intra_matrix[qscale];
+//        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
+//            bias= 1<<(QMAT_SHIFT-1);
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+    } else {
+        dc= 0;
+        start_i = 0;
+        qmat = s->q_inter_matrix[qscale];
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    last_non_zero = s->block_last_index[n];
+
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+    dc += (1<<(RECON_SHIFT-1));
+    for(i=0; i<64; i++){
+        rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
+    }
+#ifdef REFINE_STATS
+STOP_TIMER("memset rem[]")}
+#endif
+    sum=0;
+    for(i=0; i<64; i++){
+        int one= 36;
+        int qns=4;
+        int w;
+
+        w= FFABS(weight[i]) + qns*one;
+        w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
+
+        weight[i] = w;
+//        w=weight[i] = (63*qns + (w/2)) / w;
+
+        assert(w>0);
+        assert(w<(1<<6));
+        sum += w*w;
+    }
+    lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+    run=0;
+    rle_index=0;
+    for(i=start_i; i<=last_non_zero; i++){
+        int j= perm_scantable[i];
+        const int level= block[j];
+        int coeff;
+
+        if(level){
+            if(level<0) coeff= qmul*level - qadd;
+            else        coeff= qmul*level + qadd;
+            run_tab[rle_index++]=run;
+            run=0;
+
+            s->dsp.add_8x8basis(rem, basis[j], coeff);
+        }else{
+            run++;
+        }
+    }
+#ifdef REFINE_STATS
+if(last_non_zero>0){
+STOP_TIMER("init rem[]")
+}
+}
+
+{START_TIMER
+#endif
+    for(;;){
+        int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
+        int best_coeff=0;
+        int best_change=0;
+        int run2, best_unquant_change=0, analyze_gradient;
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+        analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
+
+        if(analyze_gradient){
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+            for(i=0; i<64; i++){
+                int w= weight[i];
+
+                d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
+            }
+#ifdef REFINE_STATS
+STOP_TIMER("rem*w*w")}
+{START_TIMER
+#endif
+            s->dsp.fdct(d1);
+#ifdef REFINE_STATS
+STOP_TIMER("dct")}
+#endif
+        }
+
+        if(start_i){
+            const int level= block[0];
+            int change, old_coeff;
+
+            assert(s->mb_intra);
+
+            old_coeff= q*level;
+
+            for(change=-1; change<=1; change+=2){
+                int new_level= level + change;
+                int score, new_coeff;
+
+                new_coeff= q*new_level;
+                if(new_coeff >= 2048 || new_coeff < 0)
+                    continue;
+
+                score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
+                if(score<best_score){
+                    best_score= score;
+                    best_coeff= 0;
+                    best_change= change;
+                    best_unquant_change= new_coeff - old_coeff;
+                }
+            }
+        }
+
+        run=0;
+        rle_index=0;
+        run2= run_tab[rle_index++];
+        prev_level=0;
+        prev_run=0;
+
+        for(i=start_i; i<64; i++){
+            int j= perm_scantable[i];
+            const int level= block[j];
+            int change, old_coeff;
+
+            if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
+                break;
+
+            if(level){
+                if(level<0) old_coeff= qmul*level - qadd;
+                else        old_coeff= qmul*level + qadd;
+                run2= run_tab[rle_index++]; //FIXME ! maybe after last
+            }else{
+                old_coeff=0;
+                run2--;
+                assert(run2>=0 || i >= last_non_zero );
+            }
+
+            for(change=-1; change<=1; change+=2){
+                int new_level= level + change;
+                int score, new_coeff, unquant_change;
+
+                score=0;
+                if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
+                   continue;
+
+                if(new_level){
+                    if(new_level<0) new_coeff= qmul*new_level - qadd;
+                    else            new_coeff= qmul*new_level + qadd;
+                    if(new_coeff >= 2048 || new_coeff <= -2048)
+                        continue;
+                    //FIXME check for overflow
+
+                    if(level){
+                        if(level < 63 && level > -63){
+                            if(i < last_non_zero)
+                                score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
+                                         - length[UNI_AC_ENC_INDEX(run, level+64)];
+                            else
+                                score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
+                                         - last_length[UNI_AC_ENC_INDEX(run, level+64)];
+                        }
+                    }else{
+                        assert(FFABS(new_level)==1);
+
+                        if(analyze_gradient){
+                            int g= d1[ scantable[i] ];
+                            if(g && (g^new_level) >= 0)
+                                continue;
+                        }
+
+                        if(i < last_non_zero){
+                            int next_i= i + run2 + 1;
+                            int next_level= block[ perm_scantable[next_i] ] + 64;
+
+                            if(next_level&(~127))
+                                next_level= 0;
+
+                            if(next_i < last_non_zero)
+                                score +=   length[UNI_AC_ENC_INDEX(run, 65)]
+                                         + length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                         - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
+                            else
+                                score +=  length[UNI_AC_ENC_INDEX(run, 65)]
+                                        + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                        - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
+                        }else{
+                            score += last_length[UNI_AC_ENC_INDEX(run, 65)];
+                            if(prev_level){
+                                score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
+                                        - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
+                            }
+                        }
+                    }
+                }else{
+                    new_coeff=0;
+                    assert(FFABS(level)==1);
+
+                    if(i < last_non_zero){
+                        int next_i= i + run2 + 1;
+                        int next_level= block[ perm_scantable[next_i] ] + 64;
+
+                        if(next_level&(~127))
+                            next_level= 0;
+
+                        if(next_i < last_non_zero)
+                            score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run, 65)];
+                        else
+                            score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
+                                     - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run, 65)];
+                    }else{
+                        score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
+                        if(prev_level){
+                            score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
+                                    - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
+                        }
+                    }
+                }
+
+                score *= lambda;
+
+                unquant_change= new_coeff - old_coeff;
+                assert((score < 100*lambda && score > -100*lambda) || lambda==0);
+
+                score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
+                if(score<best_score){
+                    best_score= score;
+                    best_coeff= i;
+                    best_change= change;
+                    best_unquant_change= unquant_change;
+                }
+            }
+            if(level){
+                prev_level= level + 64;
+                if(prev_level&(~127))
+                    prev_level= 0;
+                prev_run= run;
+                run=0;
+            }else{
+                run++;
+            }
+        }
+#ifdef REFINE_STATS
+STOP_TIMER("iterative step")}
+#endif
+
+        if(best_change){
+            int j= perm_scantable[ best_coeff ];
+
+            block[j] += best_change;
+
+            if(best_coeff > last_non_zero){
+                last_non_zero= best_coeff;
+                assert(block[j]);
+#ifdef REFINE_STATS
+after_last++;
+#endif
+            }else{
+#ifdef REFINE_STATS
+if(block[j]){
+    if(block[j] - best_change){
+        if(FFABS(block[j]) > FFABS(block[j] - best_change)){
+            raise++;
+        }else{
+            lower++;
+        }
+    }else{
+        from_zero++;
+    }
+}else{
+    to_zero++;
+}
+#endif
+                for(; last_non_zero>=start_i; last_non_zero--){
+                    if(block[perm_scantable[last_non_zero]])
+                        break;
+                }
+            }
+#ifdef REFINE_STATS
+count++;
+if(256*256*256*64 % count == 0){
+    printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
+}
+#endif
+            run=0;
+            rle_index=0;
+            for(i=start_i; i<=last_non_zero; i++){
+                int j= perm_scantable[i];
+                const int level= block[j];
+
+                 if(level){
+                     run_tab[rle_index++]=run;
+                     run=0;
+                 }else{
+                     run++;
+                 }
+            }
+
+            s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
+        }else{
+            break;
+        }
+    }
+#ifdef REFINE_STATS
+if(last_non_zero>0){
+STOP_TIMER("iterative search")
+}
+}
+#endif
+
+    return last_non_zero;
+}
+
+static int dct_quantize_c(MpegEncContext *s,
+                        DCTELEM *block, int n,
+                        int qscale, int *overflow)
+{
+    int i, j, level, last_non_zero, q, start_i;
+    const int *qmat;
+    const uint8_t *scantable= s->intra_scantable.scantable;
+    int bias;
+    int max=0;
+    unsigned int threshold1, threshold2;
+
+    s->dsp.fdct (block);
+
+    if(s->dct_error_sum)
+        s->denoise_dct(s, block);
+
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
+            if (n < 4)
+                q = s->y_dc_scale;
+            else
+                q = s->c_dc_scale;
+            q = q << 3;
+        } else
+            /* For AIC we skip quant/dequant of INTRADC */
+            q = 1 << 3;
+
+        /* note: block[0] is assumed to be positive */
+        block[0] = (block[0] + (q >> 1)) / q;
+        start_i = 1;
+        last_non_zero = 0;
+        qmat = s->q_intra_matrix[qscale];
+        bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
+    } else {
+        start_i = 0;
+        last_non_zero = -1;
+        qmat = s->q_inter_matrix[qscale];
+        bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
+    }
+    threshold1= (1<<QMAT_SHIFT) - bias - 1;
+    threshold2= (threshold1<<1);
+    for(i=63;i>=start_i;i--) {
+        j = scantable[i];
+        level = block[j] * qmat[j];
+
+        if(((unsigned)(level+threshold1))>threshold2){
+            last_non_zero = i;
+            break;
+        }else{
+            block[j]=0;
+        }
+    }
+    for(i=start_i; i<=last_non_zero; i++) {
+        j = scantable[i];
+        level = block[j] * qmat[j];
+
+//        if(   bias+level >= (1<<QMAT_SHIFT)
+//           || bias-level >= (1<<QMAT_SHIFT)){
+        if(((unsigned)(level+threshold1))>threshold2){
+            if(level>0){
+                level= (bias + level)>>QMAT_SHIFT;
+                block[j]= level;
+            }else{
+                level= (bias - level)>>QMAT_SHIFT;
+                block[j]= -level;
+            }
+            max |=level;
+        }else{
+            block[j]=0;
+        }
+    }
+    *overflow= s->max_qcoeff < max; //overflow might have happened
+
+    /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
+    if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
+        ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
+
+    return last_non_zero;
+}
+
+#endif //CONFIG_ENCODERS
+
+static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+
+    nCoeffs= s->block_last_index[n];
+
+    if (n < 4)
+        block[0] = block[0] * s->y_dc_scale;
+    else
+        block[0] = block[0] * s->c_dc_scale;
+    /* XXX: only mpeg1 */
+    quant_matrix = s->intra_matrix;
+    for(i=1;i<=nCoeffs;i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (level - 1) | 1;
+                level = -level;
+            } else {
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (level - 1) | 1;
+            }
+            block[j] = level;
+        }
+    }
+}
+
+static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+
+    nCoeffs= s->block_last_index[n];
+
+    quant_matrix = s->inter_matrix;
+    for(i=0; i<=nCoeffs; i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+                level = (level - 1) | 1;
+                level = -level;
+            } else {
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+                level = (level - 1) | 1;
+            }
+            block[j] = level;
+        }
+    }
+}
+
+static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+
+    if(s->alternate_scan) nCoeffs= 63;
+    else nCoeffs= s->block_last_index[n];
+
+    if (n < 4)
+        block[0] = block[0] * s->y_dc_scale;
+    else
+        block[0] = block[0] * s->c_dc_scale;
+    quant_matrix = s->intra_matrix;
+    for(i=1;i<=nCoeffs;i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = -level;
+            } else {
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+            }
+            block[j] = level;
+        }
+    }
+}
+
+static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+    int sum=-1;
+
+    if(s->alternate_scan) nCoeffs= 63;
+    else nCoeffs= s->block_last_index[n];
+
+    if (n < 4)
+        block[0] = block[0] * s->y_dc_scale;
+    else
+        block[0] = block[0] * s->c_dc_scale;
+    quant_matrix = s->intra_matrix;
+    for(i=1;i<=nCoeffs;i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = -level;
+            } else {
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+            }
+            block[j] = level;
+            sum+=level;
+        }
+    }
+    block[63]^=sum&1;
+}
+
+static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+    int sum=-1;
+
+    if(s->alternate_scan) nCoeffs= 63;
+    else nCoeffs= s->block_last_index[n];
+
+    quant_matrix = s->inter_matrix;
+    for(i=0; i<=nCoeffs; i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+                level = -level;
+            } else {
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+            }
+            block[j] = level;
+            sum+=level;
+        }
+    }
+    block[63]^=sum&1;
+}
+
+static void dct_unquantize_h263_intra_c(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            block[0] = block[0] * s->y_dc_scale;
+        else
+            block[0] = block[0] * s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    for(i=1; i<=nCoeffs; i++) {
+        level = block[i];
+        if (level) {
+            if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[i] = level;
+        }
+    }
+}
+
+static void dct_unquantize_h263_inter_c(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+
+    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    for(i=0; i<=nCoeffs; i++) {
+        level = block[i];
+        if (level) {
+            if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[i] = level;
+        }
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+AVCodec h263_encoder = {
+    "h263",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H263,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec h263p_encoder = {
+    "h263p",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H263P,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec flv_encoder = {
+    "flv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FLV1,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec rv10_encoder = {
+    "rv10",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RV10,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec rv20_encoder = {
+    "rv20",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RV20,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec mpeg4_encoder = {
+    "mpeg4",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MPEG4,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+    .capabilities= CODEC_CAP_DELAY,
+};
+
+AVCodec msmpeg4v1_encoder = {
+    "msmpeg4v1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSMPEG4V1,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec msmpeg4v2_encoder = {
+    "msmpeg4v2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSMPEG4V2,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec msmpeg4v3_encoder = {
+    "msmpeg4",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSMPEG4V3,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec wmv1_encoder = {
+    "wmv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV1,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+
+AVCodec mjpeg_encoder = {
+    "mjpeg",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MJPEG,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, -1},
+};
+
+#endif //CONFIG_ENCODERS
diff --git a/contrib/ffmpeg/libavcodec/mpegvideo.h b/contrib/ffmpeg/libavcodec/mpegvideo.h
new file mode 100644
index 000000000..011678a42
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/mpegvideo.h
@@ -0,0 +1,928 @@
+/*
+ * Generic DCT based hybrid video encoder
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file mpegvideo.h
+ * mpegvideo header.
+ */
+
+#ifndef AVCODEC_MPEGVIDEO_H
+#define AVCODEC_MPEGVIDEO_H
+
+#include "dsputil.h"
+#include "bitstream.h"
+#include "ratecontrol.h"
+#include "parser.h"
+
+#define FRAME_SKIPPED 100 ///< return value for header parsers if frame is not coded
+
+enum OutputFormat {
+    FMT_MPEG1,
+    FMT_H261,
+    FMT_H263,
+    FMT_MJPEG,
+    FMT_H264,
+};
+
+#define EDGE_WIDTH 16
+
+#define MPEG_BUF_SIZE (16 * 1024)
+
+#define QMAT_SHIFT_MMX 16
+#define QMAT_SHIFT 22
+
+#define MAX_FCODE 7
+#define MAX_MV 2048
+
+#define MAX_THREADS 8
+
+#define MAX_PICTURE_COUNT 32
+
+#define ME_MAP_SIZE 64
+#define ME_MAP_SHIFT 3
+#define ME_MAP_MV_BITS 11
+
+/* run length table */
+#define MAX_RUN    64
+#define MAX_LEVEL  64
+
+#define I_TYPE FF_I_TYPE  ///< Intra
+#define P_TYPE FF_P_TYPE  ///< Predicted
+#define B_TYPE FF_B_TYPE  ///< Bi-dir predicted
+#define S_TYPE FF_S_TYPE  ///< S(GMC)-VOP MPEG4
+#define SI_TYPE FF_SI_TYPE  ///< Switching Intra
+#define SP_TYPE FF_SP_TYPE  ///< Switching Predicted
+
+#define MAX_MB_BYTES (30*16*16*3/8 + 120)
+
+#define INPLACE_OFFSET 16
+
+/**
+ * Scantable.
+ */
+typedef struct ScanTable{
+    const uint8_t *scantable;
+    uint8_t permutated[64];
+    uint8_t raster_end[64];
+#ifdef ARCH_POWERPC
+                /** Used by dct_quantise_alitvec to find last-non-zero */
+    DECLARE_ALIGNED_8(uint8_t, inverse[64]);
+#endif
+} ScanTable;
+
+/**
+ * Picture.
+ */
+typedef struct Picture{
+    FF_COMMON_FRAME
+
+    /**
+     * halfpel luma planes.
+     */
+    uint8_t *interpolated[3];
+    int16_t (*motion_val_base[2])[2];
+    uint32_t *mb_type_base;
+#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 //default mb_type if theres just one type
+#define IS_INTRA4x4(a)   ((a)&MB_TYPE_INTRA4x4)
+#define IS_INTRA16x16(a) ((a)&MB_TYPE_INTRA16x16)
+#define IS_PCM(a)        ((a)&MB_TYPE_INTRA_PCM)
+#define IS_INTRA(a)      ((a)&7)
+#define IS_INTER(a)      ((a)&(MB_TYPE_16x16|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8))
+#define IS_SKIP(a)       ((a)&MB_TYPE_SKIP)
+#define IS_INTRA_PCM(a)  ((a)&MB_TYPE_INTRA_PCM)
+#define IS_INTERLACED(a) ((a)&MB_TYPE_INTERLACED)
+#define IS_DIRECT(a)     ((a)&MB_TYPE_DIRECT2)
+#define IS_GMC(a)        ((a)&MB_TYPE_GMC)
+#define IS_16X16(a)      ((a)&MB_TYPE_16x16)
+#define IS_16X8(a)       ((a)&MB_TYPE_16x8)
+#define IS_8X16(a)       ((a)&MB_TYPE_8x16)
+#define IS_8X8(a)        ((a)&MB_TYPE_8x8)
+#define IS_SUB_8X8(a)    ((a)&MB_TYPE_16x16) //note reused
+#define IS_SUB_8X4(a)    ((a)&MB_TYPE_16x8)  //note reused
+#define IS_SUB_4X8(a)    ((a)&MB_TYPE_8x16)  //note reused
+#define IS_SUB_4X4(a)    ((a)&MB_TYPE_8x8)   //note reused
+#define IS_ACPRED(a)     ((a)&MB_TYPE_ACPRED)
+#define IS_QUANT(a)      ((a)&MB_TYPE_QUANT)
+#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0<<((part)+2*(list))))
+#define USES_LIST(a, list) ((a) & ((MB_TYPE_P0L0|MB_TYPE_P1L0)<<(2*(list)))) ///< does this mb use listX, note doesnt work if subMBs
+#define HAS_CBP(a)        ((a)&MB_TYPE_CBP)
+
+    int field_poc[2];           ///< h264 top/bottom POC
+    int poc;                    ///< h264 frame POC
+    int frame_num;              ///< h264 frame_num
+    int pic_id;                 ///< h264 pic_num or long_term_pic_idx
+    int long_ref;               ///< 1->long term reference 0->short term reference
+    int ref_poc[2][16];         ///< h264 POCs of the frames used as reference
+    int ref_count[2];           ///< number of entries in ref_poc
+
+    int mb_var_sum;             ///< sum of MB variance for current frame
+    int mc_mb_var_sum;          ///< motion compensated MB variance for current frame
+    uint16_t *mb_var;           ///< Table for MB variances
+    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
+    uint8_t *mb_mean;           ///< Table for MB luminance
+    int32_t *mb_cmp_score;      ///< Table for MB cmp scores, for mb decision FIXME remove
+    int b_frame_score;          /* */
+} Picture;
+
+struct MpegEncContext;
+
+/**
+ * Motion estimation context.
+ */
+typedef struct MotionEstContext{
+    AVCodecContext *avctx;
+    int skip;                          ///< set if ME is skipped for the current MB
+    int co_located_mv[4][2];           ///< mv from last p frame for direct mode ME
+    int direct_basis_mv[4][2];
+    uint8_t *scratchpad;               ///< data area for the me algo, so that the ME doesnt need to malloc/free
+    uint8_t *best_mb;
+    uint8_t *temp_mb[2];
+    uint8_t *temp;
+    int best_bits;
+    uint32_t *map;                     ///< map to avoid duplicate evaluations
+    uint32_t *score_map;               ///< map to store the scores
+    int map_generation;
+    int pre_penalty_factor;
+    int penalty_factor;
+    int sub_penalty_factor;
+    int mb_penalty_factor;
+    int flags;
+    int sub_flags;
+    int mb_flags;
+    int pre_pass;                      ///< = 1 for the pre pass
+    int dia_size;
+    int xmin;
+    int xmax;
+    int ymin;
+    int ymax;
+    int pred_x;
+    int pred_y;
+    uint8_t *src[4][4];
+    uint8_t *ref[4][4];
+    int stride;
+    int uvstride;
+    /* temp variables for picture complexity calculation */
+    int mc_mb_var_sum_temp;
+    int mb_var_sum_temp;
+    int scene_change_score;
+/*    cmp, chroma_cmp;*/
+    op_pixels_func (*hpel_put)[4];
+    op_pixels_func (*hpel_avg)[4];
+    qpel_mc_func (*qpel_put)[16];
+    qpel_mc_func (*qpel_avg)[16];
+    uint8_t (*mv_penalty)[MAX_MV*2+1];  ///< amount of bits needed to encode a MV
+    uint8_t *current_mv_penalty;
+    int (*sub_motion_search)(struct MpegEncContext * s,
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
+                                  int size, int h);
+}MotionEstContext;
+
+/**
+ * MpegEncContext.
+ */
+typedef struct MpegEncContext {
+    struct AVCodecContext *avctx;
+    /* the following parameters must be initialized before encoding */
+    int width, height;///< picture size. must be a multiple of 16
+    int gop_size;
+    int intra_only;   ///< if true, only intra pictures are generated
+    int bit_rate;     ///< wanted bit rate
+    enum OutputFormat out_format; ///< output format
+    int h263_pred;    ///< use mpeg4/h263 ac/dc predictions
+
+/* the following codec id fields are deprecated in favor of codec_id */
+    int h263_plus;    ///< h263 plus headers
+    int h263_msmpeg4; ///< generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)
+    int h263_flv;     ///< use flv h263 header
+
+    enum CodecID codec_id;     /* see CODEC_ID_xxx */
+    int fixed_qscale; ///< fixed qscale if non zero
+    int encoding;     ///< true if we are encoding (vs decoding)
+    int flags;        ///< AVCodecContext.flags (HQ, MV4, ...)
+    int flags2;       ///< AVCodecContext.flags2
+    int max_b_frames; ///< max number of b-frames for encoding
+    int luma_elim_threshold;
+    int chroma_elim_threshold;
+    int strict_std_compliance; ///< strictly follow the std (MPEG4, ...)
+    int workaround_bugs;       ///< workaround bugs in encoders which cannot be detected automatically
+    int codec_tag;             ///< internal codec_tag upper case converted from avctx codec_tag
+    int stream_codec_tag;      ///< internal stream_codec_tag upper case converted from avctx stream_codec_tag
+    /* the following fields are managed internally by the encoder */
+
+    /** bit output */
+    PutBitContext pb;
+
+    /* sequence parameters */
+    int context_initialized;
+    int input_picture_number;  ///< used to set pic->display_picture_number, shouldnt be used for/by anything else
+    int coded_picture_number;  ///< used to set pic->coded_picture_number, shouldnt be used for/by anything else
+    int picture_number;       //FIXME remove, unclear definition
+    int picture_in_gop_number; ///< 0-> first pic in gop, ...
+    int b_frames_since_non_b;  ///< used for encoding, relative to not yet reordered input
+    int64_t user_specified_pts;///< last non zero pts from AVFrame which was passed into avcodec_encode_video()
+    int mb_width, mb_height;   ///< number of MBs horizontally & vertically
+    int mb_stride;             ///< mb_width+1 used for some arrays to allow simple addressing of left & top MBs without sig11
+    int b8_stride;             ///< 2*mb_width+1 used for some 8x8 block arrays to allow simple addressing
+    int b4_stride;             ///< 4*mb_width+1 used for some 4x4 block arrays to allow simple addressing
+    int h_edge_pos, v_edge_pos;///< horizontal / vertical position of the right/bottom edge (pixel replication)
+    int mb_num;                ///< number of MBs of a picture
+    int linesize;              ///< line size, in bytes, may be different from width
+    int uvlinesize;            ///< line size, for chroma in bytes, may be different from width
+    Picture *picture;          ///< main picture buffer
+    Picture **input_picture;   ///< next pictures on display order for encoding
+    Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding
+
+    int start_mb_y;            ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    int end_mb_y;              ///< end   mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    struct MpegEncContext *thread_context[MAX_THREADS];
+
+    /**
+     * copy of the previous picture structure.
+     * note, linesize & data, might not match the previous picture (for field pictures)
+     */
+    Picture last_picture;
+
+    /**
+     * copy of the next picture structure.
+     * note, linesize & data, might not match the next picture (for field pictures)
+     */
+    Picture next_picture;
+
+    /**
+     * copy of the source picture structure for encoding.
+     * note, linesize & data, might not match the source picture (for field pictures)
+     */
+    Picture new_picture;
+
+    /**
+     * copy of the current picture structure.
+     * note, linesize & data, might not match the current picture (for field pictures)
+     */
+    Picture current_picture;    ///< buffer to store the decompressed current picture
+
+    Picture *last_picture_ptr;     ///< pointer to the previous picture.
+    Picture *next_picture_ptr;     ///< pointer to the next picture (for bidir pred)
+    Picture *current_picture_ptr;  ///< pointer to the current picture
+    uint8_t *visualization_buffer[3]; //< temporary buffer vor MV visualization
+    int last_dc[3];                ///< last DC values for MPEG1
+    int16_t *dc_val_base;
+    int16_t *dc_val[3];            ///< used for mpeg4 DC prediction, all 3 arrays must be continuous
+    int16_t dc_cache[4*5];
+    int y_dc_scale, c_dc_scale;
+    const uint8_t *y_dc_scale_table;     ///< qscale -> y_dc_scale table
+    const uint8_t *c_dc_scale_table;     ///< qscale -> c_dc_scale table
+    const uint8_t *chroma_qscale_table;  ///< qscale -> chroma_qscale (h263)
+    uint8_t *coded_block_base;
+    uint8_t *coded_block;          ///< used for coded block pattern prediction (msmpeg4v3, wmv1)
+    int16_t (*ac_val_base)[16];
+    int16_t (*ac_val[3])[16];      ///< used for for mpeg4 AC prediction, all 3 arrays must be continuous
+    int ac_pred;
+    uint8_t *prev_pict_types;     ///< previous picture types in bitstream order, used for mb skip
+#define PREV_PICT_TYPES_BUFFER_SIZE 256
+    int mb_skipped;                ///< MUST BE SET only during DECODING
+    uint8_t *mbskip_table;        /**< used to avoid copy if macroblock skipped (for black regions for example)
+                                   and used for b-frame encoding & decoding (contains skip table of next P Frame) */
+    uint8_t *mbintra_table;       ///< used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding
+    uint8_t *cbp_table;           ///< used to store cbp, ac_pred for partitioned decoding
+    uint8_t *pred_dir_table;      ///< used to store pred_dir for partitioned decoding
+    uint8_t *allocated_edge_emu_buffer;
+    uint8_t *edge_emu_buffer;     ///< points into the middle of allocated_edge_emu_buffer
+    uint8_t *rd_scratchpad;       ///< scratchpad for rate distortion mb decision
+    uint8_t *obmc_scratchpad;
+    uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
+
+    int qscale;                 ///< QP
+    int chroma_qscale;          ///< chroma QP
+    unsigned int lambda;        ///< lagrange multipler used in rate distortion
+    unsigned int lambda2;       ///< (lambda*lambda) >> FF_LAMBDA_SHIFT
+    int *lambda_table;
+    int adaptive_quant;         ///< use adaptive quantization
+    int dquant;                 ///< qscale difference to prev qscale
+    int pict_type;              ///< I_TYPE, P_TYPE, B_TYPE, ...
+    int last_pict_type; //FIXME removes
+    int last_non_b_pict_type;   ///< used for mpeg4 gmc b-frames & ratecontrol
+    int dropable;
+    int frame_rate_index;
+    int last_lambda_for[5];     ///< last lambda for a specific pict type
+
+    /* motion compensation */
+    int unrestricted_mv;        ///< mv can point outside of the coded picture
+    int h263_long_vectors;      ///< use horrible h263v1 long vector mode
+    int decode;                 ///< if 0 then decoding will be skipped (for encoding b frames for example)
+
+    DSPContext dsp;             ///< pointers for accelerated dsp functions
+    int f_code;                 ///< forward MV resolution
+    int b_code;                 ///< backward MV resolution for B Frames (mpeg4)
+    int16_t (*p_mv_table_base)[2];
+    int16_t (*b_forw_mv_table_base)[2];
+    int16_t (*b_back_mv_table_base)[2];
+    int16_t (*b_bidir_forw_mv_table_base)[2];
+    int16_t (*b_bidir_back_mv_table_base)[2];
+    int16_t (*b_direct_mv_table_base)[2];
+    int16_t (*p_field_mv_table_base[2][2])[2];
+    int16_t (*b_field_mv_table_base[2][2][2])[2];
+    int16_t (*p_mv_table)[2];            ///< MV table (1MV per MB) p-frame encoding
+    int16_t (*b_forw_mv_table)[2];       ///< MV table (1MV per MB) forward mode b-frame encoding
+    int16_t (*b_back_mv_table)[2];       ///< MV table (1MV per MB) backward mode b-frame encoding
+    int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding
+    int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding
+    int16_t (*b_direct_mv_table)[2];     ///< MV table (1MV per MB) direct mode b-frame encoding
+    int16_t (*p_field_mv_table[2][2])[2];   ///< MV table (2MV per MB) interlaced p-frame encoding
+    int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding
+    uint8_t (*p_field_select_table[2]);
+    uint8_t (*b_field_select_table[2][2]);
+    int me_method;                       ///< ME algorithm
+    int mv_dir;
+#define MV_DIR_BACKWARD  1
+#define MV_DIR_FORWARD   2
+#define MV_DIRECT        4 ///< bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4)
+    int mv_type;
+#define MV_TYPE_16X16       0   ///< 1 vector for the whole mb
+#define MV_TYPE_8X8         1   ///< 4 vectors (h263, mpeg4 4MV)
+#define MV_TYPE_16X8        2   ///< 2 vectors, one per 16x8 block
+#define MV_TYPE_FIELD       3   ///< 2 vectors, one per field
+#define MV_TYPE_DMV         4   ///< 2 vectors, special mpeg2 Dual Prime Vectors
+    /**motion vectors for a macroblock
+       first coordinate : 0 = forward 1 = backward
+       second "         : depend on type
+       third  "         : 0 = x, 1 = y
+    */
+    int mv[2][4][2];
+    int field_select[2][2];
+    int last_mv[2][2][2];             ///< last MV, used for MV prediction in MPEG1 & B-frame MPEG4
+    uint8_t *fcode_tab;               ///< smallest fcode needed for each MV
+    int16_t direct_scale_mv[2][64];   ///< precomputed to avoid divisions in ff_mpeg4_set_direct_mv
+
+    MotionEstContext me;
+
+    int no_rounding;  /**< apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
+                        for b-frames rounding mode is allways 0 */
+
+    int hurry_up;     /**< when set to 1 during decoding, b frames will be skipped
+                         when set to 2 idct/dequant will be skipped too */
+
+    /* macroblock layer */
+    int mb_x, mb_y;
+    int mb_skip_run;
+    int mb_intra;
+    uint16_t *mb_type;           ///< Table for candidate MB types for encoding
+#define CANDIDATE_MB_TYPE_INTRA    0x01
+#define CANDIDATE_MB_TYPE_INTER    0x02
+#define CANDIDATE_MB_TYPE_INTER4V  0x04
+#define CANDIDATE_MB_TYPE_SKIPPED   0x08
+//#define MB_TYPE_GMC      0x10
+
+#define CANDIDATE_MB_TYPE_DIRECT   0x10
+#define CANDIDATE_MB_TYPE_FORWARD  0x20
+#define CANDIDATE_MB_TYPE_BACKWARD 0x40
+#define CANDIDATE_MB_TYPE_BIDIR    0x80
+
+#define CANDIDATE_MB_TYPE_INTER_I    0x100
+#define CANDIDATE_MB_TYPE_FORWARD_I  0x200
+#define CANDIDATE_MB_TYPE_BACKWARD_I 0x400
+#define CANDIDATE_MB_TYPE_BIDIR_I    0x800
+
+    int block_index[6]; ///< index to current MB in block based arrays with edges
+    int block_wrap[6];
+    uint8_t *dest[3];
+
+    int *mb_index2xy;        ///< mb_index -> mb_x + mb_y*mb_stride
+
+    /** matrix transmitted in the bitstream */
+    uint16_t intra_matrix[64];
+    uint16_t chroma_intra_matrix[64];
+    uint16_t inter_matrix[64];
+    uint16_t chroma_inter_matrix[64];
+#define QUANT_BIAS_SHIFT 8
+    int intra_quant_bias;    ///< bias for the quantizer
+    int inter_quant_bias;    ///< bias for the quantizer
+    int min_qcoeff;          ///< minimum encodable coefficient
+    int max_qcoeff;          ///< maximum encodable coefficient
+    int ac_esc_length;       ///< num of bits needed to encode the longest esc
+    uint8_t *intra_ac_vlc_length;
+    uint8_t *intra_ac_vlc_last_length;
+    uint8_t *inter_ac_vlc_length;
+    uint8_t *inter_ac_vlc_last_length;
+    uint8_t *luma_dc_vlc_length;
+    uint8_t *chroma_dc_vlc_length;
+#define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level))
+
+    int coded_score[8];
+
+    /** precomputed matrix (combine qscale and DCT renorm) */
+    int (*q_intra_matrix)[64];
+    int (*q_inter_matrix)[64];
+    /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
+    uint16_t (*q_intra_matrix16)[2][64];
+    uint16_t (*q_inter_matrix16)[2][64];
+    int block_last_index[12];  ///< last non zero coefficient in block
+    /* scantables */
+    DECLARE_ALIGNED_8(ScanTable, intra_scantable);
+    ScanTable intra_h_scantable;
+    ScanTable intra_v_scantable;
+    ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce tha cache usage
+
+    /* noise reduction */
+    int (*dct_error_sum)[64];
+    int dct_count[2];
+    uint16_t (*dct_offset)[64];
+
+    void *opaque;              ///< private data for the user
+
+    /* bit rate control */
+    int64_t wanted_bits;
+    int64_t total_bits;
+    int frame_bits;                ///< bits used for the current frame
+    int next_lambda;               ///< next lambda used for retrying to encode a frame
+    RateControlContext rc_context; ///< contains stuff only accessed in ratecontrol.c
+
+    /* statistics, used for 2-pass encoding */
+    int mv_bits;
+    int header_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int i_count;
+    int f_count;
+    int b_count;
+    int skip_count;
+    int misc_bits; ///< cbp, mb_type
+    int last_bits; ///< temp var used for calculating the above vars
+
+    /* error concealment / resync */
+    int error_count;
+    uint8_t *error_status_table;       ///< table of the error status of each MB
+#define VP_START            1          ///< current MB is the first after a resync marker
+#define AC_ERROR            2
+#define DC_ERROR            4
+#define MV_ERROR            8
+#define AC_END              16
+#define DC_END              32
+#define MV_END              64
+//FIXME some prefix?
+
+    int resync_mb_x;                 ///< x position of last resync marker
+    int resync_mb_y;                 ///< y position of last resync marker
+    GetBitContext last_resync_gb;    ///< used to search for the next resync marker
+    int mb_num_left;                 ///< number of MBs left in this video packet (for partitioned Slices only)
+    int next_p_frame_damaged;        ///< set if the next p frame is damaged, to avoid showing trashed b frames
+    int error_resilience;
+
+    ParseContext parse_context;
+
+    /* H.263 specific */
+    int gob_index;
+    int obmc;                       ///< overlapped block motion compensation
+
+    /* H.263+ specific */
+    int umvplus;                    ///< == H263+ && unrestricted_mv
+    int h263_aic;                   ///< Advanded INTRA Coding (AIC)
+    int h263_aic_dir;               ///< AIC direction: 0 = left, 1 = top
+    int h263_slice_structured;
+    int alt_inter_vlc;              ///< alternative inter vlc
+    int modified_quant;
+    int loop_filter;
+    int custom_pcf;
+
+    /* mpeg4 specific */
+    int time_increment_bits;        ///< number of bits to represent the fractional part of time
+    int last_time_base;
+    int time_base;                  ///< time in seconds of last I,P,S Frame
+    int64_t time;                   ///< time of current frame
+    int64_t last_non_b_time;
+    uint16_t pp_time;               ///< time distance between the last 2 p,s,i frames
+    uint16_t pb_time;               ///< time distance between the last b and p,s,i frame
+    uint16_t pp_field_time;
+    uint16_t pb_field_time;         ///< like above, just for interlaced
+    int shape;
+    int vol_sprite_usage;
+    int sprite_width;
+    int sprite_height;
+    int sprite_left;
+    int sprite_top;
+    int sprite_brightness_change;
+    int num_sprite_warping_points;
+    int real_sprite_warping_points;
+    int sprite_offset[2][2];         ///< sprite offset[isChroma][isMVY]
+    int sprite_delta[2][2];          ///< sprite_delta [isY][isMVY]
+    int sprite_shift[2];             ///< sprite shift [isChroma]
+    int mcsel;
+    int quant_precision;
+    int quarter_sample;              ///< 1->qpel, 0->half pel ME/MC
+    int scalability;
+    int hierachy_type;
+    int enhancement_type;
+    int new_pred;
+    int reduced_res_vop;
+    int aspect_ratio_info; //FIXME remove
+    int sprite_warping_accuracy;
+    int low_latency_sprite;
+    int data_partitioning;           ///< data partitioning flag from header
+    int partitioned_frame;           ///< is current frame partitioned
+    int rvlc;                        ///< reversible vlc
+    int resync_marker;               ///< could this stream contain resync markers
+    int low_delay;                   ///< no reordering needed / has no b-frames
+    int vo_type;
+    int vol_control_parameters;      ///< does the stream contain the low_delay flag, used to workaround buggy encoders
+    int intra_dc_threshold;          ///< QP above whch the ac VLC should be used for intra dc
+    int use_intra_dc_vlc;
+    PutBitContext tex_pb;            ///< used for data partitioned VOPs
+    PutBitContext pb2;               ///< used for data partitioned VOPs
+    int mpeg_quant;
+    int t_frame;                       ///< time distance of first I -> B, used for interlaced b frames
+    int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG4
+
+    /* divx specific, used to workaround (many) bugs in divx5 */
+    int divx_version;
+    int divx_build;
+    int divx_packed;
+    uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them
+    int bitstream_buffer_size;
+    unsigned int allocated_bitstream_buffer_size;
+
+    int xvid_build;
+
+    /* lavc specific stuff, used to workaround bugs in libavcodec */
+    int lavc_build;
+
+    /* RV10 specific */
+    int rv10_version; ///< RV10 version: 0 or 3
+    int rv10_first_dc_coded[3];
+
+    /* MJPEG specific */
+    struct MJpegContext *mjpeg_ctx;
+    int mjpeg_vsample[3];       ///< vertical sampling factors, default = {2, 1, 1}
+    int mjpeg_hsample[3];       ///< horizontal sampling factors, default = {2, 1, 1}
+    int mjpeg_write_tables;     ///< do we want to have quantisation- and huffmantables in the jpeg file ?
+    int mjpeg_data_only_frames; ///< frames only with SOI, SOS and EOI markers
+
+    /* MSMPEG4 specific */
+    int mv_table_index;
+    int rl_table_index;
+    int rl_chroma_table_index;
+    int dc_table_index;
+    int use_skip_mb_code;
+    int slice_height;      ///< in macroblocks
+    int first_slice_line;  ///< used in mpeg4 too to handle resync markers
+    int flipflop_rounding;
+    int msmpeg4_version;   ///< 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 4=wmv1/7 5=wmv2/8
+    int per_mb_rl_table;
+    int esc3_level_length;
+    int esc3_run_length;
+    /** [mb_intra][isChroma][level][run][last] */
+    int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
+    int inter_intra_pred;
+    int mspel;
+
+    /* decompression specific */
+    GetBitContext gb;
+
+    /* Mpeg1 specific */
+    int gop_picture_number;  ///< index of the first picture of a GOP based on fake_pic_num & mpeg1 specific
+    int last_mv_dir;         ///< last mv_dir, used for b frame encoding
+    int broken_link;         ///< no_output_of_prior_pics_flag
+    uint8_t *vbv_delay_ptr;  ///< pointer to vbv_delay in the bitstream
+
+    /* MPEG2 specific - I wish I had not to support this mess. */
+    int progressive_sequence;
+    int mpeg_f_code[2][2];
+    int picture_structure;
+/* picture type */
+#define PICT_TOP_FIELD     1
+#define PICT_BOTTOM_FIELD  2
+#define PICT_FRAME         3
+
+    int intra_dc_precision;
+    int frame_pred_frame_dct;
+    int top_field_first;
+    int concealment_motion_vectors;
+    int q_scale_type;
+    int intra_vlc_format;
+    int alternate_scan;
+    int repeat_first_field;
+    int chroma_420_type;
+    int chroma_format;
+#define CHROMA_420 1
+#define CHROMA_422 2
+#define CHROMA_444 3
+    int chroma_x_shift;//depend on pix_format, that depend on chroma_format
+    int chroma_y_shift;
+
+    int progressive_frame;
+    int full_pel[2];
+    int interlaced_dct;
+    int first_slice;
+    int first_field;         ///< is 1 for the first field of a field picture 0 otherwise
+
+    /* RTP specific */
+    int rtp_mode;
+
+    uint8_t *ptr_lastgob;
+    int swap_uv;//vcr2 codec is mpeg2 varint with UV swaped
+    short * pblocks[12];
+
+    DCTELEM (*block)[64]; ///< points to one of the following blocks
+    DCTELEM (*blocks)[8][64]; // for HQ mode we need to keep the best block
+    int (*decode_mb)(struct MpegEncContext *s, DCTELEM block[6][64]); // used by some codecs to avoid a switch()
+#define SLICE_OK         0
+#define SLICE_ERROR     -1
+#define SLICE_END       -2 ///<end marker found
+#define SLICE_NOEND     -3 ///<no end marker or error found but mb count exceeded
+
+    void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h261_intra)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h261_inter)(struct MpegEncContext *s,
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
+                           DCTELEM *block/*align 16*/, int n, int qscale);
+    int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
+    int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
+    void (*denoise_dct)(struct MpegEncContext *s, DCTELEM *block);
+} MpegEncContext;
+
+
+int DCT_common_init(MpegEncContext *s);
+void MPV_decode_defaults(MpegEncContext *s);
+int MPV_common_init(MpegEncContext *s);
+void MPV_common_end(MpegEncContext *s);
+void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]);
+int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx);
+void MPV_frame_end(MpegEncContext *s);
+int MPV_encode_init(AVCodecContext *avctx);
+int MPV_encode_end(AVCodecContext *avctx);
+int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data);
+#ifdef HAVE_MMX
+void MPV_common_init_mmx(MpegEncContext *s);
+#endif
+#ifdef ARCH_ALPHA
+void MPV_common_init_axp(MpegEncContext *s);
+#endif
+#ifdef HAVE_MLIB
+void MPV_common_init_mlib(MpegEncContext *s);
+#endif
+#ifdef HAVE_MMI
+void MPV_common_init_mmi(MpegEncContext *s);
+#endif
+#ifdef ARCH_ARMV4L
+void MPV_common_init_armv4l(MpegEncContext *s);
+#endif
+#ifdef ARCH_POWERPC
+void MPV_common_init_ppc(MpegEncContext *s);
+#endif
+extern void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
+void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length);
+void ff_clean_intra_table_entries(MpegEncContext *s);
+void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
+void ff_draw_horiz_band(MpegEncContext *s, int y, int h);
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h);
+void ff_mpeg_flush(AVCodecContext *avctx);
+void ff_print_debug_info(MpegEncContext *s, AVFrame *pict);
+void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
+int ff_find_unused_picture(MpegEncContext *s, int shared);
+void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
+void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
+const uint8_t *ff_find_start_code(const uint8_t *p, const uint8_t *end, uint32_t *state);
+
+void ff_er_frame_start(MpegEncContext *s);
+void ff_er_frame_end(MpegEncContext *s);
+void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int endy, int status);
+
+
+extern enum PixelFormat ff_yuv420p_list[2];
+
+void ff_init_block_index(MpegEncContext *s);
+
+static inline void ff_update_block_index(MpegEncContext *s){
+    const int block_size= 8>>s->avctx->lowres;
+
+    s->block_index[0]+=2;
+    s->block_index[1]+=2;
+    s->block_index[2]+=2;
+    s->block_index[3]+=2;
+    s->block_index[4]++;
+    s->block_index[5]++;
+    s->dest[0]+= 2*block_size;
+    s->dest[1]+= block_size;
+    s->dest[2]+= block_size;
+}
+
+static inline int get_bits_diff(MpegEncContext *s){
+    const int bits= put_bits_count(&s->pb);
+    const int last= s->last_bits;
+
+    s->last_bits = bits;
+
+    return bits - last;
+}
+
+/* motion_est.c */
+void ff_estimate_p_frame_motion(MpegEncContext * s,
+                             int mb_x, int mb_y);
+void ff_estimate_b_frame_motion(MpegEncContext * s,
+                             int mb_x, int mb_y);
+int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
+void ff_fix_long_p_mvs(MpegEncContext * s);
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
+                     int16_t (*mv_table)[2], int f_code, int type, int truncate);
+void ff_init_me(MpegEncContext *s);
+int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y);
+inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
+                             int ref_mv_scale, int size, int h);
+inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
+                               int ref_index, int size, int h, int add_rate);
+
+/* mpeg12.c */
+extern const uint16_t ff_mpeg1_default_intra_matrix[64];
+extern const uint16_t ff_mpeg1_default_non_intra_matrix[64];
+extern const uint8_t ff_mpeg1_dc_scale_table[128];
+
+void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
+void mpeg1_encode_mb(MpegEncContext *s,
+                     DCTELEM block[6][64],
+                     int motion_x, int motion_y);
+void ff_mpeg1_encode_init(MpegEncContext *s);
+void ff_mpeg1_encode_slice_header(MpegEncContext *s);
+void ff_mpeg1_clean_buffers(MpegEncContext *s);
+
+
+/** RLTable. */
+typedef struct RLTable {
+    int n;                         ///< number of entries of table_vlc minus 1
+    int last;                      ///< number of values for last = 0
+    const uint16_t (*table_vlc)[2];
+    const int8_t *table_run;
+    const int8_t *table_level;
+    uint8_t *index_run[2];         ///< encoding only
+    int8_t *max_level[2];          ///< encoding & decoding
+    int8_t *max_run[2];            ///< encoding & decoding
+    VLC vlc;                       ///< decoding only deprected FIXME remove
+    RL_VLC_ELEM *rl_vlc[32];       ///< decoding only
+} RLTable;
+
+void init_rl(RLTable *rl, int use_static);
+void init_vlc_rl(RLTable *rl, int use_static);
+
+static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
+{
+    int index;
+    index = rl->index_run[last][run];
+    if (index >= rl->n)
+        return rl->n;
+    if (level > rl->max_level[last][run])
+        return rl->n;
+    return index + level - 1;
+}
+
+extern const uint8_t ff_mpeg4_y_dc_scale_table[32];
+extern const uint8_t ff_mpeg4_c_dc_scale_table[32];
+extern const uint8_t ff_aic_dc_scale_table[32];
+extern const int16_t ff_mpeg4_default_intra_matrix[64];
+extern const int16_t ff_mpeg4_default_non_intra_matrix[64];
+extern const uint8_t ff_h263_chroma_qscale_table[32];
+extern const uint8_t ff_h263_loop_filter_strength[32];
+
+/* h261.c */
+void ff_h261_loop_filter(MpegEncContext *s);
+void ff_h261_reorder_mb_index(MpegEncContext* s);
+void ff_h261_encode_mb(MpegEncContext *s,
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y);
+void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number);
+void ff_h261_encode_init(MpegEncContext *s);
+int ff_h261_get_picture_format(int width, int height);
+
+
+/* h263.c, h263dec.c */
+int ff_h263_decode_init(AVCodecContext *avctx);
+int ff_h263_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size);
+int ff_h263_decode_end(AVCodecContext *avctx);
+void h263_encode_mb(MpegEncContext *s,
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y);
+void mpeg4_encode_mb(MpegEncContext *s,
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y);
+void h263_encode_picture_header(MpegEncContext *s, int picture_number);
+void ff_flv_encode_picture_header(MpegEncContext *s, int picture_number);
+void h263_encode_gob_header(MpegEncContext * s, int mb_line);
+int16_t *h263_pred_motion(MpegEncContext * s, int block, int dir,
+                        int *px, int *py);
+void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
+                   int dir);
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number);
+void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
+#ifdef CONFIG_ENCODERS
+void h263_encode_init(MpegEncContext *s);
+#else
+static void h263_encode_init(MpegEncContext *s) {assert(0);}
+#endif
+void h263_decode_init_vlc(MpegEncContext *s);
+int h263_decode_picture_header(MpegEncContext *s);
+int ff_h263_decode_gob_header(MpegEncContext *s);
+int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb);
+void ff_h263_update_motion_val(MpegEncContext * s);
+void ff_h263_loop_filter(MpegEncContext * s);
+void ff_set_qscale(MpegEncContext * s, int qscale);
+int ff_h263_decode_mba(MpegEncContext *s);
+void ff_h263_encode_mba(MpegEncContext *s);
+
+int intel_h263_decode_picture_header(MpegEncContext *s);
+int flv_h263_decode_picture_header(MpegEncContext *s);
+int ff_h263_decode_mb(MpegEncContext *s,
+                      DCTELEM block[6][64]);
+int ff_mpeg4_decode_mb(MpegEncContext *s,
+                      DCTELEM block[6][64]);
+int h263_get_picture_format(int width, int height);
+void ff_mpeg4_encode_video_packet_header(MpegEncContext *s);
+void ff_mpeg4_clean_buffers(MpegEncContext *s);
+void ff_mpeg4_stuffing(PutBitContext * pbc);
+void ff_mpeg4_init_partitions(MpegEncContext *s);
+void ff_mpeg4_merge_partitions(MpegEncContext *s);
+void ff_clean_mpeg4_qscales(MpegEncContext *s);
+void ff_clean_h263_qscales(MpegEncContext *s);
+int ff_mpeg4_decode_partitions(MpegEncContext *s);
+int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
+int ff_h263_resync(MpegEncContext *s);
+int ff_h263_get_gob_height(MpegEncContext *s);
+void ff_mpeg4_init_direct_mv(MpegEncContext *s);
+int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my);
+int ff_h263_round_chroma(int x);
+void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code);
+
+
+/* rv10.c */
+void rv10_encode_picture_header(MpegEncContext *s, int picture_number);
+int rv_decode_dc(MpegEncContext *s, int n);
+void rv20_encode_picture_header(MpegEncContext *s, int picture_number);
+
+
+/* msmpeg4.c */
+void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number);
+void msmpeg4_encode_ext_header(MpegEncContext * s);
+void msmpeg4_encode_mb(MpegEncContext * s,
+                       DCTELEM block[6][64],
+                       int motion_x, int motion_y);
+int msmpeg4_decode_picture_header(MpegEncContext * s);
+int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
+int ff_msmpeg4_decode_init(MpegEncContext *s);
+void ff_msmpeg4_encode_init(MpegEncContext *s);
+int ff_wmv2_decode_picture_header(MpegEncContext * s);
+int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s);
+void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
+void ff_mspel_motion(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
+                               int motion_x, int motion_y, int h);
+int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
+void ff_wmv2_encode_mb(MpegEncContext * s,
+                       DCTELEM block[6][64],
+                       int motion_x, int motion_y);
+
+/* mjpeg.c */
+int mjpeg_init(MpegEncContext *s);
+void mjpeg_close(MpegEncContext *s);
+void mjpeg_encode_mb(MpegEncContext *s,
+                     DCTELEM block[6][64]);
+void mjpeg_picture_header(MpegEncContext *s);
+void mjpeg_picture_trailer(MpegEncContext *s);
+void ff_mjpeg_stuffing(PutBitContext * pbc);
+
+#endif /* AVCODEC_MPEGVIDEO_H */
+
diff --git a/contrib/ffmpeg/libavcodec/msmpeg4.c b/contrib/ffmpeg/libavcodec/msmpeg4.c
new file mode 100644
index 000000000..a8124172b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/msmpeg4.c
@@ -0,0 +1,1938 @@
+/*
+ * MSMPEG4 backend for ffmpeg encoder and decoder
+ * Copyright (c) 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file msmpeg4.c
+ * MSMPEG4 backend for ffmpeg encoder and decoder.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+/*
+ * You can also call this codec : MPEG4 with a twist !
+ *
+ * TODO:
+ *        - (encoding) select best mv table (two choices)
+ *        - (encoding) select best vlc/dc table
+ */
+//#define DEBUG
+
+#define DC_VLC_BITS 9
+#define CBPY_VLC_BITS 6
+#define INTER_INTRA_VLC_BITS 3
+#define V1_INTRA_CBPC_VLC_BITS 6
+#define V1_INTER_CBPC_VLC_BITS 6
+#define V2_INTRA_CBPC_VLC_BITS 3
+#define V2_MB_TYPE_VLC_BITS 7
+#define MV_VLC_BITS 9
+#define V2_MV_VLC_BITS 9
+#define TEX_VLC_BITS 9
+#define MB_NON_INTRA_VLC_BITS 9
+#define MB_INTRA_VLC_BITS 9
+
+#define II_BITRATE 128*1024
+#define MBAC_BITRATE 50*1024
+
+#define DEFAULT_INTER_INDEX 3
+
+static uint32_t v2_dc_lum_table[512][2];
+static uint32_t v2_dc_chroma_table[512][2];
+
+static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
+static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+                                       int n, int coded, const uint8_t *scantable);
+static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
+static int msmpeg4_decode_motion(MpegEncContext * s,
+                                 int *mx_ptr, int *my_ptr);
+static void init_h263_dc_for_msmpeg4(void);
+static inline void msmpeg4_memsetw(short *tab, int val, int n);
+#ifdef CONFIG_ENCODERS
+static void msmpeg4v2_encode_motion(MpegEncContext * s, int val);
+static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra);
+#endif //CONFIG_ENCODERS
+static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+
+/* vc1 externs */
+extern uint8_t wmv3_dc_scale_table[32];
+
+#ifdef DEBUG
+int intra_count = 0;
+int frame_count = 0;
+#endif
+
+#include "msmpeg4data.h"
+
+#ifdef CONFIG_ENCODERS //strangely gcc includes this even if its not references
+static uint8_t rl_length[NB_RL_TABLES][MAX_LEVEL+1][MAX_RUN+1][2];
+#endif //CONFIG_ENCODERS
+
+static void common_init(MpegEncContext * s)
+{
+    static int inited=0;
+
+    switch(s->msmpeg4_version){
+    case 1:
+    case 2:
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+        break;
+    case 3:
+        if(s->workaround_bugs){
+            s->y_dc_scale_table= old_ff_y_dc_scale_table;
+            s->c_dc_scale_table= old_ff_c_dc_scale_table;
+        } else{
+            s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table;
+            s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+        }
+        break;
+    case 4:
+    case 5:
+        s->y_dc_scale_table= wmv1_y_dc_scale_table;
+        s->c_dc_scale_table= wmv1_c_dc_scale_table;
+        break;
+#if defined(CONFIG_WMV3_DECODER)||defined(CONFIG_VC1_DECODER)
+    case 6:
+        s->y_dc_scale_table= wmv3_dc_scale_table;
+        s->c_dc_scale_table= wmv3_dc_scale_table;
+        break;
+#endif
+
+    }
+
+
+    if(s->msmpeg4_version>=4){
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , wmv1_scantable[1]);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, wmv1_scantable[2]);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, wmv1_scantable[3]);
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , wmv1_scantable[0]);
+    }
+    //Note the default tables are set in common_init in mpegvideo.c
+
+    if(!inited){
+        inited=1;
+
+        init_h263_dc_for_msmpeg4();
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+
+/* build the table which associate a (x,y) motion vector to a vlc */
+static void init_mv_table(MVTable *tab)
+{
+    int i, x, y;
+
+    tab->table_mv_index = av_malloc(sizeof(uint16_t) * 4096);
+    /* mark all entries as not used */
+    for(i=0;i<4096;i++)
+        tab->table_mv_index[i] = tab->n;
+
+    for(i=0;i<tab->n;i++) {
+        x = tab->table_mvx[i];
+        y = tab->table_mvy[i];
+        tab->table_mv_index[(x << 6) | y] = i;
+    }
+}
+
+static void code012(PutBitContext *pb, int n)
+{
+    if (n == 0) {
+        put_bits(pb, 1, 0);
+    } else {
+        put_bits(pb, 1, 1);
+        put_bits(pb, 1, (n >= 2));
+    }
+}
+
+void ff_msmpeg4_encode_init(MpegEncContext *s)
+{
+    static int init_done=0;
+    int i;
+
+    common_init(s);
+    if(s->msmpeg4_version>=4){
+        s->min_qcoeff= -255;
+        s->max_qcoeff=  255;
+    }
+
+    if (!init_done) {
+        /* init various encoding tables */
+        init_done = 1;
+        init_mv_table(&mv_tables[0]);
+        init_mv_table(&mv_tables[1]);
+        for(i=0;i<NB_RL_TABLES;i++)
+            init_rl(&rl_table[i], 1);
+
+        for(i=0; i<NB_RL_TABLES; i++){
+            int level;
+            for(level=0; level<=MAX_LEVEL; level++){
+                int run;
+                for(run=0; run<=MAX_RUN; run++){
+                    int last;
+                    for(last=0; last<2; last++){
+                        rl_length[i][level][run][last]= get_size_of_code(s, &rl_table[  i], last, run, level, 0);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra){
+    int size=0;
+    int code;
+    int run_diff= intra ? 0 : 1;
+
+    code = get_rl_index(rl, last, run, level);
+    size+= rl->table_vlc[code][1];
+    if (code == rl->n) {
+        int level1, run1;
+
+        level1 = level - rl->max_level[last][run];
+        if (level1 < 1)
+            goto esc2;
+        code = get_rl_index(rl, last, run, level1);
+        if (code == rl->n) {
+            esc2:
+            size++;
+            if (level > MAX_LEVEL)
+                goto esc3;
+            run1 = run - rl->max_run[last][level] - run_diff;
+            if (run1 < 0)
+                goto esc3;
+            code = get_rl_index(rl, last, run1, level);
+            if (code == rl->n) {
+            esc3:
+                /* third escape */
+                size+=1+1+6+8;
+            } else {
+                /* second escape */
+                size+= 1+1+ rl->table_vlc[code][1];
+            }
+        } else {
+            /* first escape */
+            size+= 1+1+ rl->table_vlc[code][1];
+        }
+    } else {
+        size++;
+    }
+    return size;
+}
+
+static void find_best_tables(MpegEncContext * s)
+{
+    int i;
+    int best       =-1, best_size       =9999999;
+    int chroma_best=-1, best_chroma_size=9999999;
+
+    for(i=0; i<3; i++){
+        int level;
+        int chroma_size=0;
+        int size=0;
+
+        if(i>0){// ;)
+            size++;
+            chroma_size++;
+        }
+        for(level=0; level<=MAX_LEVEL; level++){
+            int run;
+            for(run=0; run<=MAX_RUN; run++){
+                int last;
+                const int last_size= size + chroma_size;
+                for(last=0; last<2; last++){
+                    int inter_count       = s->ac_stats[0][0][level][run][last] + s->ac_stats[0][1][level][run][last];
+                    int intra_luma_count  = s->ac_stats[1][0][level][run][last];
+                    int intra_chroma_count= s->ac_stats[1][1][level][run][last];
+
+                    if(s->pict_type==I_TYPE){
+                        size       += intra_luma_count  *rl_length[i  ][level][run][last];
+                        chroma_size+= intra_chroma_count*rl_length[i+3][level][run][last];
+                    }else{
+                        size+=        intra_luma_count  *rl_length[i  ][level][run][last]
+                                     +intra_chroma_count*rl_length[i+3][level][run][last]
+                                     +inter_count       *rl_length[i+3][level][run][last];
+                    }
+                }
+                if(last_size == size+chroma_size) break;
+            }
+        }
+        if(size<best_size){
+            best_size= size;
+            best= i;
+        }
+        if(chroma_size<best_chroma_size){
+            best_chroma_size= chroma_size;
+            chroma_best= i;
+        }
+    }
+
+//    printf("type:%d, best:%d, qp:%d, var:%d, mcvar:%d, size:%d //\n",
+//           s->pict_type, best, s->qscale, s->mb_var_sum, s->mc_mb_var_sum, best_size);
+
+    if(s->pict_type==P_TYPE) chroma_best= best;
+
+    memset(s->ac_stats, 0, sizeof(int)*(MAX_LEVEL+1)*(MAX_RUN+1)*2*2*2);
+
+    s->rl_table_index       =        best;
+    s->rl_chroma_table_index= chroma_best;
+
+    if(s->pict_type != s->last_non_b_pict_type){
+        s->rl_table_index= 2;
+        if(s->pict_type==I_TYPE)
+            s->rl_chroma_table_index= 1;
+        else
+            s->rl_chroma_table_index= 2;
+    }
+
+}
+
+/* write MSMPEG4 compatible frame header */
+void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
+{
+    find_best_tables(s);
+
+    align_put_bits(&s->pb);
+    put_bits(&s->pb, 2, s->pict_type - 1);
+
+    put_bits(&s->pb, 5, s->qscale);
+    if(s->msmpeg4_version<=2){
+        s->rl_table_index = 2;
+        s->rl_chroma_table_index = 2;
+    }
+
+    s->dc_table_index = 1;
+    s->mv_table_index = 1; /* only if P frame */
+    s->use_skip_mb_code = 1; /* only if P frame */
+    s->per_mb_rl_table = 0;
+    if(s->msmpeg4_version==4)
+        s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE);
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
+
+    if (s->pict_type == I_TYPE) {
+        s->slice_height= s->mb_height/1;
+        put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
+
+        if(s->msmpeg4_version==4){
+            msmpeg4_encode_ext_header(s);
+            if(s->bit_rate>MBAC_BITRATE)
+                put_bits(&s->pb, 1, s->per_mb_rl_table);
+        }
+
+        if(s->msmpeg4_version>2){
+            if(!s->per_mb_rl_table){
+                code012(&s->pb, s->rl_chroma_table_index);
+                code012(&s->pb, s->rl_table_index);
+            }
+
+            put_bits(&s->pb, 1, s->dc_table_index);
+        }
+    } else {
+        put_bits(&s->pb, 1, s->use_skip_mb_code);
+
+        if(s->msmpeg4_version==4 && s->bit_rate>MBAC_BITRATE)
+            put_bits(&s->pb, 1, s->per_mb_rl_table);
+
+        if(s->msmpeg4_version>2){
+            if(!s->per_mb_rl_table)
+                code012(&s->pb, s->rl_table_index);
+
+            put_bits(&s->pb, 1, s->dc_table_index);
+
+            put_bits(&s->pb, 1, s->mv_table_index);
+        }
+    }
+
+    s->esc3_level_length= 0;
+    s->esc3_run_length= 0;
+
+#ifdef DEBUG
+    intra_count = 0;
+    av_log(s->avctx, AV_LOG_DEBUG, "*****frame %d:\n", frame_count++);
+#endif
+}
+
+void msmpeg4_encode_ext_header(MpegEncContext * s)
+{
+        put_bits(&s->pb, 5, s->avctx->time_base.den / s->avctx->time_base.num); //yes 29.97 -> 29
+
+        put_bits(&s->pb, 11, FFMIN(s->bit_rate/1024, 2047));
+
+        if(s->msmpeg4_version>=3)
+            put_bits(&s->pb, 1, s->flipflop_rounding);
+        else
+            assert(s->flipflop_rounding==0);
+}
+
+#endif //CONFIG_ENCODERS
+
+/* predict coded block */
+static inline int coded_block_pred(MpegEncContext * s, int n, uint8_t **coded_block_ptr)
+{
+    int xy, wrap, pred, a, b, c;
+
+    xy = s->block_index[n];
+    wrap = s->b8_stride;
+
+    /* B C
+     * A X
+     */
+    a = s->coded_block[xy - 1       ];
+    b = s->coded_block[xy - 1 - wrap];
+    c = s->coded_block[xy     - wrap];
+
+    if (b == c) {
+        pred = a;
+    } else {
+        pred = c;
+    }
+
+    /* store value */
+    *coded_block_ptr = &s->coded_block[xy];
+
+    return pred;
+}
+
+#ifdef CONFIG_ENCODERS
+
+static void msmpeg4_encode_motion(MpegEncContext * s,
+                                  int mx, int my)
+{
+    int code;
+    MVTable *mv;
+
+    /* modulo encoding */
+    /* WARNING : you cannot reach all the MVs even with the modulo
+       encoding. This is a somewhat strange compromise they took !!!  */
+    if (mx <= -64)
+        mx += 64;
+    else if (mx >= 64)
+        mx -= 64;
+    if (my <= -64)
+        my += 64;
+    else if (my >= 64)
+        my -= 64;
+
+    mx += 32;
+    my += 32;
+#if 0
+    if ((unsigned)mx >= 64 ||
+        (unsigned)my >= 64)
+        av_log(s->avctx, AV_LOG_ERROR, "error mx=%d my=%d\n", mx, my);
+#endif
+    mv = &mv_tables[s->mv_table_index];
+
+    code = mv->table_mv_index[(mx << 6) | my];
+    put_bits(&s->pb,
+             mv->table_mv_bits[code],
+             mv->table_mv_code[code]);
+    if (code == mv->n) {
+        /* escape : code litterally */
+        put_bits(&s->pb, 6, mx);
+        put_bits(&s->pb, 6, my);
+    }
+}
+
+static inline void handle_slices(MpegEncContext *s){
+    if (s->mb_x == 0) {
+        if (s->slice_height && (s->mb_y % s->slice_height) == 0) {
+            if(s->msmpeg4_version < 4){
+                ff_mpeg4_clean_buffers(s);
+            }
+            s->first_slice_line = 1;
+        } else {
+            s->first_slice_line = 0;
+        }
+    }
+}
+
+void msmpeg4_encode_mb(MpegEncContext * s,
+                       DCTELEM block[6][64],
+                       int motion_x, int motion_y)
+{
+    int cbp, coded_cbp, i;
+    int pred_x, pred_y;
+    uint8_t *coded_block;
+
+    handle_slices(s);
+
+    if (!s->mb_intra) {
+        /* compute cbp */
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+        if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
+            /* skip macroblock */
+            put_bits(&s->pb, 1, 1);
+            s->last_bits++;
+            s->misc_bits++;
+            s->skip_count++;
+
+            return;
+        }
+        if (s->use_skip_mb_code)
+            put_bits(&s->pb, 1, 0);     /* mb coded */
+
+        if(s->msmpeg4_version<=2){
+            put_bits(&s->pb,
+                     v2_mb_type[cbp&3][1],
+                     v2_mb_type[cbp&3][0]);
+            if((cbp&3) != 3) coded_cbp= cbp ^ 0x3C;
+            else             coded_cbp= cbp;
+
+            put_bits(&s->pb,
+                     cbpy_tab[coded_cbp>>2][1],
+                     cbpy_tab[coded_cbp>>2][0]);
+
+            s->misc_bits += get_bits_diff(s);
+
+            h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+            msmpeg4v2_encode_motion(s, motion_x - pred_x);
+            msmpeg4v2_encode_motion(s, motion_y - pred_y);
+        }else{
+            put_bits(&s->pb,
+                     table_mb_non_intra[cbp + 64][1],
+                     table_mb_non_intra[cbp + 64][0]);
+
+            s->misc_bits += get_bits_diff(s);
+
+            /* motion vector */
+            h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+            msmpeg4_encode_motion(s, motion_x - pred_x,
+                                  motion_y - pred_y);
+        }
+
+        s->mv_bits += get_bits_diff(s);
+
+        for (i = 0; i < 6; i++) {
+            msmpeg4_encode_block(s, block[i], i);
+        }
+        s->p_tex_bits += get_bits_diff(s);
+    } else {
+        /* compute cbp */
+        cbp = 0;
+        coded_cbp = 0;
+        for (i = 0; i < 6; i++) {
+            int val, pred;
+            val = (s->block_last_index[i] >= 1);
+            cbp |= val << (5 - i);
+            if (i < 4) {
+                /* predict value for close blocks only for luma */
+                pred = coded_block_pred(s, i, &coded_block);
+                *coded_block = val;
+                val = val ^ pred;
+            }
+            coded_cbp |= val << (5 - i);
+        }
+#if 0
+        if (coded_cbp)
+            printf("cbp=%x %x\n", cbp, coded_cbp);
+#endif
+
+        if(s->msmpeg4_version<=2){
+            if (s->pict_type == I_TYPE) {
+                put_bits(&s->pb,
+                         v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
+            } else {
+                if (s->use_skip_mb_code)
+                    put_bits(&s->pb, 1, 0);     /* mb coded */
+                put_bits(&s->pb,
+                         v2_mb_type[(cbp&3) + 4][1],
+                         v2_mb_type[(cbp&3) + 4][0]);
+            }
+            put_bits(&s->pb, 1, 0);             /* no AC prediction yet */
+            put_bits(&s->pb,
+                     cbpy_tab[cbp>>2][1],
+                     cbpy_tab[cbp>>2][0]);
+        }else{
+            if (s->pict_type == I_TYPE) {
+                put_bits(&s->pb,
+                         ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
+            } else {
+                if (s->use_skip_mb_code)
+                    put_bits(&s->pb, 1, 0);     /* mb coded */
+                put_bits(&s->pb,
+                         table_mb_non_intra[cbp][1],
+                         table_mb_non_intra[cbp][0]);
+            }
+            put_bits(&s->pb, 1, 0);             /* no AC prediction yet */
+            if(s->inter_intra_pred){
+                s->h263_aic_dir=0;
+                put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
+            }
+        }
+        s->misc_bits += get_bits_diff(s);
+
+        for (i = 0; i < 6; i++) {
+            msmpeg4_encode_block(s, block[i], i);
+        }
+        s->i_tex_bits += get_bits_diff(s);
+        s->i_count++;
+    }
+}
+
+#endif //CONFIG_ENCODERS
+
+static inline int msmpeg4v1_pred_dc(MpegEncContext * s, int n,
+                                    int32_t **dc_val_ptr)
+{
+    int i;
+
+    if (n < 4) {
+        i= 0;
+    } else {
+        i= n-3;
+    }
+
+    *dc_val_ptr= &s->last_dc[i];
+    return s->last_dc[i];
+}
+
+static int get_dc(uint8_t *src, int stride, int scale)
+{
+    int y;
+    int sum=0;
+    for(y=0; y<8; y++){
+        int x;
+        for(x=0; x<8; x++){
+            sum+=src[x + y*stride];
+        }
+    }
+    return FASTDIV((sum + (scale>>1)), scale);
+}
+
+/* dir = 0: left, dir = 1: top prediction */
+static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
+                             int16_t **dc_val_ptr, int *dir_ptr)
+{
+    int a, b, c, wrap, pred, scale;
+    int16_t *dc_val;
+
+    /* find prediction */
+    if (n < 4) {
+        scale = s->y_dc_scale;
+    } else {
+        scale = s->c_dc_scale;
+    }
+
+    wrap = s->block_wrap[n];
+    dc_val= s->dc_val[0] + s->block_index[n];
+
+    /* B C
+     * A X
+     */
+    a = dc_val[ - 1];
+    b = dc_val[ - 1 - wrap];
+    c = dc_val[ - wrap];
+
+    if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version<4){
+        b=c=1024;
+    }
+
+    /* XXX: the following solution consumes divisions, but it does not
+       necessitate to modify mpegvideo.c. The problem comes from the
+       fact they decided to store the quantized DC (which would lead
+       to problems if Q could vary !) */
+#if (defined(ARCH_X86)) && !defined PIC
+    asm volatile(
+        "movl %3, %%eax         \n\t"
+        "shrl $1, %%eax         \n\t"
+        "addl %%eax, %2         \n\t"
+        "addl %%eax, %1         \n\t"
+        "addl %0, %%eax         \n\t"
+        "mull %4                \n\t"
+        "movl %%edx, %0         \n\t"
+        "movl %1, %%eax         \n\t"
+        "mull %4                \n\t"
+        "movl %%edx, %1         \n\t"
+        "movl %2, %%eax         \n\t"
+        "mull %4                \n\t"
+        "movl %%edx, %2         \n\t"
+        : "+b" (a), "+c" (b), "+D" (c)
+        : "g" (scale), "S" (ff_inverse[scale])
+        : "%eax", "%edx"
+    );
+#else
+    /* #elif defined (ARCH_ALPHA) */
+    /* Divisions are extremely costly on Alpha; optimize the most
+       common case. But they are costly everywhere...
+     */
+    if (scale == 8) {
+        a = (a + (8 >> 1)) / 8;
+        b = (b + (8 >> 1)) / 8;
+        c = (c + (8 >> 1)) / 8;
+    } else {
+        a = FASTDIV((a + (scale >> 1)), scale);
+        b = FASTDIV((b + (scale >> 1)), scale);
+        c = FASTDIV((c + (scale >> 1)), scale);
+    }
+#endif
+    /* XXX: WARNING: they did not choose the same test as MPEG4. This
+       is very important ! */
+    if(s->msmpeg4_version>3){
+        if(s->inter_intra_pred){
+            uint8_t *dest;
+            int wrap;
+
+            if(n==1){
+                pred=a;
+                *dir_ptr = 0;
+            }else if(n==2){
+                pred=c;
+                *dir_ptr = 1;
+            }else if(n==3){
+                if (abs(a - b) < abs(b - c)) {
+                    pred = c;
+                    *dir_ptr = 1;
+                } else {
+                    pred = a;
+                    *dir_ptr = 0;
+                }
+            }else{
+                if(n<4){
+                    wrap= s->linesize;
+                    dest= s->current_picture.data[0] + (((n>>1) + 2*s->mb_y) * 8*  wrap ) + ((n&1) + 2*s->mb_x) * 8;
+                }else{
+                    wrap= s->uvlinesize;
+                    dest= s->current_picture.data[n-3] + (s->mb_y * 8 * wrap) + s->mb_x * 8;
+                }
+                if(s->mb_x==0) a= (1024 + (scale>>1))/scale;
+                else           a= get_dc(dest-8, wrap, scale*8);
+                if(s->mb_y==0) c= (1024 + (scale>>1))/scale;
+                else           c= get_dc(dest-8*wrap, wrap, scale*8);
+
+                if (s->h263_aic_dir==0) {
+                    pred= a;
+                    *dir_ptr = 0;
+                }else if (s->h263_aic_dir==1) {
+                    if(n==0){
+                        pred= c;
+                        *dir_ptr = 1;
+                    }else{
+                        pred= a;
+                        *dir_ptr = 0;
+                    }
+                }else if (s->h263_aic_dir==2) {
+                    if(n==0){
+                        pred= a;
+                        *dir_ptr = 0;
+                    }else{
+                        pred= c;
+                        *dir_ptr = 1;
+                    }
+                } else {
+                    pred= c;
+                    *dir_ptr = 1;
+                }
+            }
+        }else{
+            if (abs(a - b) < abs(b - c)) {
+                pred = c;
+                *dir_ptr = 1;
+            } else {
+                pred = a;
+                *dir_ptr = 0;
+            }
+        }
+    }else{
+        if (abs(a - b) <= abs(b - c)) {
+            pred = c;
+            *dir_ptr = 1;
+        } else {
+            pred = a;
+            *dir_ptr = 0;
+        }
+    }
+
+    /* update predictor */
+    *dc_val_ptr = &dc_val[0];
+    return pred;
+}
+
+#define DC_MAX 119
+
+static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr)
+{
+    int sign, code;
+    int pred;
+
+    if(s->msmpeg4_version==1){
+        int32_t *dc_val;
+        pred = msmpeg4v1_pred_dc(s, n, &dc_val);
+
+        /* update predictor */
+        *dc_val= level;
+    }else{
+        int16_t *dc_val;
+        pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
+
+        /* update predictor */
+        if (n < 4) {
+            *dc_val = level * s->y_dc_scale;
+        } else {
+            *dc_val = level * s->c_dc_scale;
+        }
+    }
+
+    /* do the prediction */
+    level -= pred;
+
+    if(s->msmpeg4_version<=2){
+        if (n < 4) {
+            put_bits(&s->pb,
+                     v2_dc_lum_table[level+256][1],
+                     v2_dc_lum_table[level+256][0]);
+        }else{
+            put_bits(&s->pb,
+                     v2_dc_chroma_table[level+256][1],
+                     v2_dc_chroma_table[level+256][0]);
+        }
+    }else{
+        sign = 0;
+        if (level < 0) {
+            level = -level;
+            sign = 1;
+        }
+        code = level;
+        if (code > DC_MAX)
+            code = DC_MAX;
+
+        if (s->dc_table_index == 0) {
+            if (n < 4) {
+                put_bits(&s->pb, ff_table0_dc_lum[code][1], ff_table0_dc_lum[code][0]);
+            } else {
+                put_bits(&s->pb, ff_table0_dc_chroma[code][1], ff_table0_dc_chroma[code][0]);
+            }
+        } else {
+            if (n < 4) {
+                put_bits(&s->pb, ff_table1_dc_lum[code][1], ff_table1_dc_lum[code][0]);
+            } else {
+                put_bits(&s->pb, ff_table1_dc_chroma[code][1], ff_table1_dc_chroma[code][0]);
+            }
+        }
+
+        if (code == DC_MAX)
+            put_bits(&s->pb, 8, level);
+
+        if (level != 0) {
+            put_bits(&s->pb, 1, sign);
+        }
+    }
+}
+
+/* Encoding of a block. Very similar to MPEG4 except for a different
+   escape coding (same as H263) and more vlc tables.
+ */
+static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
+{
+    int level, run, last, i, j, last_index;
+    int last_non_zero, sign, slevel;
+    int code, run_diff, dc_pred_dir;
+    const RLTable *rl;
+    const uint8_t *scantable;
+
+    if (s->mb_intra) {
+        msmpeg4_encode_dc(s, block[0], n, &dc_pred_dir);
+        i = 1;
+        if (n < 4) {
+            rl = &rl_table[s->rl_table_index];
+        } else {
+            rl = &rl_table[3 + s->rl_chroma_table_index];
+        }
+        run_diff = 0;
+        scantable= s->intra_scantable.permutated;
+    } else {
+        i = 0;
+        rl = &rl_table[3 + s->rl_table_index];
+        if(s->msmpeg4_version<=2)
+            run_diff = 0;
+        else
+            run_diff = 1;
+        scantable= s->inter_scantable.permutated;
+    }
+
+    /* recalculate block_last_index for M$ wmv1 */
+    if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){
+        for(last_index=63; last_index>=0; last_index--){
+            if(block[scantable[last_index]]) break;
+        }
+        s->block_last_index[n]= last_index;
+    }else
+        last_index = s->block_last_index[n];
+    /* AC coefs */
+    last_non_zero = i - 1;
+    for (; i <= last_index; i++) {
+        j = scantable[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            slevel = level;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
+
+            if(level<=MAX_LEVEL && run<=MAX_RUN){
+                s->ac_stats[s->mb_intra][n>3][level][run][last]++;
+            }
+#if 0
+else
+    s->ac_stats[s->mb_intra][n>3][40][63][0]++; //esc3 like
+#endif
+            code = get_rl_index(rl, last, run, level);
+            put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+            if (code == rl->n) {
+                int level1, run1;
+
+                level1 = level - rl->max_level[last][run];
+                if (level1 < 1)
+                    goto esc2;
+                code = get_rl_index(rl, last, run, level1);
+                if (code == rl->n) {
+                esc2:
+                    put_bits(&s->pb, 1, 0);
+                    if (level > MAX_LEVEL)
+                        goto esc3;
+                    run1 = run - rl->max_run[last][level] - run_diff;
+                    if (run1 < 0)
+                        goto esc3;
+                    code = get_rl_index(rl, last, run1, level);
+                    if (code == rl->n) {
+                    esc3:
+                        /* third escape */
+                        put_bits(&s->pb, 1, 0);
+                        put_bits(&s->pb, 1, last);
+                        if(s->msmpeg4_version>=4){
+                            if(s->esc3_level_length==0){
+                                s->esc3_level_length=8;
+                                s->esc3_run_length= 6;
+                                if(s->qscale<8)
+                                    put_bits(&s->pb, 6, 3);
+                                else
+                                    put_bits(&s->pb, 8, 3);
+                            }
+                            put_bits(&s->pb, s->esc3_run_length, run);
+                            put_bits(&s->pb, 1, sign);
+                            put_bits(&s->pb, s->esc3_level_length, level);
+                        }else{
+                            put_bits(&s->pb, 6, run);
+                            put_bits(&s->pb, 8, slevel & 0xff);
+                        }
+                    } else {
+                        /* second escape */
+                        put_bits(&s->pb, 1, 1);
+                        put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+                        put_bits(&s->pb, 1, sign);
+                    }
+                } else {
+                    /* first escape */
+                    put_bits(&s->pb, 1, 1);
+                    put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+                    put_bits(&s->pb, 1, sign);
+                }
+            } else {
+                put_bits(&s->pb, 1, sign);
+            }
+            last_non_zero = i;
+        }
+    }
+}
+
+/****************************************/
+/* decoding stuff */
+
+static VLC mb_non_intra_vlc[4];
+VLC ff_msmp4_mb_i_vlc;
+VLC ff_msmp4_dc_luma_vlc[2];
+VLC ff_msmp4_dc_chroma_vlc[2];
+static VLC v2_dc_lum_vlc;
+static VLC v2_dc_chroma_vlc;
+static VLC cbpy_vlc;
+static VLC v2_intra_cbpc_vlc;
+static VLC v2_mb_type_vlc;
+static VLC v2_mv_vlc;
+static VLC v1_intra_cbpc_vlc;
+static VLC v1_inter_cbpc_vlc;
+static VLC inter_intra_vlc;
+
+/* this table is practically identical to the one from h263 except that its inverted */
+static void init_h263_dc_for_msmpeg4(void)
+{
+        int level, uni_code, uni_len;
+
+        for(level=-256; level<256; level++){
+            int size, v, l;
+            /* find number of bits */
+            size = 0;
+            v = abs(level);
+            while (v) {
+                v >>= 1;
+                    size++;
+            }
+
+            if (level < 0)
+                l= (-level) ^ ((1 << size) - 1);
+            else
+                l= level;
+
+            /* luminance h263 */
+            uni_code= DCtab_lum[size][0];
+            uni_len = DCtab_lum[size][1];
+            uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility
+
+            if (size > 0) {
+                uni_code<<=size; uni_code|=l;
+                uni_len+=size;
+                if (size > 8){
+                    uni_code<<=1; uni_code|=1;
+                    uni_len++;
+                }
+            }
+            v2_dc_lum_table[level+256][0]= uni_code;
+            v2_dc_lum_table[level+256][1]= uni_len;
+
+            /* chrominance h263 */
+            uni_code= DCtab_chrom[size][0];
+            uni_len = DCtab_chrom[size][1];
+            uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility
+
+            if (size > 0) {
+                uni_code<<=size; uni_code|=l;
+                uni_len+=size;
+                if (size > 8){
+                    uni_code<<=1; uni_code|=1;
+                    uni_len++;
+                }
+            }
+            v2_dc_chroma_table[level+256][0]= uni_code;
+            v2_dc_chroma_table[level+256][1]= uni_len;
+
+        }
+}
+
+/* init all vlc decoding tables */
+int ff_msmpeg4_decode_init(MpegEncContext *s)
+{
+    static int done = 0;
+    int i;
+    MVTable *mv;
+
+    common_init(s);
+
+    if (!done) {
+        done = 1;
+
+        for(i=0;i<NB_RL_TABLES;i++) {
+            init_rl(&rl_table[i], 1);
+            init_vlc_rl(&rl_table[i], 1);
+        }
+        for(i=0;i<2;i++) {
+            mv = &mv_tables[i];
+            init_vlc(&mv->vlc, MV_VLC_BITS, mv->n + 1,
+                     mv->table_mv_bits, 1, 1,
+                     mv->table_mv_code, 2, 2, 1);
+        }
+
+        init_vlc(&ff_msmp4_dc_luma_vlc[0], DC_VLC_BITS, 120,
+                 &ff_table0_dc_lum[0][1], 8, 4,
+                 &ff_table0_dc_lum[0][0], 8, 4, 1);
+        init_vlc(&ff_msmp4_dc_chroma_vlc[0], DC_VLC_BITS, 120,
+                 &ff_table0_dc_chroma[0][1], 8, 4,
+                 &ff_table0_dc_chroma[0][0], 8, 4, 1);
+        init_vlc(&ff_msmp4_dc_luma_vlc[1], DC_VLC_BITS, 120,
+                 &ff_table1_dc_lum[0][1], 8, 4,
+                 &ff_table1_dc_lum[0][0], 8, 4, 1);
+        init_vlc(&ff_msmp4_dc_chroma_vlc[1], DC_VLC_BITS, 120,
+                 &ff_table1_dc_chroma[0][1], 8, 4,
+                 &ff_table1_dc_chroma[0][0], 8, 4, 1);
+
+        init_vlc(&v2_dc_lum_vlc, DC_VLC_BITS, 512,
+                 &v2_dc_lum_table[0][1], 8, 4,
+                 &v2_dc_lum_table[0][0], 8, 4, 1);
+        init_vlc(&v2_dc_chroma_vlc, DC_VLC_BITS, 512,
+                 &v2_dc_chroma_table[0][1], 8, 4,
+                 &v2_dc_chroma_table[0][0], 8, 4, 1);
+
+        init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16,
+                 &cbpy_tab[0][1], 2, 1,
+                 &cbpy_tab[0][0], 2, 1, 1);
+        init_vlc(&v2_intra_cbpc_vlc, V2_INTRA_CBPC_VLC_BITS, 4,
+                 &v2_intra_cbpc[0][1], 2, 1,
+                 &v2_intra_cbpc[0][0], 2, 1, 1);
+        init_vlc(&v2_mb_type_vlc, V2_MB_TYPE_VLC_BITS, 8,
+                 &v2_mb_type[0][1], 2, 1,
+                 &v2_mb_type[0][0], 2, 1, 1);
+        init_vlc(&v2_mv_vlc, V2_MV_VLC_BITS, 33,
+                 &mvtab[0][1], 2, 1,
+                 &mvtab[0][0], 2, 1, 1);
+
+        for(i=0; i<4; i++){
+            init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128,
+                     &wmv2_inter_table[i][0][1], 8, 4,
+                     &wmv2_inter_table[i][0][0], 8, 4, 1); //FIXME name?
+        }
+
+        init_vlc(&ff_msmp4_mb_i_vlc, MB_INTRA_VLC_BITS, 64,
+                 &ff_msmp4_mb_i_table[0][1], 4, 2,
+                 &ff_msmp4_mb_i_table[0][0], 4, 2, 1);
+
+        init_vlc(&v1_intra_cbpc_vlc, V1_INTRA_CBPC_VLC_BITS, 8,
+                 intra_MCBPC_bits, 1, 1,
+                 intra_MCBPC_code, 1, 1, 1);
+        init_vlc(&v1_inter_cbpc_vlc, V1_INTER_CBPC_VLC_BITS, 25,
+                 inter_MCBPC_bits, 1, 1,
+                 inter_MCBPC_code, 1, 1, 1);
+
+        init_vlc(&inter_intra_vlc, INTER_INTRA_VLC_BITS, 4,
+                 &table_inter_intra[0][1], 2, 1,
+                 &table_inter_intra[0][0], 2, 1, 1);
+    }
+
+    switch(s->msmpeg4_version){
+    case 1:
+    case 2:
+        s->decode_mb= msmpeg4v12_decode_mb;
+        break;
+    case 3:
+    case 4:
+        s->decode_mb= msmpeg4v34_decode_mb;
+        break;
+    case 5:
+        s->decode_mb= wmv2_decode_mb;
+    case 6:
+        //FIXME + TODO VC1 decode mb
+        break;
+    }
+
+    s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
+
+    return 0;
+}
+
+int msmpeg4_decode_picture_header(MpegEncContext * s)
+{
+    int code;
+
+#if 0
+{
+int i;
+for(i=0; i<s->gb.size_in_bits; i++)
+    av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
+//    get_bits1(&s->gb);
+av_log(s->avctx, AV_LOG_DEBUG, "END\n");
+return -1;
+}
+#endif
+
+    if(s->msmpeg4_version==1){
+        int start_code, num;
+        start_code = (get_bits(&s->gb, 16)<<16) | get_bits(&s->gb, 16);
+        if(start_code!=0x00000100){
+            av_log(s->avctx, AV_LOG_ERROR, "invalid startcode\n");
+            return -1;
+        }
+
+        num= get_bits(&s->gb, 5); // frame number */
+    }
+
+    s->pict_type = get_bits(&s->gb, 2) + 1;
+    if (s->pict_type != I_TYPE &&
+        s->pict_type != P_TYPE){
+        av_log(s->avctx, AV_LOG_ERROR, "invalid picture type\n");
+        return -1;
+    }
+#if 0
+{
+    static int had_i=0;
+    if(s->pict_type == I_TYPE) had_i=1;
+    if(!had_i) return -1;
+}
+#endif
+    s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
+    if(s->qscale==0){
+        av_log(s->avctx, AV_LOG_ERROR, "invalid qscale\n");
+        return -1;
+    }
+
+    if (s->pict_type == I_TYPE) {
+        code = get_bits(&s->gb, 5);
+        if(s->msmpeg4_version==1){
+            if(code==0 || code>s->mb_height){
+                av_log(s->avctx, AV_LOG_ERROR, "invalid slice height %d\n", code);
+                return -1;
+            }
+
+            s->slice_height = code;
+        }else{
+            /* 0x17: one slice, 0x18: two slices, ... */
+            if (code < 0x17){
+                av_log(s->avctx, AV_LOG_ERROR, "error, slice code was %X\n", code);
+                return -1;
+            }
+
+            s->slice_height = s->mb_height / (code - 0x16);
+        }
+
+        switch(s->msmpeg4_version){
+        case 1:
+        case 2:
+            s->rl_chroma_table_index = 2;
+            s->rl_table_index = 2;
+
+            s->dc_table_index = 0; //not used
+            break;
+        case 3:
+            s->rl_chroma_table_index = decode012(&s->gb);
+            s->rl_table_index = decode012(&s->gb);
+
+            s->dc_table_index = get_bits1(&s->gb);
+            break;
+        case 4:
+            msmpeg4_decode_ext_header(s, (2+5+5+17+7)/8);
+
+            if(s->bit_rate > MBAC_BITRATE) s->per_mb_rl_table= get_bits1(&s->gb);
+            else                           s->per_mb_rl_table= 0;
+
+            if(!s->per_mb_rl_table){
+                s->rl_chroma_table_index = decode012(&s->gb);
+                s->rl_table_index = decode012(&s->gb);
+            }
+
+            s->dc_table_index = get_bits1(&s->gb);
+            s->inter_intra_pred= 0;
+            break;
+        }
+        s->no_rounding = 1;
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d   \n",
+                s->qscale,
+                s->rl_chroma_table_index,
+                s->rl_table_index,
+                s->dc_table_index,
+                s->per_mb_rl_table,
+                s->slice_height);
+    } else {
+        switch(s->msmpeg4_version){
+        case 1:
+        case 2:
+            if(s->msmpeg4_version==1)
+                s->use_skip_mb_code = 1;
+            else
+                s->use_skip_mb_code = get_bits1(&s->gb);
+            s->rl_table_index = 2;
+            s->rl_chroma_table_index = s->rl_table_index;
+            s->dc_table_index = 0; //not used
+            s->mv_table_index = 0;
+            break;
+        case 3:
+            s->use_skip_mb_code = get_bits1(&s->gb);
+            s->rl_table_index = decode012(&s->gb);
+            s->rl_chroma_table_index = s->rl_table_index;
+
+            s->dc_table_index = get_bits1(&s->gb);
+
+            s->mv_table_index = get_bits1(&s->gb);
+            break;
+        case 4:
+            s->use_skip_mb_code = get_bits1(&s->gb);
+
+            if(s->bit_rate > MBAC_BITRATE) s->per_mb_rl_table= get_bits1(&s->gb);
+            else                           s->per_mb_rl_table= 0;
+
+            if(!s->per_mb_rl_table){
+                s->rl_table_index = decode012(&s->gb);
+                s->rl_chroma_table_index = s->rl_table_index;
+            }
+
+            s->dc_table_index = get_bits1(&s->gb);
+
+            s->mv_table_index = get_bits1(&s->gb);
+            s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
+            break;
+        }
+
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d   \n",
+                s->use_skip_mb_code,
+                s->rl_table_index,
+                s->rl_chroma_table_index,
+                s->dc_table_index,
+                s->mv_table_index,
+                s->per_mb_rl_table,
+                s->qscale);
+
+        if(s->flipflop_rounding){
+            s->no_rounding ^= 1;
+        }else{
+            s->no_rounding = 0;
+        }
+    }
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
+
+    s->esc3_level_length= 0;
+    s->esc3_run_length= 0;
+
+#ifdef DEBUG
+    av_log(s->avctx, AV_LOG_DEBUG, "*****frame %d:\n", frame_count++);
+#endif
+    return 0;
+}
+
+int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size)
+{
+    int left= buf_size*8 - get_bits_count(&s->gb);
+    int length= s->msmpeg4_version>=3 ? 17 : 16;
+    /* the alt_bitstream reader could read over the end so we need to check it */
+    if(left>=length && left<length+8)
+    {
+        int fps;
+
+        fps= get_bits(&s->gb, 5);
+        s->bit_rate= get_bits(&s->gb, 11)*1024;
+        if(s->msmpeg4_version>=3)
+            s->flipflop_rounding= get_bits1(&s->gb);
+        else
+            s->flipflop_rounding= 0;
+
+//        printf("fps:%2d bps:%2d roundingType:%1d\n", fps, s->bit_rate/1024, s->flipflop_rounding);
+    }
+    else if(left<length+8)
+    {
+        s->flipflop_rounding= 0;
+        if(s->msmpeg4_version != 2)
+            av_log(s->avctx, AV_LOG_ERROR, "ext header missing, %d left\n", left);
+    }
+    else
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "I frame too long, ignoring ext header\n");
+    }
+
+    return 0;
+}
+
+static inline void msmpeg4_memsetw(short *tab, int val, int n)
+{
+    int i;
+    for(i=0;i<n;i++)
+        tab[i] = val;
+}
+
+#ifdef CONFIG_ENCODERS
+static void msmpeg4v2_encode_motion(MpegEncContext * s, int val)
+{
+    int range, bit_size, sign, code, bits;
+
+    if (val == 0) {
+        /* zero vector */
+        code = 0;
+        put_bits(&s->pb, mvtab[code][1], mvtab[code][0]);
+    } else {
+        bit_size = s->f_code - 1;
+        range = 1 << bit_size;
+        if (val <= -64)
+            val += 64;
+        else if (val >= 64)
+            val -= 64;
+
+        if (val >= 0) {
+            sign = 0;
+        } else {
+            val = -val;
+            sign = 1;
+        }
+        val--;
+        code = (val >> bit_size) + 1;
+        bits = val & (range - 1);
+
+        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
+        if (bit_size > 0) {
+            put_bits(&s->pb, bit_size, bits);
+        }
+    }
+}
+#endif
+
+/* this is identical to h263 except that its range is multiplied by 2 */
+static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
+{
+    int code, val, sign, shift;
+
+    code = get_vlc2(&s->gb, v2_mv_vlc.table, V2_MV_VLC_BITS, 2);
+//     printf("MV code %d at %d %d pred: %d\n", code, s->mb_x,s->mb_y, pred);
+    if (code < 0)
+        return 0xffff;
+
+    if (code == 0)
+        return pred;
+    sign = get_bits1(&s->gb);
+    shift = f_code - 1;
+    val = code;
+    if (shift) {
+        val = (val - 1) << shift;
+        val |= get_bits(&s->gb, shift);
+        val++;
+    }
+    if (sign)
+        val = -val;
+
+    val += pred;
+    if (val <= -64)
+        val += 64;
+    else if (val >= 64)
+        val -= 64;
+
+    return val;
+}
+
+static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
+{
+    int cbp, code, i;
+
+    if (s->pict_type == P_TYPE) {
+        if (s->use_skip_mb_code) {
+            if (get_bits1(&s->gb)) {
+                /* skip mb */
+                s->mb_intra = 0;
+                for(i=0;i<6;i++)
+                    s->block_last_index[i] = -1;
+                s->mv_dir = MV_DIR_FORWARD;
+                s->mv_type = MV_TYPE_16X16;
+                s->mv[0][0][0] = 0;
+                s->mv[0][0][1] = 0;
+                s->mb_skipped = 1;
+                return 0;
+            }
+        }
+
+        if(s->msmpeg4_version==2)
+            code = get_vlc2(&s->gb, v2_mb_type_vlc.table, V2_MB_TYPE_VLC_BITS, 1);
+        else
+            code = get_vlc2(&s->gb, v1_inter_cbpc_vlc.table, V1_INTER_CBPC_VLC_BITS, 3);
+        if(code<0 || code>7){
+            av_log(s->avctx, AV_LOG_ERROR, "cbpc %d invalid at %d %d\n", code, s->mb_x, s->mb_y);
+            return -1;
+        }
+
+        s->mb_intra = code >>2;
+
+        cbp = code & 0x3;
+    } else {
+        s->mb_intra = 1;
+        if(s->msmpeg4_version==2)
+            cbp= get_vlc2(&s->gb, v2_intra_cbpc_vlc.table, V2_INTRA_CBPC_VLC_BITS, 1);
+        else
+            cbp= get_vlc2(&s->gb, v1_intra_cbpc_vlc.table, V1_INTRA_CBPC_VLC_BITS, 1);
+        if(cbp<0 || cbp>3){
+            av_log(s->avctx, AV_LOG_ERROR, "cbpc %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y);
+            return -1;
+        }
+    }
+
+    if (!s->mb_intra) {
+        int mx, my, cbpy;
+
+        cbpy= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if(cbpy<0){
+            av_log(s->avctx, AV_LOG_ERROR, "cbpy %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y);
+            return -1;
+        }
+
+        cbp|= cbpy<<2;
+        if(s->msmpeg4_version==1 || (cbp&3) != 3) cbp^= 0x3C;
+
+        h263_pred_motion(s, 0, 0, &mx, &my);
+        mx= msmpeg4v2_decode_motion(s, mx, 1);
+        my= msmpeg4v2_decode_motion(s, my, 1);
+
+        s->mv_dir = MV_DIR_FORWARD;
+        s->mv_type = MV_TYPE_16X16;
+        s->mv[0][0][0] = mx;
+        s->mv[0][0][1] = my;
+    } else {
+        if(s->msmpeg4_version==2){
+            s->ac_pred = get_bits1(&s->gb);
+            cbp|= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors
+        } else{
+            s->ac_pred = 0;
+            cbp|= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors
+            if(s->pict_type==P_TYPE) cbp^=0x3C;
+        }
+    }
+
+    s->dsp.clear_blocks(s->block[0]);
+    for (i = 0; i < 6; i++) {
+        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
+        {
+             av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+             return -1;
+        }
+    }
+    return 0;
+}
+
+static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
+{
+    int cbp, code, i;
+    uint8_t *coded_val;
+    uint32_t * const mb_type_ptr= &s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ];
+
+    if (s->pict_type == P_TYPE) {
+        if (s->use_skip_mb_code) {
+            if (get_bits1(&s->gb)) {
+                /* skip mb */
+                s->mb_intra = 0;
+                for(i=0;i<6;i++)
+                    s->block_last_index[i] = -1;
+                s->mv_dir = MV_DIR_FORWARD;
+                s->mv_type = MV_TYPE_16X16;
+                s->mv[0][0][0] = 0;
+                s->mv[0][0][1] = 0;
+                s->mb_skipped = 1;
+                *mb_type_ptr = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
+
+                return 0;
+            }
+        }
+
+        code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
+        if (code < 0)
+            return -1;
+        //s->mb_intra = (code & 0x40) ? 0 : 1;
+        s->mb_intra = (~code & 0x40) >> 6;
+
+        cbp = code & 0x3f;
+    } else {
+        s->mb_intra = 1;
+        code = get_vlc2(&s->gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2);
+        if (code < 0)
+            return -1;
+        /* predict coded block pattern */
+        cbp = 0;
+        for(i=0;i<6;i++) {
+            int val = ((code >> (5 - i)) & 1);
+            if (i < 4) {
+                int pred = coded_block_pred(s, i, &coded_val);
+                val = val ^ pred;
+                *coded_val = val;
+            }
+            cbp |= val << (5 - i);
+        }
+    }
+
+    if (!s->mb_intra) {
+        int mx, my;
+//printf("P at %d %d\n", s->mb_x, s->mb_y);
+        if(s->per_mb_rl_table && cbp){
+            s->rl_table_index = decode012(&s->gb);
+            s->rl_chroma_table_index = s->rl_table_index;
+        }
+        h263_pred_motion(s, 0, 0, &mx, &my);
+        if (msmpeg4_decode_motion(s, &mx, &my) < 0)
+            return -1;
+        s->mv_dir = MV_DIR_FORWARD;
+        s->mv_type = MV_TYPE_16X16;
+        s->mv[0][0][0] = mx;
+        s->mv[0][0][1] = my;
+        *mb_type_ptr = MB_TYPE_L0 | MB_TYPE_16x16;
+    } else {
+//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24));
+        s->ac_pred = get_bits1(&s->gb);
+        *mb_type_ptr = MB_TYPE_INTRA;
+        if(s->inter_intra_pred){
+            s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1);
+//            printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y);
+        }
+        if(s->per_mb_rl_table && cbp){
+            s->rl_table_index = decode012(&s->gb);
+            s->rl_chroma_table_index = s->rl_table_index;
+        }
+    }
+
+    s->dsp.clear_blocks(s->block[0]);
+    for (i = 0; i < 6; i++) {
+        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
+        {
+            av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+//#define ERROR_DETAILS
+static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+                              int n, int coded, const uint8_t *scan_table)
+{
+    int level, i, last, run, run_diff;
+    int dc_pred_dir;
+    RLTable *rl;
+    RL_VLC_ELEM *rl_vlc;
+    int qmul, qadd;
+
+    if (s->mb_intra) {
+        qmul=1;
+        qadd=0;
+
+        /* DC coef */
+        level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
+
+        if (level < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "dc overflow- block: %d qscale: %d//\n", n, s->qscale);
+            if(s->inter_intra_pred) level=0;
+            else                    return -1;
+        }
+        if (n < 4) {
+            rl = &rl_table[s->rl_table_index];
+            if(level > 256*s->y_dc_scale){
+                av_log(s->avctx, AV_LOG_ERROR, "dc overflow+ L qscale: %d//\n", s->qscale);
+                if(!s->inter_intra_pred) return -1;
+            }
+        } else {
+            rl = &rl_table[3 + s->rl_chroma_table_index];
+            if(level > 256*s->c_dc_scale){
+                av_log(s->avctx, AV_LOG_ERROR, "dc overflow+ C qscale: %d//\n", s->qscale);
+                if(!s->inter_intra_pred) return -1;
+            }
+        }
+        block[0] = level;
+
+        run_diff = 0;
+        i = 0;
+        if (!coded) {
+            goto not_coded;
+        }
+        if (s->ac_pred) {
+            if (dc_pred_dir == 0)
+                scan_table = s->intra_v_scantable.permutated; /* left */
+            else
+                scan_table = s->intra_h_scantable.permutated; /* top */
+        } else {
+            scan_table = s->intra_scantable.permutated;
+        }
+        rl_vlc= rl->rl_vlc[0];
+    } else {
+        qmul = s->qscale << 1;
+        qadd = (s->qscale - 1) | 1;
+        i = -1;
+        rl = &rl_table[3 + s->rl_table_index];
+
+        if(s->msmpeg4_version==2)
+            run_diff = 0;
+        else
+            run_diff = 1;
+
+        if (!coded) {
+            s->block_last_index[n] = i;
+            return 0;
+        }
+        if(!scan_table)
+            scan_table = s->inter_scantable.permutated;
+        rl_vlc= rl->rl_vlc[s->qscale];
+    }
+  {
+    OPEN_READER(re, &s->gb);
+    for(;;) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0);
+        if (level==0) {
+            int cache;
+            cache= GET_CACHE(re, &s->gb);
+            /* escape */
+            if (s->msmpeg4_version==1 || (cache&0x80000000)==0) {
+                if (s->msmpeg4_version==1 || (cache&0x40000000)==0) {
+                    /* third escape */
+                    if(s->msmpeg4_version!=1) LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
+                    if(s->msmpeg4_version<=3){
+                        last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
+                        run=   SHOW_UBITS(re, &s->gb, 6); SKIP_CACHE(re, &s->gb, 6);
+                        level= SHOW_SBITS(re, &s->gb, 8); LAST_SKIP_CACHE(re, &s->gb, 8);
+                        SKIP_COUNTER(re, &s->gb, 1+6+8);
+                    }else{
+                        int sign;
+                        last=  SHOW_UBITS(re, &s->gb, 1); SKIP_BITS(re, &s->gb, 1);
+                        if(!s->esc3_level_length){
+                            int ll;
+                            //printf("ESC-3 %X at %d %d\n", show_bits(&s->gb, 24), s->mb_x, s->mb_y);
+                            if(s->qscale<8){
+                                ll= SHOW_UBITS(re, &s->gb, 3); SKIP_BITS(re, &s->gb, 3);
+                                if(ll==0){
+                                    if(SHOW_UBITS(re, &s->gb, 1)) av_log(s->avctx, AV_LOG_ERROR, "cool a new vlc code ,contact the ffmpeg developers and upload the file\n");
+                                    SKIP_BITS(re, &s->gb, 1);
+                                    ll=8;
+                                }
+                            }else{
+                                ll=2;
+                                while(ll<8 && SHOW_UBITS(re, &s->gb, 1)==0){
+                                    ll++;
+                                    SKIP_BITS(re, &s->gb, 1);
+                                }
+                                if(ll<8) SKIP_BITS(re, &s->gb, 1);
+                            }
+
+                            s->esc3_level_length= ll;
+                            s->esc3_run_length= SHOW_UBITS(re, &s->gb, 2) + 3; SKIP_BITS(re, &s->gb, 2);
+//printf("level length:%d, run length: %d\n", ll, s->esc3_run_length);
+                            UPDATE_CACHE(re, &s->gb);
+                        }
+                        run=   SHOW_UBITS(re, &s->gb, s->esc3_run_length);
+                        SKIP_BITS(re, &s->gb, s->esc3_run_length);
+
+                        sign=  SHOW_UBITS(re, &s->gb, 1);
+                        SKIP_BITS(re, &s->gb, 1);
+
+                        level= SHOW_UBITS(re, &s->gb, s->esc3_level_length);
+                        SKIP_BITS(re, &s->gb, s->esc3_level_length);
+                        if(sign) level= -level;
+                    }
+//printf("level: %d, run: %d at %d %d\n", level, run, s->mb_x, s->mb_y);
+#if 0 // waste of time / this will detect very few errors
+                    {
+                        const int abs_level= FFABS(level);
+                        const int run1= run - rl->max_run[last][abs_level] - run_diff;
+                        if(abs_level<=MAX_LEVEL && run<=MAX_RUN){
+                            if(abs_level <= rl->max_level[last][run]){
+                                av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n");
+                                return DECODING_AC_LOST;
+                            }
+                            if(abs_level <= rl->max_level[last][run]*2){
+                                av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n");
+                                return DECODING_AC_LOST;
+                            }
+                            if(run1>=0 && abs_level <= rl->max_level[last][run1]){
+                                av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n");
+                                return DECODING_AC_LOST;
+                            }
+                        }
+                    }
+#endif
+                    //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
+                    if (level>0) level= level * qmul + qadd;
+                    else         level= level * qmul - qadd;
+#if 0 // waste of time too :(
+                    if(level>2048 || level<-2048){
+                        av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc\n");
+                        return DECODING_AC_LOST;
+                    }
+#endif
+                    i+= run + 1;
+                    if(last) i+=192;
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal vlc code in ESC3 level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    av_log(s->avctx, AV_LOG_ERROR, "run overflow in ESC3 i=%d run=%d level=%d\n", i, run, level);
+#endif
+                } else {
+                    /* second escape */
+#if MIN_CACHE_BITS < 23
+                    LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
+#else
+                    SKIP_BITS(re, &s->gb, 2);
+#endif
+                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1);
+                    i+= run + rl->max_run[run>>7][level/qmul] + run_diff; //FIXME opt indexing
+                    level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                    LAST_SKIP_BITS(re, &s->gb, 1);
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal vlc code in ESC2 level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    av_log(s->avctx, AV_LOG_ERROR, "run overflow in ESC2 i=%d run=%d level=%d\n", i, run, level);
+#endif
+                }
+            } else {
+                /* first escape */
+#if MIN_CACHE_BITS < 22
+                LAST_SKIP_BITS(re, &s->gb, 1);
+                UPDATE_CACHE(re, &s->gb);
+#else
+                SKIP_BITS(re, &s->gb, 1);
+#endif
+                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1);
+                i+= run;
+                level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal vlc code in ESC1 level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    av_log(s->avctx, AV_LOG_ERROR, "run overflow in ESC1 i=%d run=%d level=%d\n", i, run, level);
+#endif
+            }
+        } else {
+            i+= run;
+            level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+            LAST_SKIP_BITS(re, &s->gb, 1);
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    av_log(s->avctx, AV_LOG_ERROR, "illegal vlc code level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    av_log(s->avctx, AV_LOG_ERROR, "run overflow i=%d run=%d level=%d\n", i, run, level);
+#endif
+        }
+        if (i > 62){
+            i-= 192;
+            if(i&(~63)){
+                const int left= s->gb.size_in_bits - get_bits_count(&s->gb);
+                if(((i+192 == 64 && level/qmul==-1) || s->error_resilience<=1) && left>=0){
+                    av_log(s->avctx, AV_LOG_ERROR, "ignoring overflow at %d %d\n", s->mb_x, s->mb_y);
+                    break;
+                }else{
+                    av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                    return -1;
+                }
+            }
+
+            block[scan_table[i]] = level;
+            break;
+        }
+
+        block[scan_table[i]] = level;
+    }
+    CLOSE_READER(re, &s->gb);
+  }
+ not_coded:
+    if (s->mb_intra) {
+        mpeg4_pred_ac(s, block, n, dc_pred_dir);
+        if (s->ac_pred) {
+            i = 63; /* XXX: not optimal */
+        }
+    }
+    if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
+    s->block_last_index[n] = i;
+
+    return 0;
+}
+
+static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
+{
+    int level, pred;
+
+    if(s->msmpeg4_version<=2){
+        if (n < 4) {
+            level = get_vlc2(&s->gb, v2_dc_lum_vlc.table, DC_VLC_BITS, 3);
+        } else {
+            level = get_vlc2(&s->gb, v2_dc_chroma_vlc.table, DC_VLC_BITS, 3);
+        }
+        if (level < 0)
+            return -1;
+        level-=256;
+    }else{  //FIXME optimize use unified tables & index
+        if (n < 4) {
+            level = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+        } else {
+            level = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+        }
+        if (level < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
+            return -1;
+        }
+
+        if (level == DC_MAX) {
+            level = get_bits(&s->gb, 8);
+            if (get_bits1(&s->gb))
+                level = -level;
+        } else if (level != 0) {
+            if (get_bits1(&s->gb))
+                level = -level;
+        }
+    }
+
+    if(s->msmpeg4_version==1){
+        int32_t *dc_val;
+        pred = msmpeg4v1_pred_dc(s, n, &dc_val);
+        level += pred;
+
+        /* update predictor */
+        *dc_val= level;
+    }else{
+        int16_t *dc_val;
+        pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
+        level += pred;
+
+        /* update predictor */
+        if (n < 4) {
+            *dc_val = level * s->y_dc_scale;
+        } else {
+            *dc_val = level * s->c_dc_scale;
+        }
+    }
+
+    return level;
+}
+
+static int msmpeg4_decode_motion(MpegEncContext * s,
+                                 int *mx_ptr, int *my_ptr)
+{
+    MVTable *mv;
+    int code, mx, my;
+
+    mv = &mv_tables[s->mv_table_index];
+
+    code = get_vlc2(&s->gb, mv->vlc.table, MV_VLC_BITS, 2);
+    if (code < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "illegal MV code at %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+    }
+    if (code == mv->n) {
+//printf("MV ESC %X at %d %d\n", show_bits(&s->gb, 24), s->mb_x, s->mb_y);
+        mx = get_bits(&s->gb, 6);
+        my = get_bits(&s->gb, 6);
+    } else {
+        mx = mv->table_mvx[code];
+        my = mv->table_mvy[code];
+    }
+
+    mx += *mx_ptr - 32;
+    my += *my_ptr - 32;
+    /* WARNING : they do not do exactly modulo encoding */
+    if (mx <= -64)
+        mx += 64;
+    else if (mx >= 64)
+        mx -= 64;
+
+    if (my <= -64)
+        my += 64;
+    else if (my >= 64)
+        my -= 64;
+    *mx_ptr = mx;
+    *my_ptr = my;
+    return 0;
+}
+
+/* cleanest way to support it
+ * there is too much shared between versions so that we cant have 1 file per version & 1 common
+ * as allmost everything would be in the common file
+ */
+#include "wmv2.c"
diff --git a/contrib/ffmpeg/libavcodec/msmpeg4data.h b/contrib/ffmpeg/libavcodec/msmpeg4data.h
new file mode 100644
index 000000000..d1ff70371
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/msmpeg4data.h
@@ -0,0 +1,2028 @@
+/*
+ * MSMPEG4 backend for ffmpeg encoder and decoder
+ * copyright (c) 2001 Fabrice Bellard
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file msmpeg4data.h
+ * MSMPEG4 data tables.
+ */
+
+/* intra picture macro block coded block pattern */
+const uint16_t ff_msmp4_mb_i_table[64][2] = {
+{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
+{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
+{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
+{ 0x2, 6 },{ 0xec, 9 },{ 0x77, 8 },{ 0x0, 8 },
+{ 0x3, 5 },{ 0xb7, 9 },{ 0x2c, 7 },{ 0x13, 7 },
+{ 0x1, 6 },{ 0x168, 10 },{ 0x46, 8 },{ 0x3f, 8 },
+{ 0x1e, 6 },{ 0x712, 13 },{ 0xb5, 9 },{ 0x42, 8 },
+{ 0x22, 7 },{ 0x1c5, 11 },{ 0x11e, 10 },{ 0x87, 9 },
+{ 0x6, 4 },{ 0x3, 9 },{ 0x1e, 7 },{ 0x1c, 6 },
+{ 0x12, 7 },{ 0x388, 12 },{ 0x44, 9 },{ 0x70, 9 },
+{ 0x1f, 6 },{ 0x23e, 11 },{ 0x39, 8 },{ 0x8e, 9 },
+{ 0x1, 7 },{ 0x1c6, 11 },{ 0xb6, 9 },{ 0x45, 9 },
+{ 0x14, 6 },{ 0x23f, 11 },{ 0x7d, 9 },{ 0x18, 9 },
+{ 0x7, 7 },{ 0x1c7, 11 },{ 0x86, 9 },{ 0x19, 9 },
+{ 0x15, 6 },{ 0x1db, 10 },{ 0x2, 9 },{ 0x46, 9 },
+{ 0xd, 8 },{ 0x713, 13 },{ 0x1da, 10 },{ 0x169, 10 },
+};
+
+/* non intra picture macro block coded block pattern + mb type */
+static const uint32_t table_mb_non_intra[128][2] = {
+{ 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 },
+{ 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 },
+{ 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 },
+{ 0x79, 13 },{ 0xa82, 18 },{ 0x969d, 16 },{ 0x2a4, 16 },
+{ 0x978, 12 },{ 0x543, 17 },{ 0x41df, 15 },{ 0x7f9, 17 },
+{ 0x12f3, 13 },{ 0x25a6b, 18 },{ 0x25ef9, 18 },{ 0x3fa, 16 },
+{ 0x20ee, 14 },{ 0x969ab, 20 },{ 0x969c, 16 },{ 0x25ef8, 18 },
+{ 0x12d2, 13 },{ 0xa85, 18 },{ 0x969e, 16 },{ 0x4bc8, 15 },
+{ 0x3d, 12 },{ 0x12f7f, 17 },{ 0x2a2, 16 },{ 0x969f, 16 },
+{ 0x25ee, 14 },{ 0x12d355, 21 },{ 0x12f7d, 17 },{ 0x12f7e, 17 },
+{ 0x9e5, 12 },{ 0xa81, 18 },{ 0x4b4d4, 19 },{ 0x83bd, 16 },
+{ 0x78, 13 },{ 0x969b, 16 },{ 0x3fe, 16 },{ 0x2a5, 16 },
+{ 0x7e, 13 },{ 0xa80, 18 },{ 0x2a3, 16 },{ 0x3fb, 16 },
+{ 0x1076, 13 },{ 0xa84, 18 },{ 0x153, 15 },{ 0x4bc9, 15 },
+{ 0x55, 13 },{ 0x12d354, 21 },{ 0x4bde, 15 },{ 0x25e5, 14 },
+{ 0x25b, 10 },{ 0x4b4c, 15 },{ 0x96b, 12 },{ 0x96a, 12 },
+{ 0x1, 2 },{ 0x0, 7 },{ 0x26, 6 },{ 0x12b, 9 },
+{ 0x7, 3 },{ 0x20f, 10 },{ 0x4, 9 },{ 0x28, 12 },
+{ 0x6, 3 },{ 0x20a, 10 },{ 0x128, 9 },{ 0x2b, 12 },
+{ 0x11, 5 },{ 0x1b, 11 },{ 0x13a, 9 },{ 0x4ff, 11 },
+{ 0x3, 4 },{ 0x277, 10 },{ 0x106, 9 },{ 0x839, 12 },
+{ 0xb, 4 },{ 0x27b, 10 },{ 0x12c, 9 },{ 0x4bf, 11 },
+{ 0x9, 6 },{ 0x35, 12 },{ 0x27e, 10 },{ 0x13c8, 13 },
+{ 0x1, 6 },{ 0x4aa, 11 },{ 0x208, 10 },{ 0x29, 12 },
+{ 0x1, 4 },{ 0x254, 10 },{ 0x12e, 9 },{ 0x838, 12 },
+{ 0x24, 6 },{ 0x4f3, 11 },{ 0x276, 10 },{ 0x12f6, 13 },
+{ 0x1, 5 },{ 0x27a, 10 },{ 0x13e, 9 },{ 0x3e, 12 },
+{ 0x8, 6 },{ 0x413, 11 },{ 0xc, 10 },{ 0x4be, 11 },
+{ 0x14, 5 },{ 0x412, 11 },{ 0x253, 10 },{ 0x97a, 12 },
+{ 0x21, 6 },{ 0x4ab, 11 },{ 0x20b, 10 },{ 0x34, 12 },
+{ 0x15, 5 },{ 0x278, 10 },{ 0x252, 10 },{ 0x968, 12 },
+{ 0x5, 5 },{ 0xb, 10 },{ 0x9c, 8 },{ 0xe, 10 },
+};
+
+/* dc table 0 */
+
+const uint32_t ff_table0_dc_lum[120][2] = {
+{ 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 },
+{ 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 },
+{ 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 },
+{ 0x3, 8 },{ 0x7, 8 },{ 0x27, 8 },{ 0x37, 8 },
+{ 0x5, 9 },{ 0x4c, 9 },{ 0x6c, 9 },{ 0x6d, 9 },
+{ 0x8, 10 },{ 0x19, 10 },{ 0x9b, 10 },{ 0x1b, 10 },
+{ 0x9a, 10 },{ 0x13, 11 },{ 0x34, 11 },{ 0x35, 11 },
+{ 0x61, 12 },{ 0x48, 13 },{ 0xc4, 13 },{ 0x4a, 13 },
+{ 0xc6, 13 },{ 0xc7, 13 },{ 0x92, 14 },{ 0x18b, 14 },
+{ 0x93, 14 },{ 0x183, 14 },{ 0x182, 14 },{ 0x96, 14 },
+{ 0x97, 14 },{ 0x180, 14 },{ 0x314, 15 },{ 0x315, 15 },
+{ 0x605, 16 },{ 0x604, 16 },{ 0x606, 16 },{ 0xc0e, 17 },
+{ 0x303cd, 23 },{ 0x303c9, 23 },{ 0x303c8, 23 },{ 0x303ca, 23 },
+{ 0x303cb, 23 },{ 0x303cc, 23 },{ 0x303ce, 23 },{ 0x303cf, 23 },
+{ 0x303d0, 23 },{ 0x303d1, 23 },{ 0x303d2, 23 },{ 0x303d3, 23 },
+{ 0x303d4, 23 },{ 0x303d5, 23 },{ 0x303d6, 23 },{ 0x303d7, 23 },
+{ 0x303d8, 23 },{ 0x303d9, 23 },{ 0x303da, 23 },{ 0x303db, 23 },
+{ 0x303dc, 23 },{ 0x303dd, 23 },{ 0x303de, 23 },{ 0x303df, 23 },
+{ 0x303e0, 23 },{ 0x303e1, 23 },{ 0x303e2, 23 },{ 0x303e3, 23 },
+{ 0x303e4, 23 },{ 0x303e5, 23 },{ 0x303e6, 23 },{ 0x303e7, 23 },
+{ 0x303e8, 23 },{ 0x303e9, 23 },{ 0x303ea, 23 },{ 0x303eb, 23 },
+{ 0x303ec, 23 },{ 0x303ed, 23 },{ 0x303ee, 23 },{ 0x303ef, 23 },
+{ 0x303f0, 23 },{ 0x303f1, 23 },{ 0x303f2, 23 },{ 0x303f3, 23 },
+{ 0x303f4, 23 },{ 0x303f5, 23 },{ 0x303f6, 23 },{ 0x303f7, 23 },
+{ 0x303f8, 23 },{ 0x303f9, 23 },{ 0x303fa, 23 },{ 0x303fb, 23 },
+{ 0x303fc, 23 },{ 0x303fd, 23 },{ 0x303fe, 23 },{ 0x303ff, 23 },
+{ 0x60780, 24 },{ 0x60781, 24 },{ 0x60782, 24 },{ 0x60783, 24 },
+{ 0x60784, 24 },{ 0x60785, 24 },{ 0x60786, 24 },{ 0x60787, 24 },
+{ 0x60788, 24 },{ 0x60789, 24 },{ 0x6078a, 24 },{ 0x6078b, 24 },
+{ 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 },
+};
+
+const uint32_t ff_table0_dc_chroma[120][2] = {
+{ 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 },
+{ 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 },
+{ 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 },
+{ 0x39, 6 },{ 0x3d, 6 },{ 0x61, 7 },{ 0x79, 7 },
+{ 0x80, 8 },{ 0xc8, 8 },{ 0xca, 8 },{ 0xf0, 8 },
+{ 0x81, 8 },{ 0xc0, 8 },{ 0xc9, 8 },{ 0x107, 9 },
+{ 0x106, 9 },{ 0x196, 9 },{ 0x183, 9 },{ 0x1e3, 9 },
+{ 0x1e2, 9 },{ 0x20a, 10 },{ 0x20b, 10 },{ 0x609, 11 },
+{ 0x412, 11 },{ 0x413, 11 },{ 0x60b, 11 },{ 0x411, 11 },
+{ 0x60a, 11 },{ 0x65f, 11 },{ 0x410, 11 },{ 0x65d, 11 },
+{ 0x65e, 11 },{ 0xcb8, 12 },{ 0xc10, 12 },{ 0xcb9, 12 },
+{ 0x1823, 13 },{ 0x3045, 14 },{ 0x6089, 15 },{ 0xc110, 16 },
+{ 0x304448, 22 },{ 0x304449, 22 },{ 0x30444a, 22 },{ 0x30444b, 22 },
+{ 0x30444c, 22 },{ 0x30444d, 22 },{ 0x30444e, 22 },{ 0x30444f, 22 },
+{ 0x304450, 22 },{ 0x304451, 22 },{ 0x304452, 22 },{ 0x304453, 22 },
+{ 0x304454, 22 },{ 0x304455, 22 },{ 0x304456, 22 },{ 0x304457, 22 },
+{ 0x304458, 22 },{ 0x304459, 22 },{ 0x30445a, 22 },{ 0x30445b, 22 },
+{ 0x30445c, 22 },{ 0x30445d, 22 },{ 0x30445e, 22 },{ 0x30445f, 22 },
+{ 0x304460, 22 },{ 0x304461, 22 },{ 0x304462, 22 },{ 0x304463, 22 },
+{ 0x304464, 22 },{ 0x304465, 22 },{ 0x304466, 22 },{ 0x304467, 22 },
+{ 0x304468, 22 },{ 0x304469, 22 },{ 0x30446a, 22 },{ 0x30446b, 22 },
+{ 0x30446c, 22 },{ 0x30446d, 22 },{ 0x30446e, 22 },{ 0x30446f, 22 },
+{ 0x304470, 22 },{ 0x304471, 22 },{ 0x304472, 22 },{ 0x304473, 22 },
+{ 0x304474, 22 },{ 0x304475, 22 },{ 0x304476, 22 },{ 0x304477, 22 },
+{ 0x304478, 22 },{ 0x304479, 22 },{ 0x30447a, 22 },{ 0x30447b, 22 },
+{ 0x30447c, 22 },{ 0x30447d, 22 },{ 0x30447e, 22 },{ 0x30447f, 22 },
+{ 0x608880, 23 },{ 0x608881, 23 },{ 0x608882, 23 },{ 0x608883, 23 },
+{ 0x608884, 23 },{ 0x608885, 23 },{ 0x608886, 23 },{ 0x608887, 23 },
+{ 0x608888, 23 },{ 0x608889, 23 },{ 0x60888a, 23 },{ 0x60888b, 23 },
+{ 0x60888c, 23 },{ 0x60888d, 23 },{ 0x60888e, 23 },{ 0x60888f, 23 },
+};
+
+/* dc table 1 */
+
+const uint32_t ff_table1_dc_lum[120][2] = {
+{ 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 },
+{ 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 },
+{ 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 },
+{ 0x13, 6 },{ 0x8, 7 },{ 0x18, 7 },{ 0x1c, 7 },
+{ 0x24, 7 },{ 0x4, 8 },{ 0x6, 8 },{ 0x12, 8 },
+{ 0x32, 8 },{ 0x3b, 8 },{ 0x4a, 8 },{ 0x4b, 8 },
+{ 0xb, 9 },{ 0x26, 9 },{ 0x27, 9 },{ 0x66, 9 },
+{ 0x74, 9 },{ 0x75, 9 },{ 0x14, 10 },{ 0x1c, 10 },
+{ 0x1f, 10 },{ 0x1d, 10 },{ 0x2b, 11 },{ 0x3d, 11 },
+{ 0x19d, 11 },{ 0x19f, 11 },{ 0x54, 12 },{ 0x339, 12 },
+{ 0x338, 12 },{ 0x33d, 12 },{ 0xab, 13 },{ 0xf1, 13 },
+{ 0x678, 13 },{ 0xf2, 13 },{ 0x1e0, 14 },{ 0x1e1, 14 },
+{ 0x154, 14 },{ 0xcf2, 14 },{ 0x3cc, 15 },{ 0x2ab, 15 },
+{ 0x19e7, 15 },{ 0x3ce, 15 },{ 0x19e6, 15 },{ 0x554, 16 },
+{ 0x79f, 16 },{ 0x555, 16 },{ 0xf3d, 17 },{ 0xf37, 17 },
+{ 0xf3c, 17 },{ 0xf35, 17 },{ 0x1e6d, 18 },{ 0x1e68, 18 },
+{ 0x3cd8, 19 },{ 0x3cd3, 19 },{ 0x3cd9, 19 },{ 0x79a4, 20 },
+{ 0xf34ba, 25 },{ 0xf34b4, 25 },{ 0xf34b5, 25 },{ 0xf34b6, 25 },
+{ 0xf34b7, 25 },{ 0xf34b8, 25 },{ 0xf34b9, 25 },{ 0xf34bb, 25 },
+{ 0xf34bc, 25 },{ 0xf34bd, 25 },{ 0xf34be, 25 },{ 0xf34bf, 25 },
+{ 0x1e6940, 26 },{ 0x1e6941, 26 },{ 0x1e6942, 26 },{ 0x1e6943, 26 },
+{ 0x1e6944, 26 },{ 0x1e6945, 26 },{ 0x1e6946, 26 },{ 0x1e6947, 26 },
+{ 0x1e6948, 26 },{ 0x1e6949, 26 },{ 0x1e694a, 26 },{ 0x1e694b, 26 },
+{ 0x1e694c, 26 },{ 0x1e694d, 26 },{ 0x1e694e, 26 },{ 0x1e694f, 26 },
+{ 0x1e6950, 26 },{ 0x1e6951, 26 },{ 0x1e6952, 26 },{ 0x1e6953, 26 },
+{ 0x1e6954, 26 },{ 0x1e6955, 26 },{ 0x1e6956, 26 },{ 0x1e6957, 26 },
+{ 0x1e6958, 26 },{ 0x1e6959, 26 },{ 0x1e695a, 26 },{ 0x1e695b, 26 },
+{ 0x1e695c, 26 },{ 0x1e695d, 26 },{ 0x1e695e, 26 },{ 0x1e695f, 26 },
+{ 0x1e6960, 26 },{ 0x1e6961, 26 },{ 0x1e6962, 26 },{ 0x1e6963, 26 },
+{ 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 },
+};
+
+const uint32_t ff_table1_dc_chroma[120][2] = {
+{ 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 },
+{ 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 },
+{ 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 },
+{ 0x66, 7 },{ 0xa6, 8 },{ 0xc6, 8 },{ 0xcf, 8 },
+{ 0x14f, 9 },{ 0x18e, 9 },{ 0x19c, 9 },{ 0x29d, 10 },
+{ 0x33a, 10 },{ 0x538, 11 },{ 0x63c, 11 },{ 0x63e, 11 },
+{ 0x63f, 11 },{ 0x676, 11 },{ 0xa73, 12 },{ 0xc7a, 12 },
+{ 0xcef, 12 },{ 0x14e5, 13 },{ 0x19dd, 13 },{ 0x29c8, 14 },
+{ 0x29c9, 14 },{ 0x63dd, 15 },{ 0x33b8, 14 },{ 0x33b9, 14 },
+{ 0xc7b6, 16 },{ 0x63d8, 15 },{ 0x63df, 15 },{ 0xc7b3, 16 },
+{ 0xc7b4, 16 },{ 0xc7b5, 16 },{ 0x63de, 15 },{ 0xc7b7, 16 },
+{ 0xc7b8, 16 },{ 0xc7b9, 16 },{ 0x18f65, 17 },{ 0x31ec8, 18 },
+{ 0xc7b248, 24 },{ 0xc7b249, 24 },{ 0xc7b24a, 24 },{ 0xc7b24b, 24 },
+{ 0xc7b24c, 24 },{ 0xc7b24d, 24 },{ 0xc7b24e, 24 },{ 0xc7b24f, 24 },
+{ 0xc7b250, 24 },{ 0xc7b251, 24 },{ 0xc7b252, 24 },{ 0xc7b253, 24 },
+{ 0xc7b254, 24 },{ 0xc7b255, 24 },{ 0xc7b256, 24 },{ 0xc7b257, 24 },
+{ 0xc7b258, 24 },{ 0xc7b259, 24 },{ 0xc7b25a, 24 },{ 0xc7b25b, 24 },
+{ 0xc7b25c, 24 },{ 0xc7b25d, 24 },{ 0xc7b25e, 24 },{ 0xc7b25f, 24 },
+{ 0xc7b260, 24 },{ 0xc7b261, 24 },{ 0xc7b262, 24 },{ 0xc7b263, 24 },
+{ 0xc7b264, 24 },{ 0xc7b265, 24 },{ 0xc7b266, 24 },{ 0xc7b267, 24 },
+{ 0xc7b268, 24 },{ 0xc7b269, 24 },{ 0xc7b26a, 24 },{ 0xc7b26b, 24 },
+{ 0xc7b26c, 24 },{ 0xc7b26d, 24 },{ 0xc7b26e, 24 },{ 0xc7b26f, 24 },
+{ 0xc7b270, 24 },{ 0xc7b271, 24 },{ 0xc7b272, 24 },{ 0xc7b273, 24 },
+{ 0xc7b274, 24 },{ 0xc7b275, 24 },{ 0xc7b276, 24 },{ 0xc7b277, 24 },
+{ 0xc7b278, 24 },{ 0xc7b279, 24 },{ 0xc7b27a, 24 },{ 0xc7b27b, 24 },
+{ 0xc7b27c, 24 },{ 0xc7b27d, 24 },{ 0xc7b27e, 24 },{ 0xc7b27f, 24 },
+{ 0x18f6480, 25 },{ 0x18f6481, 25 },{ 0x18f6482, 25 },{ 0x18f6483, 25 },
+{ 0x18f6484, 25 },{ 0x18f6485, 25 },{ 0x18f6486, 25 },{ 0x18f6487, 25 },
+{ 0x18f6488, 25 },{ 0x18f6489, 25 },{ 0x18f648a, 25 },{ 0x18f648b, 25 },
+{ 0x18f648c, 25 },{ 0x18f648d, 25 },{ 0x18f648e, 25 },{ 0x18f648f, 25 },
+};
+
+/* vlc table 0, for intra luma */
+
+static const uint16_t table0_vlc[133][2] = {
+{ 0x1, 2 },{ 0x6, 3 },{ 0xf, 4 },{ 0x16, 5 },
+{ 0x20, 6 },{ 0x18, 7 },{ 0x8, 8 },{ 0x9a, 8 },
+{ 0x56, 9 },{ 0x13e, 9 },{ 0xf0, 10 },{ 0x3a5, 10 },
+{ 0x77, 11 },{ 0x1ef, 11 },{ 0x9a, 12 },{ 0x5d, 13 },
+{ 0x1, 4 },{ 0x11, 5 },{ 0x2, 7 },{ 0xb, 8 },
+{ 0x12, 9 },{ 0x1d6, 9 },{ 0x27e, 10 },{ 0x191, 11 },
+{ 0xea, 12 },{ 0x3dc, 12 },{ 0x13b, 13 },{ 0x4, 5 },
+{ 0x14, 7 },{ 0x9e, 8 },{ 0x9, 10 },{ 0x1ac, 11 },
+{ 0x1e2, 11 },{ 0x3ca, 12 },{ 0x5f, 13 },{ 0x17, 5 },
+{ 0x4e, 7 },{ 0x5e, 9 },{ 0xf3, 10 },{ 0x1ad, 11 },
+{ 0xec, 12 },{ 0x5f0, 13 },{ 0xe, 6 },{ 0xe1, 8 },
+{ 0x3a4, 10 },{ 0x9c, 12 },{ 0x13d, 13 },{ 0x3b, 6 },
+{ 0x1c, 9 },{ 0x14, 11 },{ 0x9be, 12 },{ 0x6, 7 },
+{ 0x7a, 9 },{ 0x190, 11 },{ 0x137, 13 },{ 0x1b, 7 },
+{ 0x8, 10 },{ 0x75c, 11 },{ 0x71, 7 },{ 0xd7, 10 },
+{ 0x9bf, 12 },{ 0x7, 8 },{ 0xaf, 10 },{ 0x4cc, 11 },
+{ 0x34, 8 },{ 0x265, 10 },{ 0x9f, 12 },{ 0xe0, 8 },
+{ 0x16, 11 },{ 0x327, 12 },{ 0x15, 9 },{ 0x17d, 11 },
+{ 0xebb, 12 },{ 0x14, 9 },{ 0xf6, 10 },{ 0x1e4, 11 },
+{ 0xcb, 10 },{ 0x99d, 12 },{ 0xca, 10 },{ 0x2fc, 12 },
+{ 0x17f, 11 },{ 0x4cd, 11 },{ 0x2fd, 12 },{ 0x4fe, 11 },
+{ 0x13a, 13 },{ 0xa, 4 },{ 0x42, 7 },{ 0x1d3, 9 },
+{ 0x4dd, 11 },{ 0x12, 5 },{ 0xe8, 8 },{ 0x4c, 11 },
+{ 0x136, 13 },{ 0x39, 6 },{ 0x264, 10 },{ 0xeba, 12 },
+{ 0x0, 7 },{ 0xae, 10 },{ 0x99c, 12 },{ 0x1f, 7 },
+{ 0x4de, 11 },{ 0x43, 7 },{ 0x4dc, 11 },{ 0x3, 8 },
+{ 0x3cb, 12 },{ 0x6, 8 },{ 0x99e, 12 },{ 0x2a, 8 },
+{ 0x5f1, 13 },{ 0xf, 8 },{ 0x9fe, 12 },{ 0x33, 8 },
+{ 0x9ff, 12 },{ 0x98, 8 },{ 0x99f, 12 },{ 0xea, 8 },
+{ 0x13c, 13 },{ 0x2e, 8 },{ 0x192, 11 },{ 0x136, 9 },
+{ 0x6a, 9 },{ 0x15, 11 },{ 0x3af, 10 },{ 0x1e3, 11 },
+{ 0x74, 11 },{ 0xeb, 12 },{ 0x2f9, 12 },{ 0x5c, 13 },
+{ 0xed, 12 },{ 0x3dd, 12 },{ 0x326, 12 },{ 0x5e, 13 },
+{ 0x16, 7 },
+};
+
+static const int8_t table0_level[132] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14, 15, 16,
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11,  1,  2,  3,  4,  5,
+  6,  7,  8,  1,  2,  3,  4,  5,
+  6,  7,  1,  2,  3,  4,  5,  1,
+  2,  3,  4,  1,  2,  3,  4,  1,
+  2,  3,  1,  2,  3,  1,  2,  3,
+  1,  2,  3,  1,  2,  3,  1,  2,
+  3,  1,  2,  3,  1,  2,  1,  2,
+  1,  1,  1,  1,  1,  1,  2,  3,
+  4,  1,  2,  3,  4,  1,  2,  3,
+  1,  2,  3,  1,  2,  1,  2,  1,
+  2,  1,  2,  1,  2,  1,  2,  1,
+  2,  1,  2,  1,  2,  1,  2,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,
+};
+
+static const int8_t table0_run[132] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  2,  2,  2,  2,  2,
+  2,  2,  2,  3,  3,  3,  3,  3,
+  3,  3,  4,  4,  4,  4,  4,  5,
+  5,  5,  5,  6,  6,  6,  6,  7,
+  7,  7,  8,  8,  8,  9,  9,  9,
+ 10, 10, 10, 11, 11, 11, 12, 12,
+ 12, 13, 13, 13, 14, 14, 15, 15,
+ 16, 17, 18, 19, 20,  0,  0,  0,
+  0,  1,  1,  1,  1,  2,  2,  2,
+  3,  3,  3,  4,  4,  5,  5,  6,
+  6,  7,  7,  8,  8,  9,  9, 10,
+ 10, 11, 11, 12, 12, 13, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22,
+ 23, 24, 25, 26,
+};
+
+/* vlc table 1, for intra chroma and P macro blocks */
+
+static const uint16_t table1_vlc[149][2] = {
+{ 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 },
+{ 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 },
+{ 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 },
+{ 0x2b22, 14 },{ 0xbe6, 15 },{ 0xb, 4 },{ 0x37, 7 },
+{ 0x62, 9 },{ 0x7, 11 },{ 0x166, 12 },{ 0xce, 13 },
+{ 0x1590, 13 },{ 0x5f6, 14 },{ 0xbe7, 15 },{ 0x7, 5 },
+{ 0x6d, 8 },{ 0x3, 11 },{ 0x31f, 12 },{ 0x5f2, 14 },
+{ 0x2, 6 },{ 0x61, 9 },{ 0x55, 12 },{ 0x1df, 14 },
+{ 0x1a, 6 },{ 0x1e, 10 },{ 0xac9, 12 },{ 0x2b23, 14 },
+{ 0x1e, 6 },{ 0x1f, 10 },{ 0xac3, 12 },{ 0x2b2b, 14 },
+{ 0x6, 7 },{ 0x4, 11 },{ 0x2f8, 13 },{ 0x19, 7 },
+{ 0x6, 11 },{ 0x63d, 13 },{ 0x57, 7 },{ 0x182, 11 },
+{ 0x2aa2, 14 },{ 0x4, 8 },{ 0x180, 11 },{ 0x59c, 14 },
+{ 0x7d, 8 },{ 0x164, 12 },{ 0x76d, 15 },{ 0x2, 9 },
+{ 0x18d, 11 },{ 0x1581, 13 },{ 0xad, 8 },{ 0x60, 12 },
+{ 0xc67, 14 },{ 0x1c, 9 },{ 0xee, 13 },{ 0x3, 9 },
+{ 0x2cf, 13 },{ 0xd9, 9 },{ 0x1580, 13 },{ 0x2, 11 },
+{ 0x183, 11 },{ 0x57, 12 },{ 0x61, 12 },{ 0x31, 11 },
+{ 0x66, 12 },{ 0x631, 13 },{ 0x632, 13 },{ 0xac, 13 },
+{ 0x31d, 12 },{ 0x76, 12 },{ 0x3a, 11 },{ 0x165, 12 },
+{ 0xc66, 14 },{ 0x3, 2 },{ 0x54, 7 },{ 0x2ab, 10 },
+{ 0x16, 13 },{ 0x5f7, 14 },{ 0x5, 4 },{ 0xf8, 9 },
+{ 0xaa9, 12 },{ 0x5f, 15 },{ 0x4, 4 },{ 0x1c, 10 },
+{ 0x1550, 13 },{ 0x4, 5 },{ 0x77, 11 },{ 0x76c, 15 },
+{ 0xe, 5 },{ 0xa, 12 },{ 0xc, 5 },{ 0x562, 11 },
+{ 0x4, 6 },{ 0x31c, 12 },{ 0x6, 6 },{ 0xc8, 13 },
+{ 0xd, 6 },{ 0x1da, 13 },{ 0x7, 6 },{ 0xc9, 13 },
+{ 0x1, 7 },{ 0x2e, 14 },{ 0x14, 7 },{ 0x1596, 13 },
+{ 0xa, 7 },{ 0xac2, 12 },{ 0x16, 7 },{ 0x15b, 14 },
+{ 0x15, 7 },{ 0x15a, 14 },{ 0xf, 8 },{ 0x5e, 15 },
+{ 0x7e, 8 },{ 0xab, 8 },{ 0x2d, 9 },{ 0xd8, 9 },
+{ 0xb, 9 },{ 0x14, 10 },{ 0x2b3, 10 },{ 0x1f3, 10 },
+{ 0x3a, 10 },{ 0x0, 10 },{ 0x58, 10 },{ 0x2e, 9 },
+{ 0x5e, 10 },{ 0x563, 11 },{ 0xec, 12 },{ 0x54, 12 },
+{ 0xac1, 12 },{ 0x1556, 13 },{ 0x2fa, 13 },{ 0x181, 11 },
+{ 0x1557, 13 },{ 0x59d, 14 },{ 0x2aa3, 14 },{ 0x2b2a, 14 },
+{ 0x1de, 14 },{ 0x63c, 13 },{ 0xcf, 13 },{ 0x1594, 13 },
+{ 0xd, 9 },
+};
+
+static const int8_t table1_level[148] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14,  1,  2,
+  3,  4,  5,  6,  7,  8,  9,  1,
+  2,  3,  4,  5,  1,  2,  3,  4,
+  1,  2,  3,  4,  1,  2,  3,  4,
+  1,  2,  3,  1,  2,  3,  1,  2,
+  3,  1,  2,  3,  1,  2,  3,  1,
+  2,  3,  1,  2,  3,  1,  2,  1,
+  2,  1,  2,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  2,  3,  4,  5,  1,  2,
+  3,  4,  1,  2,  3,  1,  2,  3,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,
+};
+
+static const int8_t table1_run[148] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  2,
+  2,  2,  2,  2,  3,  3,  3,  3,
+  4,  4,  4,  4,  5,  5,  5,  5,
+  6,  6,  6,  7,  7,  7,  8,  8,
+  8,  9,  9,  9, 10, 10, 10, 11,
+ 11, 11, 12, 12, 12, 13, 13, 14,
+ 14, 15, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28,
+ 29,  0,  0,  0,  0,  0,  1,  1,
+  1,  1,  2,  2,  2,  3,  3,  3,
+  4,  4,  5,  5,  6,  6,  7,  7,
+  8,  8,  9,  9, 10, 10, 11, 11,
+ 12, 12, 13, 13, 14, 14, 15, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43,
+};
+
+/* third vlc table */
+
+static const uint16_t table2_vlc[186][2] = {
+{ 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 },
+{ 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 },
+{ 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 },
+{ 0x172, 11 },{ 0x24d, 12 },{ 0x3da, 12 },{ 0x2dd, 13 },
+{ 0x1f55, 13 },{ 0x5b9, 14 },{ 0x3eae, 14 },{ 0x0, 4 },
+{ 0x10, 5 },{ 0x8, 7 },{ 0x20, 8 },{ 0x29, 9 },
+{ 0x1f4, 9 },{ 0x233, 10 },{ 0x1e0, 11 },{ 0x12a, 12 },
+{ 0x3dd, 12 },{ 0x50a, 13 },{ 0x1f29, 13 },{ 0xa42, 14 },
+{ 0x1272, 15 },{ 0x1737, 15 },{ 0x3, 5 },{ 0x11, 7 },
+{ 0xc4, 8 },{ 0x4b, 10 },{ 0xb4, 11 },{ 0x7d4, 11 },
+{ 0x345, 12 },{ 0x2d7, 13 },{ 0x7bf, 13 },{ 0x938, 14 },
+{ 0xbbb, 14 },{ 0x95e, 15 },{ 0x13, 5 },{ 0x78, 7 },
+{ 0x69, 9 },{ 0x232, 10 },{ 0x461, 11 },{ 0x3ec, 12 },
+{ 0x520, 13 },{ 0x1f2a, 13 },{ 0x3e50, 14 },{ 0x3e51, 14 },
+{ 0x1486, 15 },{ 0xc, 6 },{ 0x24, 9 },{ 0x94, 11 },
+{ 0x8c0, 12 },{ 0xf09, 14 },{ 0x1ef0, 15 },{ 0x3d, 6 },
+{ 0x53, 9 },{ 0x1a0, 11 },{ 0x2d6, 13 },{ 0xf08, 14 },
+{ 0x13, 7 },{ 0x7c, 9 },{ 0x7c1, 11 },{ 0x4ac, 14 },
+{ 0x1b, 7 },{ 0xa0, 10 },{ 0x344, 12 },{ 0xf79, 14 },
+{ 0x79, 7 },{ 0x3e1, 10 },{ 0x2d4, 13 },{ 0x2306, 14 },
+{ 0x21, 8 },{ 0x23c, 10 },{ 0xfae, 12 },{ 0x23de, 14 },
+{ 0x35, 8 },{ 0x175, 11 },{ 0x7b3, 13 },{ 0xc5, 8 },
+{ 0x174, 11 },{ 0x785, 13 },{ 0x48, 9 },{ 0x1a3, 11 },
+{ 0x49e, 13 },{ 0x2c, 9 },{ 0xfa, 10 },{ 0x7d6, 11 },
+{ 0x92, 10 },{ 0x5cc, 13 },{ 0x1ef1, 15 },{ 0xa3, 10 },
+{ 0x3ed, 12 },{ 0x93e, 14 },{ 0x1e2, 11 },{ 0x1273, 15 },
+{ 0x7c4, 11 },{ 0x1487, 15 },{ 0x291, 12 },{ 0x293, 12 },
+{ 0xf8a, 12 },{ 0x509, 13 },{ 0x508, 13 },{ 0x78d, 13 },
+{ 0x7be, 13 },{ 0x78c, 13 },{ 0x4ae, 14 },{ 0xbba, 14 },
+{ 0x2307, 14 },{ 0xb9a, 14 },{ 0x1736, 15 },{ 0xe, 4 },
+{ 0x45, 7 },{ 0x1f3, 9 },{ 0x47a, 11 },{ 0x5dc, 13 },
+{ 0x23df, 14 },{ 0x19, 5 },{ 0x28, 9 },{ 0x176, 11 },
+{ 0x49d, 13 },{ 0x23dd, 14 },{ 0x30, 6 },{ 0xa2, 10 },
+{ 0x2ef, 12 },{ 0x5b8, 14 },{ 0x3f, 6 },{ 0xa5, 10 },
+{ 0x3db, 12 },{ 0x93f, 14 },{ 0x44, 7 },{ 0x7cb, 11 },
+{ 0x95f, 15 },{ 0x63, 7 },{ 0x3c3, 12 },{ 0x15, 8 },
+{ 0x8f6, 12 },{ 0x17, 8 },{ 0x498, 13 },{ 0x2c, 8 },
+{ 0x7b2, 13 },{ 0x2f, 8 },{ 0x1f54, 13 },{ 0x8d, 8 },
+{ 0x7bd, 13 },{ 0x8e, 8 },{ 0x1182, 13 },{ 0xfb, 8 },
+{ 0x50b, 13 },{ 0x2d, 8 },{ 0x7c0, 11 },{ 0x79, 9 },
+{ 0x1f5f, 13 },{ 0x7a, 9 },{ 0x1f56, 13 },{ 0x231, 10 },
+{ 0x3e4, 10 },{ 0x1a1, 11 },{ 0x143, 11 },{ 0x1f7, 11 },
+{ 0x16f, 12 },{ 0x292, 12 },{ 0x2e7, 12 },{ 0x16c, 12 },
+{ 0x16d, 12 },{ 0x3dc, 12 },{ 0xf8b, 12 },{ 0x499, 13 },
+{ 0x3d8, 12 },{ 0x78e, 13 },{ 0x2d5, 13 },{ 0x1f5e, 13 },
+{ 0x1f2b, 13 },{ 0x78f, 13 },{ 0x4ad, 14 },{ 0x3eaf, 14 },
+{ 0x23dc, 14 },{ 0x4a, 9 },
+};
+
+static const int8_t table2_level[185] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19,  1,  2,  3,  4,  5,
+  6,  7,  8,  9, 10, 11, 12, 13,
+ 14, 15,  1,  2,  3,  4,  5,  6,
+  7,  8,  9, 10, 11, 12,  1,  2,
+  3,  4,  5,  6,  7,  8,  9, 10,
+ 11,  1,  2,  3,  4,  5,  6,  1,
+  2,  3,  4,  5,  1,  2,  3,  4,
+  1,  2,  3,  4,  1,  2,  3,  4,
+  1,  2,  3,  4,  1,  2,  3,  1,
+  2,  3,  1,  2,  3,  1,  2,  3,
+  1,  2,  3,  1,  2,  3,  1,  2,
+  1,  2,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  2,  3,  4,  5,  6,  1,  2,  3,
+  4,  5,  1,  2,  3,  4,  1,  2,
+  3,  4,  1,  2,  3,  1,  2,  1,
+  2,  1,  2,  1,  2,  1,  2,  1,
+  2,  1,  2,  1,  2,  1,  2,  1,
+  2,  1,  2,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,
+};
+
+static const int8_t table2_run[185] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  4,  4,  4,  4,  4,  4,  5,
+  5,  5,  5,  5,  6,  6,  6,  6,
+  7,  7,  7,  7,  8,  8,  8,  8,
+  9,  9,  9,  9, 10, 10, 10, 11,
+ 11, 11, 12, 12, 12, 13, 13, 13,
+ 14, 14, 14, 15, 15, 15, 16, 16,
+ 17, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30,  0,
+  0,  0,  0,  0,  0,  1,  1,  1,
+  1,  1,  2,  2,  2,  2,  3,  3,
+  3,  3,  4,  4,  4,  5,  5,  6,
+  6,  7,  7,  8,  8,  9,  9, 10,
+ 10, 11, 11, 12, 12, 13, 13, 14,
+ 14, 15, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30, 31, 32, 33, 34, 35, 36,
+ 37,
+};
+
+/* second non intra vlc table */
+static const uint16_t table4_vlc[169][2] = {
+{ 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 },
+{ 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 },
+{ 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 },
+{ 0x5c9, 11 },{ 0x827, 12 },{ 0xb54, 12 },{ 0x4e6, 13 },
+{ 0x105f, 13 },{ 0x172a, 13 },{ 0x20b2, 14 },{ 0x2d4e, 14 },
+{ 0x39f0, 14 },{ 0x4175, 15 },{ 0x5a9e, 15 },{ 0x4, 4 },
+{ 0x1e, 5 },{ 0x42, 7 },{ 0xb6, 8 },{ 0x173, 9 },
+{ 0x395, 10 },{ 0x72e, 11 },{ 0xb94, 12 },{ 0x16a4, 13 },
+{ 0x20b3, 14 },{ 0x2e45, 14 },{ 0x5, 5 },{ 0x40, 7 },
+{ 0x49, 9 },{ 0x28f, 10 },{ 0x5cb, 11 },{ 0x48a, 13 },
+{ 0x9dd, 14 },{ 0x73e2, 15 },{ 0x18, 5 },{ 0x25, 8 },
+{ 0x8a, 10 },{ 0x51b, 11 },{ 0xe5f, 12 },{ 0x9c9, 14 },
+{ 0x139c, 15 },{ 0x29, 6 },{ 0x4f, 9 },{ 0x412, 11 },
+{ 0x48d, 13 },{ 0x2e41, 14 },{ 0x38, 6 },{ 0x10e, 9 },
+{ 0x5a8, 11 },{ 0x105c, 13 },{ 0x39f2, 14 },{ 0x58, 7 },
+{ 0x21f, 10 },{ 0xe7e, 12 },{ 0x39ff, 14 },{ 0x23, 8 },
+{ 0x2e3, 10 },{ 0x4e5, 13 },{ 0x2e40, 14 },{ 0xa1, 8 },
+{ 0x5be, 11 },{ 0x9c8, 14 },{ 0x83, 8 },{ 0x13a, 11 },
+{ 0x1721, 13 },{ 0x44, 9 },{ 0x276, 12 },{ 0x39f6, 14 },
+{ 0x8b, 10 },{ 0x4ef, 13 },{ 0x5a9b, 15 },{ 0x208, 10 },
+{ 0x1cfe, 13 },{ 0x399, 10 },{ 0x1cb4, 13 },{ 0x39e, 10 },
+{ 0x39f3, 14 },{ 0x5ab, 11 },{ 0x73e3, 15 },{ 0x737, 11 },
+{ 0x5a9f, 15 },{ 0x82d, 12 },{ 0xe69, 12 },{ 0xe68, 12 },
+{ 0x433, 11 },{ 0xb7b, 12 },{ 0x2df8, 14 },{ 0x2e56, 14 },
+{ 0x2e57, 14 },{ 0x39f7, 14 },{ 0x51a5, 15 },{ 0x3, 3 },
+{ 0x2a, 6 },{ 0xe4, 8 },{ 0x28e, 10 },{ 0x735, 11 },
+{ 0x1058, 13 },{ 0x1cfa, 13 },{ 0x2df9, 14 },{ 0x4174, 15 },
+{ 0x9, 4 },{ 0x54, 8 },{ 0x398, 10 },{ 0x48b, 13 },
+{ 0x139d, 15 },{ 0xd, 4 },{ 0xad, 9 },{ 0x826, 12 },
+{ 0x2d4c, 14 },{ 0x11, 5 },{ 0x16b, 9 },{ 0xb7f, 12 },
+{ 0x51a4, 15 },{ 0x19, 5 },{ 0x21b, 10 },{ 0x16fd, 13 },
+{ 0x1d, 5 },{ 0x394, 10 },{ 0x28d3, 14 },{ 0x2b, 6 },
+{ 0x5bc, 11 },{ 0x5a9a, 15 },{ 0x2f, 6 },{ 0x247, 12 },
+{ 0x10, 7 },{ 0xa35, 12 },{ 0x3e, 6 },{ 0xb7a, 12 },
+{ 0x59, 7 },{ 0x105e, 13 },{ 0x26, 8 },{ 0x9cf, 14 },
+{ 0x55, 8 },{ 0x1cb5, 13 },{ 0x57, 8 },{ 0xe5b, 12 },
+{ 0xa0, 8 },{ 0x1468, 13 },{ 0x170, 9 },{ 0x90, 10 },
+{ 0x1ce, 9 },{ 0x21a, 10 },{ 0x218, 10 },{ 0x168, 9 },
+{ 0x21e, 10 },{ 0x244, 12 },{ 0x736, 11 },{ 0x138, 11 },
+{ 0x519, 11 },{ 0xe5e, 12 },{ 0x72c, 11 },{ 0xb55, 12 },
+{ 0x9dc, 14 },{ 0x20bb, 14 },{ 0x48c, 13 },{ 0x1723, 13 },
+{ 0x2e44, 14 },{ 0x16a5, 13 },{ 0x518, 11 },{ 0x39fe, 14 },
+{ 0x169, 9 },
+};
+
+static const int8_t table4_level[168] = {
+  1,  2,  3,  4,  5,  6,  7,  8,
+  9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23,  1,
+  2,  3,  4,  5,  6,  7,  8,  9,
+ 10, 11,  1,  2,  3,  4,  5,  6,
+  7,  8,  1,  2,  3,  4,  5,  6,
+  7,  1,  2,  3,  4,  5,  1,  2,
+  3,  4,  5,  1,  2,  3,  4,  1,
+  2,  3,  4,  1,  2,  3,  1,  2,
+  3,  1,  2,  3,  1,  2,  3,  1,
+  2,  1,  2,  1,  2,  1,  2,  1,
+  2,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  2,  3,  4,  5,
+  6,  7,  8,  9,  1,  2,  3,  4,
+  5,  1,  2,  3,  4,  1,  2,  3,
+  4,  1,  2,  3,  1,  2,  3,  1,
+  2,  3,  1,  2,  1,  2,  1,  2,
+  1,  2,  1,  2,  1,  2,  1,  2,
+  1,  2,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+};
+
+static const int8_t table4_run[168] = {
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  2,  2,  2,  2,  2,  2,
+  2,  2,  3,  3,  3,  3,  3,  3,
+  3,  4,  4,  4,  4,  4,  5,  5,
+  5,  5,  5,  6,  6,  6,  6,  7,
+  7,  7,  7,  8,  8,  8,  9,  9,
+  9, 10, 10, 10, 11, 11, 11, 12,
+ 12, 13, 13, 14, 14, 15, 15, 16,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  1,  1,  1,  1,
+  1,  2,  2,  2,  2,  3,  3,  3,
+  3,  4,  4,  4,  5,  5,  5,  6,
+  6,  6,  7,  7,  8,  8,  9,  9,
+ 10, 10, 11, 11, 12, 12, 13, 13,
+ 14, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30, 31, 32, 33, 34, 35, 36,
+};
+
+extern const uint16_t inter_vlc[103][2];
+extern const int8_t inter_level[102];
+extern const int8_t inter_run[102];
+
+extern const uint16_t intra_vlc[103][2];
+extern const int8_t intra_level[102];
+extern const int8_t intra_run[102];
+
+extern const uint8_t DCtab_lum[13][2];
+extern const uint8_t DCtab_chrom[13][2];
+
+extern const uint8_t cbpy_tab[16][2];
+extern const uint8_t mvtab[33][2];
+
+extern const uint8_t intra_MCBPC_code[9];
+extern const uint8_t intra_MCBPC_bits[9];
+
+extern const uint8_t inter_MCBPC_code[28];
+extern const uint8_t inter_MCBPC_bits[28];
+
+#define NB_RL_TABLES  6
+
+static RLTable rl_table[NB_RL_TABLES] = {
+    /* intra luminance tables */
+    {
+        132,
+        85,
+        table0_vlc,
+        table0_run,
+        table0_level,
+    },
+    {
+        185,
+        119,
+        table2_vlc,
+        table2_run,
+        table2_level,
+    },
+    {
+        102,
+        67,
+        intra_vlc,
+        intra_run,
+        intra_level,
+    },
+    /* intra chrominance / non intra tables */
+    {
+        148,
+        81,
+        table1_vlc,
+        table1_run,
+        table1_level,
+    },
+    {
+        168,
+        99,
+        table4_vlc,
+        table4_run,
+        table4_level,
+    },
+    {
+        102,
+        58,
+        inter_vlc,
+        inter_run,
+        inter_level,
+    },
+};
+
+/* motion vector table 0 */
+
+static const uint16_t table0_mv_code[1100] = {
+ 0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001,
+ 0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f,
+ 0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b,
+ 0x004d, 0x0003, 0x0022, 0x0023, 0x0025, 0x0027, 0x0042, 0x0048,
+ 0x0049, 0x0050, 0x005c, 0x0091, 0x009f, 0x000e, 0x0043, 0x004c,
+ 0x0054, 0x0056, 0x008c, 0x0098, 0x009a, 0x009b, 0x00b1, 0x00b2,
+ 0x0120, 0x0121, 0x0126, 0x0133, 0x0139, 0x01a1, 0x01a4, 0x01a5,
+ 0x01a6, 0x01a7, 0x01ae, 0x01af, 0x000b, 0x0019, 0x0085, 0x0090,
+ 0x009b, 0x00aa, 0x00af, 0x010c, 0x010e, 0x011c, 0x011e, 0x0133,
+ 0x0144, 0x0160, 0x0174, 0x0175, 0x0177, 0x0178, 0x0249, 0x024b,
+ 0x0252, 0x0261, 0x0265, 0x0270, 0x0352, 0x0353, 0x0355, 0x0359,
+ 0x0010, 0x0011, 0x0013, 0x0034, 0x0035, 0x0036, 0x0037, 0x003d,
+ 0x003e, 0x0109, 0x0126, 0x0156, 0x021a, 0x021e, 0x023a, 0x023e,
+ 0x028e, 0x028f, 0x02cf, 0x0491, 0x0494, 0x049f, 0x04a0, 0x04a3,
+ 0x04a6, 0x04a7, 0x04ad, 0x04ae, 0x04c0, 0x04c4, 0x04c6, 0x04c8,
+ 0x04c9, 0x04f5, 0x04f6, 0x04f7, 0x0680, 0x0682, 0x0683, 0x0688,
+ 0x0689, 0x068d, 0x068e, 0x068f, 0x06a2, 0x06a3, 0x06a9, 0x06b0,
+ 0x06b1, 0x06b4, 0x06b5, 0x0024, 0x0060, 0x0063, 0x0078, 0x0079,
+ 0x0211, 0x0244, 0x0245, 0x0247, 0x0248, 0x0249, 0x024a, 0x024b,
+ 0x026b, 0x02af, 0x02b8, 0x02bb, 0x0436, 0x0476, 0x0477, 0x047e,
+ 0x04c8, 0x04c9, 0x04ca, 0x0514, 0x0586, 0x0587, 0x0598, 0x059d,
+ 0x05d9, 0x05da, 0x0920, 0x0921, 0x093b, 0x093c, 0x093d, 0x0942,
+ 0x0943, 0x0944, 0x0945, 0x0959, 0x095e, 0x095f, 0x0982, 0x0983,
+ 0x098e, 0x098f, 0x09c4, 0x09e7, 0x09e8, 0x09e9, 0x0d02, 0x0d17,
+ 0x0d18, 0x0d19, 0x0d41, 0x0d42, 0x0d43, 0x0d50, 0x0d5f, 0x0d6d,
+ 0x0d6e, 0x0d6f, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x041e, 0x041f, 0x0420, 0x0421,
+ 0x048c, 0x048d, 0x04d3, 0x04d4, 0x04d5, 0x055c, 0x055d, 0x0572,
+ 0x0573, 0x0574, 0x0575, 0x08de, 0x08df, 0x08fe, 0x08ff, 0x0996,
+ 0x0a36, 0x0a37, 0x0b08, 0x0b09, 0x0b0a, 0x0b0b, 0x0b32, 0x0b33,
+ 0x0b34, 0x0b35, 0x0b36, 0x0b37, 0x0b38, 0x0b39, 0x0bb0, 0x0bf7,
+ 0x0bf8, 0x0bf9, 0x0bfa, 0x0bfb, 0x0bfc, 0x0bfd, 0x0bfe, 0x0bff,
+ 0x1254, 0x1255, 0x1256, 0x1257, 0x1270, 0x1271, 0x1272, 0x1273,
+ 0x1274, 0x1275, 0x12ab, 0x12ac, 0x12ad, 0x12ae, 0x12af, 0x12b0,
+ 0x12b1, 0x1315, 0x1316, 0x1317, 0x13bf, 0x13c0, 0x13c1, 0x13c2,
+ 0x13c3, 0x13c4, 0x13c5, 0x13c6, 0x13c7, 0x13c8, 0x13c9, 0x13ca,
+ 0x13cb, 0x13cc, 0x13cd, 0x1a06, 0x1a07, 0x1a28, 0x1a29, 0x1a2a,
+ 0x1a2b, 0x1a2c, 0x1a2d, 0x1a80, 0x1abb, 0x1abc, 0x1abd, 0x1ad8,
+ 0x1ad9, 0x0094, 0x0095, 0x0096, 0x0097, 0x00a0, 0x00a1, 0x00a2,
+ 0x00a3, 0x0831, 0x0832, 0x0833, 0x0834, 0x0835, 0x0836, 0x0837,
+ 0x0838, 0x0839, 0x083a, 0x083b, 0x0939, 0x093a, 0x093b, 0x093c,
+ 0x093d, 0x093e, 0x093f, 0x09a0, 0x09a1, 0x09a2, 0x09a3, 0x09a4,
+ 0x09a5, 0x11ac, 0x11ad, 0x11ae, 0x11af, 0x11b0, 0x11b1, 0x11b2,
+ 0x11b3, 0x11b4, 0x11b5, 0x11b6, 0x11b7, 0x11b8, 0x11b9, 0x11ba,
+ 0x11bb, 0x132f, 0x1454, 0x1455, 0x1456, 0x1457, 0x1458, 0x1459,
+ 0x145a, 0x145b, 0x145c, 0x145d, 0x145e, 0x145f, 0x1460, 0x1461,
+ 0x1462, 0x1463, 0x1464, 0x1465, 0x1466, 0x1467, 0x1468, 0x1469,
+ 0x146a, 0x146b, 0x17de, 0x17df, 0x17e0, 0x17e1, 0x17e2, 0x17e3,
+ 0x17e4, 0x17e5, 0x17e6, 0x17e7, 0x17e8, 0x17e9, 0x17ea, 0x17eb,
+ 0x17ec, 0x17ed, 0x2540, 0x2541, 0x2542, 0x2543, 0x2544, 0x2545,
+ 0x2546, 0x2547, 0x2548, 0x2549, 0x254a, 0x254b, 0x254c, 0x254d,
+ 0x254e, 0x254f, 0x2550, 0x2551, 0x2552, 0x2553, 0x2554, 0x2555,
+ 0x2628, 0x2766, 0x2767, 0x2768, 0x2769, 0x276a, 0x276b, 0x276c,
+ 0x276d, 0x276e, 0x276f, 0x2770, 0x2771, 0x2772, 0x2773, 0x2774,
+ 0x2775, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a, 0x277b, 0x277c,
+ 0x277d, 0x3503, 0x3544, 0x3545, 0x3546, 0x3547, 0x3560, 0x3561,
+ 0x3562, 0x3563, 0x3564, 0x3565, 0x3566, 0x3567, 0x3568, 0x3569,
+ 0x356a, 0x356b, 0x356c, 0x356d, 0x356e, 0x356f, 0x3570, 0x3571,
+ 0x3572, 0x3573, 0x3574, 0x3575, 0x03f0, 0x103d, 0x103e, 0x103f,
+ 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047,
+ 0x1048, 0x1049, 0x104a, 0x104b, 0x104c, 0x104d, 0x104e, 0x104f,
+ 0x1050, 0x1051, 0x1052, 0x1053, 0x1054, 0x1055, 0x1056, 0x1057,
+ 0x1058, 0x1059, 0x105a, 0x105b, 0x105c, 0x105d, 0x105e, 0x105f,
+ 0x1060, 0x1061, 0x1270, 0x1271, 0x21b8, 0x21b9, 0x21ba, 0x21bb,
+ 0x21bc, 0x21bd, 0x21be, 0x21bf, 0x21f0, 0x21f1, 0x21f2, 0x21f3,
+ 0x21f4, 0x21f5, 0x21f6, 0x21f7, 0x21f8, 0x21f9, 0x21fa, 0x21fb,
+ 0x21fc, 0x21fd, 0x21fe, 0x21ff, 0x2340, 0x2341, 0x2342, 0x2343,
+ 0x2344, 0x2345, 0x2346, 0x2347, 0x2348, 0x2349, 0x234a, 0x234b,
+ 0x234c, 0x234d, 0x234e, 0x234f, 0x2350, 0x2351, 0x2352, 0x2353,
+ 0x2354, 0x2355, 0x2356, 0x2357, 0x265c, 0x2f88, 0x2f89, 0x2f8a,
+ 0x2f8b, 0x2f8c, 0x2f8d, 0x2f8e, 0x2f8f, 0x2f90, 0x2f91, 0x2f92,
+ 0x2f93, 0x2f94, 0x2f95, 0x2f96, 0x2f97, 0x2f98, 0x2f99, 0x2f9a,
+ 0x2f9b, 0x2f9c, 0x2f9d, 0x2f9e, 0x2f9f, 0x2fa0, 0x2fa1, 0x2fa2,
+ 0x2fa3, 0x2fa4, 0x2fa5, 0x2fa6, 0x2fa7, 0x2fa8, 0x2fa9, 0x2faa,
+ 0x2fab, 0x2fac, 0x2fad, 0x2fae, 0x2faf, 0x2fb0, 0x2fb1, 0x2fb2,
+ 0x2fb3, 0x2fb4, 0x2fb5, 0x2fb6, 0x2fb7, 0x2fb8, 0x2fb9, 0x2fba,
+ 0x2fbb, 0x4c52, 0x4c53, 0x4e28, 0x4e29, 0x4e2a, 0x4e2b, 0x4e2c,
+ 0x4e2d, 0x4e2e, 0x4e2f, 0x4e30, 0x4e31, 0x4e32, 0x4e33, 0x4e34,
+ 0x4e35, 0x4e36, 0x4e37, 0x4e38, 0x4e39, 0x4e3a, 0x4e3b, 0x4e3c,
+ 0x4e3d, 0x4e3e, 0x4e3f, 0x4e80, 0x4e81, 0x4e82, 0x4e83, 0x4e84,
+ 0x4e85, 0x4e86, 0x4e87, 0x4e88, 0x4e89, 0x4e8a, 0x4e8b, 0x4e8c,
+ 0x4e8d, 0x4e8e, 0x4e8f, 0x4e90, 0x4e91, 0x4e92, 0x4e93, 0x4e94,
+ 0x4e95, 0x4e96, 0x4e97, 0x4e98, 0x4e99, 0x4e9a, 0x4e9b, 0x4e9c,
+ 0x4e9d, 0x4e9e, 0x4e9f, 0x4ea0, 0x4ea1, 0x4ea2, 0x4ea3, 0x4ea4,
+ 0x4ea5, 0x4ea6, 0x4ea7, 0x4ea8, 0x4ea9, 0x4eaa, 0x4eab, 0x4eac,
+ 0x4ead, 0x4eae, 0x4eaf, 0x4eb0, 0x4eb1, 0x4eb2, 0x4eb3, 0x4eb4,
+ 0x4eb5, 0x4eb6, 0x4eb7, 0x4eb8, 0x4eb9, 0x4eba, 0x4ebb, 0x4ebc,
+ 0x4ebd, 0x4ebe, 0x4ebf, 0x4ec0, 0x4ec1, 0x4ec2, 0x4ec3, 0x4ec4,
+ 0x4ec5, 0x4ec6, 0x4ec7, 0x4ec8, 0x4ec9, 0x4eca, 0x4ecb, 0x6a04,
+ 0x6a05, 0x07e2, 0x07e3, 0x07e4, 0x07e5, 0x07e6, 0x07e7, 0x07e8,
+ 0x07e9, 0x07ea, 0x07eb, 0x07ec, 0x07ed, 0x07ee, 0x07ef, 0x07f0,
+ 0x07f1, 0x07f2, 0x07f3, 0x07f4, 0x07f5, 0x07f6, 0x07f7, 0x07f8,
+ 0x07f9, 0x07fa, 0x07fb, 0x07fc, 0x07fd, 0x07fe, 0x07ff, 0x2000,
+ 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
+ 0x2009, 0x200a, 0x200b, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010,
+ 0x2011, 0x2012, 0x2013, 0x2014, 0x2015, 0x2016, 0x2017, 0x2018,
+ 0x2019, 0x201a, 0x201b, 0x201c, 0x201d, 0x201e, 0x201f, 0x2020,
+ 0x2021, 0x2022, 0x2023, 0x2024, 0x2025, 0x2026, 0x2027, 0x2028,
+ 0x2029, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e, 0x202f, 0x2030,
+ 0x2031, 0x2032, 0x2033, 0x2034, 0x2035, 0x2036, 0x2037, 0x2038,
+ 0x2039, 0x203a, 0x203b, 0x203c, 0x203d, 0x203e, 0x203f, 0x2040,
+ 0x2041, 0x2042, 0x2043, 0x2044, 0x2045, 0x2046, 0x2047, 0x2048,
+ 0x2049, 0x204a, 0x204b, 0x204c, 0x204d, 0x204e, 0x204f, 0x2050,
+ 0x2051, 0x2052, 0x2053, 0x2054, 0x2055, 0x2056, 0x2057, 0x2058,
+ 0x2059, 0x205a, 0x205b, 0x205c, 0x205d, 0x205e, 0x205f, 0x2060,
+ 0x2061, 0x2062, 0x2063, 0x2064, 0x2065, 0x2066, 0x2067, 0x2068,
+ 0x2069, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0x2070,
+ 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078,
+ 0x2079, 0x4cba, 0x4cbb, 0x5d88, 0x5d89, 0x5d8a, 0x5d8b, 0x5d8c,
+ 0x5d8d, 0x5d8e, 0x5d8f, 0x5db0, 0x5db1, 0x5db2, 0x5db3, 0x5db4,
+ 0x5db5, 0x5db6, 0x5db7, 0x5db8, 0x5db9, 0x5dba, 0x5dbb, 0x5dbc,
+ 0x5dbd, 0x5dbe, 0x5dbf, 0x5e40, 0x5e41, 0x5e42, 0x5e43, 0x5e44,
+ 0x5e45, 0x5e46, 0x5e47, 0x5e48, 0x5e49, 0x5e4a, 0x5e4b, 0x5e4c,
+ 0x5e4d, 0x5e4e, 0x5e4f, 0x5e50, 0x5e51, 0x5e52, 0x5e53, 0x5e54,
+ 0x5e55, 0x5e56, 0x5e57, 0x5e58, 0x5e59, 0x5e5a, 0x5e5b, 0x5e5c,
+ 0x5e5d, 0x5e5e, 0x5e5f, 0x5e60, 0x5e61, 0x5e62, 0x5e63, 0x5e64,
+ 0x5e65, 0x5e66, 0x5e67, 0x5e68, 0x5e69, 0x5e6a, 0x5e6b, 0x5e6c,
+ 0x5e6d, 0x5e6e, 0x5e6f, 0x5e70, 0x5e71, 0x5e72, 0x5e73, 0x5e74,
+ 0x5e75, 0x5e76, 0x5e77, 0x5e78, 0x5e79, 0x5e7a, 0x5e7b, 0x5e7c,
+ 0x5e7d, 0x5e7e, 0x5e7f, 0x5e80, 0x5e81, 0x5e82, 0x5e83, 0x5e84,
+ 0x5e85, 0x5e86, 0x5e87, 0x5e88, 0x5e89, 0x5e8a, 0x5e8b, 0x5e8c,
+ 0x5e8d, 0x5e8e, 0x5e8f, 0x5e90, 0x5e91, 0x5e92, 0x5e93, 0x5e94,
+ 0x5e95, 0x5e96, 0x5e97, 0x5e98, 0x5e99, 0x5e9a, 0x5e9b, 0x5e9c,
+ 0x5e9d, 0x5e9e, 0x5e9f, 0x5ea0, 0x5ea1, 0x5ea2, 0x5ea3, 0x5ea4,
+ 0x5ea5, 0x5ea6, 0x5ea7, 0x5ea8, 0x5ea9, 0x5eaa, 0x5eab, 0x5eac,
+ 0x5ead, 0x5eae, 0x5eaf, 0x5eb0, 0x5eb1, 0x5eb2, 0x5eb3, 0x5eb4,
+ 0x5eb5, 0x5eb6, 0x5eb7, 0x5eb8, 0x5eb9, 0x5eba, 0x5ebb, 0x5ebc,
+ 0x5ebd, 0x5ebe, 0x5ebf, 0x5ec0, 0x5ec1, 0x5ec2, 0x5ec3, 0x5ec4,
+ 0x5ec5, 0x5ec6, 0x5ec7, 0x5ec8, 0x5ec9, 0x5eca, 0x5ecb, 0x5ecc,
+ 0x5ecd, 0x5ece, 0x5ecf, 0x5ed0, 0x5ed1, 0x5ed2, 0x5ed3, 0x5ed4,
+ 0x5ed5, 0x5ed6, 0x5ed7, 0x5ed8, 0x5ed9, 0x5eda, 0x5edb, 0x5edc,
+ 0x5edd, 0x5ede, 0x5edf, 0x5ee0, 0x5ee1, 0x5ee2, 0x5ee3, 0x5ee4,
+ 0x5ee5, 0x5ee6, 0x5ee7, 0x5ee8, 0x5ee9, 0x5eea, 0x5eeb, 0x5eec,
+ 0x5eed, 0x5eee, 0x5eef, 0x5ef0, 0x5ef1, 0x5ef2, 0x5ef3, 0x5ef4,
+ 0x5ef5, 0x5ef6, 0x5ef7, 0x5ef8, 0x5ef9, 0x5efa, 0x5efb, 0x5efc,
+ 0x5efd, 0x5efe, 0x5eff, 0x5f00, 0x5f01, 0x5f02, 0x5f03, 0x5f04,
+ 0x5f05, 0x5f06, 0x5f07, 0x5f08, 0x5f09, 0x5f0a, 0x5f0b, 0x5f0c,
+ 0x5f0d, 0x5f0e, 0x5f0f, 0x0000,
+};
+
+static const uint8_t table0_mv_bits[1100] = {
+  1,  4,  4,  4,  5,  5,  5,  6,
+  6,  6,  7,  7,  7,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  9,  9,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17,  8,
+};
+
+static const uint8_t table0_mvx[1099] = {
+ 32, 32, 31, 32, 33, 31, 33, 31,
+ 33, 32, 34, 32, 30, 32, 31, 34,
+ 35, 32, 34, 33, 29, 33, 30, 30,
+ 31, 31, 35, 29, 33, 35, 33, 34,
+ 31, 29, 30, 34, 30, 36, 28, 32,
+ 34, 37, 30, 27, 32, 25, 39, 32,
+ 34, 32, 35, 35, 35, 31, 35, 29,
+ 32, 29, 30, 29, 37, 27, 36, 38,
+ 37, 33, 32, 31, 29, 31, 28, 36,
+ 33, 30, 34, 33, 33, 28, 27, 25,
+ 31, 26, 39, 32, 32, 31, 33, 39,
+ 31, 38, 28, 36, 21, 23, 43, 36,
+ 34, 41, 30, 25, 28, 31, 30, 34,
+ 38, 35, 61, 34, 28, 30, 37, 37,
+ 35, 27, 36,  3, 59, 38, 37, 32,
+ 31, 29, 26, 33, 37, 33, 27, 27,
+ 35, 34, 34, 40, 42, 33, 32, 29,
+  4,  5, 28, 24, 25, 35, 39, 38,
+ 32, 23, 27, 32, 30, 35, 26, 34,
+ 60, 36, 29, 22, 26, 41,  7, 30,
+ 38, 30, 36, 29, 30, 41, 26, 25,
+ 32, 34, 24, 39,  1, 25, 39, 32,
+ 28, 29, 32, 38, 26, 36, 28, 63,
+ 28, 39, 23, 21, 26, 35, 31, 35,
+ 57, 31, 29, 29, 28, 30, 27, 35,
+  2, 38, 40, 34, 37, 29, 38, 43,
+ 26, 32, 33, 42, 24, 40, 28, 32,
+ 32, 32, 36, 32, 43, 25, 21, 31,
+ 30, 31, 41, 29, 33, 37, 26, 37,
+ 27, 59, 23, 33, 35, 31, 31, 37,
+ 38, 39, 32, 23, 32, 27, 37, 36,
+ 31, 40, 25, 27, 38, 31, 36, 28,
+ 31, 36, 25, 45,  3, 34, 38, 39,
+ 40, 38, 30, 32, 19, 24, 25, 26,
+ 45, 20, 24, 33, 33, 31, 41, 34,
+ 39, 47, 40, 58, 59, 41, 33,  3,
+ 17, 61, 42, 30, 26, 29, 36, 61,
+ 33, 37, 62, 28, 25, 38, 25, 38,
+ 17, 23, 34, 33, 21, 33, 49, 27,
+ 32, 23, 27, 22, 24, 22, 39, 43,
+ 27, 37,  6, 42, 47, 26, 30, 31,
+ 41, 39, 33, 22, 45, 36, 32, 45,
+ 19, 22, 30,  5,  5, 17, 29, 22,
+ 31, 31, 43, 37, 27, 32, 32, 32,
+ 33, 34, 43, 35, 29, 26, 22, 32,
+ 19, 32, 25, 31, 41, 49, 28, 34,
+ 28, 39, 34, 19, 37, 38, 29, 21,
+ 36, 42, 24, 48, 16, 28, 49, 22,
+ 34, 31, 38, 39, 44, 11, 35, 30,
+ 33, 33, 23, 28, 33, 46, 15, 13,
+ 24, 41, 24, 34, 34, 30, 26, 24,
+ 14, 60, 21, 29, 39, 23, 35, 37,
+ 63, 45, 33, 34, 47, 41, 22, 42,
+ 35, 35, 23, 32, 35, 43, 32,  7,
+ 31, 41, 20, 31, 16, 13, 63, 25,
+ 30, 32, 35, 30, 30, 31, 42, 47,
+ 39, 38, 40, 40, 51, 55, 56, 18,
+ 21, 39, 39, 33, 17, 41, 23, 24,
+ 43, 25, 31, 20, 19, 45,  1, 34,
+ 31, 22, 35, 15, 46, 46, 35, 31,
+ 28, 29, 29, 23, 41, 27, 14, 53,
+ 53, 27, 24, 32, 57, 32, 17, 42,
+ 37, 29, 33,  1, 25, 32, 32, 63,
+ 26, 40, 44, 36, 31, 39, 20, 20,
+ 44, 23, 33, 34, 35, 33, 33, 28,
+ 41, 23, 41, 41, 29, 25, 26, 49,
+ 29, 24, 37, 49, 50, 51, 51, 26,
+ 39, 25, 26, 15, 39, 18, 42, 17,
+  4, 31, 32, 32, 60,  1, 42, 32,
+  0, 12, 19, 35, 21, 41, 17, 26,
+ 20, 45, 46, 32, 37, 22, 47, 29,
+ 31, 27, 29, 30, 21, 33, 35, 18,
+ 25, 33, 50, 51, 42,  2, 15, 51,
+ 53, 33, 25, 29, 55, 37, 38, 33,
+ 38, 59, 38, 33, 39, 13, 32, 40,
+ 61, 61, 32,  9, 44,  3, 31, 29,
+ 25, 31, 27, 23,  9, 25,  9, 29,
+ 20, 30, 30, 42, 18, 28, 25, 28,
+ 28, 21, 29, 43, 29, 43, 26, 44,
+ 44, 21, 38, 21, 24, 45, 45, 35,
+ 39, 22, 35, 36, 34, 34, 45, 34,
+ 29, 31, 46, 25, 46, 16, 17, 31,
+ 20, 32, 47, 47, 47, 32, 49, 49,
+ 49, 31,  1, 27, 28, 39, 39, 21,
+ 36, 23, 51,  2, 40, 51, 32, 53,
+ 24, 30, 24, 30, 21, 40, 57, 57,
+ 31, 41, 58, 32, 12,  4, 32, 34,
+ 59, 31, 32, 13,  9, 35, 26, 35,
+ 37, 61, 37, 63, 26, 29, 41, 38,
+ 23, 20, 41, 26, 41, 42, 42, 42,
+ 26, 26, 26, 26,  1, 26, 37, 37,
+ 37, 23, 34, 42, 27, 43, 34, 27,
+ 31, 24, 33, 16,  3, 31, 24, 33,
+ 24,  4, 44, 44, 11, 44, 31, 13,
+ 13, 44, 45, 13, 25, 22, 38, 26,
+ 38, 38, 39, 32, 30, 39, 30, 22,
+ 32, 26, 30, 47, 47, 47, 19, 47,
+ 30, 31, 35,  8, 23, 47, 47, 27,
+ 35, 47, 31, 48, 35, 19, 36, 49,
+ 49, 33, 31, 39, 27, 39, 49, 49,
+ 50, 50, 50, 39, 31, 51, 51, 39,
+ 28, 33, 33, 21, 40, 31, 52, 53,
+ 40, 53,  9, 33, 31, 53, 54, 54,
+ 54, 55, 55, 34, 15, 56, 25, 56,
+ 21, 21, 40, 40, 25, 40, 58, 36,
+  5, 41, 41, 12, 60, 41, 41, 37,
+ 22, 61, 18, 29, 29, 30, 61, 30,
+ 61, 62, 62, 30, 30, 63, 18, 13,
+ 30, 23, 19, 20, 20, 41, 13,  2,
+  5,  5,  1,  5, 32,  6, 32, 35,
+ 20, 35, 27, 35, 35, 36, 36, 13,
+ 36, 41, 41, 41,  3, 30, 42, 27,
+ 20, 30, 27, 28, 30, 21, 33, 33,
+ 14, 24, 30, 42, 24, 33, 25, 42,
+ 43, 14, 43, 43, 14, 43,  7, 36,
+ 37, 37, 37, 37,  7, 14, 25, 43,
+ 43, 44, 15, 37,  7,  7,  3,  1,
+  8, 15, 15,  8, 44, 44, 44, 45,
+ 45, 45, 45,  8,  8, 45, 21, 45,
+ 28, 28, 28, 21, 28, 28, 22, 37,
+ 46, 46, 37,  8, 29, 37, 29, 22,
+ 46, 37, 22, 29, 47, 47, 38, 38,
+ 16, 38, 38, 33, 38, 22, 47, 47,
+ 29, 25, 16,  0, 48,  1, 34, 48,
+ 48, 34, 25, 26, 26, 49, 49, 26,
+  1, 49,  4, 26,  4, 49,  1,  9,
+ 49, 49, 49, 10, 49, 17, 38, 17,
+ 17, 50, 38, 50, 50, 22, 38, 51,
+ 38, 38, 51, 39, 39, 18, 22, 39,
+ 51, 22, 52, 52, 52, 39, 53, 53,
+ 10, 23, 18, 29, 10, 53, 29, 54,
+ 11, 54, 11, 11, 55,  1, 18, 55,
+ 55, 55, 55, 55, 55, 29, 34, 18,
+ 29, 56, 56, 34, 57, 34, 34, 29,
+ 29, 57, 57, 35, 35, 35, 35, 35,
+ 39, 35, 59, 59, 18, 59, 39, 30,
+ 18, 40, 60, 60, 61, 30, 18, 61,
+ 61, 19, 19,
+};
+
+static const uint8_t table0_mvy[1099] = {
+ 32, 31, 32, 33, 32, 31, 31, 33,
+ 33, 34, 32, 30, 32, 35, 34, 31,
+ 32, 29, 33, 30, 32, 34, 33, 31,
+ 30, 35, 31, 31, 29, 33, 35, 30,
+ 29, 33, 34, 34, 30, 32, 32, 36,
+ 29, 32, 35, 32, 28, 32, 32, 27,
+ 35, 37, 34, 29, 30, 36, 35, 34,
+ 25, 30, 29, 35, 33, 31, 31, 32,
+ 31, 28, 39, 28, 29, 37, 31, 33,
+ 27, 36, 28, 36, 37, 33, 33, 31,
+ 27, 32, 31, 38, 26, 25, 25, 33,
+ 39, 31, 34, 30, 32, 32, 32, 34,
+ 36, 32, 28, 33, 30, 38, 37, 27,
+ 33, 28, 32, 37, 35, 38, 29, 34,
+ 27, 29, 29, 32, 32, 34, 35,  3,
+ 26, 36, 31, 38, 30, 26, 35, 34,
+ 37, 26, 25, 32, 32, 39, 23, 37,
+ 32, 32, 29, 32, 29, 36, 29, 30,
+ 41, 31, 30, 21, 39, 25, 34, 38,
+ 32, 35, 39, 32, 33, 33, 32, 27,
+ 29, 25, 28, 27, 26, 31, 30, 35,
+ 24, 24, 31, 34, 32, 30, 35, 40,
+ 28, 38,  5, 35, 29, 36, 36, 32,
+ 38, 30, 33, 31, 35, 26, 23, 38,
+ 32, 41, 28, 25, 37, 40, 37, 39,
+ 32, 36, 33, 39, 25, 26, 28, 31,
+ 28, 42, 23, 31, 33, 31, 39,  1,
+ 59, 22, 27,  4, 33, 34, 33, 24,
+ 41,  3, 35, 41, 41, 28, 36, 36,
+ 28, 33, 35, 21, 23, 21, 22, 37,
+ 27, 27, 43, 29, 60, 39, 27, 25,
+ 59, 34, 27, 27, 26, 40, 37, 27,
+ 61, 26, 39, 33, 31, 22, 37, 25,
+ 30, 25, 24, 61, 31, 34, 25, 38,
+ 32, 32, 30,  3, 61, 43, 29, 23,
+ 28, 32, 28, 32, 31, 34,  5, 33,
+ 32, 33, 33, 42, 37, 23, 38, 31,
+ 40, 26, 32, 26, 37, 38, 36, 24,
+ 29, 30, 20, 22, 29, 24, 32, 41,
+  2, 34, 25, 33, 29, 31, 39, 35,
+ 36, 24, 32, 30, 33, 27, 44, 60,
+ 30, 36, 19, 34, 31, 24, 16, 35,
+ 32, 38, 21, 33, 31, 31, 21, 35,
+  5, 17, 29, 38, 38, 18, 58, 19,
+ 43, 41, 30, 41, 43, 39, 29,  7,
+ 29, 17, 28, 19, 28, 31, 25, 19,
+ 40, 26, 21, 33, 39, 23, 40, 30,
+ 39, 34, 35, 32, 32, 24, 33, 30,
+ 40, 47, 39, 37, 32, 33, 24, 23,
+ 45, 47, 27, 23, 42, 32, 32, 33,
+ 36, 37, 37, 17, 18, 22, 40, 38,
+ 32, 31, 35, 24, 17, 25, 17, 23,
+ 33, 34, 51, 42, 31, 36, 36, 29,
+ 21, 22, 37, 44, 43, 25, 47, 33,
+ 45, 27, 31, 58, 31, 32, 31, 38,
+ 43, 20, 47, 45, 54,  1, 26, 34,
+ 38, 14, 22, 24, 33, 34, 32, 32,
+ 37, 21, 23, 49, 35, 23, 28, 39,
+ 39, 23, 55, 33, 30, 30, 63, 16,
+ 42, 28, 13, 33, 33, 35, 19, 46,
+ 43, 17, 19, 36, 39, 24, 31, 32,
+ 33, 26, 28, 62, 33, 63, 33, 39,
+ 19, 49, 17, 31, 43, 13, 15, 29,
+ 25, 35, 33, 23, 49, 41, 28, 29,
+ 34, 38,  7, 61, 11, 50, 13, 41,
+ 19, 47, 25, 26, 15, 42, 41, 29,
+ 45, 27, 17, 35, 32, 29, 32, 24,
+ 13, 26, 26, 31, 24, 33, 28, 30,
+ 31, 11, 45, 46, 33, 33, 35, 57,
+ 32, 32, 35, 45, 34, 11, 37, 42,
+ 39, 37, 31, 49, 21, 27, 29, 47,
+ 53, 40, 51, 16, 26,  1, 40, 30,
+ 41, 44, 34, 25, 27, 31, 35, 35,
+ 31, 15, 49,  1, 35, 40,  5, 58,
+ 21, 29, 22, 59, 45, 31,  9, 26,
+  9, 29, 11, 32, 30,  3, 13, 20,
+ 18, 20, 11,  3, 29, 40, 31, 53,
+ 30, 17, 20, 37, 31, 42, 47, 47,
+ 54, 38,  9, 34, 13, 37, 21, 25,
+ 27, 43, 42, 45, 40, 25, 27, 46,
+ 22, 25, 53, 20,  2, 14, 39, 15,
+ 22, 44, 34, 21, 38, 33, 27, 48,
+ 34, 52, 35, 47, 49, 54,  2, 13,
+ 23, 52, 29, 45, 22, 49, 54, 21,
+ 40, 42, 31, 30, 29, 34,  0, 25,
+ 23, 51, 24, 59, 28, 38, 29, 31,
+  2, 13, 31,  8, 31, 33, 12, 45,
+ 41,  7, 14, 30, 25, 18, 43, 20,
+ 43, 35, 44,  1, 49, 42, 42, 18,
+ 41, 38, 41, 44, 53, 11, 20, 25,
+ 45, 46, 47, 48, 39, 52, 46, 49,
+ 63, 55, 44, 38, 13, 13, 57, 22,
+ 51, 16, 12, 28, 35, 57, 25, 20,
+ 26, 28, 28, 29, 32, 31, 62, 34,
+ 35, 35, 19, 49, 48, 39, 40, 18,
+ 43, 46, 11,  6, 48, 19, 49, 41,
+ 10, 23, 58, 17, 21, 23, 34, 30,
+ 60,  0, 44, 34, 26, 37, 46, 43,
+ 49, 59,  4, 34, 59, 37, 22, 25,
+ 28, 46,  6, 40, 59, 42, 36, 61,
+ 28, 30, 31, 43, 10, 22, 23, 47,
+ 20, 52, 55, 36, 25, 16,  1, 11,
+ 27, 29,  5, 63, 18, 41, 31, 34,
+ 38,  1,  5, 13, 28, 31, 17, 38,
+ 39, 41, 36, 37, 22, 39, 33, 43,
+ 43, 15, 17, 49, 30, 21, 22, 20,
+ 10, 17, 25, 54, 57,  3, 34,  8,
+ 36, 25, 31, 14, 15, 19, 29, 25,
+ 18, 39, 53, 22, 27, 20, 29, 33,
+ 41, 42, 35, 62, 50, 29, 53, 50,
+ 35, 55, 42, 61, 63,  4,  7, 42,
+ 21, 46, 47, 49, 27, 46, 17, 55,
+ 41, 50, 63,  4, 56, 18,  8, 10,
+ 18, 51, 63, 36, 55, 18,  5, 55,
+  9, 29, 17, 21, 30, 27,  1, 59,
+  7, 11, 12, 15,  5, 42, 24, 41,
+ 43,  7, 27, 22, 25, 31, 30, 37,
+ 22, 39, 53, 29, 36, 37, 48,  0,
+  5, 13, 17, 31, 32, 26, 46, 28,
+ 44, 45, 46, 53, 49, 51,  3, 41,
+  3, 22, 42, 33,  5, 45,  7, 22,
+ 40, 53, 24, 14, 25, 27, 10, 12,
+ 34, 16, 17, 53, 20, 26, 39, 45,
+ 18, 45, 35, 33, 31, 49,  4, 39,
+ 42, 11, 51,  5, 13, 26, 27, 17,
+ 52, 30,  0, 22, 12, 34, 62, 36,
+ 38, 41, 47, 30, 63, 38, 41, 43,
+ 59, 33, 45, 37, 38, 40, 47, 24,
+ 48, 49, 30,  1, 10, 22, 49, 15,
+ 39, 59, 31, 32, 33, 18, 13, 15,
+ 31, 21, 27, 44, 42, 39, 46, 17,
+ 26, 32, 30, 31,  0, 30, 34,  9,
+ 12, 13, 25, 31, 32, 55, 43, 35,
+ 61, 33, 35, 46, 25, 47, 48, 62,
+ 63, 38, 61,  1,  2,  5,  7,  9,
+ 46, 10, 34, 35, 36, 55, 51,  7,
+ 40, 23, 34, 37,  5, 13, 42, 18,
+ 25, 27, 28,
+};
+
+/* motion vector table 1 */
+static const uint16_t table1_mv_code[1100] = {
+ 0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c,
+ 0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041,
+ 0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069,
+ 0x006b, 0x00a6, 0x00c1, 0x00cb, 0x00cc, 0x00ce, 0x00da, 0x00e8,
+ 0x00ee, 0x0087, 0x0090, 0x009e, 0x009f, 0x00ba, 0x00ca, 0x00d8,
+ 0x00db, 0x00df, 0x0104, 0x0109, 0x010c, 0x0143, 0x0145, 0x014a,
+ 0x0156, 0x015c, 0x01b3, 0x01d3, 0x01da, 0x0103, 0x0109, 0x010b,
+ 0x0122, 0x0127, 0x0134, 0x0161, 0x0164, 0x0176, 0x0184, 0x018d,
+ 0x018e, 0x018f, 0x0190, 0x0193, 0x0196, 0x019d, 0x019e, 0x019f,
+ 0x01a9, 0x01b2, 0x01b4, 0x01ba, 0x01bb, 0x01bc, 0x0201, 0x0202,
+ 0x0205, 0x0207, 0x020d, 0x0210, 0x0211, 0x0215, 0x021b, 0x021f,
+ 0x0281, 0x0285, 0x0290, 0x029c, 0x029d, 0x02a2, 0x02a7, 0x02a8,
+ 0x02aa, 0x02b0, 0x02b1, 0x02b4, 0x02bc, 0x02bf, 0x0320, 0x0326,
+ 0x0327, 0x0329, 0x032a, 0x0336, 0x0360, 0x0362, 0x0363, 0x0372,
+ 0x03b2, 0x03bc, 0x03bd, 0x0203, 0x0205, 0x021a, 0x0249, 0x024a,
+ 0x024c, 0x02c7, 0x02ca, 0x02ce, 0x02ef, 0x030d, 0x0322, 0x0325,
+ 0x0338, 0x0373, 0x037a, 0x0409, 0x0415, 0x0416, 0x0418, 0x0428,
+ 0x042d, 0x042f, 0x0434, 0x0508, 0x0509, 0x0510, 0x0511, 0x051c,
+ 0x051e, 0x0524, 0x0541, 0x0543, 0x0546, 0x0547, 0x054d, 0x0557,
+ 0x055f, 0x056a, 0x056c, 0x056d, 0x056f, 0x0576, 0x0577, 0x057a,
+ 0x057b, 0x057c, 0x057d, 0x0600, 0x0601, 0x0603, 0x0614, 0x0616,
+ 0x0617, 0x061c, 0x061f, 0x0642, 0x0648, 0x0649, 0x064a, 0x064b,
+ 0x0657, 0x0668, 0x0669, 0x066b, 0x066e, 0x067f, 0x06c2, 0x06c8,
+ 0x06cb, 0x06de, 0x06df, 0x06e2, 0x06e3, 0x06ef, 0x0748, 0x074b,
+ 0x076e, 0x076f, 0x077c, 0x0409, 0x0423, 0x0428, 0x0429, 0x042a,
+ 0x042b, 0x0432, 0x0433, 0x0496, 0x049a, 0x04d5, 0x04db, 0x0581,
+ 0x0582, 0x058b, 0x058c, 0x058d, 0x0598, 0x0599, 0x059a, 0x059e,
+ 0x05dd, 0x0619, 0x0632, 0x0633, 0x0648, 0x0672, 0x06a1, 0x06a2,
+ 0x06a3, 0x06af, 0x06e2, 0x06e3, 0x06e4, 0x0800, 0x0801, 0x0802,
+ 0x0803, 0x081a, 0x081b, 0x0829, 0x082f, 0x0832, 0x083e, 0x083f,
+ 0x0852, 0x0853, 0x0858, 0x086b, 0x0877, 0x0878, 0x0879, 0x087a,
+ 0x087b, 0x0a00, 0x0a01, 0x0a0d, 0x0a0e, 0x0a0f, 0x0a24, 0x0a37,
+ 0x0a3a, 0x0a3b, 0x0a3e, 0x0a46, 0x0a47, 0x0a4a, 0x0a4b, 0x0a5f,
+ 0x0a79, 0x0a7a, 0x0a7b, 0x0a80, 0x0a81, 0x0a84, 0x0a85, 0x0a99,
+ 0x0aa5, 0x0aa6, 0x0ab8, 0x0aba, 0x0abb, 0x0abc, 0x0abd, 0x0ac8,
+ 0x0ace, 0x0acf, 0x0ad7, 0x0adc, 0x0aeb, 0x0c04, 0x0c25, 0x0c26,
+ 0x0c27, 0x0c2a, 0x0c2b, 0x0c3a, 0x0c3b, 0x0c3c, 0x0c3d, 0x0ca0,
+ 0x0cad, 0x0cd4, 0x0cd5, 0x0cfc, 0x0cfd, 0x0d86, 0x0d92, 0x0d93,
+ 0x0d94, 0x0d95, 0x0db0, 0x0db8, 0x0db9, 0x0dba, 0x0dbb, 0x0dc0,
+ 0x0dc2, 0x0dc3, 0x0dda, 0x0ddb, 0x0ddc, 0x0ddd, 0x0e92, 0x0e93,
+ 0x0e94, 0x0e95, 0x0ec7, 0x0ecc, 0x0ece, 0x0ecf, 0x0ed8, 0x0ed9,
+ 0x0eda, 0x0edb, 0x0808, 0x0809, 0x080a, 0x0810, 0x0811, 0x0844,
+ 0x0845, 0x0861, 0x0862, 0x0863, 0x086c, 0x0922, 0x0923, 0x092e,
+ 0x092f, 0x0936, 0x0937, 0x09b1, 0x09b2, 0x09b3, 0x09b4, 0x09b5,
+ 0x09b8, 0x09b9, 0x09ba, 0x09bb, 0x09bc, 0x09bd, 0x09be, 0x09bf,
+ 0x0b00, 0x0b15, 0x0b2c, 0x0b2d, 0x0b2e, 0x0b2f, 0x0b36, 0x0bb9,
+ 0x0c28, 0x0c2a, 0x0c2b, 0x0c2c, 0x0c2d, 0x0c2e, 0x0c2f, 0x0c30,
+ 0x0c31, 0x0c38, 0x0c60, 0x0c61, 0x0c62, 0x0c63, 0x0c8d, 0x0c8e,
+ 0x0c8f, 0x0c92, 0x0cbe, 0x0cbf, 0x0ce6, 0x0ce7, 0x0d40, 0x0d41,
+ 0x0d57, 0x0d58, 0x0d59, 0x0d5a, 0x0d5b, 0x0d5c, 0x0d5d, 0x0d98,
+ 0x0d99, 0x0d9a, 0x0d9b, 0x0d9c, 0x0d9d, 0x0dad, 0x0dae, 0x0daf,
+ 0x0dc0, 0x0dc1, 0x0dc2, 0x0dc3, 0x0dca, 0x0dcb, 0x0dec, 0x0ded,
+ 0x0dee, 0x0def, 0x1018, 0x1022, 0x1023, 0x1030, 0x1031, 0x1032,
+ 0x1033, 0x1050, 0x1051, 0x105c, 0x1074, 0x1075, 0x1076, 0x1077,
+ 0x1078, 0x1079, 0x107a, 0x107b, 0x10b2, 0x10b3, 0x10b8, 0x10b9,
+ 0x10ba, 0x10bb, 0x10d4, 0x10ea, 0x10eb, 0x10ec, 0x10ed, 0x1404,
+ 0x1405, 0x1406, 0x1407, 0x1410, 0x1411, 0x1412, 0x1413, 0x1414,
+ 0x1415, 0x1416, 0x1417, 0x1418, 0x1419, 0x1466, 0x1467, 0x1468,
+ 0x1469, 0x146a, 0x146b, 0x146c, 0x146d, 0x147e, 0x147f, 0x1488,
+ 0x1489, 0x148a, 0x148b, 0x14b6, 0x14b7, 0x14b8, 0x14b9, 0x14ba,
+ 0x14bb, 0x14bc, 0x14bd, 0x14f0, 0x14f1, 0x14f8, 0x14f9, 0x14fa,
+ 0x14fb, 0x14fc, 0x14fd, 0x14fe, 0x14ff, 0x152a, 0x152b, 0x152c,
+ 0x152d, 0x152e, 0x152f, 0x1530, 0x1531, 0x1548, 0x1549, 0x154e,
+ 0x154f, 0x1558, 0x1559, 0x155a, 0x155b, 0x1572, 0x159a, 0x159b,
+ 0x15ac, 0x15ba, 0x15bb, 0x15d0, 0x15d1, 0x15d2, 0x15d3, 0x15d4,
+ 0x15d5, 0x181d, 0x181e, 0x181f, 0x1840, 0x1841, 0x1842, 0x1843,
+ 0x1844, 0x1845, 0x1846, 0x1847, 0x1848, 0x1849, 0x1861, 0x1862,
+ 0x1863, 0x1864, 0x1865, 0x1866, 0x1867, 0x1868, 0x1869, 0x186a,
+ 0x186b, 0x186c, 0x186d, 0x186e, 0x191b, 0x191c, 0x191d, 0x191e,
+ 0x191f, 0x1942, 0x1943, 0x1944, 0x1945, 0x1946, 0x1947, 0x1958,
+ 0x1959, 0x19ed, 0x19ee, 0x19ef, 0x19f0, 0x19f1, 0x19f2, 0x19f3,
+ 0x19f4, 0x19f5, 0x19f6, 0x19f7, 0x1b0e, 0x1b0f, 0x1b62, 0x1b63,
+ 0x1b64, 0x1b65, 0x1b66, 0x1b67, 0x1b68, 0x1b69, 0x1b6a, 0x1b6b,
+ 0x1b6c, 0x1b6d, 0x1b6e, 0x1b6f, 0x1b82, 0x1ba8, 0x1ba9, 0x1baa,
+ 0x1bab, 0x1bac, 0x1bad, 0x1bae, 0x1baf, 0x1bb0, 0x1bb1, 0x1bb2,
+ 0x1bb3, 0x1d80, 0x1d81, 0x1d82, 0x1d83, 0x1d84, 0x1d85, 0x1d86,
+ 0x1d87, 0x1d88, 0x1d89, 0x1d8a, 0x1d8b, 0x1d8c, 0x1d8d, 0x1007,
+ 0x1008, 0x1009, 0x100a, 0x100b, 0x100c, 0x100d, 0x100e, 0x100f,
+ 0x1016, 0x1080, 0x1081, 0x1082, 0x1083, 0x1084, 0x1085, 0x1086,
+ 0x1087, 0x10c0, 0x123a, 0x123b, 0x123c, 0x123d, 0x123e, 0x123f,
+ 0x1240, 0x1241, 0x1242, 0x1243, 0x1350, 0x1352, 0x1353, 0x1358,
+ 0x1359, 0x135a, 0x135b, 0x135c, 0x135d, 0x135e, 0x135f, 0x1360,
+ 0x1361, 0x1602, 0x1603, 0x160c, 0x160d, 0x160e, 0x160f, 0x1620,
+ 0x1621, 0x1622, 0x1623, 0x1624, 0x1625, 0x1626, 0x1627, 0x1628,
+ 0x1629, 0x166e, 0x166f, 0x167c, 0x167d, 0x167e, 0x167f, 0x1770,
+ 0x1771, 0x1852, 0x1853, 0x1872, 0x1873, 0x1874, 0x1875, 0x1876,
+ 0x1877, 0x1878, 0x1879, 0x187a, 0x187b, 0x187c, 0x187d, 0x187e,
+ 0x187f, 0x1918, 0x1919, 0x1926, 0x1927, 0x1970, 0x1971, 0x1972,
+ 0x1973, 0x1974, 0x1975, 0x1976, 0x1977, 0x1978, 0x1979, 0x197a,
+ 0x197b, 0x1aa0, 0x1aa1, 0x1aa2, 0x1aa3, 0x1aa4, 0x1aa5, 0x1aa6,
+ 0x1aa7, 0x1aa8, 0x1aa9, 0x1aaa, 0x1aab, 0x1aac, 0x1aad, 0x1b3c,
+ 0x1b3d, 0x1b3e, 0x1b3f, 0x1b50, 0x1b51, 0x1b52, 0x1b53, 0x1b54,
+ 0x1b55, 0x1b56, 0x1b57, 0x1b58, 0x1b59, 0x2032, 0x2033, 0x2034,
+ 0x2035, 0x2036, 0x2037, 0x2038, 0x2039, 0x203a, 0x203b, 0x203c,
+ 0x203d, 0x203e, 0x203f, 0x2040, 0x2041, 0x2042, 0x2043, 0x20ba,
+ 0x20bb, 0x20cc, 0x20cd, 0x20ce, 0x20cf, 0x20e0, 0x20e1, 0x20e2,
+ 0x20e3, 0x20e4, 0x20e5, 0x20e6, 0x20e7, 0x21aa, 0x21ab, 0x21c0,
+ 0x21c1, 0x21c2, 0x21c3, 0x21c4, 0x21c5, 0x21c6, 0x21c7, 0x21c8,
+ 0x21c9, 0x21ca, 0x21cb, 0x21cc, 0x21cd, 0x21ce, 0x21cf, 0x21d0,
+ 0x21d1, 0x21d2, 0x21d3, 0x2894, 0x2895, 0x2896, 0x2897, 0x2898,
+ 0x2899, 0x289a, 0x289b, 0x289c, 0x289d, 0x289e, 0x289f, 0x28c0,
+ 0x28c1, 0x28c2, 0x28c3, 0x28c4, 0x28c5, 0x28c6, 0x28c7, 0x28c8,
+ 0x28c9, 0x28ca, 0x28cb, 0x2930, 0x2931, 0x2932, 0x2933, 0x2934,
+ 0x2935, 0x2936, 0x2937, 0x2938, 0x2939, 0x293a, 0x293b, 0x293c,
+ 0x293d, 0x293e, 0x293f, 0x2960, 0x2961, 0x2962, 0x2963, 0x2964,
+ 0x2965, 0x2966, 0x2967, 0x2968, 0x2969, 0x296a, 0x296b, 0x2a40,
+ 0x2a41, 0x2a42, 0x2a43, 0x2a44, 0x2a45, 0x2a46, 0x2a47, 0x2a48,
+ 0x2a49, 0x2a4a, 0x2a4b, 0x2a4c, 0x2a4d, 0x2a4e, 0x2a4f, 0x2a50,
+ 0x2a51, 0x2a52, 0x2a53, 0x2ae6, 0x2ae7, 0x2b24, 0x2b25, 0x2b26,
+ 0x2b27, 0x2b28, 0x2b29, 0x2b2a, 0x2b2b, 0x2b2c, 0x2b2d, 0x2b2e,
+ 0x2b2f, 0x2b30, 0x2b31, 0x2b32, 0x2b33, 0x2b5a, 0x2b5b, 0x3014,
+ 0x3015, 0x3016, 0x3017, 0x3020, 0x3021, 0x3022, 0x3023, 0x3024,
+ 0x3025, 0x3026, 0x3027, 0x3028, 0x3029, 0x302a, 0x302b, 0x302c,
+ 0x302d, 0x302e, 0x302f, 0x3030, 0x3031, 0x3032, 0x3033, 0x3034,
+ 0x3035, 0x3036, 0x3037, 0x3038, 0x3039, 0x30c0, 0x30c1, 0x30de,
+ 0x30df, 0x3218, 0x3219, 0x321a, 0x321b, 0x321c, 0x321d, 0x321e,
+ 0x321f, 0x3220, 0x3221, 0x3222, 0x3223, 0x3224, 0x3225, 0x3226,
+ 0x3227, 0x3228, 0x3229, 0x322a, 0x322b, 0x322c, 0x322d, 0x322e,
+ 0x322f, 0x3230, 0x3231, 0x3232, 0x3233, 0x3234, 0x3235, 0x3378,
+ 0x3379, 0x337a, 0x337b, 0x337c, 0x337d, 0x337e, 0x337f, 0x33c0,
+ 0x33c1, 0x33c2, 0x33c3, 0x33c4, 0x33c5, 0x33c6, 0x33c7, 0x33c8,
+ 0x33c9, 0x33ca, 0x33cb, 0x33cc, 0x33cd, 0x33ce, 0x33cf, 0x33d0,
+ 0x33d1, 0x33d2, 0x33d3, 0x33d4, 0x33d5, 0x33d6, 0x33d7, 0x33d8,
+ 0x33d9, 0x3706, 0x3707, 0x3730, 0x3731, 0x3732, 0x3733, 0x3734,
+ 0x3735, 0x3736, 0x3737, 0x3738, 0x3739, 0x373a, 0x373b, 0x373c,
+ 0x373d, 0x373e, 0x373f, 0x3740, 0x3741, 0x3742, 0x3743, 0x3744,
+ 0x3745, 0x3746, 0x3747, 0x3748, 0x3749, 0x374a, 0x374b, 0x374c,
+ 0x374d, 0x374e, 0x374f, 0x3b34, 0x3b35, 0x3b36, 0x3b37, 0x3be8,
+ 0x3be9, 0x3bea, 0x3beb, 0x3bec, 0x3bed, 0x3bee, 0x3bef, 0x3bf0,
+ 0x3bf1, 0x3bf2, 0x3bf3, 0x3bf4, 0x3bf5, 0x3bf6, 0x3bf7, 0x3bf8,
+ 0x3bf9, 0x3bfa, 0x3bfb, 0x3bfc, 0x3bfd, 0x3bfe, 0x3bff, 0x2000,
+ 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
+ 0x2009, 0x200a, 0x200b, 0x200c, 0x200d, 0x202e, 0x202f, 0x2182,
+ 0x2183, 0x21b4, 0x21b5, 0x21b6, 0x21b7, 0x21b8, 0x21b9, 0x21ba,
+ 0x21bb, 0x21bc, 0x21bd, 0x21be, 0x21bf, 0x2460, 0x2461, 0x2462,
+ 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246a,
+ 0x246b, 0x246c, 0x246d, 0x246e, 0x246f, 0x2470, 0x2471, 0x2472,
+ 0x2473, 0x26a2, 0x26a3, 0x000b,
+};
+
+static const uint8_t table1_mv_bits[1100] = {
+  2,  4,  4,  4,  5,  5,  5,  5,
+  6,  6,  7,  7,  7,  7,  7,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  9,  9,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15,  4,
+};
+
+static const uint8_t table1_mvx[1099] = {
+ 32, 31, 32, 31, 33, 32, 33, 33,
+ 31, 34, 30, 32, 32, 34, 35, 32,
+ 34, 33, 29, 30, 30, 32, 31, 31,
+ 33, 35, 35, 33, 31, 29, 29, 33,
+ 34, 30, 31, 28, 36, 30, 34, 32,
+ 32, 37, 32, 32, 25, 27, 39, 32,
+ 32, 32, 38, 35, 36, 32, 37, 61,
+ 26, 32, 34, 35,  3, 35, 27, 28,
+ 29, 34, 28, 37, 31, 36, 32, 27,
+ 31, 30, 29, 39, 33, 29, 33, 35,
+ 25, 25, 29, 33, 31, 31, 31, 33,
+ 32, 30, 32, 32, 41, 39, 33, 36,
+ 32, 28, 34, 36, 38, 24, 60, 31,
+ 23, 28, 32, 33, 59, 32, 40, 30,
+  5, 34, 32, 38, 32, 30, 43,  4,
+ 32, 32, 42, 31, 31, 32, 26, 38,
+ 26, 22, 21, 37, 61, 63, 37, 31,
+ 32, 33,  2,  1, 23, 33, 41, 27,
+ 35, 30, 38, 23, 33,  3, 28, 34,
+ 34, 27, 41, 29, 39, 35, 36, 29,
+ 32, 27, 30, 32, 24, 61, 37, 26,
+ 59, 25, 35, 27, 36, 37, 30, 31,
+ 34, 40,  3, 28, 34, 39, 32, 31,
+ 32, 30, 24, 28, 35, 36, 26, 32,
+ 31, 33, 29, 33, 39, 25, 30, 24,
+ 35, 59, 29, 34, 25, 30, 21, 35,
+ 43, 40, 32, 29,  5, 28, 31, 62,
+ 33, 33, 25, 31, 21, 31, 43, 31,
+ 34, 33, 20, 40, 39, 31, 31, 57,
+ 38, 32, 42, 33, 32, 31, 32, 29,
+ 30, 44,  5, 31, 22, 34, 36, 17,
+ 38, 58, 38, 35, 32, 60, 35, 24,
+ 32, 38, 16, 45, 42, 32, 31, 29,
+  4, 30, 17, 40, 46, 48, 63, 32,
+ 42, 19, 41, 22, 28, 36, 45, 33,
+ 33, 32, 29,  7, 41, 42, 18, 33,
+ 33, 32, 22, 37,  1, 26, 22, 23,
+ 49, 28, 26, 27, 32, 33, 27, 23,
+ 28, 36, 15,  6, 34, 27, 31, 26,
+ 23,  2, 33, 32, 34, 41, 28, 32,
+ 41,  0, 36, 38, 34, 31, 47, 32,
+ 17, 31, 39, 33, 37, 51, 30, 47,
+ 32, 50, 32, 19, 63, 30, 25, 27,
+ 33, 62, 24, 31, 27, 30, 37, 31,
+ 45, 32, 39, 20, 46, 47, 35, 19,
+ 34,  1, 49, 21, 21, 14, 51, 26,
+ 23, 31, 36, 35, 58, 29, 29, 21,
+ 20, 42, 13, 28, 12, 40, 31, 33,
+ 39, 60, 32, 44, 33, 31, 28, 37,
+ 29, 32, 30, 49, 43, 28, 39, 25,
+ 32, 48,  2, 15, 20, 25, 31, 28,
+ 21, 24, 25, 15, 31, 17, 37, 43,
+ 18, 32, 33, 24, 33, 36, 13, 33,
+ 31, 39, 11, 31, 33, 32, 39, 37,
+ 32, 32, 29, 17, 44, 46, 36, 35,
+ 26, 37, 58, 32, 34, 38,  8, 38,
+ 38, 22, 29, 25, 16, 35, 32, 35,
+ 33, 43, 18, 46, 38, 50, 33, 18,
+ 53, 60, 13, 32, 36, 33, 51, 36,
+ 43, 45, 27, 42, 29, 24, 30, 25,
+ 31, 52, 31, 35, 38,  9, 22, 34,
+  4, 17, 28, 55, 42, 25, 17, 20,
+ 47, 34, 33, 16, 40, 25, 16, 30,
+ 53, 29, 10, 11, 14, 26, 33,  4,
+ 35, 44, 26, 16, 31, 26, 34, 38,
+ 29, 31, 30, 24, 22, 61, 32,  9,
+ 45, 34, 31, 19,  9, 31, 46, 31,
+ 35, 54, 29, 57, 30, 50,  3, 31,
+ 63, 34, 47, 41, 51, 18, 31, 14,
+ 37, 38, 31, 24, 32, 31, 50, 33,
+ 31, 54, 27,  9, 33, 23, 19, 32,
+ 29, 29, 33, 28, 47, 49, 30, 47,
+ 33, 27, 25, 54, 44, 45, 50, 58,
+ 51, 48, 33, 59, 33, 34, 57, 13,
+ 26, 33, 13, 48, 30, 11,  7, 56,
+ 34, 55, 26,  0, 26, 35,  1, 51,
+ 33, 53, 31, 45, 12, 29, 29, 51,
+ 31, 48,  2,  6, 34, 30, 28, 33,
+ 60, 40, 27, 46, 31,  9, 35, 29,
+ 31, 39, 55, 46, 19, 37, 62, 34,
+ 30, 16, 19, 49, 41, 41, 39, 37,
+ 14,  5, 13, 35, 55, 30, 40, 40,
+ 42,  8, 20, 25, 45, 35, 33, 36,
+ 54, 38, 27, 37, 62, 40, 15, 59,
+ 49, 31, 29, 34, 34, 39, 24, 29,
+ 25, 29, 21, 29, 10, 61, 33, 49,
+ 35, 34,  3, 38, 39, 29,  7, 41,
+  1, 35,  4, 23, 15, 23, 11, 37,
+ 28, 35, 30, 30, 24,  1, 43, 56,
+  8, 34, 42, 24, 45, 30, 20, 23,
+  8, 38, 22, 33, 17, 52, 34, 22,
+ 53, 43, 44,  1, 27, 31, 41, 43,
+ 41, 30, 31, 36, 30,  5, 55, 31,
+ 33, 30, 40, 23, 15, 29, 34, 34,
+ 59, 34, 30, 11, 13, 38,  5,  0,
+ 30, 42,  5, 30, 29, 34, 10, 44,
+ 30, 63, 35, 12,  3, 26, 15, 17,
+ 25, 34, 43, 39, 34, 56, 29, 23,
+ 30, 12, 30, 10, 35,  9, 24, 58,
+ 10, 12, 54, 33, 37, 20, 41, 35,
+ 29, 18, 61, 30, 40, 24, 39, 53,
+ 62, 26, 29, 33, 34, 53, 49, 21,
+ 27, 11, 63, 20, 26, 23,  7, 13,
+  6, 47, 29, 30,  9, 51, 22, 34,
+ 21, 25, 33, 56, 57, 30, 38, 51,
+ 51, 38, 63, 28, 40, 35, 33, 18,
+ 33, 33, 24, 58, 58, 34, 49, 29,
+ 43,  4,  1,  4, 42, 35, 35, 30,
+ 17,  5, 56, 61, 25, 37, 36, 55,
+ 28, 35, 29, 50, 48, 52,  2, 42,
+ 34, 40, 46, 46, 43, 35, 29, 48,
+ 20, 29, 31, 41,  7, 30, 35, 19,
+ 14, 21,  8, 39, 39, 40, 46, 55,
+ 34,  6, 30, 34, 37, 25, 37, 33,
+ 22, 44, 52, 17, 35, 29, 36, 35,
+ 40, 37, 28, 30, 50, 14, 28, 55,
+  6, 23, 19, 14, 30,  3, 30, 28,
+ 28, 61, 61, 47, 45, 48, 40, 40,
+ 34, 34, 25, 30, 29, 35,  4, 26,
+ 53, 50, 26, 41, 27, 59, 27, 38,
+ 39,  3, 50, 43, 47, 23, 33, 55,
+ 35, 21, 23, 35, 61, 33, 46, 52,
+ 35, 34, 24, 30, 43, 16, 37, 21,
+  2, 24, 45, 34, 30, 55, 55,  1,
+ 29, 29, 26, 28, 25, 31, 36, 22,
+ 17, 30, 52,  2, 44, 44, 57, 26,
+ 62, 41, 39, 57, 26, 46, 49, 11,
+ 16, 19,  5, 59, 38, 39, 58, 38,
+ 25, 49, 50, 22, 28, 59,  9, 59,
+  7, 28, 55, 17,  4, 35, 50, 21,
+ 29, 44, 47, 18, 24, 19, 25, 42,
+ 35,  3, 51, 35, 16, 35, 30, 63,
+ 57, 39, 39, 25, 35, 38,  9, 16,
+ 36, 45, 31, 60, 14, 34, 42, 24,
+  0, 37, 18, 61, 57, 37, 28, 53,
+ 20, 46, 14, 47, 38, 38, 38,  9,
+ 34, 39, 43, 17, 39, 59,  5, 27,
+  0, 12, 27,
+};
+
+static const uint8_t table1_mvy[1099] = {
+ 32, 32, 31, 31, 32, 33, 31, 33,
+ 33, 32, 32, 30, 34, 31, 32, 29,
+ 33, 30, 32, 33, 31, 35, 34, 30,
+ 34, 31, 33, 29, 29, 31, 33, 35,
+ 30, 30, 35, 32, 32, 34, 34, 28,
+ 25, 32, 36, 27, 32, 32, 32, 37,
+ 39,  3, 32, 30, 31, 26, 31, 32,
+ 32, 38, 29, 29, 32, 34, 31, 31,
+ 34, 35, 33, 33, 28, 33,  1, 33,
+ 27, 29, 30, 31, 28, 29, 37, 35,
+ 31, 33, 35, 27, 36, 37, 25, 25,
+ 61, 35,  4,  5, 32, 33, 36, 30,
+ 23, 30, 28, 34, 31, 32, 32, 39,
+ 32, 34, 21, 39, 32, 59, 32, 28,
+ 32, 36, 60, 33, 24, 36, 32, 32,
+ 41,  2, 32, 38, 26, 22, 33, 30,
+ 31, 32, 32, 30, 31, 32, 29,  3,
+ 40, 38, 32, 32, 33, 26, 31, 34,
+ 28, 38, 34, 31,  3, 31, 35, 38,
+ 27, 35, 33, 28, 29, 27, 29, 27,
+ 43, 29, 37, 63, 31, 33, 34, 30,
+ 31, 30, 37, 30, 35, 35, 26, 41,
+ 37, 31, 33, 28, 26, 30, 42, 24,
+  7, 27, 33, 29, 36, 28, 34, 57,
+ 23, 41, 36, 23, 35, 34, 25, 30,
+ 25, 33, 25, 25, 29, 24, 33, 39,
+ 33, 33,  0, 37, 31, 36, 21, 32,
+ 61, 24, 35, 61, 31,  5, 31, 59,
+ 39, 21, 32, 30, 34, 22, 40, 32,
+ 29, 16, 31,  5, 62,  2, 20, 39,
+ 39, 32, 33,  1, 31, 24, 36, 32,
+ 36, 32, 28, 26,  6, 31, 38, 34,
+ 58, 35, 32, 33, 33, 17, 43, 26,
+ 31, 40, 31, 34, 32, 32, 31, 19,
+ 30, 32, 29, 33, 38, 38, 32, 59,
+ 40, 18, 38, 32, 35, 34, 32, 17,
+  1, 15, 30, 28, 31, 28, 34, 29,
+ 32, 27, 35, 27, 49, 22, 37, 34,
+ 37, 26, 32, 32, 22, 28, 45, 29,
+ 30, 31, 43, 46, 41, 30, 26, 13,
+ 34, 32, 27, 38, 42, 42, 33, 47,
+ 33, 60, 27, 42, 25, 32, 22, 32,
+ 48, 32, 45, 33, 33, 41, 27, 25,
+ 19, 31, 35, 19, 36, 42, 27, 17,
+ 31, 44, 28, 33, 33, 31, 23, 31,
+ 40, 33, 31, 34, 30, 32, 33, 36,
+ 35, 47, 37, 41, 31, 23, 41, 29,
+ 30, 35, 32, 25, 32, 28, 58,  2,
+ 37, 33, 14, 33, 49, 20, 39, 36,
+ 21,  9, 23, 33, 35, 24, 39, 37,
+ 11, 33, 30, 31, 31, 28, 51, 40,
+ 35, 29, 25, 33, 46, 35, 37, 30,
+ 30,  8, 63, 28, 15, 40, 33, 45,
+ 49, 25, 32,  4, 47, 51, 36, 39,
+ 53, 10, 24, 29, 30, 31, 25, 40,
+ 38, 38, 33, 56, 23, 27, 32, 37,
+ 26, 29, 43, 36, 33, 24, 55, 43,
+  9, 29, 34, 34, 24, 33, 18, 33,
+ 33, 30, 31, 50, 24, 60, 30, 39,
+ 34, 30, 39, 28, 22, 38,  2, 26,
+ 63, 32, 57, 21, 39, 33, 28, 18,
+ 30, 34, 22, 33, 29, 41, 30, 34,
+ 35, 21, 13, 34, 35, 39, 30, 46,
+ 32, 42, 32, 31, 33, 26, 11, 33,
+ 22, 31, 25, 31, 53, 27, 43, 25,
+ 40, 50, 21, 36, 38, 30, 12, 31,
+ 34, 20, 15, 29, 32, 62, 30, 13,
+ 17, 32, 19, 31, 20, 31, 30,  7,
+  1, 17, 34, 37, 31, 31, 44, 34,
+ 26, 40, 16, 37, 52, 48, 30, 20,
+ 18, 33, 38, 29,  7, 25, 30, 54,
+ 45, 47, 46, 41, 29, 29, 16, 30,
+ 14, 26, 38, 34, 34, 29, 34, 30,
+ 29, 30, 57, 30,  4, 46, 33, 29,
+ 39, 44, 30, 31, 50, 33, 31, 32,
+ 19, 32, 40, 31, 37, 47,  1, 35,
+ 16, 31,  0, 35, 33,  1, 17, 34,
+  9, 34, 33, 31, 49, 43, 42, 51,
+ 34, 29, 23, 29, 14, 30, 45, 49,
+ 11, 24, 31, 28, 35, 41, 30, 44,
+ 18, 29, 34, 35, 36, 25, 26, 21,
+ 31, 30, 34, 19, 34, 44, 36, 38,
+ 25, 31, 28, 23, 37,  3, 55, 41,
+ 30, 22, 41, 24, 33, 26, 35, 35,
+ 30, 55, 51, 47, 48, 38, 24, 15,
+ 21, 50, 25, 46, 30, 29, 10, 34,
+ 42, 45, 29, 42, 22,  3, 33, 27,
+ 34,  1, 34, 28, 34, 36, 35, 23,
+ 23, 13, 58,  3, 26, 63, 25, 31,
+ 34, 61, 38, 39, 25, 61, 29, 37,
+ 30, 41, 26, 48, 28, 33, 50, 35,
+ 30, 37, 29, 29, 40,  6, 39, 28,
+ 28, 19,  8, 22, 45, 34, 35, 10,
+ 58, 17, 37, 39, 30, 18, 54, 14,
+ 29, 16, 59, 30, 35, 23, 35, 30,
+ 47, 36, 29, 55, 20, 12, 31, 35,
+ 14, 29, 18, 34, 34, 24, 29, 26,
+ 22,  2, 27, 23,  8, 30, 55, 38,
+ 60, 31,  4, 34, 49, 34, 27, 34,
+ 33, 30, 31, 54, 42, 35, 38, 46,
+ 44, 26, 27,  9, 39, 25, 21, 29,
+ 28, 42, 13,  0,  5, 34, 37, 28,
+ 24, 29, 63, 26, 22, 27, 29, 25,
+ 33, 25, 61,  0, 35, 25, 36, 15,
+ 27, 40, 53, 33,  3, 10, 16, 37,
+ 38, 18, 30, 46, 27,  9,  6, 29,
+ 62,  8, 42, 28, 29,  3, 25, 16,
+ 26, 29, 35, 28, 27, 51, 61, 48,
+ 37,  9, 34,  7, 49, 45, 20, 29,
+ 21,  5,  5, 29, 28, 34, 29, 24,
+ 10, 24, 35, 36, 38, 55, 11, 36,
+ 38, 53, 54, 26, 30, 49, 20, 27,
+ 30, 39, 33, 41, 49, 22, 38, 38,
+  4, 30,  8,  9,  3, 24, 22, 50,
+ 37, 36, 31, 27,  2,  9, 42, 63,
+ 25, 19, 44,  1, 28, 28, 48, 30,
+ 34, 41, 41, 38, 12, 27, 15,  0,
+ 16, 34, 35, 38, 28, 29, 40, 42,
+ 51, 52, 45, 54, 59, 59, 42, 44,
+ 37, 26, 46, 24, 15, 39, 22, 46,
+ 19, 35, 38, 17, 37, 23, 52, 55,
+ 50, 37, 26, 11, 37, 12, 24, 30,
+ 16, 13, 22, 13, 36, 35, 40, 41,
+ 34, 41, 26, 53, 51,  5, 21, 30,
+  2, 63, 41, 20,  1, 56, 21, 24,
+ 25,  5, 28, 35, 26, 28, 30, 18,
+ 29, 23, 40, 34, 20, 42, 39, 34,
+ 28, 61, 38, 27, 62,  9, 36, 17,
+  9, 49, 24, 25, 54, 34, 39, 37,
+  3,  1, 25, 38, 38, 44, 35, 36,
+ 12, 60, 36, 38, 40, 25, 43, 39,
+ 53, 28, 39, 57, 46, 10, 52, 27,
+ 35, 42, 45, 59, 15, 60, 38, 24,
+ 23, 39, 12, 29, 24,  0, 20, 16,
+ 28, 43, 35, 28,  1, 49,  4, 21,
+ 42, 39, 29,  3, 44, 21, 53, 55,
+ 11,  5,  3, 39, 53, 28, 25, 19,
+ 34, 28, 21,
+};
+
+/* motion vector table */
+typedef struct MVTable {
+    int n;
+    const uint16_t *table_mv_code;
+    const uint8_t *table_mv_bits;
+    const uint8_t *table_mvx;
+    const uint8_t *table_mvy;
+    uint16_t *table_mv_index; /* encoding: convert mv to index in table_mv */
+    VLC vlc;                /* decoding: vlc */
+} MVTable;
+
+static MVTable mv_tables[2] = {
+    {
+        1099,
+        table0_mv_code,
+        table0_mv_bits,
+        table0_mvx,
+        table0_mvy,
+    },
+    {
+        1099,
+        table1_mv_code,
+        table1_mv_bits,
+        table1_mvx,
+        table1_mvy,
+    }
+};
+
+static const uint8_t v2_mb_type[8][2] = {
+ {1, 1}, {0   , 2}, {3   , 3}, {9   , 5},
+ {5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6},
+};
+
+static const uint8_t v2_intra_cbpc[4][2] = {
+ {1, 1}, {0, 3}, {1, 3}, {1, 2},
+};
+
+static const uint8_t wmv1_y_dc_scale_table[32]={
+//  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+    0, 8, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21
+};
+static const uint8_t wmv1_c_dc_scale_table[32]={
+//  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+    0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
+};
+
+static const uint8_t old_ff_y_dc_scale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+};
+static const uint8_t old_ff_c_dc_scale_table[32]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
+};
+
+
+#define WMV1_SCANTABLE_COUNT 4
+
+static const uint8_t wmv1_scantable00[64]= {
+0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
+0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
+0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
+0x06, 0x0D, 0x14, 0x1B, 0x22, 0x31, 0x39, 0x3A,
+0x32, 0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F,
+0x16, 0x1D, 0x24, 0x2B, 0x33, 0x3B, 0x3C, 0x34,
+0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
+0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F,
+};
+static const uint8_t wmv1_scantable01[64]= {
+0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
+0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
+0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
+0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39, 0x2A,
+0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F, 0x16, 0x1D,
+0x24, 0x2B, 0x32, 0x3A, 0x33, 0x3B, 0x2C, 0x25,
+0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
+0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F,
+};
+static const uint8_t wmv1_scantable02[64]= {
+0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
+0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
+0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
+0x0D, 0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39,
+0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x0F, 0x16, 0x1D,
+0x24, 0x2B, 0x32, 0x3A, 0x33, 0x2C, 0x25, 0x1E,
+0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
+0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
+};
+static const uint8_t wmv1_scantable03[64]= {
+0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
+0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
+0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
+0x1B, 0x22, 0x31, 0x39, 0x32, 0x2A, 0x23, 0x1C,
+0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1D, 0x24,
+0x2B, 0x33, 0x3A, 0x3B, 0x34, 0x2C, 0x25, 0x1E,
+0x16, 0x0F, 0x17, 0x1F, 0x26, 0x2D, 0x3C, 0x35,
+0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
+};
+
+static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
+    wmv1_scantable00,
+    wmv1_scantable01,
+    wmv1_scantable02,
+    wmv1_scantable03,
+};
+
+static const uint8_t table_inter_intra[4][2]={
+    {0,1} /*Luma-Left Chroma-Left*/,
+    {2,2} /*Luma-Top  Chroma-Left*/,
+    {6,3} /*luma-Left Chroma-Top */,
+    {7,3} /*luma-Top  Chroma-Top */
+};
+
+#define WMV2_INTER_CBP_TABLE_COUNT 4
+
+static const uint32_t table_mb_non_intra2[128][2] = {
+{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19},
+{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21},
+{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24},
+{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16},
+{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19},
+{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18},
+{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20},
+{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16},
+{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19},
+{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20},
+{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19},
+{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17},
+{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17},
+{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16},
+{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16},
+{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10},
+{0x000001,  3}, {0x000010,  8}, {0x00002F,  6}, {0x00004C, 10},
+{0x00000D,  4}, {0x000000, 10}, {0x000006,  9}, {0x000134, 12},
+{0x00000C,  4}, {0x000007, 10}, {0x000007,  9}, {0x0006E1, 12},
+{0x00000E,  5}, {0x0000DA,  9}, {0x000022,  9}, {0x000364, 11},
+{0x00000F,  4}, {0x000006, 10}, {0x00000F,  9}, {0x000135, 12},
+{0x000014,  5}, {0x0000DD,  9}, {0x000004,  9}, {0x000015, 11},
+{0x00001A,  6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12},
+{0x00000C,  5}, {0x0000B9,  8}, {0x000004,  8}, {0x0000DB,  9},
+{0x00000E,  4}, {0x00000B, 10}, {0x000023,  9}, {0x0006CB, 12},
+{0x000005,  6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12},
+{0x000011,  5}, {0x0000DF,  9}, {0x00000E,  9}, {0x000373, 11},
+{0x000003,  5}, {0x0000B8,  8}, {0x000006,  8}, {0x000175,  9},
+{0x000015,  5}, {0x000174,  9}, {0x000027,  9}, {0x000372, 11},
+{0x000010,  5}, {0x0000BB,  8}, {0x000005,  8}, {0x0000DE,  9},
+{0x00000F,  5}, {0x000001,  9}, {0x000012,  8}, {0x000004, 10},
+{0x000002,  3}, {0x000016,  5}, {0x000009,  4}, {0x000001,  5},
+};
+
+static const uint32_t table_mb_non_intra3[128][2] = {
+{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17},
+{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17},
+{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19},
+{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16},
+{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17},
+{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16},
+{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17},
+{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15},
+{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17},
+{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18},
+{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17},
+{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16},
+{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16},
+{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15},
+{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14},
+{0x000158,  9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12},
+{0x000000,  2}, {0x000069,  8}, {0x00006B,  8}, {0x00068C, 12},
+{0x000007,  3}, {0x00015E,  9}, {0x0002A3, 10}, {0x000AE9, 12},
+{0x000006,  3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14},
+{0x000010,  5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12},
+{0x000004,  4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13},
+{0x000012,  5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12},
+{0x00001B,  6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14},
+{0x000016,  5}, {0x00015F,  9}, {0x0002A0, 10}, {0x00054D, 11},
+{0x000005,  4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13},
+{0x000022,  6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14},
+{0x000013,  5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13},
+{0x000017,  5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12},
+{0x000014,  5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13},
+{0x000019,  6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12},
+{0x000018,  6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12},
+{0x000007,  4}, {0x000055,  7}, {0x000047,  7}, {0x0000AD,  8},
+};
+
+static const uint32_t table_mb_non_intra4[128][2] = {
+{0x0000D4,  8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16},
+{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17},
+{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17},
+{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16},
+{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15},
+{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15},
+{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15},
+{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15},
+{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15},
+{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15},
+{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15},
+{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15},
+{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14},
+{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14},
+{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14},
+{0x0001AE,  9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13},
+{0x000000,  1}, {0x0000F0,  8}, {0x0001AD,  9}, {0x0010C1, 13},
+{0x00000A,  4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13},
+{0x000009,  4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13},
+{0x00001D,  5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13},
+{0x00000B,  4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13},
+{0x00001F,  5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12},
+{0x00003D,  6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14},
+{0x000036,  6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13},
+{0x00000C,  4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13},
+{0x000042,  7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13},
+{0x000020,  6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13},
+{0x000037,  6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13},
+{0x000034,  6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13},
+{0x000039,  6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13},
+{0x000038,  6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13},
+{0x000011,  5}, {0x0001AC,  9}, {0x0000F3,  8}, {0x000439, 11},
+};
+
+static const uint32_t (*wmv2_inter_table[WMV2_INTER_CBP_TABLE_COUNT])[2]={
+    table_mb_non_intra2,
+    table_mb_non_intra3,
+    table_mb_non_intra4,
+    table_mb_non_intra,
+};
+
+static const uint8_t wmv2_scantableA[64]={
+0x00, 0x01, 0x02, 0x08, 0x03, 0x09, 0x0A, 0x10,
+0x04, 0x0B, 0x11, 0x18, 0x12, 0x0C, 0x05, 0x13,
+0x19, 0x0D, 0x14, 0x1A, 0x1B, 0x06, 0x15, 0x1C,
+0x0E, 0x16, 0x1D, 0x07, 0x1E, 0x0F, 0x17, 0x1F,
+};
+
+static const uint8_t wmv2_scantableB[64]={
+0x00, 0x08, 0x01, 0x10, 0x09, 0x18, 0x11, 0x02,
+0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A,
+0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A,
+0x13, 0x32, 0x1B, 0x3A, 0x23, 0x2B, 0x33, 0x3B,
+};
diff --git a/contrib/ffmpeg/libavcodec/msrle.c b/contrib/ffmpeg/libavcodec/msrle.c
new file mode 100644
index 000000000..fae5616e5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/msrle.c
@@ -0,0 +1,309 @@
+/*
+ * Micrsoft RLE Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file msrle.c
+ * MS RLE Video Decoder by Mike Melanson (melanson@pcisys.net)
+ * For more information about the MS RLE format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * The MS RLE decoder outputs PAL8 colorspace data.
+ *
+ * Note that this decoder expects the palette colors from the end of the
+ * BITMAPINFO header passed through palctrl.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+typedef struct MsrleContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+} MsrleContext;
+
+#define FETCH_NEXT_STREAM_BYTE() \
+    if (stream_ptr >= s->size) \
+    { \
+      av_log(s->avctx, AV_LOG_ERROR, " MS RLE: stream ptr just went out of bounds (1)\n"); \
+      return; \
+    } \
+    stream_byte = s->buf[stream_ptr++];
+
+static void msrle_decode_pal4(MsrleContext *s)
+{
+    int stream_ptr = 0;
+    unsigned char rle_code;
+    unsigned char extra_byte, odd_pixel;
+    unsigned char stream_byte;
+    int pixel_ptr = 0;
+    int row_dec = s->frame.linesize[0];
+    int row_ptr = (s->avctx->height - 1) * row_dec;
+    int frame_size = row_dec * s->avctx->height;
+    int i;
+
+    /* make the palette available */
+    memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE);
+    if (s->avctx->palctrl->palette_changed) {
+        s->frame.palette_has_changed = 1;
+        s->avctx->palctrl->palette_changed = 0;
+    }
+
+    while (row_ptr >= 0) {
+        FETCH_NEXT_STREAM_BYTE();
+        rle_code = stream_byte;
+        if (rle_code == 0) {
+            /* fetch the next byte to see how to handle escape code */
+            FETCH_NEXT_STREAM_BYTE();
+            if (stream_byte == 0) {
+                /* line is done, goto the next one */
+                row_ptr -= row_dec;
+                pixel_ptr = 0;
+            } else if (stream_byte == 1) {
+                /* decode is done */
+                return;
+            } else if (stream_byte == 2) {
+                /* reposition frame decode coordinates */
+                FETCH_NEXT_STREAM_BYTE();
+                pixel_ptr += stream_byte;
+                FETCH_NEXT_STREAM_BYTE();
+                row_ptr -= stream_byte * row_dec;
+        } else {
+            // copy pixels from encoded stream
+            odd_pixel =  stream_byte & 1;
+            rle_code = (stream_byte + 1) / 2;
+            extra_byte = rle_code & 0x01;
+            if ((row_ptr + pixel_ptr + stream_byte > frame_size) ||
+                (row_ptr < 0)) {
+                av_log(s->avctx, AV_LOG_ERROR, " MS RLE: frame ptr just went out of bounds (1)\n");
+                return;
+            }
+
+            for (i = 0; i < rle_code; i++) {
+                if (pixel_ptr >= s->avctx->width)
+                    break;
+                FETCH_NEXT_STREAM_BYTE();
+                s->frame.data[0][row_ptr + pixel_ptr] = stream_byte >> 4;
+                pixel_ptr++;
+                if (i + 1 == rle_code && odd_pixel)
+                    break;
+                if (pixel_ptr >= s->avctx->width)
+                    break;
+                s->frame.data[0][row_ptr + pixel_ptr] = stream_byte & 0x0F;
+                pixel_ptr++;
+            }
+
+            // if the RLE code is odd, skip a byte in the stream
+            if (extra_byte)
+              stream_ptr++;
+            }
+        } else {
+            // decode a run of data
+            if ((row_ptr + pixel_ptr + stream_byte > frame_size) ||
+                (row_ptr < 0)) {
+                av_log(s->avctx, AV_LOG_ERROR, " MS RLE: frame ptr just went out of bounds (1)\n");
+                return;
+            }
+            FETCH_NEXT_STREAM_BYTE();
+            for (i = 0; i < rle_code; i++) {
+                if (pixel_ptr >= s->avctx->width)
+                    break;
+                if ((i & 1) == 0)
+                    s->frame.data[0][row_ptr + pixel_ptr] = stream_byte >> 4;
+                else
+                    s->frame.data[0][row_ptr + pixel_ptr] = stream_byte & 0x0F;
+                pixel_ptr++;
+            }
+        }
+    }
+
+    /* one last sanity check on the way out */
+    if (stream_ptr < s->size)
+        av_log(s->avctx, AV_LOG_ERROR, " MS RLE: ended frame decode with bytes left over (%d < %d)\n",
+            stream_ptr, s->size);
+}
+
+
+
+static void msrle_decode_pal8(MsrleContext *s)
+{
+    int stream_ptr = 0;
+    unsigned char rle_code;
+    unsigned char extra_byte;
+    unsigned char stream_byte;
+    int pixel_ptr = 0;
+    int row_dec = s->frame.linesize[0];
+    int row_ptr = (s->avctx->height - 1) * row_dec;
+    int frame_size = row_dec * s->avctx->height;
+
+    /* make the palette available */
+    memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE);
+    if (s->avctx->palctrl->palette_changed) {
+        s->frame.palette_has_changed = 1;
+        s->avctx->palctrl->palette_changed = 0;
+    }
+
+    while (row_ptr >= 0) {
+        FETCH_NEXT_STREAM_BYTE();
+        rle_code = stream_byte;
+        if (rle_code == 0) {
+            /* fetch the next byte to see how to handle escape code */
+            FETCH_NEXT_STREAM_BYTE();
+            if (stream_byte == 0) {
+                /* line is done, goto the next one */
+                row_ptr -= row_dec;
+                pixel_ptr = 0;
+            } else if (stream_byte == 1) {
+                /* decode is done */
+                return;
+            } else if (stream_byte == 2) {
+                /* reposition frame decode coordinates */
+                FETCH_NEXT_STREAM_BYTE();
+                pixel_ptr += stream_byte;
+                FETCH_NEXT_STREAM_BYTE();
+                row_ptr -= stream_byte * row_dec;
+            } else {
+                /* copy pixels from encoded stream */
+                if ((row_ptr + pixel_ptr + stream_byte > frame_size) ||
+                    (row_ptr < 0)) {
+                    av_log(s->avctx, AV_LOG_ERROR, " MS RLE: frame ptr just went out of bounds (1)\n");
+                    return;
+                }
+
+                rle_code = stream_byte;
+                extra_byte = stream_byte & 0x01;
+                if (stream_ptr + rle_code + extra_byte > s->size) {
+                    av_log(s->avctx, AV_LOG_ERROR, " MS RLE: stream ptr just went out of bounds (2)\n");
+                    return;
+                }
+
+                while (rle_code--) {
+                    FETCH_NEXT_STREAM_BYTE();
+                    s->frame.data[0][row_ptr + pixel_ptr] = stream_byte;
+                    pixel_ptr++;
+                }
+
+                /* if the RLE code is odd, skip a byte in the stream */
+                if (extra_byte)
+                    stream_ptr++;
+            }
+        } else {
+            /* decode a run of data */
+            if ((row_ptr + pixel_ptr + stream_byte > frame_size) ||
+                (row_ptr < 0)) {
+                av_log(s->avctx, AV_LOG_ERROR, " MS RLE: frame ptr just went out of bounds (2)\n");
+                return;
+            }
+
+            FETCH_NEXT_STREAM_BYTE();
+
+            while(rle_code--) {
+                s->frame.data[0][row_ptr + pixel_ptr] = stream_byte;
+                pixel_ptr++;
+            }
+        }
+    }
+
+    /* one last sanity check on the way out */
+    if (stream_ptr < s->size)
+        av_log(s->avctx, AV_LOG_ERROR, " MS RLE: ended frame decode with bytes left over (%d < %d)\n",
+            stream_ptr, s->size);
+}
+
+static int msrle_decode_init(AVCodecContext *avctx)
+{
+    MsrleContext *s = (MsrleContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int msrle_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+    MsrleContext *s = (MsrleContext *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    switch (avctx->bits_per_sample) {
+        case 8:
+            msrle_decode_pal8(s);
+            break;
+        case 4:
+            msrle_decode_pal4(s);
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Don't know how to decode depth %u.\n",
+                   avctx->bits_per_sample);
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int msrle_decode_end(AVCodecContext *avctx)
+{
+    MsrleContext *s = (MsrleContext *)avctx->priv_data;
+
+    /* release the last frame */
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec msrle_decoder = {
+    "msrle",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSRLE,
+    sizeof(MsrleContext),
+    msrle_decode_init,
+    NULL,
+    msrle_decode_end,
+    msrle_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/msvideo1.c b/contrib/ffmpeg/libavcodec/msvideo1.c
new file mode 100644
index 000000000..5929e1c63
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/msvideo1.c
@@ -0,0 +1,349 @@
+/*
+ * Microsoft Video-1 Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file msvideo1.c
+ * Microsoft Video-1 Decoder by Mike Melanson (melanson@pcisys.net)
+ * For more information about the MS Video-1 format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * This decoder outputs either PAL8 or RGB555 data, depending on the
+ * whether a RGB palette was passed through palctrl;
+ * if it's present, then the data is PAL8; RGB555 otherwise.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define PALETTE_COUNT 256
+#define CHECK_STREAM_PTR(n) \
+  if ((stream_ptr + n) > s->size ) { \
+    av_log(s->avctx, AV_LOG_ERROR, " MS Video-1 warning: stream_ptr out of bounds (%d >= %d)\n", \
+      stream_ptr + n, s->size); \
+    return; \
+  }
+
+typedef struct Msvideo1Context {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+    int mode_8bit;  /* if it's not 8-bit, it's 16-bit */
+
+} Msvideo1Context;
+
+static int msvideo1_decode_init(AVCodecContext *avctx)
+{
+    Msvideo1Context *s = (Msvideo1Context *)avctx->priv_data;
+
+    s->avctx = avctx;
+
+    /* figure out the colorspace based on the presence of a palette */
+    if (s->avctx->palctrl) {
+        s->mode_8bit = 1;
+        avctx->pix_fmt = PIX_FMT_PAL8;
+    } else {
+        s->mode_8bit = 0;
+        avctx->pix_fmt = PIX_FMT_RGB555;
+    }
+
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static void msvideo1_decode_8bit(Msvideo1Context *s)
+{
+    int block_ptr, pixel_ptr;
+    int total_blocks;
+    int pixel_x, pixel_y;  /* pixel width and height iterators */
+    int block_x, block_y;  /* block width and height iterators */
+    int blocks_wide, blocks_high;  /* width and height in 4x4 blocks */
+    int block_inc;
+    int row_dec;
+
+    /* decoding parameters */
+    int stream_ptr;
+    unsigned char byte_a, byte_b;
+    unsigned short flags;
+    int skip_blocks;
+    unsigned char colors[8];
+    unsigned char *pixels = s->frame.data[0];
+    int stride = s->frame.linesize[0];
+
+    stream_ptr = 0;
+    skip_blocks = 0;
+    blocks_wide = s->avctx->width / 4;
+    blocks_high = s->avctx->height / 4;
+    total_blocks = blocks_wide * blocks_high;
+    block_inc = 4;
+    row_dec = stride + 4;
+
+    for (block_y = blocks_high; block_y > 0; block_y--) {
+        block_ptr = ((block_y * 4) - 1) * stride;
+        for (block_x = blocks_wide; block_x > 0; block_x--) {
+            /* check if this block should be skipped */
+            if (skip_blocks) {
+                block_ptr += block_inc;
+                skip_blocks--;
+                total_blocks--;
+                continue;
+            }
+
+            pixel_ptr = block_ptr;
+
+            /* get the next two bytes in the encoded data stream */
+            CHECK_STREAM_PTR(2);
+            byte_a = s->buf[stream_ptr++];
+            byte_b = s->buf[stream_ptr++];
+
+            /* check if the decode is finished */
+            if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0))
+                return;
+            else if ((byte_b & 0xFC) == 0x84) {
+                /* skip code, but don't count the current block */
+                skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1;
+            } else if (byte_b < 0x80) {
+                /* 2-color encoding */
+                flags = (byte_b << 8) | byte_a;
+
+                CHECK_STREAM_PTR(2);
+                colors[0] = s->buf[stream_ptr++];
+                colors[1] = s->buf[stream_ptr++];
+
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++, flags >>= 1)
+                        pixels[pixel_ptr++] = colors[(flags & 0x1) ^ 1];
+                    pixel_ptr -= row_dec;
+                }
+            } else if (byte_b >= 0x90) {
+                /* 8-color encoding */
+                flags = (byte_b << 8) | byte_a;
+
+                CHECK_STREAM_PTR(8);
+                memcpy(colors, &s->buf[stream_ptr], 8);
+                stream_ptr += 8;
+
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++, flags >>= 1)
+                        pixels[pixel_ptr++] =
+                            colors[((pixel_y & 0x2) << 1) +
+                                (pixel_x & 0x2) + ((flags & 0x1) ^ 1)];
+                    pixel_ptr -= row_dec;
+                }
+            } else {
+                /* 1-color encoding */
+                colors[0] = byte_a;
+
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++)
+                        pixels[pixel_ptr++] = colors[0];
+                    pixel_ptr -= row_dec;
+                }
+            }
+
+            block_ptr += block_inc;
+            total_blocks--;
+        }
+    }
+
+    /* make the palette available on the way out */
+    if (s->avctx->pix_fmt == PIX_FMT_PAL8) {
+        memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE);
+        if (s->avctx->palctrl->palette_changed) {
+            s->frame.palette_has_changed = 1;
+            s->avctx->palctrl->palette_changed = 0;
+        }
+    }
+}
+
+static void msvideo1_decode_16bit(Msvideo1Context *s)
+{
+    int block_ptr, pixel_ptr;
+    int total_blocks;
+    int pixel_x, pixel_y;  /* pixel width and height iterators */
+    int block_x, block_y;  /* block width and height iterators */
+    int blocks_wide, blocks_high;  /* width and height in 4x4 blocks */
+    int block_inc;
+    int row_dec;
+
+    /* decoding parameters */
+    int stream_ptr;
+    unsigned char byte_a, byte_b;
+    unsigned short flags;
+    int skip_blocks;
+    unsigned short colors[8];
+    unsigned short *pixels = (unsigned short *)s->frame.data[0];
+    int stride = s->frame.linesize[0] / 2;
+
+    stream_ptr = 0;
+    skip_blocks = 0;
+    blocks_wide = s->avctx->width / 4;
+    blocks_high = s->avctx->height / 4;
+    total_blocks = blocks_wide * blocks_high;
+    block_inc = 4;
+    row_dec = stride + 4;
+
+    for (block_y = blocks_high; block_y > 0; block_y--) {
+        block_ptr = ((block_y * 4) - 1) * stride;
+        for (block_x = blocks_wide; block_x > 0; block_x--) {
+            /* check if this block should be skipped */
+            if (skip_blocks) {
+                block_ptr += block_inc;
+                skip_blocks--;
+                total_blocks--;
+                continue;
+            }
+
+            pixel_ptr = block_ptr;
+
+            /* get the next two bytes in the encoded data stream */
+            CHECK_STREAM_PTR(2);
+            byte_a = s->buf[stream_ptr++];
+            byte_b = s->buf[stream_ptr++];
+
+            /* check if the decode is finished */
+            if ((byte_a == 0) && (byte_b == 0) && (total_blocks == 0)) {
+                return;
+            } else if ((byte_b & 0xFC) == 0x84) {
+                /* skip code, but don't count the current block */
+                skip_blocks = ((byte_b - 0x84) << 8) + byte_a - 1;
+            } else if (byte_b < 0x80) {
+                /* 2- or 8-color encoding modes */
+                flags = (byte_b << 8) | byte_a;
+
+                CHECK_STREAM_PTR(4);
+                colors[0] = LE_16(&s->buf[stream_ptr]);
+                stream_ptr += 2;
+                colors[1] = LE_16(&s->buf[stream_ptr]);
+                stream_ptr += 2;
+
+                if (colors[0] & 0x8000) {
+                    /* 8-color encoding */
+                    CHECK_STREAM_PTR(12);
+                    colors[2] = LE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+                    colors[3] = LE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+                    colors[4] = LE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+                    colors[5] = LE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+                    colors[6] = LE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+                    colors[7] = LE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+
+                    for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                        for (pixel_x = 0; pixel_x < 4; pixel_x++, flags >>= 1)
+                            pixels[pixel_ptr++] =
+                                colors[((pixel_y & 0x2) << 1) +
+                                    (pixel_x & 0x2) + ((flags & 0x1) ^ 1)];
+                        pixel_ptr -= row_dec;
+                    }
+                } else {
+                    /* 2-color encoding */
+                    for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                        for (pixel_x = 0; pixel_x < 4; pixel_x++, flags >>= 1)
+                            pixels[pixel_ptr++] = colors[(flags & 0x1) ^ 1];
+                        pixel_ptr -= row_dec;
+                    }
+                }
+            } else {
+                /* otherwise, it's a 1-color block */
+                colors[0] = (byte_b << 8) | byte_a;
+
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++)
+                        pixels[pixel_ptr++] = colors[0];
+                    pixel_ptr -= row_dec;
+                }
+            }
+
+            block_ptr += block_inc;
+            total_blocks--;
+        }
+    }
+}
+
+static int msvideo1_decode_frame(AVCodecContext *avctx,
+                                void *data, int *data_size,
+                                uint8_t *buf, int buf_size)
+{
+    Msvideo1Context *s = (Msvideo1Context *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    if (s->mode_8bit)
+        msvideo1_decode_8bit(s);
+    else
+        msvideo1_decode_16bit(s);
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int msvideo1_decode_end(AVCodecContext *avctx)
+{
+    Msvideo1Context *s = (Msvideo1Context *)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec msvideo1_decoder = {
+    "msvideo1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_MSVIDEO1,
+    sizeof(Msvideo1Context),
+    msvideo1_decode_init,
+    NULL,
+    msvideo1_decode_end,
+    msvideo1_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/nuv.c b/contrib/ffmpeg/libavcodec/nuv.c
new file mode 100644
index 000000000..592d3de03
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/nuv.c
@@ -0,0 +1,216 @@
+/*
+ * NuppelVideo decoder
+ * Copyright (c) 2006 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#include "bswap.h"
+#include "dsputil.h"
+#include "lzo.h"
+#include "rtjpeg.h"
+
+typedef struct {
+    AVFrame pic;
+    int width, height;
+    unsigned int decomp_size;
+    unsigned char* decomp_buf;
+    uint32_t lq[64], cq[64];
+    RTJpegContext rtj;
+    DSPContext dsp;
+} NuvContext;
+
+/**
+ * \brief copy frame data from buffer to AVFrame, handling stride.
+ * \param f destination AVFrame
+ * \param src source buffer, does not use any line-stride
+ * \param width width of the video frame
+ * \param height height of the video frame
+ */
+static void copy_frame(AVFrame *f, uint8_t *src,
+                       int width, int height) {
+    AVPicture pic;
+    avpicture_fill(&pic, src, PIX_FMT_YUV420P, width, height);
+    img_copy((AVPicture *)f, &pic, PIX_FMT_YUV420P, width, height);
+}
+
+/**
+ * \brief extract quantization tables from codec data into our context
+ */
+static int get_quant(AVCodecContext *avctx, NuvContext *c,
+                     uint8_t *buf, int size) {
+    int i;
+    if (size < 2 * 64 * 4) {
+        av_log(avctx, AV_LOG_ERROR, "insufficient rtjpeg quant data\n");
+        return -1;
+    }
+    for (i = 0; i < 64; i++, buf += 4)
+        c->lq[i] = LE_32(buf);
+    for (i = 0; i < 64; i++, buf += 4)
+        c->cq[i] = LE_32(buf);
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                        uint8_t *buf, int buf_size) {
+    NuvContext *c = (NuvContext *)avctx->priv_data;
+    AVFrame *picture = data;
+    int orig_size = buf_size;
+    enum {NUV_UNCOMPRESSED = '0', NUV_RTJPEG = '1',
+          NUV_RTJPEG_IN_LZO = '2', NUV_LZO = '3',
+          NUV_BLACK = 'N', NUV_COPY_LAST = 'L'} comptype;
+
+    if (buf_size < 12) {
+        av_log(avctx, AV_LOG_ERROR, "coded frame too small\n");
+        return -1;
+    }
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+    c->pic.reference = 1;
+    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_READABLE |
+                          FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->get_buffer(avctx, &c->pic) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    // codec data (rtjpeg quant tables)
+    if (buf[0] == 'D' && buf[1] == 'R') {
+        int ret;
+        // skip rest of the frameheader.
+        buf = &buf[12];
+        buf_size -= 12;
+        ret = get_quant(avctx, c, buf, buf_size);
+        if (ret < 0)
+            return ret;
+        rtjpeg_decode_init(&c->rtj, &c->dsp, c->width, c->height, c->lq, c->cq);
+        return orig_size;
+    }
+
+    if (buf[0] != 'V' || buf_size < 12) {
+        av_log(avctx, AV_LOG_ERROR, "not a nuv video frame\n");
+        return -1;
+    }
+    comptype = buf[1];
+    // skip rest of the frameheader.
+    buf = &buf[12];
+    buf_size -= 12;
+
+    c->pic.pict_type = FF_I_TYPE;
+    c->pic.key_frame = 1;
+    // decompress/copy/whatever data
+    switch (comptype) {
+        case NUV_UNCOMPRESSED: {
+            int height = c->height;
+            if (buf_size < c->width * height * 3 / 2) {
+                av_log(avctx, AV_LOG_ERROR, "uncompressed frame too short\n");
+                height = buf_size / c->width / 3 * 2;
+            }
+            copy_frame(&c->pic, buf, c->width, height);
+            break;
+        }
+        case NUV_RTJPEG: {
+            rtjpeg_decode_frame_yuv420(&c->rtj, &c->pic, buf, buf_size);
+            break;
+        }
+        case NUV_RTJPEG_IN_LZO: {
+            int outlen = c->decomp_size, inlen = buf_size;
+            if (lzo1x_decode(c->decomp_buf, &outlen, buf, &inlen))
+                av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
+            rtjpeg_decode_frame_yuv420(&c->rtj, &c->pic, c->decomp_buf, c->decomp_size);
+            break;
+        }
+        case NUV_LZO: {
+            int outlen = c->decomp_size, inlen = buf_size;
+            if (lzo1x_decode(c->decomp_buf, &outlen, buf, &inlen))
+                av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
+            copy_frame(&c->pic, c->decomp_buf, c->width, c->height);
+            break;
+        }
+        case NUV_BLACK: {
+            memset(c->pic.data[0], 0, c->width * c->height);
+            memset(c->pic.data[1], 128, c->width * c->height / 4);
+            memset(c->pic.data[2], 128, c->width * c->height / 4);
+            break;
+        }
+        case NUV_COPY_LAST: {
+            c->pic.pict_type = FF_P_TYPE;
+            c->pic.key_frame = 0;
+            /* nothing more to do here */
+            break;
+        }
+        default:
+            av_log(avctx, AV_LOG_ERROR, "unknown compression\n");
+            return -1;
+    }
+
+    *picture = c->pic;
+    *data_size = sizeof(AVFrame);
+    return orig_size;
+}
+
+static int decode_init(AVCodecContext *avctx) {
+    NuvContext *c = (NuvContext *)avctx->priv_data;
+    avctx->width = (avctx->width + 1) & ~1;
+    avctx->height = (avctx->height + 1) & ~1;
+    if (avcodec_check_dimensions(avctx, avctx->height, avctx->width) < 0) {
+        return 1;
+    }
+    avctx->has_b_frames = 0;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+    c->pic.data[0] = NULL;
+    c->width = avctx->width;
+    c->height = avctx->height;
+    c->decomp_size = c->height * c->width * 3 / 2;
+    c->decomp_buf = av_malloc(c->decomp_size + LZO_OUTPUT_PADDING);
+    if (!c->decomp_buf) {
+        av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
+        return 1;
+    }
+    dsputil_init(&c->dsp, avctx);
+    if (avctx->extradata_size)
+        get_quant(avctx, c, avctx->extradata, avctx->extradata_size);
+    rtjpeg_decode_init(&c->rtj, &c->dsp, c->width, c->height, c->lq, c->cq);
+    return 0;
+}
+
+static int decode_end(AVCodecContext *avctx) {
+    NuvContext *c = (NuvContext *)avctx->priv_data;
+    av_freep(&c->decomp_buf);
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+    return 0;
+}
+
+AVCodec nuv_decoder = {
+    "nuv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_NUV,
+    sizeof(NuvContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/oggvorbis.c b/contrib/ffmpeg/libavcodec/oggvorbis.c
new file mode 100644
index 000000000..9e684a0f4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/oggvorbis.c
@@ -0,0 +1,381 @@
+/*
+ * copyright (c) 2002 Mark Hills <mark@pogo.org.uk>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file oggvorbis.c
+ * Ogg Vorbis codec support via libvorbisenc.
+ * @author Mark Hills <mark@pogo.org.uk>
+ */
+
+#include <vorbis/vorbisenc.h>
+
+#include "avcodec.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#define OGGVORBIS_FRAME_SIZE 64
+
+#define BUFFER_SIZE (1024*64)
+
+typedef struct OggVorbisContext {
+    vorbis_info vi ;
+    vorbis_dsp_state vd ;
+    vorbis_block vb ;
+    uint8_t buffer[BUFFER_SIZE];
+    int buffer_index;
+
+    /* decoder */
+    vorbis_comment vc ;
+    ogg_packet op;
+} OggVorbisContext ;
+
+
+static int oggvorbis_init_encoder(vorbis_info *vi, AVCodecContext *avccontext) {
+    double cfreq;
+
+    if(avccontext->flags & CODEC_FLAG_QSCALE) {
+        /* variable bitrate */
+        if(vorbis_encode_setup_vbr(vi, avccontext->channels,
+                avccontext->sample_rate,
+                avccontext->global_quality / (float)FF_QP2LAMBDA))
+            return -1;
+    } else {
+        /* constant bitrate */
+        if(vorbis_encode_setup_managed(vi, avccontext->channels,
+                avccontext->sample_rate, -1, avccontext->bit_rate, -1))
+            return -1;
+
+#ifdef OGGVORBIS_VBR_BY_ESTIMATE
+        /* variable bitrate by estimate */
+        if(vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL))
+            return -1;
+#endif
+    }
+
+    /* cutoff frequency */
+    if(avccontext->cutoff > 0) {
+        cfreq = avccontext->cutoff / 1000.0;
+        if(vorbis_encode_ctl(vi, OV_ECTL_LOWPASS_SET, &cfreq))
+            return -1;
+    }
+
+    return vorbis_encode_setup_init(vi);
+}
+
+static int oggvorbis_encode_init(AVCodecContext *avccontext) {
+    OggVorbisContext *context = avccontext->priv_data ;
+    ogg_packet header, header_comm, header_code;
+    uint8_t *p;
+    unsigned int offset, len;
+
+    vorbis_info_init(&context->vi) ;
+    if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) {
+        av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
+        return -1 ;
+    }
+    vorbis_analysis_init(&context->vd, &context->vi) ;
+    vorbis_block_init(&context->vd, &context->vb) ;
+
+    vorbis_comment_init(&context->vc);
+    vorbis_comment_add_tag(&context->vc, "encoder", LIBAVCODEC_IDENT) ;
+
+    vorbis_analysis_headerout(&context->vd, &context->vc, &header,
+                                &header_comm, &header_code);
+
+    len = header.bytes + header_comm.bytes +  header_code.bytes;
+    avccontext->extradata_size= 64 + len + len/255;
+    p = avccontext->extradata= av_mallocz(avccontext->extradata_size);
+    p[0] = 2;
+    offset = 1;
+    offset += av_xiphlacing(&p[offset], header.bytes);
+    offset += av_xiphlacing(&p[offset], header_comm.bytes);
+    memcpy(&p[offset], header.packet, header.bytes);
+    offset += header.bytes;
+    memcpy(&p[offset], header_comm.packet, header_comm.bytes);
+    offset += header_comm.bytes;
+    memcpy(&p[offset], header_code.packet, header_code.bytes);
+    offset += header_code.bytes;
+    avccontext->extradata_size = offset;
+    avccontext->extradata= av_realloc(avccontext->extradata, avccontext->extradata_size);
+
+/*    vorbis_block_clear(&context->vb);
+    vorbis_dsp_clear(&context->vd);
+    vorbis_info_clear(&context->vi);*/
+    vorbis_comment_clear(&context->vc);
+
+    avccontext->frame_size = OGGVORBIS_FRAME_SIZE ;
+
+    avccontext->coded_frame= avcodec_alloc_frame();
+    avccontext->coded_frame->key_frame= 1;
+
+    return 0 ;
+}
+
+
+static int oggvorbis_encode_frame(AVCodecContext *avccontext,
+                                  unsigned char *packets,
+                           int buf_size, void *data)
+{
+    OggVorbisContext *context = avccontext->priv_data ;
+    float **buffer ;
+    ogg_packet op ;
+    signed short *audio = data ;
+    int l, samples = data ? OGGVORBIS_FRAME_SIZE : 0;
+
+    buffer = vorbis_analysis_buffer(&context->vd, samples) ;
+
+    if(context->vi.channels == 1) {
+        for(l = 0 ; l < samples ; l++)
+            buffer[0][l]=audio[l]/32768.f;
+    } else {
+        for(l = 0 ; l < samples ; l++){
+            buffer[0][l]=audio[l*2]/32768.f;
+            buffer[1][l]=audio[l*2+1]/32768.f;
+        }
+    }
+
+    vorbis_analysis_wrote(&context->vd, samples) ;
+
+    while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) {
+        vorbis_analysis(&context->vb, NULL);
+        vorbis_bitrate_addblock(&context->vb) ;
+
+        while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
+            if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg
+                continue;
+            memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet));
+            context->buffer_index += sizeof(ogg_packet);
+            memcpy(context->buffer + context->buffer_index, op.packet, op.bytes);
+            context->buffer_index += op.bytes;
+//            av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes);
+        }
+    }
+
+    l=0;
+    if(context->buffer_index){
+        ogg_packet *op2= (ogg_packet*)context->buffer;
+        op2->packet = context->buffer + sizeof(ogg_packet);
+
+        l=  op2->bytes;
+        avccontext->coded_frame->pts= av_rescale_q(op2->granulepos, (AVRational){1, avccontext->sample_rate}, avccontext->time_base);
+        //FIXME we should reorder the user supplied pts and not assume that they are spaced by 1/sample_rate
+
+        memcpy(packets, op2->packet, l);
+        context->buffer_index -= l + sizeof(ogg_packet);
+        memcpy(context->buffer, context->buffer + l + sizeof(ogg_packet), context->buffer_index);
+//        av_log(avccontext, AV_LOG_DEBUG, "E%d\n", l);
+    }
+
+    return l;
+}
+
+
+static int oggvorbis_encode_close(AVCodecContext *avccontext) {
+    OggVorbisContext *context = avccontext->priv_data ;
+/*  ogg_packet op ; */
+
+    vorbis_analysis_wrote(&context->vd, 0) ; /* notify vorbisenc this is EOF */
+
+    vorbis_block_clear(&context->vb);
+    vorbis_dsp_clear(&context->vd);
+    vorbis_info_clear(&context->vi);
+
+    av_freep(&avccontext->coded_frame);
+    av_freep(&avccontext->extradata);
+
+    return 0 ;
+}
+
+
+AVCodec oggvorbis_encoder = {
+    "vorbis",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_VORBIS,
+    sizeof(OggVorbisContext),
+    oggvorbis_encode_init,
+    oggvorbis_encode_frame,
+    oggvorbis_encode_close,
+    .capabilities= CODEC_CAP_DELAY,
+} ;
+
+static int oggvorbis_decode_init(AVCodecContext *avccontext) {
+    OggVorbisContext *context = avccontext->priv_data ;
+    uint8_t *p= avccontext->extradata;
+    int i, hsizes[3];
+    unsigned char *headers[3], *extradata = avccontext->extradata;
+
+    vorbis_info_init(&context->vi) ;
+    vorbis_comment_init(&context->vc) ;
+
+    if(! avccontext->extradata_size || ! p) {
+        av_log(avccontext, AV_LOG_ERROR, "vorbis extradata absent\n");
+        return -1;
+    }
+
+    if(p[0] == 0 && p[1] == 30) {
+        for(i = 0; i < 3; i++){
+            hsizes[i] = *p++ << 8;
+            hsizes[i] += *p++;
+            headers[i] = p;
+            p += hsizes[i];
+        }
+    } else if(*p == 2) {
+        unsigned int offset = 1;
+        p++;
+        for(i=0; i<2; i++) {
+            hsizes[i] = 0;
+            while((*p == 0xFF) && (offset < avccontext->extradata_size)) {
+                hsizes[i] += 0xFF;
+                offset++;
+                p++;
+            }
+            if(offset >= avccontext->extradata_size - 1) {
+                av_log(avccontext, AV_LOG_ERROR,
+                       "vorbis header sizes damaged\n");
+                return -1;
+            }
+            hsizes[i] += *p;
+            offset++;
+            p++;
+        }
+        hsizes[2] = avccontext->extradata_size - hsizes[0]-hsizes[1]-offset;
+#if 0
+        av_log(avccontext, AV_LOG_DEBUG,
+               "vorbis header sizes: %d, %d, %d, / extradata_len is %d \n",
+               hsizes[0], hsizes[1], hsizes[2], avccontext->extradata_size);
+#endif
+        headers[0] = extradata + offset;
+        headers[1] = extradata + offset + hsizes[0];
+        headers[2] = extradata + offset + hsizes[0] + hsizes[1];
+    } else {
+        av_log(avccontext, AV_LOG_ERROR,
+               "vorbis initial header len is wrong: %d\n", *p);
+        return -1;
+    }
+
+    for(i=0; i<3; i++){
+        context->op.b_o_s= i==0;
+        context->op.bytes = hsizes[i];
+        context->op.packet = headers[i];
+        if(vorbis_synthesis_headerin(&context->vi, &context->vc, &context->op)<0){
+            av_log(avccontext, AV_LOG_ERROR, "%d. vorbis header damaged\n", i+1);
+            return -1;
+        }
+    }
+
+    avccontext->channels = context->vi.channels;
+    avccontext->sample_rate = context->vi.rate;
+    avccontext->time_base= (AVRational){1, avccontext->sample_rate};
+
+    vorbis_synthesis_init(&context->vd, &context->vi);
+    vorbis_block_init(&context->vd, &context->vb);
+
+    return 0 ;
+}
+
+
+static inline int conv(int samples, float **pcm, char *buf, int channels) {
+    int i, j, val ;
+    ogg_int16_t *ptr, *data = (ogg_int16_t*)buf ;
+    float *mono ;
+
+    for(i = 0 ; i < channels ; i++){
+        ptr = &data[i];
+        mono = pcm[i] ;
+
+        for(j = 0 ; j < samples ; j++) {
+
+            val = mono[j] * 32767.f;
+
+            if(val > 32767) val = 32767 ;
+            if(val < -32768) val = -32768 ;
+
+            *ptr = val ;
+            ptr += channels;
+        }
+    }
+
+    return 0 ;
+}
+
+
+static int oggvorbis_decode_frame(AVCodecContext *avccontext,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    OggVorbisContext *context = avccontext->priv_data ;
+    float **pcm ;
+    ogg_packet *op= &context->op;
+    int samples, total_samples, total_bytes;
+
+    if(!buf_size){
+    //FIXME flush
+        return 0;
+    }
+
+    op->packet = buf;
+    op->bytes  = buf_size;
+
+//    av_log(avccontext, AV_LOG_DEBUG, "%d %d %d %"PRId64" %"PRId64" %d %d\n", op->bytes, op->b_o_s, op->e_o_s, op->granulepos, op->packetno, buf_size, context->vi.rate);
+
+/*    for(i=0; i<op->bytes; i++)
+      av_log(avccontext, AV_LOG_DEBUG, "%02X ", op->packet[i]);
+    av_log(avccontext, AV_LOG_DEBUG, "\n");*/
+
+    if(vorbis_synthesis(&context->vb, op) == 0)
+        vorbis_synthesis_blockin(&context->vd, &context->vb) ;
+
+    total_samples = 0 ;
+    total_bytes = 0 ;
+
+    while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
+        conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
+        total_bytes += samples * 2 * context->vi.channels ;
+        total_samples += samples ;
+        vorbis_synthesis_read(&context->vd, samples) ;
+    }
+
+    *data_size = total_bytes ;
+    return buf_size ;
+}
+
+
+static int oggvorbis_decode_close(AVCodecContext *avccontext) {
+    OggVorbisContext *context = avccontext->priv_data ;
+
+    vorbis_info_clear(&context->vi) ;
+    vorbis_comment_clear(&context->vc) ;
+
+    return 0 ;
+}
+
+
+AVCodec oggvorbis_decoder = {
+    "vorbis",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_VORBIS,
+    sizeof(OggVorbisContext),
+    oggvorbis_decode_init,
+    NULL,
+    oggvorbis_decode_close,
+    oggvorbis_decode_frame,
+    .capabilities= CODEC_CAP_DELAY,
+} ;
diff --git a/contrib/ffmpeg/libavcodec/opt.c b/contrib/ffmpeg/libavcodec/opt.c
new file mode 100644
index 000000000..a200d9a82
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/opt.c
@@ -0,0 +1,381 @@
+/*
+ * AVOptions
+ * Copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file opt.c
+ * AVOptions
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "avcodec.h"
+#include "opt.h"
+#include "eval.h"
+
+//FIXME order them and do a bin search
+static AVOption *find_opt(void *v, const char *name, const char *unit){
+    AVClass *c= *(AVClass**)v; //FIXME silly way of storing AVClass
+    AVOption *o= c->option;
+
+    for(;o && o->name; o++){
+        if(!strcmp(o->name, name) && (!unit || !strcmp(o->unit, unit)) )
+            return o;
+    }
+    return NULL;
+}
+
+AVOption *av_next_option(void *obj, AVOption *last){
+    if(last && last[1].name) return ++last;
+    else if(last)            return NULL;
+    else                     return (*(AVClass**)obj)->option;
+}
+
+static AVOption *av_set_number(void *obj, const char *name, double num, int den, int64_t intnum){
+    AVOption *o= find_opt(obj, name, NULL);
+    void *dst;
+    if(!o || o->offset<=0)
+        return NULL;
+
+    if(o->max*den < num*intnum || o->min*den > num*intnum) {
+        av_log(NULL, AV_LOG_ERROR, "Value %lf for parameter '%s' out of range.\n", num, name);
+        return NULL;
+    }
+
+    dst= ((uint8_t*)obj) + o->offset;
+
+    switch(o->type){
+    case FF_OPT_TYPE_FLAGS:
+    case FF_OPT_TYPE_INT:   *(int       *)dst= lrintf(num/den)*intnum; break;
+    case FF_OPT_TYPE_INT64: *(int64_t   *)dst= lrintf(num/den)*intnum; break;
+    case FF_OPT_TYPE_FLOAT: *(float     *)dst= num*intnum/den;         break;
+    case FF_OPT_TYPE_DOUBLE:*(double    *)dst= num*intnum/den;         break;
+    case FF_OPT_TYPE_RATIONAL:
+        if((int)num == num) *(AVRational*)dst= (AVRational){num*intnum, den};
+        else                *(AVRational*)dst= av_d2q(num*intnum/den, 1<<24);
+    default:
+        return NULL;
+    }
+    return o;
+}
+
+static AVOption *set_all_opt(void *v, const char *unit, double d){
+    AVClass *c= *(AVClass**)v; //FIXME silly way of storing AVClass
+    AVOption *o= c->option;
+    AVOption *ret=NULL;
+
+    for(;o && o->name; o++){
+        if(o->type != FF_OPT_TYPE_CONST && o->unit && !strcmp(o->unit, unit)){
+            double tmp= d;
+            if(o->type == FF_OPT_TYPE_FLAGS)
+                tmp= av_get_int(v, o->name, NULL) | (int64_t)d;
+
+            av_set_number(v, o->name, tmp, 1, 1);
+            ret= o;
+        }
+    }
+    return ret;
+}
+
+static double const_values[]={
+    M_PI,
+    M_E,
+    FF_QP2LAMBDA,
+    0
+};
+
+static const char *const_names[]={
+    "PI",
+    "E",
+    "QP2LAMBDA",
+    0
+};
+
+AVOption *av_set_string(void *obj, const char *name, const char *val){
+    AVOption *o= find_opt(obj, name, NULL);
+    if(o && o->offset==0 && o->type == FF_OPT_TYPE_CONST && o->unit){
+        return set_all_opt(obj, o->unit, o->default_val);
+    }
+    if(!o || !val || o->offset<=0)
+        return NULL;
+    if(o->type != FF_OPT_TYPE_STRING){
+        for(;;){
+            int i;
+            char buf[256];
+            int cmd=0;
+            double d;
+            char *error = NULL;
+
+            if(*val == '+' || *val == '-')
+                cmd= *(val++);
+
+            for(i=0; i<sizeof(buf)-1 && val[i] && val[i]!='+' && val[i]!='-'; i++)
+                buf[i]= val[i];
+            buf[i]=0;
+            val+= i;
+
+            d = ff_eval2(buf, const_values, const_names, NULL, NULL, NULL, NULL, NULL, &error);
+            if(isnan(d)) {
+                AVOption *o_named= find_opt(obj, buf, o->unit);
+                if(o_named && o_named->type == FF_OPT_TYPE_CONST)
+                    d= o_named->default_val;
+                else if(!strcmp(buf, "default")) d= o->default_val;
+                else if(!strcmp(buf, "max"    )) d= o->max;
+                else if(!strcmp(buf, "min"    )) d= o->min;
+                else {
+                    if (!error)
+                        av_log(NULL, AV_LOG_ERROR, "Unable to parse option value \"%s\": %s\n", val, error);
+                    return NULL;
+                }
+            }
+            if(o->type == FF_OPT_TYPE_FLAGS){
+                if     (cmd=='+') d= av_get_int(obj, name, NULL) | (int64_t)d;
+                else if(cmd=='-') d= av_get_int(obj, name, NULL) &~(int64_t)d;
+            }else if(cmd=='-')
+                d= -d;
+
+            av_set_number(obj, name, d, 1, 1);
+            if(!*val)
+                return o;
+        }
+        return NULL;
+    }
+
+    memcpy(((uint8_t*)obj) + o->offset, val, sizeof(val));
+    return o;
+}
+
+AVOption *av_set_double(void *obj, const char *name, double n){
+    return av_set_number(obj, name, n, 1, 1);
+}
+
+AVOption *av_set_q(void *obj, const char *name, AVRational n){
+    return av_set_number(obj, name, n.num, n.den, 1);
+}
+
+AVOption *av_set_int(void *obj, const char *name, int64_t n){
+    return av_set_number(obj, name, 1, 1, n);
+}
+
+/**
+ *
+ * @param buf a buffer which is used for returning non string values as strings, can be NULL
+ * @param buf_len allocated length in bytes of buf
+ */
+const char *av_get_string(void *obj, const char *name, AVOption **o_out, char *buf, int buf_len){
+    AVOption *o= find_opt(obj, name, NULL);
+    void *dst;
+    if(!o || o->offset<=0)
+        return NULL;
+    if(o->type != FF_OPT_TYPE_STRING && (!buf || !buf_len))
+        return NULL;
+
+    dst= ((uint8_t*)obj) + o->offset;
+    if(o_out) *o_out= o;
+
+    if(o->type == FF_OPT_TYPE_STRING)
+        return dst;
+
+    switch(o->type){
+    case FF_OPT_TYPE_FLAGS:     snprintf(buf, buf_len, "0x%08X",*(int    *)dst);break;
+    case FF_OPT_TYPE_INT:       snprintf(buf, buf_len, "%d" , *(int    *)dst);break;
+    case FF_OPT_TYPE_INT64:     snprintf(buf, buf_len, "%"PRId64, *(int64_t*)dst);break;
+    case FF_OPT_TYPE_FLOAT:     snprintf(buf, buf_len, "%f" , *(float  *)dst);break;
+    case FF_OPT_TYPE_DOUBLE:    snprintf(buf, buf_len, "%f" , *(double *)dst);break;
+    case FF_OPT_TYPE_RATIONAL:  snprintf(buf, buf_len, "%d/%d", ((AVRational*)dst)->num, ((AVRational*)dst)->den);break;
+    default: return NULL;
+    }
+    return buf;
+}
+
+static int av_get_number(void *obj, const char *name, AVOption **o_out, double *num, int *den, int64_t *intnum){
+    AVOption *o= find_opt(obj, name, NULL);
+    void *dst;
+    if(!o || o->offset<=0)
+        goto error;
+
+    dst= ((uint8_t*)obj) + o->offset;
+
+    if(o_out) *o_out= o;
+
+    switch(o->type){
+    case FF_OPT_TYPE_FLAGS:
+    case FF_OPT_TYPE_INT:       *intnum= *(int    *)dst;return 0;
+    case FF_OPT_TYPE_INT64:     *intnum= *(int64_t*)dst;return 0;
+    case FF_OPT_TYPE_FLOAT:     *num=    *(float  *)dst;return 0;
+    case FF_OPT_TYPE_DOUBLE:    *num=    *(double *)dst;return 0;
+    case FF_OPT_TYPE_RATIONAL:  *intnum= ((AVRational*)dst)->num;
+                                *den   = ((AVRational*)dst)->den;
+                                                        return 0;
+    }
+error:
+    *den=*intnum=0;
+    return -1;
+}
+
+double av_get_double(void *obj, const char *name, AVOption **o_out){
+    int64_t intnum=1;
+    double num=1;
+    int den=1;
+
+    av_get_number(obj, name, o_out, &num, &den, &intnum);
+    return num*intnum/den;
+}
+
+AVRational av_get_q(void *obj, const char *name, AVOption **o_out){
+    int64_t intnum=1;
+    double num=1;
+    int den=1;
+
+    av_get_number(obj, name, o_out, &num, &den, &intnum);
+    if(num == 1.0 && (int)intnum == intnum)
+        return (AVRational){intnum, den};
+    else
+        return av_d2q(num*intnum/den, 1<<24);
+}
+
+int64_t av_get_int(void *obj, const char *name, AVOption **o_out){
+    int64_t intnum=1;
+    double num=1;
+    int den=1;
+
+    av_get_number(obj, name, o_out, &num, &den, &intnum);
+    return num*intnum/den;
+}
+
+static void opt_list(void *obj, void *av_log_obj, char *unit)
+{
+    AVOption *opt=NULL;
+
+    while((opt= av_next_option(obj, opt))){
+        if(!(opt->flags & (AV_OPT_FLAG_ENCODING_PARAM|AV_OPT_FLAG_DECODING_PARAM)))
+            continue;
+
+        /* Don't print CONST's on level one.
+         * Don't print anything but CONST's on level two.
+         * Only print items from the requested unit.
+         */
+        if (!unit && opt->type==FF_OPT_TYPE_CONST)
+            continue;
+        else if (unit && opt->type!=FF_OPT_TYPE_CONST)
+            continue;
+        else if (unit && opt->type==FF_OPT_TYPE_CONST && strcmp(unit, opt->unit))
+            continue;
+        else if (unit && opt->type == FF_OPT_TYPE_CONST)
+            av_log(av_log_obj, AV_LOG_INFO, "   %-15s ", opt->name);
+        else
+            av_log(av_log_obj, AV_LOG_INFO, "-%-17s ", opt->name);
+
+        switch( opt->type )
+        {
+            case FF_OPT_TYPE_FLAGS:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<flags>" );
+                break;
+            case FF_OPT_TYPE_INT:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<int>" );
+                break;
+            case FF_OPT_TYPE_INT64:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<int64>" );
+                break;
+            case FF_OPT_TYPE_DOUBLE:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<double>" );
+                break;
+            case FF_OPT_TYPE_FLOAT:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<float>" );
+                break;
+            case FF_OPT_TYPE_STRING:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<string>" );
+                break;
+            case FF_OPT_TYPE_RATIONAL:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "<rational>" );
+                break;
+            case FF_OPT_TYPE_CONST:
+            default:
+                av_log( av_log_obj, AV_LOG_INFO, "%-7s ", "" );
+                break;
+        }
+        av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_ENCODING_PARAM) ? 'E' : '.');
+        av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_DECODING_PARAM) ? 'D' : '.');
+        av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_VIDEO_PARAM   ) ? 'V' : '.');
+        av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_AUDIO_PARAM   ) ? 'A' : '.');
+        av_log(av_log_obj, AV_LOG_INFO, "%c", (opt->flags & AV_OPT_FLAG_SUBTITLE_PARAM) ? 'S' : '.');
+
+        if(opt->help)
+            av_log(av_log_obj, AV_LOG_INFO, " %s", opt->help);
+        av_log(av_log_obj, AV_LOG_INFO, "\n");
+        if (opt->unit && opt->type != FF_OPT_TYPE_CONST) {
+            opt_list(obj, av_log_obj, opt->unit);
+        }
+    }
+}
+
+int av_opt_show(void *obj, void *av_log_obj){
+    if(!obj)
+        return -1;
+
+    av_log(av_log_obj, AV_LOG_INFO, "%s AVOptions:\n", (*(AVClass**)obj)->class_name);
+
+    opt_list(obj, av_log_obj, NULL);
+
+    return 0;
+}
+
+/** Set the values of the AVCodecContext or AVFormatContext structure.
+ * They are set to the defaults specified in the according AVOption options
+ * array default_val field.
+ *
+ * @param s AVCodecContext or AVFormatContext for which the defaults will be set
+ */
+void av_opt_set_defaults(void *s)
+{
+    AVOption *opt = NULL;
+    while ((opt = av_next_option(s, opt)) != NULL) {
+        switch(opt->type) {
+            case FF_OPT_TYPE_CONST:
+                /* Nothing to be done here */
+            break;
+            case FF_OPT_TYPE_FLAGS:
+            case FF_OPT_TYPE_INT: {
+                int val;
+                val = opt->default_val;
+                av_set_int(s, opt->name, val);
+            }
+            break;
+            case FF_OPT_TYPE_FLOAT: {
+                double val;
+                val = opt->default_val;
+                av_set_double(s, opt->name, val);
+            }
+            break;
+            case FF_OPT_TYPE_RATIONAL: {
+                AVRational val;
+                val = av_d2q(opt->default_val, INT_MAX);
+                av_set_q(s, opt->name, val);
+            }
+            break;
+            case FF_OPT_TYPE_STRING:
+                /* Cannot set default for string as default_val is of type * double */
+            break;
+            default:
+                av_log(s, AV_LOG_DEBUG, "AVOption type %d of option %s not implemented yet\n", opt->type, opt->name);
+        }
+    }
+}
+
diff --git a/contrib/ffmpeg/libavcodec/opt.h b/contrib/ffmpeg/libavcodec/opt.h
new file mode 100644
index 000000000..b8a17031b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/opt.h
@@ -0,0 +1,83 @@
+/*
+ * AVOptions
+ * copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVOPT_H
+#define AVOPT_H
+
+/**
+ * @file opt.h
+ * AVOptions
+ */
+
+enum AVOptionType{
+    FF_OPT_TYPE_FLAGS,
+    FF_OPT_TYPE_INT,
+    FF_OPT_TYPE_INT64,
+    FF_OPT_TYPE_DOUBLE,
+    FF_OPT_TYPE_FLOAT,
+    FF_OPT_TYPE_STRING,
+    FF_OPT_TYPE_RATIONAL,
+    FF_OPT_TYPE_CONST=128,
+};
+
+/**
+ * AVOption.
+ */
+typedef struct AVOption {
+    const char *name;
+
+    /**
+     * short English text help.
+     * @fixme what about other languages
+     */
+    const char *help;
+    int offset;             ///< offset to context structure where the parsed value should be stored
+    enum AVOptionType type;
+
+    double default_val;
+    double min;
+    double max;
+
+    int flags;
+#define AV_OPT_FLAG_ENCODING_PARAM  1   ///< a generic parameter which can be set by the user for muxing or encoding
+#define AV_OPT_FLAG_DECODING_PARAM  2   ///< a generic parameter which can be set by the user for demuxing or decoding
+#define AV_OPT_FLAG_METADATA        4   ///< some data extracted or inserted into the file like title, comment, ...
+#define AV_OPT_FLAG_AUDIO_PARAM     8
+#define AV_OPT_FLAG_VIDEO_PARAM     16
+#define AV_OPT_FLAG_SUBTITLE_PARAM  32
+//FIXME think about enc-audio, ... style flags
+    const char *unit;
+} AVOption;
+
+
+AVOption *av_set_string(void *obj, const char *name, const char *val);
+AVOption *av_set_double(void *obj, const char *name, double n);
+AVOption *av_set_q(void *obj, const char *name, AVRational n);
+AVOption *av_set_int(void *obj, const char *name, int64_t n);
+double av_get_double(void *obj, const char *name, AVOption **o_out);
+AVRational av_get_q(void *obj, const char *name, AVOption **o_out);
+int64_t av_get_int(void *obj, const char *name, AVOption **o_out);
+const char *av_get_string(void *obj, const char *name, AVOption **o_out, char *buf, int buf_len);
+AVOption *av_next_option(void *obj, AVOption *last);
+int av_opt_show(void *obj, void *av_log_obj);
+void av_opt_set_defaults(void *s);
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/os2thread.c b/contrib/ffmpeg/libavcodec/os2thread.c
new file mode 100644
index 000000000..c52b7ae02
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/os2thread.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+//#define DEBUG
+
+// Ported by Vlad Stelmahovsky
+
+#include "avcodec.h"
+#include "common.h"
+
+#define INCL_DOS
+#define INCL_DOSERRORS
+#define INCL_DOSDEVIOCTL
+#include <os2.h>
+
+typedef struct ThreadContext{
+    AVCodecContext *avctx;
+    int thread;
+    HEV work_sem;
+    HEV done_sem;
+    int (*func)(AVCodecContext *c, void *arg);
+    void *arg;
+    int ret;
+}ThreadContext;
+
+
+void thread_func(void *v){
+    ThreadContext *c= v;
+
+    for(;;){
+        //printf("thread_func %X enter wait\n", (int)v); fflush(stdout);
+        DosWaitEventSem(c->work_sem, SEM_INDEFINITE_WAIT);
+//        WaitForSingleObject(c->work_sem, INFINITE);
+//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout);
+        if(c->func)
+            c->ret= c->func(c->avctx, c->arg);
+        else
+            return;
+        //printf("thread_func %X signal complete\n", (int)v); fflush(stdout);
+        DosPostEventSem(c->done_sem);
+//        ReleaseSemaphore(c->done_sem, 1, 0);
+    }
+
+    return;
+}
+
+/**
+ * free what has been allocated by avcodec_thread_init().
+ * must be called after decoding has finished, especially dont call while avcodec_thread_execute() is running
+ */
+void avcodec_thread_free(AVCodecContext *s){
+    ThreadContext *c= s->thread_opaque;
+    int i;
+
+    for(i=0; i<s->thread_count; i++){
+
+        c[i].func= NULL;
+        DosPostEventSem(c[i].work_sem);
+        //        ReleaseSemaphore(c[i].work_sem, 1, 0);
+        DosWaitThread((PTID)&c[i].thread,DCWW_WAIT);
+//        WaitForSingleObject(c[i].thread, INFINITE);
+        if(c[i].work_sem) DosCloseEventSem(c[i].work_sem);//CloseHandle(c[i].work_sem);
+        if(c[i].done_sem) DosCloseEventSem(c[i].done_sem);//CloseHandle(c[i].done_sem);
+    }
+
+    av_freep(&s->thread_opaque);
+}
+
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
+    ThreadContext *c= s->thread_opaque;
+    int i;
+
+    assert(s == c->avctx);
+    assert(count <= s->thread_count);
+
+    /* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */
+
+    for(i=0; i<count; i++){
+
+        c[i].arg= arg[i];
+        c[i].func= func;
+        c[i].ret= 12345;
+
+        DosPostEventSem(c[i].work_sem);
+//        ReleaseSemaphore(c[i].work_sem, 1, 0);
+    }
+    for(i=0; i<count; i++){
+        DosWaitEventSem(c[i].done_sem,SEM_INDEFINITE_WAIT);
+//        WaitForSingleObject(c[i].done_sem, INFINITE);
+
+        c[i].func= NULL;
+        if(ret) ret[i]= c[i].ret;
+    }
+    return 0;
+}
+
+int avcodec_thread_init(AVCodecContext *s, int thread_count){
+    int i;
+    ThreadContext *c;
+    uint32_t threadid;
+
+    s->thread_count= thread_count;
+
+    assert(!s->thread_opaque);
+    c= av_mallocz(sizeof(ThreadContext)*thread_count);
+    s->thread_opaque= c;
+
+    for(i=0; i<thread_count; i++){
+//printf("init semaphors %d\n", i); fflush(stdout);
+        c[i].avctx= s;
+
+        if (DosCreateEventSem(NULL,&c[i].work_sem,DC_SEM_SHARED,0))
+            goto fail;
+        if (DosCreateEventSem(NULL,&c[i].done_sem,DC_SEM_SHARED,0))
+            goto fail;
+
+//printf("create thread %d\n", i); fflush(stdout);
+//        c[i].thread = (HANDLE)_beginthreadex(NULL, 0, thread_func, &c[i], 0, &threadid );
+        c[i].thread = _beginthread(thread_func, NULL, 0x10000, &c[i]);
+        if( c[i].thread <= 0 ) goto fail;
+    }
+//printf("init done\n"); fflush(stdout);
+
+    s->execute= avcodec_thread_execute;
+
+    return 0;
+fail:
+    avcodec_thread_free(s);
+    return -1;
+}
diff --git a/contrib/ffmpeg/libavcodec/parser.c b/contrib/ffmpeg/libavcodec/parser.c
new file mode 100644
index 000000000..72a3e55a3
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/parser.c
@@ -0,0 +1,860 @@
+/*
+ * Audio and Video frame extraction
+ * Copyright (c) 2003 Fabrice Bellard.
+ * Copyright (c) 2003 Michael Niedermayer.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "mpegaudio.h"
+#include "parser.h"
+
+AVCodecParser *av_first_parser = NULL;
+
+void av_register_codec_parser(AVCodecParser *parser)
+{
+    parser->next = av_first_parser;
+    av_first_parser = parser;
+}
+
+AVCodecParserContext *av_parser_init(int codec_id)
+{
+    AVCodecParserContext *s;
+    AVCodecParser *parser;
+    int ret;
+
+    if(codec_id == CODEC_ID_NONE)
+        return NULL;
+
+    for(parser = av_first_parser; parser != NULL; parser = parser->next) {
+        if (parser->codec_ids[0] == codec_id ||
+            parser->codec_ids[1] == codec_id ||
+            parser->codec_ids[2] == codec_id ||
+            parser->codec_ids[3] == codec_id ||
+            parser->codec_ids[4] == codec_id)
+            goto found;
+    }
+    return NULL;
+ found:
+    s = av_mallocz(sizeof(AVCodecParserContext));
+    if (!s)
+        return NULL;
+    s->parser = parser;
+    s->priv_data = av_mallocz(parser->priv_data_size);
+    if (!s->priv_data) {
+        av_free(s);
+        return NULL;
+    }
+    if (parser->parser_init) {
+        ret = parser->parser_init(s);
+        if (ret != 0) {
+            av_free(s->priv_data);
+            av_free(s);
+            return NULL;
+        }
+    }
+    s->fetch_timestamp=1;
+    return s;
+}
+
+/**
+ *
+ * @param buf           input
+ * @param buf_size      input length, to signal EOF, this should be 0 (so that the last frame can be output)
+ * @param pts           input presentation timestamp
+ * @param dts           input decoding timestamp
+ * @param poutbuf       will contain a pointer to the first byte of the output frame
+ * @param poutbuf_size  will contain the length of the output frame
+ * @return the number of bytes of the input bitstream used
+ *
+ * Example:
+ * @code
+ *   while(in_len){
+ *       len = av_parser_parse(myparser, AVCodecContext, &data, &size,
+ *                                       in_data, in_len,
+ *                                       pts, dts);
+ *       in_data += len;
+ *       in_len  -= len;
+ *
+ *       decode_frame(data, size);
+ *   }
+ * @endcode
+ */
+int av_parser_parse(AVCodecParserContext *s,
+                    AVCodecContext *avctx,
+                    uint8_t **poutbuf, int *poutbuf_size,
+                    const uint8_t *buf, int buf_size,
+                    int64_t pts, int64_t dts)
+{
+    int index, i, k;
+    uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
+
+    if (buf_size == 0) {
+        /* padding is always necessary even if EOF, so we add it here */
+        memset(dummy_buf, 0, sizeof(dummy_buf));
+        buf = dummy_buf;
+    } else {
+        /* add a new packet descriptor */
+        k = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
+        s->cur_frame_start_index = k;
+        s->cur_frame_offset[k] = s->cur_offset;
+        s->cur_frame_pts[k] = pts;
+        s->cur_frame_dts[k] = dts;
+
+        /* fill first PTS/DTS */
+        if (s->fetch_timestamp){
+            s->fetch_timestamp=0;
+            s->last_pts = pts;
+            s->last_dts = dts;
+            s->cur_frame_pts[k] =
+            s->cur_frame_dts[k] = AV_NOPTS_VALUE;
+        }
+    }
+
+    /* WARNING: the returned index can be negative */
+    index = s->parser->parser_parse(s, avctx, poutbuf, poutbuf_size, buf, buf_size);
+//av_log(NULL, AV_LOG_DEBUG, "parser: in:%"PRId64", %"PRId64", out:%"PRId64", %"PRId64", in:%d out:%d id:%d\n", pts, dts, s->last_pts, s->last_dts, buf_size, *poutbuf_size, avctx->codec_id);
+    /* update the file pointer */
+    if (*poutbuf_size) {
+        /* fill the data for the current frame */
+        s->frame_offset = s->last_frame_offset;
+        s->pts = s->last_pts;
+        s->dts = s->last_dts;
+
+        /* offset of the next frame */
+        s->last_frame_offset = s->cur_offset + index;
+        /* find the packet in which the new frame starts. It
+           is tricky because of MPEG video start codes
+           which can begin in one packet and finish in
+           another packet. In the worst case, an MPEG
+           video start code could be in 4 different
+           packets. */
+        k = s->cur_frame_start_index;
+        for(i = 0; i < AV_PARSER_PTS_NB; i++) {
+            if (s->last_frame_offset >= s->cur_frame_offset[k])
+                break;
+            k = (k - 1) & (AV_PARSER_PTS_NB - 1);
+        }
+
+        s->last_pts = s->cur_frame_pts[k];
+        s->last_dts = s->cur_frame_dts[k];
+
+        /* some parsers tell us the packet size even before seeing the first byte of the next packet,
+           so the next pts/dts is in the next chunk */
+        if(index == buf_size){
+            s->fetch_timestamp=1;
+        }
+    }
+    if (index < 0)
+        index = 0;
+    s->cur_offset += index;
+    return index;
+}
+
+/**
+ *
+ * @return 0 if the output buffer is a subset of the input, 1 if it is allocated and must be freed
+ * @deprecated use AVBitstreamFilter
+ */
+int av_parser_change(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+
+    if(s && s->parser->split){
+        if((avctx->flags & CODEC_FLAG_GLOBAL_HEADER) || (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER)){
+            int i= s->parser->split(avctx, buf, buf_size);
+            buf += i;
+            buf_size -= i;
+        }
+    }
+
+    /* cast to avoid warning about discarding qualifiers */
+    *poutbuf= (uint8_t *) buf;
+    *poutbuf_size= buf_size;
+    if(avctx->extradata){
+        if(  (keyframe && (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER))
+            /*||(s->pict_type != I_TYPE && (s->flags & PARSER_FLAG_DUMP_EXTRADATA_AT_NOKEY))*/
+            /*||(? && (s->flags & PARSER_FLAG_DUMP_EXTRADATA_AT_BEGIN)*/){
+            int size= buf_size + avctx->extradata_size;
+            *poutbuf_size= size;
+            *poutbuf= av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+            memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
+            memcpy((*poutbuf) + avctx->extradata_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+void av_parser_close(AVCodecParserContext *s)
+{
+    if (s->parser->parser_close)
+        s->parser->parser_close(s);
+    av_free(s->priv_data);
+    av_free(s);
+}
+
+/*****************************************************/
+
+/**
+ * combines the (truncated) bitstream to a complete frame
+ * @returns -1 if no complete frame could be created
+ */
+int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size)
+{
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
+    /* copy overreaded bytes from last frame into buffer */
+    for(; pc->overread>0; pc->overread--){
+        pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
+    }
+
+    /* flush remaining if EOF */
+    if(!*buf_size && next == END_NOT_FOUND){
+        next= 0;
+    }
+
+    pc->last_index= pc->index;
+
+    /* copy into buffer end return */
+    if(next == END_NOT_FOUND){
+        pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
+
+        memcpy(&pc->buffer[pc->index], *buf, *buf_size);
+        pc->index += *buf_size;
+        return -1;
+    }
+
+    *buf_size=
+    pc->overread_index= pc->index + next;
+
+    /* append to buffer */
+    if(pc->index){
+        pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
+
+        memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
+        pc->index = 0;
+        *buf= pc->buffer;
+    }
+
+    /* store overread bytes */
+    for(;next < 0; next++){
+        pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
+        pc->overread++;
+    }
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
+    return 0;
+}
+
+void ff_parse_close(AVCodecParserContext *s)
+{
+    ParseContext *pc = s->priv_data;
+
+    av_free(pc->buffer);
+}
+
+void ff_parse1_close(AVCodecParserContext *s)
+{
+    ParseContext1 *pc1 = s->priv_data;
+
+    av_free(pc1->pc.buffer);
+    av_free(pc1->enc);
+}
+
+/*************************/
+
+#ifdef CONFIG_MPEG4VIDEO_PARSER
+/* used by parser */
+/* XXX: make it use less memory */
+static int av_mpeg4_decode_header(AVCodecParserContext *s1,
+                                  AVCodecContext *avctx,
+                                  const uint8_t *buf, int buf_size)
+{
+    ParseContext1 *pc = s1->priv_data;
+    MpegEncContext *s = pc->enc;
+    GetBitContext gb1, *gb = &gb1;
+    int ret;
+
+    s->avctx = avctx;
+    s->current_picture_ptr = &s->current_picture;
+
+    if (avctx->extradata_size && pc->first_picture){
+        init_get_bits(gb, avctx->extradata, avctx->extradata_size*8);
+        ret = ff_mpeg4_decode_picture_header(s, gb);
+    }
+
+    init_get_bits(gb, buf, 8 * buf_size);
+    ret = ff_mpeg4_decode_picture_header(s, gb);
+    if (s->width) {
+        avcodec_set_dimensions(avctx, s->width, s->height);
+    }
+    s1->pict_type= s->pict_type;
+    pc->first_picture = 0;
+    return ret;
+}
+
+static int mpeg4video_parse_init(AVCodecParserContext *s)
+{
+    ParseContext1 *pc = s->priv_data;
+
+    pc->enc = av_mallocz(sizeof(MpegEncContext));
+    if (!pc->enc)
+        return -1;
+    pc->first_picture = 1;
+    return 0;
+}
+
+static int mpeg4video_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
+        next= buf_size;
+    }else{
+        next= ff_mpeg4_find_frame_end(pc, buf, buf_size);
+
+        if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+    }
+    av_mpeg4_decode_header(s, avctx, buf, buf_size);
+
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+#endif
+
+int ff_mpeg4video_split(AVCodecContext *avctx,
+                           const uint8_t *buf, int buf_size)
+{
+    int i;
+    uint32_t state= -1;
+
+    for(i=0; i<buf_size; i++){
+        state= (state<<8) | buf[i];
+        if(state == 0x1B3 || state == 0x1B6)
+            return i-3;
+    }
+    return 0;
+}
+
+/*************************/
+
+#ifdef CONFIG_MPEGAUDIO_PARSER
+typedef struct MpegAudioParseContext {
+    uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE];    /* input buffer */
+    uint8_t *inbuf_ptr;
+    int frame_size;
+    int free_format_frame_size;
+    int free_format_next_header;
+    uint32_t header;
+    int header_count;
+} MpegAudioParseContext;
+
+#define MPA_HEADER_SIZE 4
+
+/* header + layer + bitrate + freq + lsf/mpeg25 */
+#undef SAME_HEADER_MASK /* mpegaudio.h defines different version */
+#define SAME_HEADER_MASK \
+   (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
+
+static int mpegaudio_parse_init(AVCodecParserContext *s1)
+{
+    MpegAudioParseContext *s = s1->priv_data;
+    s->inbuf_ptr = s->inbuf;
+    return 0;
+}
+
+static int mpegaudio_parse(AVCodecParserContext *s1,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    MpegAudioParseContext *s = s1->priv_data;
+    int len, ret, sr;
+    uint32_t header;
+    const uint8_t *buf_ptr;
+
+    *poutbuf = NULL;
+    *poutbuf_size = 0;
+    buf_ptr = buf;
+    while (buf_size > 0) {
+        len = s->inbuf_ptr - s->inbuf;
+        if (s->frame_size == 0) {
+            /* special case for next header for first frame in free
+               format case (XXX: find a simpler method) */
+            if (s->free_format_next_header != 0) {
+                s->inbuf[0] = s->free_format_next_header >> 24;
+                s->inbuf[1] = s->free_format_next_header >> 16;
+                s->inbuf[2] = s->free_format_next_header >> 8;
+                s->inbuf[3] = s->free_format_next_header;
+                s->inbuf_ptr = s->inbuf + 4;
+                s->free_format_next_header = 0;
+                goto got_header;
+            }
+            /* no header seen : find one. We need at least MPA_HEADER_SIZE
+               bytes to parse it */
+            len = FFMIN(MPA_HEADER_SIZE - len, buf_size);
+            if (len > 0) {
+                memcpy(s->inbuf_ptr, buf_ptr, len);
+                buf_ptr += len;
+                buf_size -= len;
+                s->inbuf_ptr += len;
+            }
+            if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
+            got_header:
+                header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
+                    (s->inbuf[2] << 8) | s->inbuf[3];
+
+                ret = mpa_decode_header(avctx, header, &sr);
+                if (ret < 0) {
+                    s->header_count= -2;
+                    /* no sync found : move by one byte (inefficient, but simple!) */
+                    memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
+                    s->inbuf_ptr--;
+                    dprintf("skip %x\n", header);
+                    /* reset free format frame size to give a chance
+                       to get a new bitrate */
+                    s->free_format_frame_size = 0;
+                } else {
+                    if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
+                        s->header_count= -3;
+                    s->header= header;
+                    s->header_count++;
+                    s->frame_size = ret;
+
+#if 0
+                    /* free format: prepare to compute frame size */
+                    if (decode_header(s, header) == 1) {
+                        s->frame_size = -1;
+                    }
+#endif
+                }
+                if(s->header_count > 1)
+                    avctx->sample_rate= sr;
+            }
+        } else
+#if 0
+        if (s->frame_size == -1) {
+            /* free format : find next sync to compute frame size */
+            len = MPA_MAX_CODED_FRAME_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
+            if (len == 0) {
+                /* frame too long: resync */
+                s->frame_size = 0;
+                memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
+                s->inbuf_ptr--;
+            } else {
+                uint8_t *p, *pend;
+                uint32_t header1;
+                int padding;
+
+                memcpy(s->inbuf_ptr, buf_ptr, len);
+                /* check for header */
+                p = s->inbuf_ptr - 3;
+                pend = s->inbuf_ptr + len - 4;
+                while (p <= pend) {
+                    header = (p[0] << 24) | (p[1] << 16) |
+                        (p[2] << 8) | p[3];
+                    header1 = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
+                        (s->inbuf[2] << 8) | s->inbuf[3];
+                    /* check with high probability that we have a
+                       valid header */
+                    if ((header & SAME_HEADER_MASK) ==
+                        (header1 & SAME_HEADER_MASK)) {
+                        /* header found: update pointers */
+                        len = (p + 4) - s->inbuf_ptr;
+                        buf_ptr += len;
+                        buf_size -= len;
+                        s->inbuf_ptr = p;
+                        /* compute frame size */
+                        s->free_format_next_header = header;
+                        s->free_format_frame_size = s->inbuf_ptr - s->inbuf;
+                        padding = (header1 >> 9) & 1;
+                        if (s->layer == 1)
+                            s->free_format_frame_size -= padding * 4;
+                        else
+                            s->free_format_frame_size -= padding;
+                        dprintf("free frame size=%d padding=%d\n",
+                                s->free_format_frame_size, padding);
+                        decode_header(s, header1);
+                        goto next_data;
+                    }
+                    p++;
+                }
+                /* not found: simply increase pointers */
+                buf_ptr += len;
+                s->inbuf_ptr += len;
+                buf_size -= len;
+            }
+        } else
+#endif
+        if (len < s->frame_size) {
+            if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
+                s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
+            len = FFMIN(s->frame_size - len, buf_size);
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+        }
+
+        if(s->frame_size > 0 && buf_ptr - buf == s->inbuf_ptr - s->inbuf
+           && buf_size + buf_ptr - buf >= s->frame_size){
+            if(s->header_count > 0){
+                *poutbuf = buf;
+                *poutbuf_size = s->frame_size;
+            }
+            buf_ptr = buf + s->frame_size;
+            s->inbuf_ptr = s->inbuf;
+            s->frame_size = 0;
+            break;
+        }
+
+        //    next_data:
+        if (s->frame_size > 0 &&
+            (s->inbuf_ptr - s->inbuf) >= s->frame_size) {
+            if(s->header_count > 0){
+                *poutbuf = s->inbuf;
+                *poutbuf_size = s->inbuf_ptr - s->inbuf;
+            }
+            s->inbuf_ptr = s->inbuf;
+            s->frame_size = 0;
+            break;
+        }
+    }
+    return buf_ptr - buf;
+}
+#endif /* CONFIG_MPEGAUDIO_PARSER */
+
+#if defined(CONFIG_AC3_PARSER) || defined(CONFIG_AAC_PARSER)
+/* also used for ADTS AAC */
+typedef struct AC3ParseContext {
+    uint8_t *inbuf_ptr;
+    int frame_size;
+    int header_size;
+    int (*sync)(const uint8_t *buf, int *channels, int *sample_rate,
+                int *bit_rate, int *samples);
+    uint8_t inbuf[8192]; /* input buffer */
+} AC3ParseContext;
+
+#define AC3_HEADER_SIZE 7
+#define AAC_HEADER_SIZE 7
+
+#ifdef CONFIG_AC3_PARSER
+static const int ac3_sample_rates[4] = {
+    48000, 44100, 32000, 0
+};
+
+static const int ac3_frame_sizes[64][3] = {
+    { 64,   69,   96   },
+    { 64,   70,   96   },
+    { 80,   87,   120  },
+    { 80,   88,   120  },
+    { 96,   104,  144  },
+    { 96,   105,  144  },
+    { 112,  121,  168  },
+    { 112,  122,  168  },
+    { 128,  139,  192  },
+    { 128,  140,  192  },
+    { 160,  174,  240  },
+    { 160,  175,  240  },
+    { 192,  208,  288  },
+    { 192,  209,  288  },
+    { 224,  243,  336  },
+    { 224,  244,  336  },
+    { 256,  278,  384  },
+    { 256,  279,  384  },
+    { 320,  348,  480  },
+    { 320,  349,  480  },
+    { 384,  417,  576  },
+    { 384,  418,  576  },
+    { 448,  487,  672  },
+    { 448,  488,  672  },
+    { 512,  557,  768  },
+    { 512,  558,  768  },
+    { 640,  696,  960  },
+    { 640,  697,  960  },
+    { 768,  835,  1152 },
+    { 768,  836,  1152 },
+    { 896,  975,  1344 },
+    { 896,  976,  1344 },
+    { 1024, 1114, 1536 },
+    { 1024, 1115, 1536 },
+    { 1152, 1253, 1728 },
+    { 1152, 1254, 1728 },
+    { 1280, 1393, 1920 },
+    { 1280, 1394, 1920 },
+};
+
+static const int ac3_bitrates[64] = {
+    32, 32, 40, 40, 48, 48, 56, 56, 64, 64, 80, 80, 96, 96, 112, 112,
+    128, 128, 160, 160, 192, 192, 224, 224, 256, 256, 320, 320, 384,
+    384, 448, 448, 512, 512, 576, 576, 640, 640,
+};
+
+static const int ac3_channels[8] = {
+    2, 1, 2, 3, 3, 4, 4, 5
+};
+#endif /* CONFIG_AC3_PARSER */
+
+#ifdef CONFIG_AAC_PARSER
+static const int aac_sample_rates[16] = {
+    96000, 88200, 64000, 48000, 44100, 32000,
+    24000, 22050, 16000, 12000, 11025, 8000, 7350
+};
+
+static const int aac_channels[8] = {
+    0, 1, 2, 3, 4, 5, 6, 8
+};
+#endif
+
+#ifdef CONFIG_AC3_PARSER
+static int ac3_sync(const uint8_t *buf, int *channels, int *sample_rate,
+                    int *bit_rate, int *samples)
+{
+    unsigned int fscod, frmsizecod, acmod, bsid, lfeon;
+    GetBitContext bits;
+
+    init_get_bits(&bits, buf, AC3_HEADER_SIZE * 8);
+
+    if(get_bits(&bits, 16) != 0x0b77)
+        return 0;
+
+    skip_bits(&bits, 16);       /* crc */
+    fscod = get_bits(&bits, 2);
+    frmsizecod = get_bits(&bits, 6);
+
+    if(!ac3_sample_rates[fscod])
+        return 0;
+
+    bsid = get_bits(&bits, 5);
+    if(bsid > 8)
+        return 0;
+    skip_bits(&bits, 3);        /* bsmod */
+    acmod = get_bits(&bits, 3);
+    if(acmod & 1 && acmod != 1)
+        skip_bits(&bits, 2);    /* cmixlev */
+    if(acmod & 4)
+        skip_bits(&bits, 2);    /* surmixlev */
+    if(acmod & 2)
+        skip_bits(&bits, 2);    /* dsurmod */
+    lfeon = get_bits1(&bits);
+
+    *sample_rate = ac3_sample_rates[fscod];
+    *bit_rate = ac3_bitrates[frmsizecod] * 1000;
+    *channels = ac3_channels[acmod] + lfeon;
+    *samples = 6 * 256;
+
+    return ac3_frame_sizes[frmsizecod][fscod] * 2;
+}
+#endif /* CONFIG_AC3_PARSER */
+
+#ifdef CONFIG_AAC_PARSER
+static int aac_sync(const uint8_t *buf, int *channels, int *sample_rate,
+                    int *bit_rate, int *samples)
+{
+    GetBitContext bits;
+    int size, rdb, ch, sr;
+
+    init_get_bits(&bits, buf, AAC_HEADER_SIZE * 8);
+
+    if(get_bits(&bits, 12) != 0xfff)
+        return 0;
+
+    skip_bits1(&bits);          /* id */
+    skip_bits(&bits, 2);        /* layer */
+    skip_bits1(&bits);          /* protection_absent */
+    skip_bits(&bits, 2);        /* profile_objecttype */
+    sr = get_bits(&bits, 4);    /* sample_frequency_index */
+    if(!aac_sample_rates[sr])
+        return 0;
+    skip_bits1(&bits);          /* private_bit */
+    ch = get_bits(&bits, 3);    /* channel_configuration */
+    if(!aac_channels[ch])
+        return 0;
+    skip_bits1(&bits);          /* original/copy */
+    skip_bits1(&bits);          /* home */
+
+    /* adts_variable_header */
+    skip_bits1(&bits);          /* copyright_identification_bit */
+    skip_bits1(&bits);          /* copyright_identification_start */
+    size = get_bits(&bits, 13); /* aac_frame_length */
+    skip_bits(&bits, 11);       /* adts_buffer_fullness */
+    rdb = get_bits(&bits, 2);   /* number_of_raw_data_blocks_in_frame */
+
+    *channels = aac_channels[ch];
+    *sample_rate = aac_sample_rates[sr];
+    *samples = (rdb + 1) * 1024;
+    *bit_rate = size * 8 * *sample_rate / *samples;
+
+    return size;
+}
+#endif /* CONFIG_AAC_PARSER */
+
+#ifdef CONFIG_AC3_PARSER
+static int ac3_parse_init(AVCodecParserContext *s1)
+{
+    AC3ParseContext *s = s1->priv_data;
+    s->inbuf_ptr = s->inbuf;
+    s->header_size = AC3_HEADER_SIZE;
+    s->sync = ac3_sync;
+    return 0;
+}
+#endif
+
+#ifdef CONFIG_AAC_PARSER
+static int aac_parse_init(AVCodecParserContext *s1)
+{
+    AC3ParseContext *s = s1->priv_data;
+    s->inbuf_ptr = s->inbuf;
+    s->header_size = AAC_HEADER_SIZE;
+    s->sync = aac_sync;
+    return 0;
+}
+#endif
+
+/* also used for ADTS AAC */
+static int ac3_parse(AVCodecParserContext *s1,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size)
+{
+    AC3ParseContext *s = s1->priv_data;
+    const uint8_t *buf_ptr;
+    int len, sample_rate, bit_rate, channels, samples;
+
+    *poutbuf = NULL;
+    *poutbuf_size = 0;
+
+    buf_ptr = buf;
+    while (buf_size > 0) {
+        len = s->inbuf_ptr - s->inbuf;
+        if (s->frame_size == 0) {
+            /* no header seen : find one. We need at least s->header_size
+               bytes to parse it */
+            len = FFMIN(s->header_size - len, buf_size);
+
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+            if ((s->inbuf_ptr - s->inbuf) == s->header_size) {
+                len = s->sync(s->inbuf, &channels, &sample_rate, &bit_rate,
+                              &samples);
+                if (len == 0) {
+                    /* no sync found : move by one byte (inefficient, but simple!) */
+                    memmove(s->inbuf, s->inbuf + 1, s->header_size - 1);
+                    s->inbuf_ptr--;
+                } else {
+                    s->frame_size = len;
+                    /* update codec info */
+                    avctx->sample_rate = sample_rate;
+                    /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
+                    if(avctx->codec_id == CODEC_ID_AC3){
+                        if(avctx->channels!=1 && avctx->channels!=2){
+                            avctx->channels = channels;
+                        }
+                    } else {
+                        avctx->channels = channels;
+                    }
+                    avctx->bit_rate = bit_rate;
+                    avctx->frame_size = samples;
+                }
+            }
+        } else {
+            len = FFMIN(s->frame_size - len, buf_size);
+
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+
+            if(s->inbuf_ptr - s->inbuf == s->frame_size){
+                *poutbuf = s->inbuf;
+                *poutbuf_size = s->frame_size;
+                s->inbuf_ptr = s->inbuf;
+                s->frame_size = 0;
+                break;
+            }
+        }
+    }
+    return buf_ptr - buf;
+}
+#endif /* CONFIG_AC3_PARSER || CONFIG_AAC_PARSER */
+
+#ifdef CONFIG_MPEG4VIDEO_PARSER
+AVCodecParser mpeg4video_parser = {
+    { CODEC_ID_MPEG4 },
+    sizeof(ParseContext1),
+    mpeg4video_parse_init,
+    mpeg4video_parse,
+    ff_parse1_close,
+    ff_mpeg4video_split,
+};
+#endif
+#ifdef CONFIG_MPEGAUDIO_PARSER
+AVCodecParser mpegaudio_parser = {
+    { CODEC_ID_MP2, CODEC_ID_MP3 },
+    sizeof(MpegAudioParseContext),
+    mpegaudio_parse_init,
+    mpegaudio_parse,
+    NULL,
+};
+#endif
+#ifdef CONFIG_AC3_PARSER
+AVCodecParser ac3_parser = {
+    { CODEC_ID_AC3 },
+    sizeof(AC3ParseContext),
+    ac3_parse_init,
+    ac3_parse,
+    NULL,
+};
+#endif
+#ifdef CONFIG_AAC_PARSER
+AVCodecParser aac_parser = {
+    { CODEC_ID_AAC },
+    sizeof(AC3ParseContext),
+    aac_parse_init,
+    ac3_parse,
+    NULL,
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/parser.h b/contrib/ffmpeg/libavcodec/parser.h
new file mode 100644
index 000000000..3496b341f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/parser.h
@@ -0,0 +1,63 @@
+/*
+ * AVCodecParser prototypes and definitions
+ * Copyright (c) 2003 Fabrice Bellard.
+ * Copyright (c) 2003 Michael Niedermayer.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_PARSER_H
+#define FFMPEG_PARSER_H
+
+typedef struct ParseContext{
+    uint8_t *buffer;
+    int index;
+    int last_index;
+    unsigned int buffer_size;
+    uint32_t state;             ///< contains the last few bytes in MSB order
+    int frame_start_found;
+    int overread;               ///< the number of bytes which where irreversibly read from the next frame
+    int overread_index;         ///< the index into ParseContext.buffer of the overreaded bytes
+} ParseContext;
+
+struct MpegEncContext;
+
+typedef struct ParseContext1{
+    ParseContext pc;
+/* XXX/FIXME PC1 vs. PC */
+    /* MPEG2 specific */
+    AVRational frame_rate;
+    int progressive_sequence;
+    int width, height;
+
+    /* XXX: suppress that, needed by MPEG4 */
+    struct MpegEncContext *enc;
+    int first_picture;
+} ParseContext1;
+
+#define END_NOT_FOUND (-100)
+
+int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size);
+int ff_mpeg4video_split(AVCodecContext *avctx, const uint8_t *buf,
+                        int buf_size);
+void ff_parse_close(AVCodecParserContext *s);
+void ff_parse1_close(AVCodecParserContext *s);
+
+/* h263dec.c */
+int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size);
+
+#endif /* !FFMPEG_PARSER_H */
diff --git a/contrib/ffmpeg/libavcodec/pcm.c b/contrib/ffmpeg/libavcodec/pcm.c
new file mode 100644
index 000000000..26c38b329
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/pcm.c
@@ -0,0 +1,548 @@
+/*
+ * PCM codecs
+ * Copyright (c) 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file pcm.c
+ * PCM codecs
+ */
+
+#include "avcodec.h"
+#include "bitstream.h" // for ff_reverse
+
+/* from g711.c by SUN microsystems (unrestricted use) */
+
+#define         SIGN_BIT        (0x80)      /* Sign bit for a A-law byte. */
+#define         QUANT_MASK      (0xf)       /* Quantization field mask. */
+#define         NSEGS           (8)         /* Number of A-law segments. */
+#define         SEG_SHIFT       (4)         /* Left shift for segment number. */
+#define         SEG_MASK        (0x70)      /* Segment field mask. */
+
+#define         BIAS            (0x84)      /* Bias for linear code. */
+
+/*
+ * alaw2linear() - Convert an A-law value to 16-bit linear PCM
+ *
+ */
+static int alaw2linear(unsigned char a_val)
+{
+        int t;
+        int seg;
+
+        a_val ^= 0x55;
+
+        t = a_val & QUANT_MASK;
+        seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
+        if(seg) t= (t + t + 1 + 32) << (seg + 2);
+        else    t= (t + t + 1     ) << 3;
+
+        return ((a_val & SIGN_BIT) ? t : -t);
+}
+
+static int ulaw2linear(unsigned char u_val)
+{
+        int t;
+
+        /* Complement to obtain normal u-law value. */
+        u_val = ~u_val;
+
+        /*
+         * Extract and bias the quantization bits. Then
+         * shift up by the segment number and subtract out the bias.
+         */
+        t = ((u_val & QUANT_MASK) << 3) + BIAS;
+        t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
+
+        return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
+}
+
+/* 16384 entries per table */
+static uint8_t *linear_to_alaw = NULL;
+static int linear_to_alaw_ref = 0;
+
+static uint8_t *linear_to_ulaw = NULL;
+static int linear_to_ulaw_ref = 0;
+
+static void build_xlaw_table(uint8_t *linear_to_xlaw,
+                             int (*xlaw2linear)(unsigned char),
+                             int mask)
+{
+    int i, j, v, v1, v2;
+
+    j = 0;
+    for(i=0;i<128;i++) {
+        if (i != 127) {
+            v1 = xlaw2linear(i ^ mask);
+            v2 = xlaw2linear((i + 1) ^ mask);
+            v = (v1 + v2 + 4) >> 3;
+        } else {
+            v = 8192;
+        }
+        for(;j<v;j++) {
+            linear_to_xlaw[8192 + j] = (i ^ mask);
+            if (j > 0)
+                linear_to_xlaw[8192 - j] = (i ^ (mask ^ 0x80));
+        }
+    }
+    linear_to_xlaw[0] = linear_to_xlaw[1];
+}
+
+static int pcm_encode_init(AVCodecContext *avctx)
+{
+    avctx->frame_size = 1;
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_ALAW:
+        if (linear_to_alaw_ref == 0) {
+            linear_to_alaw = av_malloc(16384);
+            if (!linear_to_alaw)
+                return -1;
+            build_xlaw_table(linear_to_alaw, alaw2linear, 0xd5);
+        }
+        linear_to_alaw_ref++;
+        break;
+    case CODEC_ID_PCM_MULAW:
+        if (linear_to_ulaw_ref == 0) {
+            linear_to_ulaw = av_malloc(16384);
+            if (!linear_to_ulaw)
+                return -1;
+            build_xlaw_table(linear_to_ulaw, ulaw2linear, 0xff);
+        }
+        linear_to_ulaw_ref++;
+        break;
+    default:
+        break;
+    }
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_S32LE:
+    case CODEC_ID_PCM_S32BE:
+    case CODEC_ID_PCM_U32LE:
+    case CODEC_ID_PCM_U32BE:
+        avctx->block_align = 4 * avctx->channels;
+        break;
+    case CODEC_ID_PCM_S24LE:
+    case CODEC_ID_PCM_S24BE:
+    case CODEC_ID_PCM_U24LE:
+    case CODEC_ID_PCM_U24BE:
+    case CODEC_ID_PCM_S24DAUD:
+        avctx->block_align = 3 * avctx->channels;
+        break;
+    case CODEC_ID_PCM_S16LE:
+    case CODEC_ID_PCM_S16BE:
+    case CODEC_ID_PCM_U16LE:
+    case CODEC_ID_PCM_U16BE:
+        avctx->block_align = 2 * avctx->channels;
+        break;
+    case CODEC_ID_PCM_S8:
+    case CODEC_ID_PCM_U8:
+    case CODEC_ID_PCM_MULAW:
+    case CODEC_ID_PCM_ALAW:
+        avctx->block_align = avctx->channels;
+        break;
+    default:
+        break;
+    }
+
+    avctx->coded_frame= avcodec_alloc_frame();
+    avctx->coded_frame->key_frame= 1;
+
+    return 0;
+}
+
+static int pcm_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_ALAW:
+        if (--linear_to_alaw_ref == 0)
+            av_free(linear_to_alaw);
+        break;
+    case CODEC_ID_PCM_MULAW:
+        if (--linear_to_ulaw_ref == 0)
+            av_free(linear_to_ulaw);
+        break;
+    default:
+        /* nothing to free */
+        break;
+    }
+    return 0;
+}
+
+/**
+ * \brief convert samples from 16 bit
+ * \param bps byte per sample for the destination format, must be >= 2
+ * \param le 0 for big-, 1 for little-endian
+ * \param us 0 for signed, 1 for unsigned output
+ * \param samples input samples
+ * \param dst output samples
+ * \param n number of samples in samples buffer.
+ */
+static inline void encode_from16(int bps, int le, int us,
+                               short **samples, uint8_t **dst, int n) {
+    if (bps > 2)
+        memset(*dst, 0, n * bps);
+    if (le) *dst += bps - 2;
+    for(;n>0;n--) {
+        register int v = *(*samples)++;
+        if (us) v += 0x8000;
+        (*dst)[le] = v >> 8;
+        (*dst)[1 - le] = v;
+        *dst += bps;
+    }
+    if (le) *dst -= bps - 2;
+}
+
+static int pcm_encode_frame(AVCodecContext *avctx,
+                            unsigned char *frame, int buf_size, void *data)
+{
+    int n, sample_size, v;
+    short *samples;
+    unsigned char *dst;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_S32LE:
+    case CODEC_ID_PCM_S32BE:
+    case CODEC_ID_PCM_U32LE:
+    case CODEC_ID_PCM_U32BE:
+        sample_size = 4;
+        break;
+    case CODEC_ID_PCM_S24LE:
+    case CODEC_ID_PCM_S24BE:
+    case CODEC_ID_PCM_U24LE:
+    case CODEC_ID_PCM_U24BE:
+    case CODEC_ID_PCM_S24DAUD:
+        sample_size = 3;
+        break;
+    case CODEC_ID_PCM_S16LE:
+    case CODEC_ID_PCM_S16BE:
+    case CODEC_ID_PCM_U16LE:
+    case CODEC_ID_PCM_U16BE:
+        sample_size = 2;
+        break;
+    default:
+        sample_size = 1;
+        break;
+    }
+    n = buf_size / sample_size;
+    samples = data;
+    dst = frame;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_S32LE:
+        encode_from16(4, 1, 0, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_S32BE:
+        encode_from16(4, 0, 0, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_U32LE:
+        encode_from16(4, 1, 1, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_U32BE:
+        encode_from16(4, 0, 1, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_S24LE:
+        encode_from16(3, 1, 0, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_S24BE:
+        encode_from16(3, 0, 0, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_U24LE:
+        encode_from16(3, 1, 1, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_U24BE:
+        encode_from16(3, 0, 1, &samples, &dst, n);
+        break;
+    case CODEC_ID_PCM_S24DAUD:
+        for(;n>0;n--) {
+            uint32_t tmp = ff_reverse[*samples >> 8] +
+                           (ff_reverse[*samples & 0xff] << 8);
+            tmp <<= 4; // sync flags would go here
+            dst[2] = tmp & 0xff;
+            tmp >>= 8;
+            dst[1] = tmp & 0xff;
+            dst[0] = tmp >> 8;
+            samples++;
+            dst += 3;
+        }
+        break;
+    case CODEC_ID_PCM_S16LE:
+        for(;n>0;n--) {
+            v = *samples++;
+            dst[0] = v & 0xff;
+            dst[1] = v >> 8;
+            dst += 2;
+        }
+        break;
+    case CODEC_ID_PCM_S16BE:
+        for(;n>0;n--) {
+            v = *samples++;
+            dst[0] = v >> 8;
+            dst[1] = v;
+            dst += 2;
+        }
+        break;
+    case CODEC_ID_PCM_U16LE:
+        for(;n>0;n--) {
+            v = *samples++;
+            v += 0x8000;
+            dst[0] = v & 0xff;
+            dst[1] = v >> 8;
+            dst += 2;
+        }
+        break;
+    case CODEC_ID_PCM_U16BE:
+        for(;n>0;n--) {
+            v = *samples++;
+            v += 0x8000;
+            dst[0] = v >> 8;
+            dst[1] = v;
+            dst += 2;
+        }
+        break;
+    case CODEC_ID_PCM_S8:
+        for(;n>0;n--) {
+            v = *samples++;
+            dst[0] = v >> 8;
+            dst++;
+        }
+        break;
+    case CODEC_ID_PCM_U8:
+        for(;n>0;n--) {
+            v = *samples++;
+            dst[0] = (v >> 8) + 128;
+            dst++;
+        }
+        break;
+    case CODEC_ID_PCM_ALAW:
+        for(;n>0;n--) {
+            v = *samples++;
+            dst[0] = linear_to_alaw[(v + 32768) >> 2];
+            dst++;
+        }
+        break;
+    case CODEC_ID_PCM_MULAW:
+        for(;n>0;n--) {
+            v = *samples++;
+            dst[0] = linear_to_ulaw[(v + 32768) >> 2];
+            dst++;
+        }
+        break;
+    default:
+        return -1;
+    }
+    //avctx->frame_size = (dst - frame) / (sample_size * avctx->channels);
+
+    return dst - frame;
+}
+
+typedef struct PCMDecode {
+    short table[256];
+} PCMDecode;
+
+static int pcm_decode_init(AVCodecContext * avctx)
+{
+    PCMDecode *s = avctx->priv_data;
+    int i;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_ALAW:
+        for(i=0;i<256;i++)
+            s->table[i] = alaw2linear(i);
+        break;
+    case CODEC_ID_PCM_MULAW:
+        for(i=0;i<256;i++)
+            s->table[i] = ulaw2linear(i);
+        break;
+    default:
+        break;
+    }
+    return 0;
+}
+
+/**
+ * \brief convert samples to 16 bit
+ * \param bps byte per sample for the source format, must be >= 2
+ * \param le 0 for big-, 1 for little-endian
+ * \param us 0 for signed, 1 for unsigned input
+ * \param src input samples
+ * \param samples output samples
+ * \param src_len number of bytes in src
+ */
+static inline void decode_to16(int bps, int le, int us,
+                               uint8_t **src, short **samples, int src_len)
+{
+    register int n = src_len / bps;
+    if (le) *src += bps - 2;
+    for(;n>0;n--) {
+        *(*samples)++ = ((*src)[le] << 8 | (*src)[1 - le]) - (us?0x8000:0);
+        *src += bps;
+    }
+    if (le) *src -= bps - 2;
+}
+
+static int pcm_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    PCMDecode *s = avctx->priv_data;
+    int n;
+    short *samples;
+    uint8_t *src;
+
+    samples = data;
+    src = buf;
+
+    if(buf_size > AVCODEC_MAX_AUDIO_FRAME_SIZE/2)
+        buf_size = AVCODEC_MAX_AUDIO_FRAME_SIZE/2;
+
+    switch(avctx->codec->id) {
+    case CODEC_ID_PCM_S32LE:
+        decode_to16(4, 1, 0, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_S32BE:
+        decode_to16(4, 0, 0, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_U32LE:
+        decode_to16(4, 1, 1, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_U32BE:
+        decode_to16(4, 0, 1, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_S24LE:
+        decode_to16(3, 1, 0, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_S24BE:
+        decode_to16(3, 0, 0, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_U24LE:
+        decode_to16(3, 1, 1, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_U24BE:
+        decode_to16(3, 0, 1, &src, &samples, buf_size);
+        break;
+    case CODEC_ID_PCM_S24DAUD:
+        n = buf_size / 3;
+        for(;n>0;n--) {
+          uint32_t v = src[0] << 16 | src[1] << 8 | src[2];
+          v >>= 4; // sync flags are here
+          *samples++ = ff_reverse[(v >> 8) & 0xff] +
+                       (ff_reverse[v & 0xff] << 8);
+          src += 3;
+        }
+        break;
+    case CODEC_ID_PCM_S16LE:
+        n = buf_size >> 1;
+        for(;n>0;n--) {
+            *samples++ = src[0] | (src[1] << 8);
+            src += 2;
+        }
+        break;
+    case CODEC_ID_PCM_S16BE:
+        n = buf_size >> 1;
+        for(;n>0;n--) {
+            *samples++ = (src[0] << 8) | src[1];
+            src += 2;
+        }
+        break;
+    case CODEC_ID_PCM_U16LE:
+        n = buf_size >> 1;
+        for(;n>0;n--) {
+            *samples++ = (src[0] | (src[1] << 8)) - 0x8000;
+            src += 2;
+        }
+        break;
+    case CODEC_ID_PCM_U16BE:
+        n = buf_size >> 1;
+        for(;n>0;n--) {
+            *samples++ = ((src[0] << 8) | src[1]) - 0x8000;
+            src += 2;
+        }
+        break;
+    case CODEC_ID_PCM_S8:
+        n = buf_size;
+        for(;n>0;n--) {
+            *samples++ = src[0] << 8;
+            src++;
+        }
+        break;
+    case CODEC_ID_PCM_U8:
+        n = buf_size;
+        for(;n>0;n--) {
+            *samples++ = ((int)src[0] - 128) << 8;
+            src++;
+        }
+        break;
+    case CODEC_ID_PCM_ALAW:
+    case CODEC_ID_PCM_MULAW:
+        n = buf_size;
+        for(;n>0;n--) {
+            *samples++ = s->table[src[0]];
+            src++;
+        }
+        break;
+    default:
+        return -1;
+    }
+    *data_size = (uint8_t *)samples - (uint8_t *)data;
+    return src - buf;
+}
+
+#define PCM_CODEC(id, name)                     \
+AVCodec name ## _encoder = {                    \
+    #name,                                      \
+    CODEC_TYPE_AUDIO,                           \
+    id,                                         \
+    0,                                          \
+    pcm_encode_init,                            \
+    pcm_encode_frame,                           \
+    pcm_encode_close,                           \
+    NULL,                                       \
+};                                              \
+AVCodec name ## _decoder = {                    \
+    #name,                                      \
+    CODEC_TYPE_AUDIO,                           \
+    id,                                         \
+    sizeof(PCMDecode),                          \
+    pcm_decode_init,                            \
+    NULL,                                       \
+    NULL,                                       \
+    pcm_decode_frame,                           \
+}
+
+PCM_CODEC(CODEC_ID_PCM_S32LE, pcm_s32le);
+PCM_CODEC(CODEC_ID_PCM_S32BE, pcm_s32be);
+PCM_CODEC(CODEC_ID_PCM_U32LE, pcm_u32le);
+PCM_CODEC(CODEC_ID_PCM_U32BE, pcm_u32be);
+PCM_CODEC(CODEC_ID_PCM_S24LE, pcm_s24le);
+PCM_CODEC(CODEC_ID_PCM_S24BE, pcm_s24be);
+PCM_CODEC(CODEC_ID_PCM_U24LE, pcm_u24le);
+PCM_CODEC(CODEC_ID_PCM_U24BE, pcm_u24be);
+PCM_CODEC(CODEC_ID_PCM_S24DAUD, pcm_s24daud);
+PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le);
+PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be);
+PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le);
+PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be);
+PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8);
+PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8);
+PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw);
+PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw);
+
+#undef PCM_CODEC
diff --git a/contrib/ffmpeg/libavcodec/png.c b/contrib/ffmpeg/libavcodec/png.c
new file mode 100644
index 000000000..a257492b7
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/png.c
@@ -0,0 +1,968 @@
+/*
+ * PNG image format
+ * Copyright (c) 2003 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+/* TODO:
+ * - add 2, 4 and 16 bit depth support
+ * - use filters when generating a png (better compression)
+ */
+
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+
+//#define DEBUG
+
+#define PNG_COLOR_MASK_PALETTE    1
+#define PNG_COLOR_MASK_COLOR      2
+#define PNG_COLOR_MASK_ALPHA      4
+
+#define PNG_COLOR_TYPE_GRAY 0
+#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
+#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
+#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
+#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
+
+#define PNG_FILTER_VALUE_NONE  0
+#define PNG_FILTER_VALUE_SUB   1
+#define PNG_FILTER_VALUE_UP    2
+#define PNG_FILTER_VALUE_AVG   3
+#define PNG_FILTER_VALUE_PAETH 4
+
+#define PNG_IHDR      0x0001
+#define PNG_IDAT      0x0002
+#define PNG_ALLIMAGE  0x0004
+#define PNG_PLTE      0x0008
+
+#define NB_PASSES 7
+
+#define IOBUF_SIZE 4096
+
+typedef struct PNGContext {
+    uint8_t *bytestream;
+    uint8_t *bytestream_start;
+    uint8_t *bytestream_end;
+    AVFrame picture;
+
+    int state;
+    int width, height;
+    int bit_depth;
+    int color_type;
+    int compression_type;
+    int interlace_type;
+    int filter_type;
+    int channels;
+    int bits_per_pixel;
+    int bpp;
+
+    uint8_t *image_buf;
+    int image_linesize;
+    uint32_t palette[256];
+    uint8_t *crow_buf;
+    uint8_t *last_row;
+    uint8_t *tmp_row;
+    int pass;
+    int crow_size; /* compressed row size (include filter type) */
+    int row_size; /* decompressed row size */
+    int pass_row_size; /* decompress row size of the current pass */
+    int y;
+    z_stream zstream;
+    uint8_t buf[IOBUF_SIZE];
+} PNGContext;
+
+static unsigned int get32(uint8_t **b){
+    (*b) += 4;
+    return ((*b)[-4]<<24) + ((*b)[-3]<<16) + ((*b)[-2]<<8) + (*b)[-1];
+}
+
+#ifdef CONFIG_ENCODERS
+static void put32(uint8_t **b, unsigned int v){
+    *(*b)++= v>>24;
+    *(*b)++= v>>16;
+    *(*b)++= v>>8;
+    *(*b)++= v;
+}
+#endif
+
+static const uint8_t pngsig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
+/* Mask to determine which y pixels are valid in a pass */
+static const uint8_t png_pass_ymask[NB_PASSES] = {
+    0x80, 0x80, 0x08, 0x88, 0x22, 0xaa, 0x55,
+};
+
+/* Mask to determine which y pixels can be written in a pass */
+static const uint8_t png_pass_dsp_ymask[NB_PASSES] = {
+    0xff, 0xff, 0x0f, 0xcc, 0x33, 0xff, 0x55,
+};
+
+/* minimum x value */
+static const uint8_t png_pass_xmin[NB_PASSES] = {
+    0, 4, 0, 2, 0, 1, 0
+};
+
+/* x shift to get row width */
+static const uint8_t png_pass_xshift[NB_PASSES] = {
+    3, 3, 2, 2, 1, 1, 0
+};
+
+/* Mask to determine which pixels are valid in a pass */
+static const uint8_t png_pass_mask[NB_PASSES] = {
+    0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff
+};
+
+/* Mask to determine which pixels to overwrite while displaying */
+static const uint8_t png_pass_dsp_mask[NB_PASSES] = {
+    0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff
+};
+#if 0
+static int png_probe(AVProbeData *pd)
+{
+    if (pd->buf_size >= 8 &&
+        memcmp(pd->buf, pngsig, 8) == 0)
+        return AVPROBE_SCORE_MAX;
+    else
+        return 0;
+}
+#endif
+static void *png_zalloc(void *opaque, unsigned int items, unsigned int size)
+{
+    if(items >= UINT_MAX / size)
+        return NULL;
+    return av_malloc(items * size);
+}
+
+static void png_zfree(void *opaque, void *ptr)
+{
+    av_free(ptr);
+}
+
+static int png_get_nb_channels(int color_type)
+{
+    int channels;
+    channels = 1;
+    if ((color_type & (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)) ==
+        PNG_COLOR_MASK_COLOR)
+        channels = 3;
+    if (color_type & PNG_COLOR_MASK_ALPHA)
+        channels++;
+    return channels;
+}
+
+/* compute the row size of an interleaved pass */
+static int png_pass_row_size(int pass, int bits_per_pixel, int width)
+{
+    int shift, xmin, pass_width;
+
+    xmin = png_pass_xmin[pass];
+    if (width <= xmin)
+        return 0;
+    shift = png_pass_xshift[pass];
+    pass_width = (width - xmin + (1 << shift) - 1) >> shift;
+    return (pass_width * bits_per_pixel + 7) >> 3;
+}
+
+/* NOTE: we try to construct a good looking image at each pass. width
+   is the original image width. We also do pixel format convertion at
+   this stage */
+static void png_put_interlaced_row(uint8_t *dst, int width,
+                                   int bits_per_pixel, int pass,
+                                   int color_type, const uint8_t *src)
+{
+    int x, mask, dsp_mask, j, src_x, b, bpp;
+    uint8_t *d;
+    const uint8_t *s;
+
+    mask = png_pass_mask[pass];
+    dsp_mask = png_pass_dsp_mask[pass];
+    switch(bits_per_pixel) {
+    case 1:
+        /* we must intialize the line to zero before writing to it */
+        if (pass == 0)
+            memset(dst, 0, (width + 7) >> 3);
+        src_x = 0;
+        for(x = 0; x < width; x++) {
+            j = (x & 7);
+            if ((dsp_mask << j) & 0x80) {
+                b = (src[src_x >> 3] >> (7 - (src_x & 7))) & 1;
+                dst[x >> 3] |= b << (7 - j);
+            }
+            if ((mask << j) & 0x80)
+                src_x++;
+        }
+        break;
+    default:
+        bpp = bits_per_pixel >> 3;
+        d = dst;
+        s = src;
+        if (color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+            for(x = 0; x < width; x++) {
+                j = x & 7;
+                if ((dsp_mask << j) & 0x80) {
+                    *(uint32_t *)d = (s[3] << 24) | (s[0] << 16) | (s[1] << 8) | s[2];
+                }
+                d += bpp;
+                if ((mask << j) & 0x80)
+                    s += bpp;
+            }
+        } else {
+            for(x = 0; x < width; x++) {
+                j = x & 7;
+                if ((dsp_mask << j) & 0x80) {
+                    memcpy(d, s, bpp);
+                }
+                d += bpp;
+                if ((mask << j) & 0x80)
+                    s += bpp;
+            }
+        }
+        break;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static void png_get_interlaced_row(uint8_t *dst, int row_size,
+                                   int bits_per_pixel, int pass,
+                                   const uint8_t *src, int width)
+{
+    int x, mask, dst_x, j, b, bpp;
+    uint8_t *d;
+    const uint8_t *s;
+
+    mask = png_pass_mask[pass];
+    switch(bits_per_pixel) {
+    case 1:
+        memset(dst, 0, row_size);
+        dst_x = 0;
+        for(x = 0; x < width; x++) {
+            j = (x & 7);
+            if ((mask << j) & 0x80) {
+                b = (src[x >> 3] >> (7 - j)) & 1;
+                dst[dst_x >> 3] |= b << (7 - (dst_x & 7));
+                dst_x++;
+            }
+        }
+        break;
+    default:
+        bpp = bits_per_pixel >> 3;
+        d = dst;
+        s = src;
+        for(x = 0; x < width; x++) {
+            j = x & 7;
+            if ((mask << j) & 0x80) {
+                memcpy(d, s, bpp);
+                d += bpp;
+            }
+            s += bpp;
+        }
+        break;
+    }
+}
+#endif
+
+/* XXX: optimize */
+/* NOTE: 'dst' can be equal to 'last' */
+static void png_filter_row(uint8_t *dst, int filter_type,
+                           uint8_t *src, uint8_t *last, int size, int bpp)
+{
+    int i, p;
+
+    switch(filter_type) {
+    case PNG_FILTER_VALUE_NONE:
+        memcpy(dst, src, size);
+        break;
+    case PNG_FILTER_VALUE_SUB:
+        for(i = 0; i < bpp; i++) {
+            dst[i] = src[i];
+        }
+        for(i = bpp; i < size; i++) {
+            p = dst[i - bpp];
+            dst[i] = p + src[i];
+        }
+        break;
+    case PNG_FILTER_VALUE_UP:
+        for(i = 0; i < size; i++) {
+            p = last[i];
+            dst[i] = p + src[i];
+        }
+        break;
+    case PNG_FILTER_VALUE_AVG:
+        for(i = 0; i < bpp; i++) {
+            p = (last[i] >> 1);
+            dst[i] = p + src[i];
+        }
+        for(i = bpp; i < size; i++) {
+            p = ((dst[i - bpp] + last[i]) >> 1);
+            dst[i] = p + src[i];
+        }
+        break;
+    case PNG_FILTER_VALUE_PAETH:
+        for(i = 0; i < bpp; i++) {
+            p = last[i];
+            dst[i] = p + src[i];
+        }
+        for(i = bpp; i < size; i++) {
+            int a, b, c, pa, pb, pc;
+
+            a = dst[i - bpp];
+            b = last[i];
+            c = last[i - bpp];
+
+            p = b - c;
+            pc = a - c;
+
+            pa = abs(p);
+            pb = abs(pc);
+            pc = abs(p + pc);
+
+            if (pa <= pb && pa <= pc)
+                p = a;
+            else if (pb <= pc)
+                p = b;
+            else
+                p = c;
+            dst[i] = p + src[i];
+        }
+        break;
+    }
+}
+
+#ifdef CONFIG_ENCODERS
+static void convert_from_rgba32(uint8_t *dst, const uint8_t *src, int width)
+{
+    uint8_t *d;
+    int j;
+    unsigned int v;
+
+    d = dst;
+    for(j = 0; j < width; j++) {
+        v = ((const uint32_t *)src)[j];
+        d[0] = v >> 16;
+        d[1] = v >> 8;
+        d[2] = v;
+        d[3] = v >> 24;
+        d += 4;
+    }
+}
+#endif
+
+#ifdef CONFIG_DECODERS
+static void convert_to_rgba32(uint8_t *dst, const uint8_t *src, int width)
+{
+    int j;
+    unsigned int r, g, b, a;
+
+    for(j = 0;j < width; j++) {
+        r = src[0];
+        g = src[1];
+        b = src[2];
+        a = src[3];
+        *(uint32_t *)dst = (a << 24) | (r << 16) | (g << 8) | b;
+        dst += 4;
+        src += 4;
+    }
+}
+
+/* process exactly one decompressed row */
+static void png_handle_row(PNGContext *s)
+{
+    uint8_t *ptr, *last_row;
+    int got_line;
+
+    if (!s->interlace_type) {
+        ptr = s->image_buf + s->image_linesize * s->y;
+        /* need to swap bytes correctly for RGB_ALPHA */
+        if (s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+            png_filter_row(s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
+                           s->last_row, s->row_size, s->bpp);
+            memcpy(s->last_row, s->tmp_row, s->row_size);
+            convert_to_rgba32(ptr, s->tmp_row, s->width);
+        } else {
+            /* in normal case, we avoid one copy */
+            if (s->y == 0)
+                last_row = s->last_row;
+            else
+                last_row = ptr - s->image_linesize;
+
+            png_filter_row(ptr, s->crow_buf[0], s->crow_buf + 1,
+                           last_row, s->row_size, s->bpp);
+        }
+        s->y++;
+        if (s->y == s->height) {
+            s->state |= PNG_ALLIMAGE;
+        }
+    } else {
+        got_line = 0;
+        for(;;) {
+            ptr = s->image_buf + s->image_linesize * s->y;
+            if ((png_pass_ymask[s->pass] << (s->y & 7)) & 0x80) {
+                /* if we already read one row, it is time to stop to
+                   wait for the next one */
+                if (got_line)
+                    break;
+                png_filter_row(s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
+                               s->last_row, s->pass_row_size, s->bpp);
+                memcpy(s->last_row, s->tmp_row, s->pass_row_size);
+                got_line = 1;
+            }
+            if ((png_pass_dsp_ymask[s->pass] << (s->y & 7)) & 0x80) {
+                /* NOTE: rgba32 is handled directly in png_put_interlaced_row */
+                png_put_interlaced_row(ptr, s->width, s->bits_per_pixel, s->pass,
+                                       s->color_type, s->last_row);
+            }
+            s->y++;
+            if (s->y == s->height) {
+                for(;;) {
+                    if (s->pass == NB_PASSES - 1) {
+                        s->state |= PNG_ALLIMAGE;
+                        goto the_end;
+                    } else {
+                        s->pass++;
+                        s->y = 0;
+                        s->pass_row_size = png_pass_row_size(s->pass,
+                                                             s->bits_per_pixel,
+                                                             s->width);
+                        s->crow_size = s->pass_row_size + 1;
+                        if (s->pass_row_size != 0)
+                            break;
+                        /* skip pass if empty row */
+                    }
+                }
+            }
+        }
+    the_end: ;
+    }
+}
+
+static int png_decode_idat(PNGContext *s, int length)
+{
+    int ret;
+    s->zstream.avail_in = length;
+    s->zstream.next_in = s->bytestream;
+    s->bytestream += length;
+
+    if(s->bytestream > s->bytestream_end)
+        return -1;
+
+    /* decode one line if possible */
+    while (s->zstream.avail_in > 0) {
+        ret = inflate(&s->zstream, Z_PARTIAL_FLUSH);
+        if (ret != Z_OK && ret != Z_STREAM_END) {
+            return -1;
+        }
+        if (s->zstream.avail_out == 0) {
+            if (!(s->state & PNG_ALLIMAGE)) {
+                png_handle_row(s);
+            }
+            s->zstream.avail_out = s->crow_size;
+            s->zstream.next_out = s->crow_buf;
+        }
+    }
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    PNGContext * const s = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    uint32_t tag, length;
+    int ret, crc;
+
+    s->bytestream_start=
+    s->bytestream= buf;
+    s->bytestream_end= buf + buf_size;
+
+    /* check signature */
+    if (memcmp(s->bytestream, pngsig, 8) != 0)
+        return -1;
+    s->bytestream+= 8;
+    s->y=
+    s->state=0;
+//    memset(s, 0, sizeof(PNGContext));
+    /* init the zlib */
+    s->zstream.zalloc = png_zalloc;
+    s->zstream.zfree = png_zfree;
+    s->zstream.opaque = NULL;
+    ret = inflateInit(&s->zstream);
+    if (ret != Z_OK)
+        return -1;
+    for(;;) {
+        int tag32;
+        if (s->bytestream >= s->bytestream_end)
+            goto fail;
+        length = get32(&s->bytestream);
+        if (length > 0x7fffffff)
+            goto fail;
+        tag32 = get32(&s->bytestream);
+        tag = bswap_32(tag32);
+#ifdef DEBUG
+        av_log(avctx, AV_LOG_DEBUG, "png: tag=%c%c%c%c length=%u\n",
+               (tag & 0xff),
+               ((tag >> 8) & 0xff),
+               ((tag >> 16) & 0xff),
+               ((tag >> 24) & 0xff), length);
+#endif
+        switch(tag) {
+        case MKTAG('I', 'H', 'D', 'R'):
+            if (length != 13)
+                goto fail;
+            s->width = get32(&s->bytestream);
+            s->height = get32(&s->bytestream);
+            if(avcodec_check_dimensions(avctx, s->width, s->height)){
+                s->width= s->height= 0;
+                goto fail;
+            }
+            s->bit_depth = *s->bytestream++;
+            s->color_type = *s->bytestream++;
+            s->compression_type = *s->bytestream++;
+            s->filter_type = *s->bytestream++;
+            s->interlace_type = *s->bytestream++;
+            crc = get32(&s->bytestream);
+            s->state |= PNG_IHDR;
+#ifdef DEBUG
+            av_log(avctx, AV_LOG_DEBUG, "width=%d height=%d depth=%d color_type=%d compression_type=%d filter_type=%d interlace_type=%d\n",
+                   s->width, s->height, s->bit_depth, s->color_type,
+                   s->compression_type, s->filter_type, s->interlace_type);
+#endif
+            break;
+        case MKTAG('I', 'D', 'A', 'T'):
+            if (!(s->state & PNG_IHDR))
+                goto fail;
+            if (!(s->state & PNG_IDAT)) {
+                /* init image info */
+                avctx->width = s->width;
+                avctx->height = s->height;
+
+                s->channels = png_get_nb_channels(s->color_type);
+                s->bits_per_pixel = s->bit_depth * s->channels;
+                s->bpp = (s->bits_per_pixel + 7) >> 3;
+                s->row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
+
+                if (s->bit_depth == 8 &&
+                    s->color_type == PNG_COLOR_TYPE_RGB) {
+                    avctx->pix_fmt = PIX_FMT_RGB24;
+                } else if (s->bit_depth == 8 &&
+                           s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+                    avctx->pix_fmt = PIX_FMT_RGBA32;
+                } else if (s->bit_depth == 8 &&
+                           s->color_type == PNG_COLOR_TYPE_GRAY) {
+                    avctx->pix_fmt = PIX_FMT_GRAY8;
+                } else if (s->bit_depth == 16 &&
+                           s->color_type == PNG_COLOR_TYPE_GRAY) {
+                    avctx->pix_fmt = PIX_FMT_GRAY16BE;
+                } else if (s->bit_depth == 1 &&
+                           s->color_type == PNG_COLOR_TYPE_GRAY) {
+                    avctx->pix_fmt = PIX_FMT_MONOBLACK;
+                } else if (s->color_type == PNG_COLOR_TYPE_PALETTE) {
+                    avctx->pix_fmt = PIX_FMT_PAL8;
+                } else {
+                    goto fail;
+                }
+                if(p->data[0])
+                    avctx->release_buffer(avctx, p);
+
+                p->reference= 0;
+                if(avctx->get_buffer(avctx, p) < 0){
+                    av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                    goto fail;
+                }
+                p->pict_type= FF_I_TYPE;
+                p->key_frame= 1;
+                p->interlaced_frame = !!s->interlace_type;
+
+                /* compute the compressed row size */
+                if (!s->interlace_type) {
+                    s->crow_size = s->row_size + 1;
+                } else {
+                    s->pass = 0;
+                    s->pass_row_size = png_pass_row_size(s->pass,
+                                                         s->bits_per_pixel,
+                                                         s->width);
+                    s->crow_size = s->pass_row_size + 1;
+                }
+#ifdef DEBUG
+                av_log(avctx, AV_LOG_DEBUG, "row_size=%d crow_size =%d\n",
+                       s->row_size, s->crow_size);
+#endif
+                s->image_buf = p->data[0];
+                s->image_linesize = p->linesize[0];
+                /* copy the palette if needed */
+                if (s->color_type == PNG_COLOR_TYPE_PALETTE)
+                    memcpy(p->data[1], s->palette, 256 * sizeof(uint32_t));
+                /* empty row is used if differencing to the first row */
+                s->last_row = av_mallocz(s->row_size);
+                if (!s->last_row)
+                    goto fail;
+                if (s->interlace_type ||
+                    s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+                    s->tmp_row = av_malloc(s->row_size);
+                    if (!s->tmp_row)
+                        goto fail;
+                }
+                /* compressed row */
+                s->crow_buf = av_malloc(s->row_size + 1);
+                if (!s->crow_buf)
+                    goto fail;
+                s->zstream.avail_out = s->crow_size;
+                s->zstream.next_out = s->crow_buf;
+            }
+            s->state |= PNG_IDAT;
+            if (png_decode_idat(s, length) < 0)
+                goto fail;
+            /* skip crc */
+            crc = get32(&s->bytestream);
+            break;
+        case MKTAG('P', 'L', 'T', 'E'):
+            {
+                int n, i, r, g, b;
+
+                if ((length % 3) != 0 || length > 256 * 3)
+                    goto skip_tag;
+                /* read the palette */
+                n = length / 3;
+                for(i=0;i<n;i++) {
+                    r = *s->bytestream++;
+                    g = *s->bytestream++;
+                    b = *s->bytestream++;
+                    s->palette[i] = (0xff << 24) | (r << 16) | (g << 8) | b;
+                }
+                for(;i<256;i++) {
+                    s->palette[i] = (0xff << 24);
+                }
+                s->state |= PNG_PLTE;
+                crc = get32(&s->bytestream);
+            }
+            break;
+        case MKTAG('t', 'R', 'N', 'S'):
+            {
+                int v, i;
+
+                /* read the transparency. XXX: Only palette mode supported */
+                if (s->color_type != PNG_COLOR_TYPE_PALETTE ||
+                    length > 256 ||
+                    !(s->state & PNG_PLTE))
+                    goto skip_tag;
+                for(i=0;i<length;i++) {
+                    v = *s->bytestream++;
+                    s->palette[i] = (s->palette[i] & 0x00ffffff) | (v << 24);
+                }
+                crc = get32(&s->bytestream);
+            }
+            break;
+        case MKTAG('I', 'E', 'N', 'D'):
+            if (!(s->state & PNG_ALLIMAGE))
+                goto fail;
+            crc = get32(&s->bytestream);
+            goto exit_loop;
+        default:
+            /* skip tag */
+        skip_tag:
+            s->bytestream += length + 4;
+            break;
+        }
+    }
+ exit_loop:
+    *picture= *(AVFrame*)&s->picture;
+    *data_size = sizeof(AVPicture);
+
+    ret = s->bytestream - s->bytestream_start;
+ the_end:
+    inflateEnd(&s->zstream);
+    av_freep(&s->crow_buf);
+    av_freep(&s->last_row);
+    av_freep(&s->tmp_row);
+    return ret;
+ fail:
+    ret = -1;
+    goto the_end;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+static void png_write_chunk(uint8_t **f, uint32_t tag,
+                            const uint8_t *buf, int length)
+{
+    uint32_t crc;
+    uint8_t tagbuf[4];
+
+    put32(f, length);
+    crc = crc32(0, Z_NULL, 0);
+    tagbuf[0] = tag;
+    tagbuf[1] = tag >> 8;
+    tagbuf[2] = tag >> 16;
+    tagbuf[3] = tag >> 24;
+    crc = crc32(crc, tagbuf, 4);
+    put32(f, bswap_32(tag));
+    if (length > 0) {
+        crc = crc32(crc, buf, length);
+        memcpy(*f, buf, length);
+        *f += length;
+    }
+    put32(f, crc);
+}
+
+/* XXX: use avcodec generic function ? */
+static void to_be32(uint8_t *p, uint32_t v)
+{
+    p[0] = v >> 24;
+    p[1] = v >> 16;
+    p[2] = v >> 8;
+    p[3] = v;
+}
+
+/* XXX: do filtering */
+static int png_write_row(PNGContext *s, const uint8_t *data, int size)
+{
+    int ret;
+
+    s->zstream.avail_in = size;
+    s->zstream.next_in = (uint8_t *)data;
+    while (s->zstream.avail_in > 0) {
+        ret = deflate(&s->zstream, Z_NO_FLUSH);
+        if (ret != Z_OK)
+            return -1;
+        if (s->zstream.avail_out == 0) {
+            if(s->bytestream_end - s->bytestream > IOBUF_SIZE + 100)
+                png_write_chunk(&s->bytestream, MKTAG('I', 'D', 'A', 'T'), s->buf, IOBUF_SIZE);
+            s->zstream.avail_out = IOBUF_SIZE;
+            s->zstream.next_out = s->buf;
+        }
+    }
+    return 0;
+}
+#endif /* CONFIG_ENCODERS */
+
+static int common_init(AVCodecContext *avctx){
+    PNGContext *s = avctx->priv_data;
+
+    avcodec_get_frame_defaults((AVFrame*)&s->picture);
+    avctx->coded_frame= (AVFrame*)&s->picture;
+//    s->avctx= avctx;
+
+    return 0;
+}
+
+#ifdef CONFIG_ENCODERS
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    PNGContext *s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    int bit_depth, color_type, y, len, row_size, ret, is_progressive;
+    int bits_per_pixel, pass_row_size;
+    uint8_t *ptr;
+    uint8_t *crow_buf = NULL;
+    uint8_t *tmp_buf = NULL;
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    s->bytestream_start=
+    s->bytestream= buf;
+    s->bytestream_end= buf+buf_size;
+
+    is_progressive = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+    switch(avctx->pix_fmt) {
+    case PIX_FMT_RGBA32:
+        bit_depth = 8;
+        color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+        break;
+    case PIX_FMT_RGB24:
+        bit_depth = 8;
+        color_type = PNG_COLOR_TYPE_RGB;
+        break;
+    case PIX_FMT_GRAY8:
+        bit_depth = 8;
+        color_type = PNG_COLOR_TYPE_GRAY;
+        break;
+    case PIX_FMT_MONOBLACK:
+        bit_depth = 1;
+        color_type = PNG_COLOR_TYPE_GRAY;
+        break;
+    case PIX_FMT_PAL8:
+        bit_depth = 8;
+        color_type = PNG_COLOR_TYPE_PALETTE;
+        break;
+    default:
+        return -1;
+    }
+    bits_per_pixel = png_get_nb_channels(color_type) * bit_depth;
+    row_size = (avctx->width * bits_per_pixel + 7) >> 3;
+
+    s->zstream.zalloc = png_zalloc;
+    s->zstream.zfree = png_zfree;
+    s->zstream.opaque = NULL;
+    ret = deflateInit2(&s->zstream, Z_DEFAULT_COMPRESSION,
+                       Z_DEFLATED, 15, 8, Z_DEFAULT_STRATEGY);
+    if (ret != Z_OK)
+        return -1;
+    crow_buf = av_malloc(row_size + 1);
+    if (!crow_buf)
+        goto fail;
+    if (is_progressive) {
+        tmp_buf = av_malloc(row_size + 1);
+        if (!tmp_buf)
+            goto fail;
+    }
+
+    /* write png header */
+    memcpy(s->bytestream, pngsig, 8);
+    s->bytestream += 8;
+
+    to_be32(s->buf, avctx->width);
+    to_be32(s->buf + 4, avctx->height);
+    s->buf[8] = bit_depth;
+    s->buf[9] = color_type;
+    s->buf[10] = 0; /* compression type */
+    s->buf[11] = 0; /* filter type */
+    s->buf[12] = is_progressive; /* interlace type */
+
+    png_write_chunk(&s->bytestream, MKTAG('I', 'H', 'D', 'R'), s->buf, 13);
+
+    /* put the palette if needed */
+    if (color_type == PNG_COLOR_TYPE_PALETTE) {
+        int has_alpha, alpha, i;
+        unsigned int v;
+        uint32_t *palette;
+        uint8_t *alpha_ptr;
+
+        palette = (uint32_t *)p->data[1];
+        ptr = s->buf;
+        alpha_ptr = s->buf + 256 * 3;
+        has_alpha = 0;
+        for(i = 0; i < 256; i++) {
+            v = palette[i];
+            alpha = v >> 24;
+            if (alpha && alpha != 0xff)
+                has_alpha = 1;
+            *alpha_ptr++ = alpha;
+            ptr[0] = v >> 16;
+            ptr[1] = v >> 8;
+            ptr[2] = v;
+            ptr += 3;
+        }
+        png_write_chunk(&s->bytestream, MKTAG('P', 'L', 'T', 'E'), s->buf, 256 * 3);
+        if (has_alpha) {
+            png_write_chunk(&s->bytestream, MKTAG('t', 'R', 'N', 'S'), s->buf + 256 * 3, 256);
+        }
+    }
+
+    /* now put each row */
+    s->zstream.avail_out = IOBUF_SIZE;
+    s->zstream.next_out = s->buf;
+    if (is_progressive) {
+        uint8_t *ptr1;
+        int pass;
+
+        for(pass = 0; pass < NB_PASSES; pass++) {
+            /* NOTE: a pass is completely omited if no pixels would be
+               output */
+            pass_row_size = png_pass_row_size(pass, bits_per_pixel, avctx->width);
+            if (pass_row_size > 0) {
+                for(y = 0; y < avctx->height; y++) {
+                    if ((png_pass_ymask[pass] << (y & 7)) & 0x80) {
+                        ptr = p->data[0] + y * p->linesize[0];
+                        if (color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+                            convert_from_rgba32(tmp_buf, ptr, avctx->width);
+                            ptr1 = tmp_buf;
+                        } else {
+                            ptr1 = ptr;
+                        }
+                        png_get_interlaced_row(crow_buf + 1, pass_row_size,
+                                               bits_per_pixel, pass,
+                                               ptr1, avctx->width);
+                        crow_buf[0] = PNG_FILTER_VALUE_NONE;
+                        png_write_row(s, crow_buf, pass_row_size + 1);
+                    }
+                }
+            }
+        }
+    } else {
+        for(y = 0; y < avctx->height; y++) {
+            ptr = p->data[0] + y * p->linesize[0];
+            if (color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+                convert_from_rgba32(crow_buf + 1, ptr, avctx->width);
+            else
+                memcpy(crow_buf + 1, ptr, row_size);
+            crow_buf[0] = PNG_FILTER_VALUE_NONE;
+            png_write_row(s, crow_buf, row_size + 1);
+        }
+    }
+    /* compress last bytes */
+    for(;;) {
+        ret = deflate(&s->zstream, Z_FINISH);
+        if (ret == Z_OK || ret == Z_STREAM_END) {
+            len = IOBUF_SIZE - s->zstream.avail_out;
+            if (len > 0 && s->bytestream_end - s->bytestream > len + 100) {
+                png_write_chunk(&s->bytestream, MKTAG('I', 'D', 'A', 'T'), s->buf, len);
+            }
+            s->zstream.avail_out = IOBUF_SIZE;
+            s->zstream.next_out = s->buf;
+            if (ret == Z_STREAM_END)
+                break;
+        } else {
+            goto fail;
+        }
+    }
+    png_write_chunk(&s->bytestream, MKTAG('I', 'E', 'N', 'D'), NULL, 0);
+
+    ret = s->bytestream - s->bytestream_start;
+ the_end:
+    av_free(crow_buf);
+    av_free(tmp_buf);
+    deflateEnd(&s->zstream);
+    return ret;
+ fail:
+    ret = -1;
+    goto the_end;
+}
+#endif
+
+#ifdef CONFIG_PNG_DECODER
+AVCodec png_decoder = {
+    "png",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PNG,
+    sizeof(PNGContext),
+    common_init,
+    NULL,
+    NULL, //decode_end,
+    decode_frame,
+    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    NULL
+};
+#endif
+
+#ifdef CONFIG_PNG_ENCODER
+AVCodec png_encoder = {
+    "png",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PNG,
+    sizeof(PNGContext),
+    common_init,
+    encode_frame,
+    NULL, //encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_RGB24, PIX_FMT_RGBA32, PIX_FMT_PAL8, PIX_FMT_GRAY8, PIX_FMT_MONOBLACK, -1},
+};
+#endif // CONFIG_PNG_ENCODER
+#endif
diff --git a/contrib/ffmpeg/libavcodec/pnm.c b/contrib/ffmpeg/libavcodec/pnm.c
new file mode 100644
index 000000000..610bb28be
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/pnm.c
@@ -0,0 +1,606 @@
+/*
+ * PNM image format
+ * Copyright (c) 2002, 2003 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "parser.h" //for ParseContext
+
+typedef struct PNMContext {
+    uint8_t *bytestream;
+    uint8_t *bytestream_start;
+    uint8_t *bytestream_end;
+    AVFrame picture;
+} PNMContext;
+
+static inline int pnm_space(int c)
+{
+    return (c == ' ' || c == '\n' || c == '\r' || c == '\t');
+}
+
+static void pnm_get(PNMContext *sc, char *str, int buf_size)
+{
+    char *s;
+    int c;
+
+    /* skip spaces and comments */
+    for(;;) {
+        c = *sc->bytestream++;
+        if (c == '#')  {
+            do  {
+                c = *sc->bytestream++;
+            } while (c != '\n' && sc->bytestream < sc->bytestream_end);
+        } else if (!pnm_space(c)) {
+            break;
+        }
+    }
+
+    s = str;
+    while (sc->bytestream < sc->bytestream_end && !pnm_space(c)) {
+        if ((s - str)  < buf_size - 1)
+            *s++ = c;
+        c = *sc->bytestream++;
+    }
+    *s = '\0';
+}
+
+static int common_init(AVCodecContext *avctx){
+    PNMContext *s = avctx->priv_data;
+
+    avcodec_get_frame_defaults((AVFrame*)&s->picture);
+    avctx->coded_frame= (AVFrame*)&s->picture;
+
+    return 0;
+}
+
+static int pnm_decode_header(AVCodecContext *avctx, PNMContext * const s){
+    char buf1[32], tuple_type[32];
+    int h, w, depth, maxval;
+
+    pnm_get(s, buf1, sizeof(buf1));
+    if (!strcmp(buf1, "P4")) {
+        avctx->pix_fmt = PIX_FMT_MONOWHITE;
+    } else if (!strcmp(buf1, "P5")) {
+        if (avctx->codec_id == CODEC_ID_PGMYUV)
+            avctx->pix_fmt = PIX_FMT_YUV420P;
+        else
+            avctx->pix_fmt = PIX_FMT_GRAY8;
+    } else if (!strcmp(buf1, "P6")) {
+        avctx->pix_fmt = PIX_FMT_RGB24;
+    } else if (!strcmp(buf1, "P7")) {
+        w = -1;
+        h = -1;
+        maxval = -1;
+        depth = -1;
+        tuple_type[0] = '\0';
+        for(;;) {
+            pnm_get(s, buf1, sizeof(buf1));
+            if (!strcmp(buf1, "WIDTH")) {
+                pnm_get(s, buf1, sizeof(buf1));
+                w = strtol(buf1, NULL, 10);
+            } else if (!strcmp(buf1, "HEIGHT")) {
+                pnm_get(s, buf1, sizeof(buf1));
+                h = strtol(buf1, NULL, 10);
+            } else if (!strcmp(buf1, "DEPTH")) {
+                pnm_get(s, buf1, sizeof(buf1));
+                depth = strtol(buf1, NULL, 10);
+            } else if (!strcmp(buf1, "MAXVAL")) {
+                pnm_get(s, buf1, sizeof(buf1));
+                maxval = strtol(buf1, NULL, 10);
+            } else if (!strcmp(buf1, "TUPLETYPE")) {
+                pnm_get(s, tuple_type, sizeof(tuple_type));
+            } else if (!strcmp(buf1, "ENDHDR")) {
+                break;
+            } else {
+                return -1;
+            }
+        }
+        /* check that all tags are present */
+        if (w <= 0 || h <= 0 || maxval <= 0 || depth <= 0 || tuple_type[0] == '\0' || avcodec_check_dimensions(avctx, w, h))
+            return -1;
+
+        avctx->width = w;
+        avctx->height = h;
+        if (depth == 1) {
+            if (maxval == 1)
+                avctx->pix_fmt = PIX_FMT_MONOWHITE;
+            else
+                avctx->pix_fmt = PIX_FMT_GRAY8;
+        } else if (depth == 3) {
+            avctx->pix_fmt = PIX_FMT_RGB24;
+        } else if (depth == 4) {
+            avctx->pix_fmt = PIX_FMT_RGBA32;
+        } else {
+            return -1;
+        }
+        return 0;
+    } else {
+        return -1;
+    }
+    pnm_get(s, buf1, sizeof(buf1));
+    avctx->width = atoi(buf1);
+    if (avctx->width <= 0)
+        return -1;
+    pnm_get(s, buf1, sizeof(buf1));
+    avctx->height = atoi(buf1);
+    if(avcodec_check_dimensions(avctx, avctx->width, avctx->height))
+        return -1;
+    if (avctx->pix_fmt != PIX_FMT_MONOWHITE) {
+        pnm_get(s, buf1, sizeof(buf1));
+        if(atoi(buf1) == 65535 && avctx->pix_fmt == PIX_FMT_GRAY8)
+            avctx->pix_fmt = PIX_FMT_GRAY16BE;
+    }
+    /* more check if YUV420 */
+    if (avctx->pix_fmt == PIX_FMT_YUV420P) {
+        if ((avctx->width & 1) != 0)
+            return -1;
+        h = (avctx->height * 2);
+        if ((h % 3) != 0)
+            return -1;
+        h /= 3;
+        avctx->height = h;
+    }
+    return 0;
+}
+
+static int pnm_decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    PNMContext * const s = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    int i, n, linesize, h;
+    unsigned char *ptr;
+
+    s->bytestream_start=
+    s->bytestream= buf;
+    s->bytestream_end= buf + buf_size;
+
+    if(pnm_decode_header(avctx, s) < 0)
+        return -1;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    switch(avctx->pix_fmt) {
+    default:
+        return -1;
+    case PIX_FMT_RGB24:
+        n = avctx->width * 3;
+        goto do_read;
+    case PIX_FMT_GRAY8:
+        n = avctx->width;
+        goto do_read;
+    case PIX_FMT_GRAY16BE:
+        n = avctx->width * 2;
+        goto do_read;
+    case PIX_FMT_MONOWHITE:
+    case PIX_FMT_MONOBLACK:
+        n = (avctx->width + 7) >> 3;
+    do_read:
+        ptr = p->data[0];
+        linesize = p->linesize[0];
+        if(s->bytestream + n*avctx->height > s->bytestream_end)
+            return -1;
+        for(i = 0; i < avctx->height; i++) {
+            memcpy(ptr, s->bytestream, n);
+            s->bytestream += n;
+            ptr += linesize;
+        }
+        break;
+    case PIX_FMT_YUV420P:
+        {
+            unsigned char *ptr1, *ptr2;
+
+            n = avctx->width;
+            ptr = p->data[0];
+            linesize = p->linesize[0];
+            if(s->bytestream + n*avctx->height*3/2 > s->bytestream_end)
+                return -1;
+            for(i = 0; i < avctx->height; i++) {
+                memcpy(ptr, s->bytestream, n);
+                s->bytestream += n;
+                ptr += linesize;
+            }
+            ptr1 = p->data[1];
+            ptr2 = p->data[2];
+            n >>= 1;
+            h = avctx->height >> 1;
+            for(i = 0; i < h; i++) {
+                memcpy(ptr1, s->bytestream, n);
+                s->bytestream += n;
+                memcpy(ptr2, s->bytestream, n);
+                s->bytestream += n;
+                ptr1 += p->linesize[1];
+                ptr2 += p->linesize[2];
+            }
+        }
+        break;
+    case PIX_FMT_RGBA32:
+        ptr = p->data[0];
+        linesize = p->linesize[0];
+        if(s->bytestream + avctx->width*avctx->height*4 > s->bytestream_end)
+            return -1;
+        for(i = 0; i < avctx->height; i++) {
+            int j, r, g, b, a;
+
+            for(j = 0;j < avctx->width; j++) {
+                r = *s->bytestream++;
+                g = *s->bytestream++;
+                b = *s->bytestream++;
+                a = *s->bytestream++;
+                ((uint32_t *)ptr)[j] = (a << 24) | (r << 16) | (g << 8) | b;
+            }
+            ptr += linesize;
+        }
+        break;
+    }
+    *picture= *(AVFrame*)&s->picture;
+    *data_size = sizeof(AVPicture);
+
+    return s->bytestream - s->bytestream_start;
+}
+
+static int pnm_encode_frame(AVCodecContext *avctx, unsigned char *outbuf, int buf_size, void *data){
+    PNMContext *s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    int i, h, h1, c, n, linesize;
+    uint8_t *ptr, *ptr1, *ptr2;
+
+    if(buf_size < avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height) + 200){
+        av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    s->bytestream_start=
+    s->bytestream= outbuf;
+    s->bytestream_end= outbuf+buf_size;
+
+    h = avctx->height;
+    h1 = h;
+    switch(avctx->pix_fmt) {
+    case PIX_FMT_MONOWHITE:
+        c = '4';
+        n = (avctx->width + 7) >> 3;
+        break;
+    case PIX_FMT_GRAY8:
+        c = '5';
+        n = avctx->width;
+        break;
+    case PIX_FMT_GRAY16BE:
+        c = '5';
+        n = avctx->width * 2;
+        break;
+    case PIX_FMT_RGB24:
+        c = '6';
+        n = avctx->width * 3;
+        break;
+    case PIX_FMT_YUV420P:
+        c = '5';
+        n = avctx->width;
+        h1 = (h * 3) / 2;
+        break;
+    default:
+        return -1;
+    }
+    snprintf(s->bytestream, s->bytestream_end - s->bytestream,
+             "P%c\n%d %d\n",
+             c, avctx->width, h1);
+    s->bytestream += strlen(s->bytestream);
+    if (avctx->pix_fmt != PIX_FMT_MONOWHITE) {
+        snprintf(s->bytestream, s->bytestream_end - s->bytestream,
+                 "%d\n", (avctx->pix_fmt != PIX_FMT_GRAY16BE) ? 255 : 65535);
+        s->bytestream += strlen(s->bytestream);
+    }
+
+    ptr = p->data[0];
+    linesize = p->linesize[0];
+    for(i=0;i<h;i++) {
+        memcpy(s->bytestream, ptr, n);
+        s->bytestream += n;
+        ptr += linesize;
+    }
+
+    if (avctx->pix_fmt == PIX_FMT_YUV420P) {
+        h >>= 1;
+        n >>= 1;
+        ptr1 = p->data[1];
+        ptr2 = p->data[2];
+        for(i=0;i<h;i++) {
+            memcpy(s->bytestream, ptr1, n);
+            s->bytestream += n;
+            memcpy(s->bytestream, ptr2, n);
+            s->bytestream += n;
+                ptr1 += p->linesize[1];
+                ptr2 += p->linesize[2];
+        }
+    }
+    return s->bytestream - s->bytestream_start;
+}
+
+static int pam_encode_frame(AVCodecContext *avctx, unsigned char *outbuf, int buf_size, void *data){
+    PNMContext *s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    int i, h, w, n, linesize, depth, maxval;
+    const char *tuple_type;
+    uint8_t *ptr;
+
+    if(buf_size < avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height) + 200){
+        av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    s->bytestream_start=
+    s->bytestream= outbuf;
+    s->bytestream_end= outbuf+buf_size;
+
+    h = avctx->height;
+    w = avctx->width;
+    switch(avctx->pix_fmt) {
+    case PIX_FMT_MONOWHITE:
+        n = (w + 7) >> 3;
+        depth = 1;
+        maxval = 1;
+        tuple_type = "BLACKANDWHITE";
+        break;
+    case PIX_FMT_GRAY8:
+        n = w;
+        depth = 1;
+        maxval = 255;
+        tuple_type = "GRAYSCALE";
+        break;
+    case PIX_FMT_RGB24:
+        n = w * 3;
+        depth = 3;
+        maxval = 255;
+        tuple_type = "RGB";
+        break;
+    case PIX_FMT_RGBA32:
+        n = w * 4;
+        depth = 4;
+        maxval = 255;
+        tuple_type = "RGB_ALPHA";
+        break;
+    default:
+        return -1;
+    }
+    snprintf(s->bytestream, s->bytestream_end - s->bytestream,
+             "P7\nWIDTH %d\nHEIGHT %d\nDEPTH %d\nMAXVAL %d\nTUPLETYPE %s\nENDHDR\n",
+             w, h, depth, maxval, tuple_type);
+    s->bytestream += strlen(s->bytestream);
+
+    ptr = p->data[0];
+    linesize = p->linesize[0];
+
+    if (avctx->pix_fmt == PIX_FMT_RGBA32) {
+        int j;
+        unsigned int v;
+
+        for(i=0;i<h;i++) {
+            for(j=0;j<w;j++) {
+                v = ((uint32_t *)ptr)[j];
+                *s->bytestream++ = v >> 16;
+                *s->bytestream++ = v >> 8;
+                *s->bytestream++ = v;
+                *s->bytestream++ = v >> 24;
+            }
+            ptr += linesize;
+        }
+    } else {
+        for(i=0;i<h;i++) {
+            memcpy(s->bytestream, ptr, n);
+            s->bytestream += n;
+            ptr += linesize;
+        }
+    }
+    return s->bytestream - s->bytestream_start;
+}
+
+#if 0
+static int pnm_probe(AVProbeData *pd)
+{
+    const char *p = pd->buf;
+    if (pd->buf_size >= 8 &&
+        p[0] == 'P' &&
+        p[1] >= '4' && p[1] <= '6' &&
+        pnm_space(p[2]) )
+        return AVPROBE_SCORE_MAX - 1; /* to permit pgmyuv probe */
+    else
+        return 0;
+}
+
+static int pgmyuv_probe(AVProbeData *pd)
+{
+    if (match_ext(pd->filename, "pgmyuv"))
+        return AVPROBE_SCORE_MAX;
+    else
+        return 0;
+}
+
+static int pam_probe(AVProbeData *pd)
+{
+    const char *p = pd->buf;
+    if (pd->buf_size >= 8 &&
+        p[0] == 'P' &&
+        p[1] == '7' &&
+        p[2] == '\n')
+        return AVPROBE_SCORE_MAX;
+    else
+        return 0;
+}
+#endif
+
+#ifdef CONFIG_PNM_PARSER
+static int pnm_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    PNMContext pnmctx;
+    int next;
+
+    for(; pc->overread>0; pc->overread--){
+        pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
+    }
+retry:
+    if(pc->index){
+        pnmctx.bytestream_start=
+        pnmctx.bytestream= pc->buffer;
+        pnmctx.bytestream_end= pc->buffer + pc->index;
+    }else{
+        pnmctx.bytestream_start=
+        pnmctx.bytestream= (uint8_t *) buf; /* casts avoid warnings */
+        pnmctx.bytestream_end= (uint8_t *) buf + buf_size;
+    }
+    if(pnm_decode_header(avctx, &pnmctx) < 0){
+        if(pnmctx.bytestream < pnmctx.bytestream_end){
+            if(pc->index){
+                pc->index=0;
+            }else{
+                buf++;
+                buf_size--;
+            }
+            goto retry;
+        }
+#if 0
+        if(pc->index && pc->index*2 + FF_INPUT_BUFFER_PADDING_SIZE < pc->buffer_size && buf_size > pc->index){
+            memcpy(pc->buffer + pc->index, buf, pc->index);
+            pc->index += pc->index;
+            buf += pc->index;
+            buf_size -= pc->index;
+            goto retry;
+        }
+#endif
+        next= END_NOT_FOUND;
+    }else{
+        next= pnmctx.bytestream - pnmctx.bytestream_start
+            + avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+        if(pnmctx.bytestream_start!=buf)
+            next-= pc->index;
+        if(next > buf_size)
+            next= END_NOT_FOUND;
+    }
+
+    if(ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size)<0){
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+    *poutbuf = (uint8_t *)buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser pnm_parser = {
+    { CODEC_ID_PGM, CODEC_ID_PGMYUV, CODEC_ID_PPM, CODEC_ID_PBM, CODEC_ID_PAM},
+    sizeof(ParseContext),
+    NULL,
+    pnm_parse,
+    ff_parse_close,
+};
+#endif /* CONFIG_PNM_PARSER */
+
+#ifdef CONFIG_PGM_ENCODER
+AVCodec pgm_encoder = {
+    "pgm",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PGM,
+    sizeof(PNMContext),
+    common_init,
+    pnm_encode_frame,
+    NULL, //encode_end,
+    pnm_decode_frame,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_GRAY8, PIX_FMT_GRAY16BE, -1},
+};
+#endif // CONFIG_PGM_ENCODER
+
+#ifdef CONFIG_PGMYUV_ENCODER
+AVCodec pgmyuv_encoder = {
+    "pgmyuv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PGMYUV,
+    sizeof(PNMContext),
+    common_init,
+    pnm_encode_frame,
+    NULL, //encode_end,
+    pnm_decode_frame,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+#endif // CONFIG_PGMYUV_ENCODER
+
+#ifdef CONFIG_PPM_ENCODER
+AVCodec ppm_encoder = {
+    "ppm",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PPM,
+    sizeof(PNMContext),
+    common_init,
+    pnm_encode_frame,
+    NULL, //encode_end,
+    pnm_decode_frame,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_RGB24, -1},
+};
+#endif // CONFIG_PPM_ENCODER
+
+#ifdef CONFIG_PBM_ENCODER
+AVCodec pbm_encoder = {
+    "pbm",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PBM,
+    sizeof(PNMContext),
+    common_init,
+    pnm_encode_frame,
+    NULL, //encode_end,
+    pnm_decode_frame,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_MONOWHITE, -1},
+};
+#endif // CONFIG_PBM_ENCODER
+
+#ifdef CONFIG_PAM_ENCODER
+AVCodec pam_encoder = {
+    "pam",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_PAM,
+    sizeof(PNMContext),
+    common_init,
+    pam_encode_frame,
+    NULL, //encode_end,
+    pnm_decode_frame,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_RGB24, PIX_FMT_RGBA32, PIX_FMT_GRAY8, PIX_FMT_MONOWHITE, -1},
+};
+#endif // CONFIG_PAM_ENCODER
diff --git a/contrib/ffmpeg/libavcodec/ppc/dsputil_altivec.c b/contrib/ffmpeg/libavcodec/ppc/dsputil_altivec.c
new file mode 100644
index 000000000..6f48893a4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/dsputil_altivec.c
@@ -0,0 +1,1591 @@
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+#ifdef CONFIG_DARWIN
+#include <sys/sysctl.h>
+#else /* CONFIG_DARWIN */
+#ifdef __AMIGAOS4__
+#include <exec/exec.h>
+#include <interfaces/exec.h>
+#include <proto/exec.h>
+#else /* __AMIGAOS4__ */
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void sigill_handler (int sig)
+{
+    if (!canjump) {
+        signal (sig, SIG_DFL);
+        raise (sig);
+    }
+
+    canjump = 0;
+    siglongjmp (jmpbuf, 1);
+}
+#endif /* CONFIG_DARWIN */
+#endif /* __AMIGAOS4__ */
+
+int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
+    vector unsigned char *tv;
+    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
+    vector unsigned int sad;
+    vector signed int sumdiffs;
+
+    s = 0;
+    sad = (vector unsigned int)vec_splat_u32(0);
+    for(i=0;i<h;i++) {
+        /*
+           Read unaligned pixels into our vectors. The vectors are as follows:
+           pix1v: pix1[0]-pix1[15]
+           pix2v: pix2[0]-pix2[15]      pix2iv: pix2[1]-pix2[16]
+        */
+        tv = (vector unsigned char *) pix1;
+        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
+
+        tv = (vector unsigned char *) &pix2[0];
+        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
+
+        tv = (vector unsigned char *) &pix2[1];
+        pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
+
+        /* Calculate the average vector */
+        avgv = vec_avg(pix2v, pix2iv);
+
+        /* Calculate a sum of abs differences vector */
+        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
+
+        /* Add each 4 pixel group together and put 4 results into sad */
+        sad = vec_sum4s(t5, sad);
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    /* Sum up the four partial sums, and put the result into s */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
+    vector unsigned char *tv;
+    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
+    vector unsigned int sad;
+    vector signed int sumdiffs;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    sad = (vector unsigned int)vec_splat_u32(0);
+
+    /*
+       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
+       iteration becomes pix2 in the next iteration. We can use this
+       fact to avoid a potentially expensive unaligned read, each
+       time around the loop.
+       Read unaligned pixels into our vectors. The vectors are as follows:
+       pix2v: pix2[0]-pix2[15]
+       Split the pixel vectors into shorts
+    */
+    tv = (vector unsigned char *) &pix2[0];
+    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
+
+    for(i=0;i<h;i++) {
+        /*
+           Read unaligned pixels into our vectors. The vectors are as follows:
+           pix1v: pix1[0]-pix1[15]
+           pix3v: pix3[0]-pix3[15]
+        */
+        tv = (vector unsigned char *) pix1;
+        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
+
+        tv = (vector unsigned char *) &pix3[0];
+        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
+
+        /* Calculate the average vector */
+        avgv = vec_avg(pix2v, pix3v);
+
+        /* Calculate a sum of abs differences vector */
+        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
+
+        /* Add each 4 pixel group together and put 4 results into sad */
+        sad = vec_sum4s(t5, sad);
+
+        pix1 += line_size;
+        pix2v = pix3v;
+        pix3 += line_size;
+
+    }
+
+    /* Sum up the four partial sums, and put the result into s */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+    return s;
+}
+
+int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    uint8_t *pix3 = pix2 + line_size;
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
+    const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2);
+    vector unsigned char *tv, avgv, t5;
+    vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
+    vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
+    vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
+    vector unsigned short avghv, avglv;
+    vector unsigned short t1, t2, t3, t4;
+    vector unsigned int sad;
+    vector signed int sumdiffs;
+
+    sad = (vector unsigned int)vec_splat_u32(0);
+
+    s = 0;
+
+    /*
+       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
+       iteration becomes pix2 in the next iteration. We can use this
+       fact to avoid a potentially expensive unaligned read, as well
+       as some splitting, and vector addition each time around the loop.
+       Read unaligned pixels into our vectors. The vectors are as follows:
+       pix2v: pix2[0]-pix2[15]  pix2iv: pix2[1]-pix2[16]
+       Split the pixel vectors into shorts
+    */
+    tv = (vector unsigned char *) &pix2[0];
+    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
+
+    tv = (vector unsigned char *) &pix2[1];
+    pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
+
+    pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v);
+    pix2lv = (vector unsigned short) vec_mergel(zero, pix2v);
+    pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv);
+    pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv);
+    t1 = vec_add(pix2hv, pix2ihv);
+    t2 = vec_add(pix2lv, pix2ilv);
+
+    for(i=0;i<h;i++) {
+        /*
+           Read unaligned pixels into our vectors. The vectors are as follows:
+           pix1v: pix1[0]-pix1[15]
+           pix3v: pix3[0]-pix3[15]      pix3iv: pix3[1]-pix3[16]
+        */
+        tv = (vector unsigned char *) pix1;
+        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
+
+        tv = (vector unsigned char *) &pix3[0];
+        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
+
+        tv = (vector unsigned char *) &pix3[1];
+        pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1]));
+
+        /*
+          Note that Altivec does have vec_avg, but this works on vector pairs
+          and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding
+          would mean that, for example, avg(3,0,0,1) = 2, when it should be 1.
+          Instead, we have to split the pixel vectors into vectors of shorts,
+          and do the averaging by hand.
+        */
+
+        /* Split the pixel vectors into shorts */
+        pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v);
+        pix3lv = (vector unsigned short) vec_mergel(zero, pix3v);
+        pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv);
+        pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv);
+
+        /* Do the averaging on them */
+        t3 = vec_add(pix3hv, pix3ihv);
+        t4 = vec_add(pix3lv, pix3ilv);
+
+        avghv = vec_sr(vec_add(vec_add(t1, t3), two), two);
+        avglv = vec_sr(vec_add(vec_add(t2, t4), two), two);
+
+        /* Pack the shorts back into a result */
+        avgv = vec_pack(avghv, avglv);
+
+        /* Calculate a sum of abs differences vector */
+        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
+
+        /* Add each 4 pixel group together and put 4 results into sad */
+        sad = vec_sum4s(t5, sad);
+
+        pix1 += line_size;
+        pix3 += line_size;
+        /* Transfer the calculated values for pix3 into pix2 */
+        t1 = t3;
+        t2 = t4;
+    }
+    /* Sum up the four partial sums, and put the result into s */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
+    vector unsigned char perm1, perm2, *pix1v, *pix2v;
+    vector unsigned char t1, t2, t3,t4, t5;
+    vector unsigned int sad;
+    vector signed int sumdiffs;
+
+    sad = (vector unsigned int)vec_splat_u32(0);
+
+
+    for(i=0;i<h;i++) {
+        /* Read potentially unaligned pixels into t1 and t2 */
+        perm1 = vec_lvsl(0, pix1);
+        pix1v = (vector unsigned char *) pix1;
+        perm2 = vec_lvsl(0, pix2);
+        pix2v = (vector unsigned char *) pix2;
+        t1 = vec_perm(pix1v[0], pix1v[1], perm1);
+        t2 = vec_perm(pix2v[0], pix2v[1], perm2);
+
+        /* Calculate a sum of abs differences vector */
+        t3 = vec_max(t1, t2);
+        t4 = vec_min(t1, t2);
+        t5 = vec_sub(t3, t4);
+
+        /* Add each 4 pixel group together and put 4 results into sad */
+        sad = vec_sum4s(t5, sad);
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+
+    /* Sum up the four partial sums, and put the result into s */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
+    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
+    vector unsigned char t1, t2, t3,t4, t5;
+    vector unsigned int sad;
+    vector signed int sumdiffs;
+
+    sad = (vector unsigned int)vec_splat_u32(0);
+
+    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+
+    for(i=0;i<h;i++) {
+        /* Read potentially unaligned pixels into t1 and t2
+           Since we're reading 16 pixels, and actually only want 8,
+           mask out the last 8 pixels. The 0s don't change the sum. */
+        perm1 = vec_lvsl(0, pix1);
+        pix1v = (vector unsigned char *) pix1;
+        perm2 = vec_lvsl(0, pix2);
+        pix2v = (vector unsigned char *) pix2;
+        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
+        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
+
+        /* Calculate a sum of abs differences vector */
+        t3 = vec_max(t1, t2);
+        t4 = vec_min(t1, t2);
+        t5 = vec_sub(t3, t4);
+
+        /* Add each 4 pixel group together and put 4 results into sad */
+        sad = vec_sum4s(t5, sad);
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+
+    /* Sum up the four partial sums, and put the result into s */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+int pix_norm1_altivec(uint8_t *pix, int line_size)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
+    vector unsigned char *tv;
+    vector unsigned char pixv;
+    vector unsigned int sv;
+    vector signed int sum;
+
+    sv = (vector unsigned int)vec_splat_u32(0);
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        /* Read in the potentially unaligned pixels */
+        tv = (vector unsigned char *) pix;
+        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
+
+        /* Square the values, and add them to our sum */
+        sv = vec_msum(pixv, pixv, sv);
+
+        pix += line_size;
+    }
+    /* Sum up the four partial sums, and put the result into s */
+    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
+    sum = vec_splat(sum, 3);
+    vec_ste(sum, 0, &s);
+
+    return s;
+}
+
+/**
+ * Sum of Squared Errors for a 8x8 block.
+ * AltiVec-enhanced.
+ * It's the sad8_altivec code above w/ squaring added.
+ */
+int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
+    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
+    vector unsigned char t1, t2, t3,t4, t5;
+    vector unsigned int sum;
+    vector signed int sumsqr;
+
+    sum = (vector unsigned int)vec_splat_u32(0);
+
+    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+
+
+    for(i=0;i<h;i++) {
+        /* Read potentially unaligned pixels into t1 and t2
+           Since we're reading 16 pixels, and actually only want 8,
+           mask out the last 8 pixels. The 0s don't change the sum. */
+        perm1 = vec_lvsl(0, pix1);
+        pix1v = (vector unsigned char *) pix1;
+        perm2 = vec_lvsl(0, pix2);
+        pix2v = (vector unsigned char *) pix2;
+        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
+        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
+
+        /*
+          Since we want to use unsigned chars, we can take advantage
+          of the fact that abs(a-b)^2 = (a-b)^2.
+        */
+
+        /* Calculate abs differences vector */
+        t3 = vec_max(t1, t2);
+        t4 = vec_min(t1, t2);
+        t5 = vec_sub(t3, t4);
+
+        /* Square the values and add them to our sum */
+        sum = vec_msum(t5, t5, sum);
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+
+    /* Sum up the four partial sums, and put the result into s */
+    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
+    sumsqr = vec_splat(sumsqr, 3);
+    vec_ste(sumsqr, 0, &s);
+
+    return s;
+}
+
+/**
+ * Sum of Squared Errors for a 16x16 block.
+ * AltiVec-enhanced.
+ * It's the sad16_altivec code above w/ squaring added.
+ */
+int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int i;
+    int s __attribute__((aligned(16)));
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
+    vector unsigned char perm1, perm2, *pix1v, *pix2v;
+    vector unsigned char t1, t2, t3,t4, t5;
+    vector unsigned int sum;
+    vector signed int sumsqr;
+
+    sum = (vector unsigned int)vec_splat_u32(0);
+
+    for(i=0;i<h;i++) {
+        /* Read potentially unaligned pixels into t1 and t2 */
+        perm1 = vec_lvsl(0, pix1);
+        pix1v = (vector unsigned char *) pix1;
+        perm2 = vec_lvsl(0, pix2);
+        pix2v = (vector unsigned char *) pix2;
+        t1 = vec_perm(pix1v[0], pix1v[1], perm1);
+        t2 = vec_perm(pix2v[0], pix2v[1], perm2);
+
+        /*
+          Since we want to use unsigned chars, we can take advantage
+          of the fact that abs(a-b)^2 = (a-b)^2.
+        */
+
+        /* Calculate abs differences vector */
+        t3 = vec_max(t1, t2);
+        t4 = vec_min(t1, t2);
+        t5 = vec_sub(t3, t4);
+
+        /* Square the values and add them to our sum */
+        sum = vec_msum(t5, t5, sum);
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+
+    /* Sum up the four partial sums, and put the result into s */
+    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
+    sumsqr = vec_splat(sumsqr, 3);
+    vec_ste(sumsqr, 0, &s);
+
+    return s;
+}
+
+int pix_sum_altivec(uint8_t * pix, int line_size)
+{
+    const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
+    vector unsigned char perm, *pixv;
+    vector unsigned char t1;
+    vector unsigned int sad;
+    vector signed int sumdiffs;
+
+    int i;
+    int s __attribute__((aligned(16)));
+
+    sad = (vector unsigned int)vec_splat_u32(0);
+
+    for (i = 0; i < 16; i++) {
+        /* Read the potentially unaligned 16 pixels into t1 */
+        perm = vec_lvsl(0, pix);
+        pixv = (vector unsigned char *) pix;
+        t1 = vec_perm(pixv[0], pixv[1], perm);
+
+        /* Add each 4 pixel group together and put 4 results into sad */
+        sad = vec_sum4s(t1, sad);
+
+        pix += line_size;
+    }
+
+    /* Sum up the four partial sums, and put the result into s */
+    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
+    sumdiffs = vec_splat(sumdiffs, 3);
+    vec_ste(sumdiffs, 0, &s);
+
+    return s;
+}
+
+void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+    int i;
+    vector unsigned char perm, bytes, *pixv;
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
+    vector signed short shorts;
+
+    for(i=0;i<8;i++)
+    {
+        // Read potentially unaligned pixels.
+        // We're reading 16 pixels, and actually only want 8,
+        // but we simply ignore the extras.
+        perm = vec_lvsl(0, pixels);
+        pixv = (vector unsigned char *) pixels;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts = (vector signed short)vec_mergeh(zero, bytes);
+
+        // save the data to the block, we assume the block is 16-byte aligned
+        vec_st(shorts, i*16, (vector signed short*)block);
+
+        pixels += line_size;
+    }
+}
+
+void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
+        const uint8_t *s2, int stride)
+{
+    int i;
+    vector unsigned char perm, bytes, *pixv;
+    const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
+    vector signed short shorts1, shorts2;
+
+    for(i=0;i<4;i++)
+    {
+        // Read potentially unaligned pixels
+        // We're reading 16 pixels, and actually only want 8,
+        // but we simply ignore the extras.
+        perm = vec_lvsl(0, s1);
+        pixv = (vector unsigned char *) s1;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts1 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the same for the second block of pixels
+        perm = vec_lvsl(0, s2);
+        pixv = (vector unsigned char *) s2;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts2 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the subtraction
+        shorts1 = vec_sub(shorts1, shorts2);
+
+        // save the data to the block, we assume the block is 16-byte aligned
+        vec_st(shorts1, 0, (vector signed short*)block);
+
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+
+
+        // The code below is a copy of the code above... This is a manual
+        // unroll.
+
+        // Read potentially unaligned pixels
+        // We're reading 16 pixels, and actually only want 8,
+        // but we simply ignore the extras.
+        perm = vec_lvsl(0, s1);
+        pixv = (vector unsigned char *) s1;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts1 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the same for the second block of pixels
+        perm = vec_lvsl(0, s2);
+        pixv = (vector unsigned char *) s2;
+        bytes = vec_perm(pixv[0], pixv[1], perm);
+
+        // convert the bytes into shorts
+        shorts2 = (vector signed short)vec_mergeh(zero, bytes);
+
+        // Do the subtraction
+        shorts1 = vec_sub(shorts1, shorts2);
+
+        // save the data to the block, we assume the block is 16-byte aligned
+        vec_st(shorts1, 0, (vector signed short*)block);
+
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    }
+}
+
+void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
+    register int i;
+    register vector unsigned char vdst, vsrc;
+
+    /* dst and src are 16 bytes-aligned (guaranteed) */
+    for(i = 0 ; (i + 15) < w ; i+=16)
+    {
+      vdst = vec_ld(i, (unsigned char*)dst);
+      vsrc = vec_ld(i, (unsigned char*)src);
+      vdst = vec_add(vsrc, vdst);
+      vec_st(vdst, i, (unsigned char*)dst);
+    }
+    /* if w is not a multiple of 16 */
+    for (; (i < w) ; i++)
+    {
+      dst[i] = src[i];
+    }
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
+    register vector unsigned char pixelsv1, pixelsv2;
+    register vector unsigned char pixelsv1B, pixelsv2B;
+    register vector unsigned char pixelsv1C, pixelsv2C;
+    register vector unsigned char pixelsv1D, pixelsv2D;
+
+    register vector unsigned char perm = vec_lvsl(0, pixels);
+    int i;
+    register int line_size_2 = line_size << 1;
+    register int line_size_3 = line_size + line_size_2;
+    register int line_size_4 = line_size << 2;
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
+// hand-unrolling the loop by 4 gains about 15%
+// mininum execution time goes from 74 to 60 cycles
+// it's faster than -funroll-loops, but using
+// -funroll-loops w/ this is bad - 74 cycles again.
+// all this is on a 7450, tuning for the 7450
+#if 0
+    for(i=0; i<h; i++) {
+      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+      pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+      vec_st(vec_perm(pixelsv1, pixelsv2, perm),
+             0, (unsigned char*)block);
+      pixels+=line_size;
+      block +=line_size;
+    }
+#else
+    for(i=0; i<h; i+=4) {
+      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+      pixelsv2 = vec_ld(15, (unsigned char*)pixels);
+      pixelsv1B = vec_ld(line_size, (unsigned char*)pixels);
+      pixelsv2B = vec_ld(15 + line_size, (unsigned char*)pixels);
+      pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels);
+      pixelsv2C = vec_ld(15 + line_size_2, (unsigned char*)pixels);
+      pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels);
+      pixelsv2D = vec_ld(15 + line_size_3, (unsigned char*)pixels);
+      vec_st(vec_perm(pixelsv1, pixelsv2, perm),
+             0, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
+             line_size, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
+             line_size_2, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
+             line_size_3, (unsigned char*)block);
+      pixels+=line_size_4;
+      block +=line_size_4;
+    }
+#endif
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+#define op_avg(a,b)  a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
+void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
+    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+    register vector unsigned char perm = vec_lvsl(0, pixels);
+    int i;
+
+POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
+
+    for(i=0; i<h; i++) {
+      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+      pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+      blockv = vec_ld(0, block);
+      pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
+      blockv = vec_avg(blockv,pixelsv);
+      vec_st(blockv, 0, (unsigned char*)block);
+      pixels+=line_size;
+      block +=line_size;
+    }
+
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
+    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+    int i;
+
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
+
+   for (i = 0; i < h; i++) {
+     /*
+       block is 8 bytes-aligned, so we're either in the
+       left block (16 bytes-aligned) or in the right block (not)
+     */
+     int rightside = ((unsigned long)block & 0x0000000F);
+
+     blockv = vec_ld(0, block);
+     pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+     pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+     pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
+
+     if (rightside)
+     {
+       pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
+     }
+     else
+     {
+       pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
+     }
+
+     blockv = vec_avg(blockv, pixelsv);
+
+     vec_st(blockv, 0, block);
+
+     pixels += line_size;
+     block += line_size;
+   }
+
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
+   register int i;
+   register vector unsigned char
+     pixelsv1, pixelsv2,
+     pixelsavg;
+   register vector unsigned char
+     blockv, temp1, temp2;
+   register vector unsigned short
+     pixelssum1, pixelssum2, temp3;
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
+
+   temp1 = vec_ld(0, pixels);
+   temp2 = vec_ld(16, pixels);
+   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
+   {
+     pixelsv2 = temp2;
+   }
+   else
+   {
+     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+   }
+   pixelsv1 = vec_mergeh(vczero, pixelsv1);
+   pixelsv2 = vec_mergeh(vczero, pixelsv2);
+   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                        (vector unsigned short)pixelsv2);
+   pixelssum1 = vec_add(pixelssum1, vctwo);
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+   for (i = 0; i < h ; i++) {
+     int rightside = ((unsigned long)block & 0x0000000F);
+     blockv = vec_ld(0, block);
+
+     temp1 = vec_ld(line_size, pixels);
+     temp2 = vec_ld(line_size + 16, pixels);
+     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+     {
+       pixelsv2 = temp2;
+     }
+     else
+     {
+       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+     }
+
+     pixelsv1 = vec_mergeh(vczero, pixelsv1);
+     pixelsv2 = vec_mergeh(vczero, pixelsv2);
+     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                          (vector unsigned short)pixelsv2);
+     temp3 = vec_add(pixelssum1, pixelssum2);
+     temp3 = vec_sra(temp3, vctwo);
+     pixelssum1 = vec_add(pixelssum2, vctwo);
+     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+
+     if (rightside)
+     {
+       blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+     }
+     else
+     {
+       blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+     }
+
+     vec_st(blockv, 0, block);
+
+     block += line_size;
+     pixels += line_size;
+   }
+
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
+   register int i;
+   register vector unsigned char
+     pixelsv1, pixelsv2,
+     pixelsavg;
+   register vector unsigned char
+     blockv, temp1, temp2;
+   register vector unsigned short
+     pixelssum1, pixelssum2, temp3;
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
+
+   temp1 = vec_ld(0, pixels);
+   temp2 = vec_ld(16, pixels);
+   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
+   {
+     pixelsv2 = temp2;
+   }
+   else
+   {
+     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+   }
+   pixelsv1 = vec_mergeh(vczero, pixelsv1);
+   pixelsv2 = vec_mergeh(vczero, pixelsv2);
+   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                        (vector unsigned short)pixelsv2);
+   pixelssum1 = vec_add(pixelssum1, vcone);
+
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+   for (i = 0; i < h ; i++) {
+     int rightside = ((unsigned long)block & 0x0000000F);
+     blockv = vec_ld(0, block);
+
+     temp1 = vec_ld(line_size, pixels);
+     temp2 = vec_ld(line_size + 16, pixels);
+     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+     {
+       pixelsv2 = temp2;
+     }
+     else
+     {
+       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+     }
+
+     pixelsv1 = vec_mergeh(vczero, pixelsv1);
+     pixelsv2 = vec_mergeh(vczero, pixelsv2);
+     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                          (vector unsigned short)pixelsv2);
+     temp3 = vec_add(pixelssum1, pixelssum2);
+     temp3 = vec_sra(temp3, vctwo);
+     pixelssum1 = vec_add(pixelssum2, vcone);
+     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+
+     if (rightside)
+     {
+       blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+     }
+     else
+     {
+       blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+     }
+
+     vec_st(blockv, 0, block);
+
+     block += line_size;
+     pixels += line_size;
+   }
+
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
+   register int i;
+   register vector unsigned char
+     pixelsv1, pixelsv2, pixelsv3, pixelsv4;
+   register vector unsigned char
+     blockv, temp1, temp2;
+   register vector unsigned short
+     pixelssum1, pixelssum2, temp3,
+     pixelssum3, pixelssum4, temp4;
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+
+   temp1 = vec_ld(0, pixels);
+   temp2 = vec_ld(16, pixels);
+   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
+   {
+     pixelsv2 = temp2;
+   }
+   else
+   {
+     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+   }
+   pixelsv3 = vec_mergel(vczero, pixelsv1);
+   pixelsv4 = vec_mergel(vczero, pixelsv2);
+   pixelsv1 = vec_mergeh(vczero, pixelsv1);
+   pixelsv2 = vec_mergeh(vczero, pixelsv2);
+   pixelssum3 = vec_add((vector unsigned short)pixelsv3,
+                        (vector unsigned short)pixelsv4);
+   pixelssum3 = vec_add(pixelssum3, vctwo);
+   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                        (vector unsigned short)pixelsv2);
+   pixelssum1 = vec_add(pixelssum1, vctwo);
+
+   for (i = 0; i < h ; i++) {
+     blockv = vec_ld(0, block);
+
+     temp1 = vec_ld(line_size, pixels);
+     temp2 = vec_ld(line_size + 16, pixels);
+     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+     {
+       pixelsv2 = temp2;
+     }
+     else
+     {
+       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+     }
+
+     pixelsv3 = vec_mergel(vczero, pixelsv1);
+     pixelsv4 = vec_mergel(vczero, pixelsv2);
+     pixelsv1 = vec_mergeh(vczero, pixelsv1);
+     pixelsv2 = vec_mergeh(vczero, pixelsv2);
+
+     pixelssum4 = vec_add((vector unsigned short)pixelsv3,
+                          (vector unsigned short)pixelsv4);
+     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                          (vector unsigned short)pixelsv2);
+     temp4 = vec_add(pixelssum3, pixelssum4);
+     temp4 = vec_sra(temp4, vctwo);
+     temp3 = vec_add(pixelssum1, pixelssum2);
+     temp3 = vec_sra(temp3, vctwo);
+
+     pixelssum3 = vec_add(pixelssum4, vctwo);
+     pixelssum1 = vec_add(pixelssum2, vctwo);
+
+     blockv = vec_packsu(temp3, temp4);
+
+     vec_st(blockv, 0, block);
+
+     block += line_size;
+     pixels += line_size;
+   }
+
+POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
+}
+
+/* next one assumes that ((line_size % 16) == 0) */
+void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
+   register int i;
+   register vector unsigned char
+     pixelsv1, pixelsv2, pixelsv3, pixelsv4;
+   register vector unsigned char
+     blockv, temp1, temp2;
+   register vector unsigned short
+     pixelssum1, pixelssum2, temp3,
+     pixelssum3, pixelssum4, temp4;
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
+
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+
+   temp1 = vec_ld(0, pixels);
+   temp2 = vec_ld(16, pixels);
+   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
+   {
+     pixelsv2 = temp2;
+   }
+   else
+   {
+     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+   }
+   pixelsv3 = vec_mergel(vczero, pixelsv1);
+   pixelsv4 = vec_mergel(vczero, pixelsv2);
+   pixelsv1 = vec_mergeh(vczero, pixelsv1);
+   pixelsv2 = vec_mergeh(vczero, pixelsv2);
+   pixelssum3 = vec_add((vector unsigned short)pixelsv3,
+                        (vector unsigned short)pixelsv4);
+   pixelssum3 = vec_add(pixelssum3, vcone);
+   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                        (vector unsigned short)pixelsv2);
+   pixelssum1 = vec_add(pixelssum1, vcone);
+
+   for (i = 0; i < h ; i++) {
+     blockv = vec_ld(0, block);
+
+     temp1 = vec_ld(line_size, pixels);
+     temp2 = vec_ld(line_size + 16, pixels);
+     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+     {
+       pixelsv2 = temp2;
+     }
+     else
+     {
+       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+     }
+
+     pixelsv3 = vec_mergel(vczero, pixelsv1);
+     pixelsv4 = vec_mergel(vczero, pixelsv2);
+     pixelsv1 = vec_mergeh(vczero, pixelsv1);
+     pixelsv2 = vec_mergeh(vczero, pixelsv2);
+
+     pixelssum4 = vec_add((vector unsigned short)pixelsv3,
+                          (vector unsigned short)pixelsv4);
+     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                          (vector unsigned short)pixelsv2);
+     temp4 = vec_add(pixelssum3, pixelssum4);
+     temp4 = vec_sra(temp4, vctwo);
+     temp3 = vec_add(pixelssum1, pixelssum2);
+     temp3 = vec_sra(temp3, vctwo);
+
+     pixelssum3 = vec_add(pixelssum4, vcone);
+     pixelssum1 = vec_add(pixelssum2, vcone);
+
+     blockv = vec_packsu(temp3, temp4);
+
+     vec_st(blockv, 0, block);
+
+     block += line_size;
+     pixels += line_size;
+   }
+
+POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+}
+
+int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
+POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
+    int sum;
+    register const_vector unsigned char vzero =
+                            (const_vector unsigned char)vec_splat_u8(0);
+    register vector signed short temp0, temp1, temp2, temp3, temp4,
+                                 temp5, temp6, temp7;
+POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
+  {
+    register const_vector signed short vprod1 =(const_vector signed short)
+                                        AVV( 1,-1, 1,-1, 1,-1, 1,-1);
+    register const_vector signed short vprod2 =(const_vector signed short)
+                                        AVV( 1, 1,-1,-1, 1, 1,-1,-1);
+    register const_vector signed short vprod3 =(const_vector signed short)
+                                        AVV( 1, 1, 1, 1,-1,-1,-1,-1);
+    register const_vector unsigned char perm1 = (const_vector unsigned char)
+      AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
+          0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
+    register const_vector unsigned char perm2 = (const_vector unsigned char)
+      AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
+          0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
+    register const_vector unsigned char perm3 = (const_vector unsigned char)
+      AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+          0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+
+#define ONEITERBUTTERFLY(i, res)                                        \
+    {                                                                   \
+      register vector unsigned char src1, src2, srcO;                   \
+      register vector unsigned char dst1, dst2, dstO;                   \
+      register vector signed short srcV, dstV;                          \
+      register vector signed short but0, but1, but2, op1, op2, op3;     \
+      src1 = vec_ld(stride * i, src);                                   \
+      if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8)       \
+        src2 = vec_ld((stride * i) + 16, src);                          \
+      srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src));           \
+      dst1 = vec_ld(stride * i, dst);                                   \
+      if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8)       \
+        dst2 = vec_ld((stride * i) + 16, dst);                          \
+      dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst));           \
+      /* promote the unsigned chars to signed shorts */                 \
+      /* we're in the 8x8 function, we only care for the first 8 */     \
+      srcV =                                                            \
+        (vector signed short)vec_mergeh((vector signed char)vzero,      \
+        (vector signed char)srcO);                                      \
+      dstV =                                                            \
+        (vector signed short)vec_mergeh((vector signed char)vzero,      \
+        (vector signed char)dstO);                                      \
+      /* substractions inside the first butterfly */                    \
+      but0 = vec_sub(srcV, dstV);                                       \
+      op1 = vec_perm(but0, but0, perm1);                                \
+      but1 = vec_mladd(but0, vprod1, op1);                              \
+      op2 = vec_perm(but1, but1, perm2);                                \
+      but2 = vec_mladd(but1, vprod2, op2);                              \
+      op3 = vec_perm(but2, but2, perm3);                                \
+      res = vec_mladd(but2, vprod3, op3);                               \
+    }
+    ONEITERBUTTERFLY(0, temp0);
+    ONEITERBUTTERFLY(1, temp1);
+    ONEITERBUTTERFLY(2, temp2);
+    ONEITERBUTTERFLY(3, temp3);
+    ONEITERBUTTERFLY(4, temp4);
+    ONEITERBUTTERFLY(5, temp5);
+    ONEITERBUTTERFLY(6, temp6);
+    ONEITERBUTTERFLY(7, temp7);
+  }
+#undef ONEITERBUTTERFLY
+  {
+    register vector signed int vsum;
+    register vector signed short line0 = vec_add(temp0, temp1);
+    register vector signed short line1 = vec_sub(temp0, temp1);
+    register vector signed short line2 = vec_add(temp2, temp3);
+    register vector signed short line3 = vec_sub(temp2, temp3);
+    register vector signed short line4 = vec_add(temp4, temp5);
+    register vector signed short line5 = vec_sub(temp4, temp5);
+    register vector signed short line6 = vec_add(temp6, temp7);
+    register vector signed short line7 = vec_sub(temp6, temp7);
+
+    register vector signed short line0B = vec_add(line0, line2);
+    register vector signed short line2B = vec_sub(line0, line2);
+    register vector signed short line1B = vec_add(line1, line3);
+    register vector signed short line3B = vec_sub(line1, line3);
+    register vector signed short line4B = vec_add(line4, line6);
+    register vector signed short line6B = vec_sub(line4, line6);
+    register vector signed short line5B = vec_add(line5, line7);
+    register vector signed short line7B = vec_sub(line5, line7);
+
+    register vector signed short line0C = vec_add(line0B, line4B);
+    register vector signed short line4C = vec_sub(line0B, line4B);
+    register vector signed short line1C = vec_add(line1B, line5B);
+    register vector signed short line5C = vec_sub(line1B, line5B);
+    register vector signed short line2C = vec_add(line2B, line6B);
+    register vector signed short line6C = vec_sub(line2B, line6B);
+    register vector signed short line3C = vec_add(line3B, line7B);
+    register vector signed short line7C = vec_sub(line3B, line7B);
+
+    vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
+    vsum = vec_sum4s(vec_abs(line1C), vsum);
+    vsum = vec_sum4s(vec_abs(line2C), vsum);
+    vsum = vec_sum4s(vec_abs(line3C), vsum);
+    vsum = vec_sum4s(vec_abs(line4C), vsum);
+    vsum = vec_sum4s(vec_abs(line5C), vsum);
+    vsum = vec_sum4s(vec_abs(line6C), vsum);
+    vsum = vec_sum4s(vec_abs(line7C), vsum);
+    vsum = vec_sums(vsum, (vector signed int)vzero);
+    vsum = vec_splat(vsum, 3);
+    vec_ste(vsum, 0, &sum);
+  }
+POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
+  return sum;
+}
+
+/*
+  16x8 works with 16 elements ; it allows to avoid replicating
+  loads, and give the compiler more rooms for scheduling.
+  It's only used from inside hadamard8_diff16_altivec.
+
+  Unfortunately, it seems gcc-3.3 is a bit dumb, and
+  the compiled code has a LOT of spill code, it seems
+  gcc (unlike xlc) cannot keep everything in registers
+  by itself. The following code include hand-made
+  registers allocation. It's not clean, but on
+  a 7450 the resulting code is much faster (best case
+  fall from 700+ cycles to 550).
+
+  xlc doesn't add spill code, but it doesn't know how to
+  schedule for the 7450, and its code isn't much faster than
+  gcc-3.3 on the 7450 (but uses 25% less instructions...)
+
+  On the 970, the hand-made RA is still a win (arount 690
+  vs. around 780), but xlc goes to around 660 on the
+  regular C code...
+*/
+
+static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
+    int sum;
+    register vector signed short
+        temp0 REG_v(v0),
+        temp1 REG_v(v1),
+        temp2 REG_v(v2),
+        temp3 REG_v(v3),
+        temp4 REG_v(v4),
+        temp5 REG_v(v5),
+        temp6 REG_v(v6),
+        temp7 REG_v(v7);
+    register vector signed short
+        temp0S REG_v(v8),
+        temp1S REG_v(v9),
+        temp2S REG_v(v10),
+        temp3S REG_v(v11),
+        temp4S REG_v(v12),
+        temp5S REG_v(v13),
+        temp6S REG_v(v14),
+        temp7S REG_v(v15);
+    register const_vector unsigned char vzero REG_v(v31)=
+        (const_vector unsigned char)vec_splat_u8(0);
+  {
+    register const_vector signed short vprod1 REG_v(v16)=
+        (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
+    register const_vector signed short vprod2 REG_v(v17)=
+        (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
+    register const_vector signed short vprod3 REG_v(v18)=
+        (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
+    register const_vector unsigned char perm1 REG_v(v19)=
+        (const_vector unsigned char)
+        AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
+            0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
+    register const_vector unsigned char perm2 REG_v(v20)=
+        (const_vector unsigned char)
+        AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
+            0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
+    register const_vector unsigned char perm3 REG_v(v21)=
+        (const_vector unsigned char)
+        AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+
+#define ONEITERBUTTERFLY(i, res1, res2)                                 \
+    {                                                                   \
+      register vector unsigned char src1 REG_v(v22),                    \
+                                    src2 REG_v(v23),                    \
+                                    dst1 REG_v(v24),                    \
+                                    dst2 REG_v(v25),                    \
+                                    srcO REG_v(v22),                    \
+                                    dstO REG_v(v23);                    \
+                                                                        \
+      register vector signed short  srcV REG_v(v24),                    \
+                                    dstV REG_v(v25),                    \
+                                    srcW REG_v(v26),                    \
+                                    dstW REG_v(v27),                    \
+                                    but0 REG_v(v28),                    \
+                                    but0S REG_v(v29),                   \
+                                    op1 REG_v(v30),                     \
+                                    but1 REG_v(v22),                    \
+                                    op1S REG_v(v23),                    \
+                                    but1S REG_v(v24),                   \
+                                    op2 REG_v(v25),                     \
+                                    but2 REG_v(v26),                    \
+                                    op2S REG_v(v27),                    \
+                                    but2S REG_v(v28),                   \
+                                    op3 REG_v(v29),                     \
+                                    op3S REG_v(v30);                    \
+                                                                        \
+      src1 = vec_ld(stride * i, src);                                   \
+      src2 = vec_ld((stride * i) + 16, src);                            \
+      srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src));           \
+      dst1 = vec_ld(stride * i, dst);                                   \
+      dst2 = vec_ld((stride * i) + 16, dst);                            \
+      dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst));           \
+      /* promote the unsigned chars to signed shorts */                 \
+      srcV =                                                            \
+        (vector signed short)vec_mergeh((vector signed char)vzero,      \
+        (vector signed char)srcO);                                      \
+      dstV =                                                            \
+        (vector signed short)vec_mergeh((vector signed char)vzero,      \
+        (vector signed char)dstO);                                      \
+      srcW =                                                            \
+        (vector signed short)vec_mergel((vector signed char)vzero,      \
+        (vector signed char)srcO);                                      \
+      dstW =                                                            \
+        (vector signed short)vec_mergel((vector signed char)vzero,      \
+        (vector signed char)dstO);                                      \
+      /* substractions inside the first butterfly */                    \
+      but0 = vec_sub(srcV, dstV);                                       \
+      but0S = vec_sub(srcW, dstW);                                      \
+      op1 = vec_perm(but0, but0, perm1);                                \
+      but1 = vec_mladd(but0, vprod1, op1);                              \
+      op1S = vec_perm(but0S, but0S, perm1);                             \
+      but1S = vec_mladd(but0S, vprod1, op1S);                           \
+      op2 = vec_perm(but1, but1, perm2);                                \
+      but2 = vec_mladd(but1, vprod2, op2);                              \
+      op2S = vec_perm(but1S, but1S, perm2);                             \
+      but2S = vec_mladd(but1S, vprod2, op2S);                           \
+      op3 = vec_perm(but2, but2, perm3);                                \
+      res1 = vec_mladd(but2, vprod3, op3);                              \
+      op3S = vec_perm(but2S, but2S, perm3);                             \
+      res2 = vec_mladd(but2S, vprod3, op3S);                            \
+    }
+    ONEITERBUTTERFLY(0, temp0, temp0S);
+    ONEITERBUTTERFLY(1, temp1, temp1S);
+    ONEITERBUTTERFLY(2, temp2, temp2S);
+    ONEITERBUTTERFLY(3, temp3, temp3S);
+    ONEITERBUTTERFLY(4, temp4, temp4S);
+    ONEITERBUTTERFLY(5, temp5, temp5S);
+    ONEITERBUTTERFLY(6, temp6, temp6S);
+    ONEITERBUTTERFLY(7, temp7, temp7S);
+  }
+#undef ONEITERBUTTERFLY
+  {
+    register vector signed int vsum;
+    register vector signed short line0S, line1S, line2S, line3S, line4S,
+                                 line5S, line6S, line7S, line0BS,line2BS,
+                                 line1BS,line3BS,line4BS,line6BS,line5BS,
+                                 line7BS,line0CS,line4CS,line1CS,line5CS,
+                                 line2CS,line6CS,line3CS,line7CS;
+
+    register vector signed short line0 = vec_add(temp0, temp1);
+    register vector signed short line1 = vec_sub(temp0, temp1);
+    register vector signed short line2 = vec_add(temp2, temp3);
+    register vector signed short line3 = vec_sub(temp2, temp3);
+    register vector signed short line4 = vec_add(temp4, temp5);
+    register vector signed short line5 = vec_sub(temp4, temp5);
+    register vector signed short line6 = vec_add(temp6, temp7);
+    register vector signed short line7 = vec_sub(temp6, temp7);
+
+    register vector signed short line0B = vec_add(line0, line2);
+    register vector signed short line2B = vec_sub(line0, line2);
+    register vector signed short line1B = vec_add(line1, line3);
+    register vector signed short line3B = vec_sub(line1, line3);
+    register vector signed short line4B = vec_add(line4, line6);
+    register vector signed short line6B = vec_sub(line4, line6);
+    register vector signed short line5B = vec_add(line5, line7);
+    register vector signed short line7B = vec_sub(line5, line7);
+
+    register vector signed short line0C = vec_add(line0B, line4B);
+    register vector signed short line4C = vec_sub(line0B, line4B);
+    register vector signed short line1C = vec_add(line1B, line5B);
+    register vector signed short line5C = vec_sub(line1B, line5B);
+    register vector signed short line2C = vec_add(line2B, line6B);
+    register vector signed short line6C = vec_sub(line2B, line6B);
+    register vector signed short line3C = vec_add(line3B, line7B);
+    register vector signed short line7C = vec_sub(line3B, line7B);
+
+    vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
+    vsum = vec_sum4s(vec_abs(line1C), vsum);
+    vsum = vec_sum4s(vec_abs(line2C), vsum);
+    vsum = vec_sum4s(vec_abs(line3C), vsum);
+    vsum = vec_sum4s(vec_abs(line4C), vsum);
+    vsum = vec_sum4s(vec_abs(line5C), vsum);
+    vsum = vec_sum4s(vec_abs(line6C), vsum);
+    vsum = vec_sum4s(vec_abs(line7C), vsum);
+
+    line0S = vec_add(temp0S, temp1S);
+    line1S = vec_sub(temp0S, temp1S);
+    line2S = vec_add(temp2S, temp3S);
+    line3S = vec_sub(temp2S, temp3S);
+    line4S = vec_add(temp4S, temp5S);
+    line5S = vec_sub(temp4S, temp5S);
+    line6S = vec_add(temp6S, temp7S);
+    line7S = vec_sub(temp6S, temp7S);
+
+    line0BS = vec_add(line0S, line2S);
+    line2BS = vec_sub(line0S, line2S);
+    line1BS = vec_add(line1S, line3S);
+    line3BS = vec_sub(line1S, line3S);
+    line4BS = vec_add(line4S, line6S);
+    line6BS = vec_sub(line4S, line6S);
+    line5BS = vec_add(line5S, line7S);
+    line7BS = vec_sub(line5S, line7S);
+
+    line0CS = vec_add(line0BS, line4BS);
+    line4CS = vec_sub(line0BS, line4BS);
+    line1CS = vec_add(line1BS, line5BS);
+    line5CS = vec_sub(line1BS, line5BS);
+    line2CS = vec_add(line2BS, line6BS);
+    line6CS = vec_sub(line2BS, line6BS);
+    line3CS = vec_add(line3BS, line7BS);
+    line7CS = vec_sub(line3BS, line7BS);
+
+    vsum = vec_sum4s(vec_abs(line0CS), vsum);
+    vsum = vec_sum4s(vec_abs(line1CS), vsum);
+    vsum = vec_sum4s(vec_abs(line2CS), vsum);
+    vsum = vec_sum4s(vec_abs(line3CS), vsum);
+    vsum = vec_sum4s(vec_abs(line4CS), vsum);
+    vsum = vec_sum4s(vec_abs(line5CS), vsum);
+    vsum = vec_sum4s(vec_abs(line6CS), vsum);
+    vsum = vec_sum4s(vec_abs(line7CS), vsum);
+    vsum = vec_sums(vsum, (vector signed int)vzero);
+    vsum = vec_splat(vsum, 3);
+    vec_ste(vsum, 0, &sum);
+  }
+  return sum;
+}
+
+int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
+POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1);
+    int score;
+POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1);
+    score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
+    if (h==16) {
+        dst += 8*stride;
+        src += 8*stride;
+        score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
+    }
+POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
+    return score;
+}
+
+int has_altivec(void)
+{
+#ifdef __AMIGAOS4__
+    ULONG result = 0;
+    extern struct ExecIFace *IExec;
+
+    IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
+    if (result == VECTORTYPE_ALTIVEC) return 1;
+    return 0;
+#else /* __AMIGAOS4__ */
+
+#ifdef CONFIG_DARWIN
+    int sels[2] = {CTL_HW, HW_VECTORUNIT};
+    int has_vu = 0;
+    size_t len = sizeof(has_vu);
+    int err;
+
+    err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
+
+    if (err == 0) return (has_vu != 0);
+#else /* CONFIG_DARWIN */
+/* no Darwin, do it the brute-force way */
+/* this is borrowed from the libmpeg2 library */
+    {
+      signal (SIGILL, sigill_handler);
+      if (sigsetjmp (jmpbuf, 1)) {
+        signal (SIGILL, SIG_DFL);
+      } else {
+        canjump = 1;
+
+        asm volatile ("mtspr 256, %0\n\t"
+                      "vand %%v0, %%v0, %%v0"
+                      :
+                      : "r" (-1));
+
+        signal (SIGILL, SIG_DFL);
+        return 1;
+      }
+    }
+#endif /* CONFIG_DARWIN */
+    return 0;
+#endif /* __AMIGAOS4__ */
+}
+
+static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
+                                            int blocksize)
+{
+    int i;
+    vector float m, a;
+    vector bool int t0, t1;
+    const vector unsigned int v_31 = //XXX
+        vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));
+    for(i=0; i<blocksize; i+=4) {
+        m = vec_ld(0, mag+i);
+        a = vec_ld(0, ang+i);
+        t0 = vec_cmple(m, (vector float)vec_splat_u32(0));
+        t1 = vec_cmple(a, (vector float)vec_splat_u32(0));
+        a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));
+        t0 = (vector bool int)vec_and(a, t1);
+        t1 = (vector bool int)vec_andc(a, t1);
+        a = vec_sub(m, (vector float)t1);
+        m = vec_add(m, (vector float)t0);
+        vec_stl(a, 0, ang+i);
+        vec_stl(m, 0, mag+i);
+    }
+}
+
+/* next one assumes that ((line_size % 8) == 0) */
+void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
+    register int i;
+    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
+    register vector unsigned char blockv, temp1, temp2, blocktemp;
+    register vector unsigned short pixelssum1, pixelssum2, temp3;
+
+    register const_vector unsigned char vczero = (const_vector unsigned char)
+                                        vec_splat_u8(0);
+    register const_vector unsigned short vctwo = (const_vector unsigned short)
+                                        vec_splat_u16(2);
+
+    temp1 = vec_ld(0, pixels);
+    temp2 = vec_ld(16, pixels);
+    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
+        pixelsv2 = temp2;
+    } else {
+        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+    }
+    pixelsv1 = vec_mergeh(vczero, pixelsv1);
+    pixelsv2 = vec_mergeh(vczero, pixelsv2);
+    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                         (vector unsigned short)pixelsv2);
+    pixelssum1 = vec_add(pixelssum1, vctwo);
+
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
+    for (i = 0; i < h ; i++) {
+        int rightside = ((unsigned long)block & 0x0000000F);
+        blockv = vec_ld(0, block);
+
+        temp1 = vec_ld(line_size, pixels);
+        temp2 = vec_ld(line_size + 16, pixels);
+        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+        {
+            pixelsv2 = temp2;
+        } else {
+            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+        }
+
+        pixelsv1 = vec_mergeh(vczero, pixelsv1);
+        pixelsv2 = vec_mergeh(vczero, pixelsv2);
+        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                             (vector unsigned short)pixelsv2);
+        temp3 = vec_add(pixelssum1, pixelssum2);
+        temp3 = vec_sra(temp3, vctwo);
+        pixelssum1 = vec_add(pixelssum2, vctwo);
+        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+
+        if (rightside) {
+            blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+        } else {
+            blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+        }
+
+        blockv = vec_avg(blocktemp, blockv);
+        vec_st(blockv, 0, block);
+
+        block += line_size;
+        pixels += line_size;
+    }
+
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
+}
+
+void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+    c->pix_abs[0][1] = sad16_x2_altivec;
+    c->pix_abs[0][2] = sad16_y2_altivec;
+    c->pix_abs[0][3] = sad16_xy2_altivec;
+    c->pix_abs[0][0] = sad16_altivec;
+    c->pix_abs[1][0] = sad8_altivec;
+    c->sad[0]= sad16_altivec;
+    c->sad[1]= sad8_altivec;
+    c->pix_norm1 = pix_norm1_altivec;
+    c->sse[1]= sse8_altivec;
+    c->sse[0]= sse16_altivec;
+    c->pix_sum = pix_sum_altivec;
+    c->diff_pixels = diff_pixels_altivec;
+    c->get_pixels = get_pixels_altivec;
+    c->add_bytes= add_bytes_altivec;
+    c->put_pixels_tab[0][0] = put_pixels16_altivec;
+    /* the two functions do the same thing, so use the same code */
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
+    c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
+    c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
+    c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
+
+    c->hadamard8_diff[0] = hadamard8_diff16_altivec;
+    c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
+#ifdef CONFIG_VORBIS_DECODER
+    c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
+#endif
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/dsputil_altivec.h b/contrib/ffmpeg/libavcodec/ppc/dsputil_altivec.h
new file mode 100644
index 000000000..560d778bb
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/dsputil_altivec.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DSPUTIL_ALTIVEC_
+#define _DSPUTIL_ALTIVEC_
+
+#include "dsputil_ppc.h"
+
+#ifdef HAVE_ALTIVEC
+
+extern int has_altivec(void);
+
+void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+// used to build registers permutation vectors (vcprm)
+// the 's' are for words in the _s_econd vector
+#define WORD_0 0x00,0x01,0x02,0x03
+#define WORD_1 0x04,0x05,0x06,0x07
+#define WORD_2 0x08,0x09,0x0a,0x0b
+#define WORD_3 0x0c,0x0d,0x0e,0x0f
+#define WORD_s0 0x10,0x11,0x12,0x13
+#define WORD_s1 0x14,0x15,0x16,0x17
+#define WORD_s2 0x18,0x19,0x1a,0x1b
+#define WORD_s3 0x1c,0x1d,0x1e,0x1f
+
+#ifdef CONFIG_DARWIN
+#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
+#else
+#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
+#endif
+
+// vcprmle is used to keep the same index as in the SSE version.
+// it's the same as vcprm, with the index inversed
+// ('le' is Little Endian)
+#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
+
+// used to build inverse/identity vectors (vcii)
+// n is _n_egative, p is _p_ositive
+#define FLOAT_n -1.
+#define FLOAT_p 1.
+
+
+#ifdef CONFIG_DARWIN
+#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
+#else
+#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
+#endif
+
+// Transpose 8x8 matrix of 16-bit elements (in-place)
+#define TRANSPOSE8(a,b,c,d,e,f,g,h) \
+do { \
+    vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \
+    vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \
+ \
+    A1 = vec_mergeh (a, e); \
+    B1 = vec_mergel (a, e); \
+    C1 = vec_mergeh (b, f); \
+    D1 = vec_mergel (b, f); \
+    E1 = vec_mergeh (c, g); \
+    F1 = vec_mergel (c, g); \
+    G1 = vec_mergeh (d, h); \
+    H1 = vec_mergel (d, h); \
+ \
+    A2 = vec_mergeh (A1, E1); \
+    B2 = vec_mergel (A1, E1); \
+    C2 = vec_mergeh (B1, F1); \
+    D2 = vec_mergel (B1, F1); \
+    E2 = vec_mergeh (C1, G1); \
+    F2 = vec_mergel (C1, G1); \
+    G2 = vec_mergeh (D1, H1); \
+    H2 = vec_mergel (D1, H1); \
+ \
+    a = vec_mergeh (A2, E2); \
+    b = vec_mergel (A2, E2); \
+    c = vec_mergeh (B2, F2); \
+    d = vec_mergel (B2, F2); \
+    e = vec_mergeh (C2, G2); \
+    f = vec_mergel (C2, G2); \
+    g = vec_mergeh (D2, H2); \
+    h = vec_mergel (D2, H2); \
+} while (0)
+
+#endif /* HAVE_ALTIVEC */
+
+#endif /* _DSPUTIL_ALTIVEC_ */
diff --git a/contrib/ffmpeg/libavcodec/ppc/dsputil_ppc.c b/contrib/ffmpeg/libavcodec/ppc/dsputil_ppc.c
new file mode 100644
index 000000000..9169eaef0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/dsputil_ppc.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "dsputil_ppc.h"
+
+#ifdef HAVE_ALTIVEC
+#include "dsputil_altivec.h"
+
+extern void fdct_altivec(int16_t *block);
+extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
+                         int x16, int y16, int rounder);
+extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
+extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
+
+void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
+
+void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
+void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
+void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
+void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
+
+#endif
+
+int mm_flags = 0;
+
+int mm_support(void)
+{
+    int result = 0;
+#ifdef HAVE_ALTIVEC
+    if (has_altivec()) {
+        result |= MM_ALTIVEC;
+    }
+#endif /* result */
+    return result;
+}
+
+#ifdef POWERPC_PERFORMANCE_REPORT
+unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
+/* list below must match enum in dsputil_ppc.h */
+static unsigned char* perfname[] = {
+  "ff_fft_calc_altivec",
+  "gmc1_altivec",
+  "dct_unquantize_h263_altivec",
+  "fdct_altivec",
+  "idct_add_altivec",
+  "idct_put_altivec",
+  "put_pixels16_altivec",
+  "avg_pixels16_altivec",
+  "avg_pixels8_altivec",
+  "put_pixels8_xy2_altivec",
+  "put_no_rnd_pixels8_xy2_altivec",
+  "put_pixels16_xy2_altivec",
+  "put_no_rnd_pixels16_xy2_altivec",
+  "hadamard8_diff8x8_altivec",
+  "hadamard8_diff16_altivec",
+  "avg_pixels8_xy2_altivec",
+  "clear_blocks_dcbz32_ppc",
+  "clear_blocks_dcbz128_ppc",
+  "put_h264_chroma_mc8_altivec",
+  "avg_h264_chroma_mc8_altivec",
+  "put_h264_qpel16_h_lowpass_altivec",
+  "avg_h264_qpel16_h_lowpass_altivec",
+  "put_h264_qpel16_v_lowpass_altivec",
+  "avg_h264_qpel16_v_lowpass_altivec",
+  "put_h264_qpel16_hv_lowpass_altivec",
+  "avg_h264_qpel16_hv_lowpass_altivec",
+  ""
+};
+#include <stdio.h>
+#endif
+
+#ifdef POWERPC_PERFORMANCE_REPORT
+void powerpc_display_perf_report(void)
+{
+  int i, j;
+  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
+  for(i = 0 ; i < powerpc_perf_total ; i++)
+  {
+    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+      {
+        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
+          av_log(NULL, AV_LOG_INFO,
+                  " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
+                  perfname[i],
+                  j+1,
+                  perfdata[j][i][powerpc_data_min],
+                  perfdata[j][i][powerpc_data_max],
+                  (double)perfdata[j][i][powerpc_data_sum] /
+                  (double)perfdata[j][i][powerpc_data_num],
+                  perfdata[j][i][powerpc_data_num]);
+      }
+  }
+}
+#endif /* POWERPC_PERFORMANCE_REPORT */
+
+/* ***** WARNING ***** WARNING ***** WARNING ***** */
+/*
+  clear_blocks_dcbz32_ppc will not work properly
+  on PowerPC processors with a cache line size
+  not equal to 32 bytes.
+  Fortunately all processor used by Apple up to
+  at least the 7450 (aka second generation G4)
+  use 32 bytes cache line.
+  This is due to the use of the 'dcbz' instruction.
+  It simply clear to zero a single cache line,
+  so you need to know the cache line size to use it !
+  It's absurd, but it's fast...
+
+  update 24/06/2003 : Apple released yesterday the G5,
+  with a PPC970. cache line size : 128 bytes. Oups.
+  The semantic of dcbz was changed, it always clear
+  32 bytes. so the function below will work, but will
+  be slow. So I fixed check_dcbz_effect to use dcbzl,
+  which is defined to clear a cache line (as dcbz before).
+  So we still can distinguish, and use dcbz (32 bytes)
+  or dcbzl (one cache line) as required.
+
+  see <http://developer.apple.com/technotes/tn/tn2087.html>
+  and <http://developer.apple.com/technotes/tn/tn2086.html>
+*/
+void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
+{
+POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
+    register int misal = ((unsigned long)blocks & 0x00000010);
+    register int i = 0;
+POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
+#if 1
+    if (misal) {
+      ((unsigned long*)blocks)[0] = 0L;
+      ((unsigned long*)blocks)[1] = 0L;
+      ((unsigned long*)blocks)[2] = 0L;
+      ((unsigned long*)blocks)[3] = 0L;
+      i += 16;
+    }
+    for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
+#ifndef __MWERKS__
+      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
+#else
+      __dcbz( blocks, i );
+#endif
+    }
+    if (misal) {
+      ((unsigned long*)blocks)[188] = 0L;
+      ((unsigned long*)blocks)[189] = 0L;
+      ((unsigned long*)blocks)[190] = 0L;
+      ((unsigned long*)blocks)[191] = 0L;
+      i += 16;
+    }
+#else
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+#endif
+POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
+}
+
+/* same as above, when dcbzl clear a whole 128B cache line
+   i.e. the PPC970 aka G5 */
+#ifdef HAVE_DCBZL
+void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
+{
+POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
+    register int misal = ((unsigned long)blocks & 0x0000007f);
+    register int i = 0;
+POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
+#if 1
+ if (misal) {
+   // we could probably also optimize this case,
+   // but there's not much point as the machines
+   // aren't available yet (2003-06-26)
+      memset(blocks, 0, sizeof(DCTELEM)*6*64);
+    }
+    else
+      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
+        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
+      }
+#else
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+#endif
+POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
+}
+#else
+void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
+{
+  memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+#endif
+
+#ifdef HAVE_DCBZL
+/* check dcbz report how many bytes are set to 0 by dcbz */
+/* update 24/06/2003 : replace dcbz by dcbzl to get
+   the intended effect (Apple "fixed" dcbz)
+   unfortunately this cannot be used unless the assembler
+   knows about dcbzl ... */
+long check_dcbzl_effect(void)
+{
+  register char *fakedata = (char*)av_malloc(1024);
+  register char *fakedata_middle;
+  register long zero = 0;
+  register long i = 0;
+  long count = 0;
+
+  if (!fakedata)
+  {
+    return 0L;
+  }
+
+  fakedata_middle = (fakedata + 512);
+
+  memset(fakedata, 0xFF, 1024);
+
+  /* below the constraint "b" seems to mean "Address base register"
+     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
+  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
+
+  for (i = 0; i < 1024 ; i ++)
+  {
+    if (fakedata[i] == (char)0)
+      count++;
+  }
+
+  av_free(fakedata);
+
+  return count;
+}
+#else
+long check_dcbzl_effect(void)
+{
+  return 0;
+}
+#endif
+
+static void prefetch_ppc(void *mem, int stride, int h)
+{
+    register const uint8_t *p = mem;
+    do {
+        asm volatile ("dcbt 0,%0" : : "r" (p));
+        p+= stride;
+    } while(--h);
+}
+
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
+{
+    // Common optimizations whether Altivec is available or not
+    c->prefetch = prefetch_ppc;
+    switch (check_dcbzl_effect()) {
+        case 32:
+            c->clear_blocks = clear_blocks_dcbz32_ppc;
+            break;
+        case 128:
+            c->clear_blocks = clear_blocks_dcbz128_ppc;
+            break;
+        default:
+            break;
+    }
+
+#ifdef HAVE_ALTIVEC
+    if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
+
+    if (has_altivec()) {
+        mm_flags |= MM_ALTIVEC;
+
+        dsputil_init_altivec(c, avctx);
+        if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx);
+        if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
+            vc1dsp_init_altivec(c, avctx);
+        float_init_altivec(c, avctx);
+        c->gmc1 = gmc1_altivec;
+
+#ifdef CONFIG_ENCODERS
+        if (avctx->dct_algo == FF_DCT_AUTO ||
+            avctx->dct_algo == FF_DCT_ALTIVEC)
+        {
+            c->fdct = fdct_altivec;
+        }
+#endif //CONFIG_ENCODERS
+
+        if (avctx->lowres==0)
+        {
+        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+                (avctx->idct_algo == FF_IDCT_ALTIVEC))
+        {
+            c->idct_put = idct_put_altivec;
+            c->idct_add = idct_add_altivec;
+            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+        }
+        }
+
+#ifdef POWERPC_PERFORMANCE_REPORT
+        {
+          int i, j;
+          for (i = 0 ; i < powerpc_perf_total ; i++)
+          {
+            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+              {
+                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
+                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
+                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
+                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
+              }
+          }
+        }
+#endif /* POWERPC_PERFORMANCE_REPORT */
+    }
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/dsputil_ppc.h b/contrib/ffmpeg/libavcodec/ppc/dsputil_ppc.h
new file mode 100644
index 000000000..ab2b05780
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/dsputil_ppc.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DSPUTIL_PPC_
+#define _DSPUTIL_PPC_
+
+#ifdef POWERPC_PERFORMANCE_REPORT
+void powerpc_display_perf_report(void);
+/* the 604* have 2, the G3* have 4, the G4s have 6,
+   and the G5 are completely different (they MUST use
+   POWERPC_MODE_64BITS, and let's hope all future 64 bis PPC
+   will use the same PMCs... */
+#define POWERPC_NUM_PMC_ENABLED 6
+/* if you add to the enum below, also add to the perfname array
+   in dsputil_ppc.c */
+enum powerpc_perf_index {
+  altivec_fft_num = 0,
+  altivec_gmc1_num,
+  altivec_dct_unquantize_h263_num,
+  altivec_fdct,
+  altivec_idct_add_num,
+  altivec_idct_put_num,
+  altivec_put_pixels16_num,
+  altivec_avg_pixels16_num,
+  altivec_avg_pixels8_num,
+  altivec_put_pixels8_xy2_num,
+  altivec_put_no_rnd_pixels8_xy2_num,
+  altivec_put_pixels16_xy2_num,
+  altivec_put_no_rnd_pixels16_xy2_num,
+  altivec_hadamard8_diff8x8_num,
+  altivec_hadamard8_diff16_num,
+  altivec_avg_pixels8_xy2_num,
+  powerpc_clear_blocks_dcbz32,
+  powerpc_clear_blocks_dcbz128,
+  altivec_put_h264_chroma_mc8_num,
+  altivec_avg_h264_chroma_mc8_num,
+  altivec_put_h264_qpel16_h_lowpass_num,
+  altivec_avg_h264_qpel16_h_lowpass_num,
+  altivec_put_h264_qpel16_v_lowpass_num,
+  altivec_avg_h264_qpel16_v_lowpass_num,
+  altivec_put_h264_qpel16_hv_lowpass_num,
+  altivec_avg_h264_qpel16_hv_lowpass_num,
+  powerpc_perf_total
+};
+enum powerpc_data_index {
+  powerpc_data_min = 0,
+  powerpc_data_max,
+  powerpc_data_sum,
+  powerpc_data_num,
+  powerpc_data_total
+};
+extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
+
+#ifndef POWERPC_MODE_64BITS
+#define POWERP_PMC_DATATYPE unsigned long
+#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
+#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
+#if (POWERPC_NUM_PMC_ENABLED > 2)
+#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
+#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
+#else
+#define POWERPC_GET_PMC3(a) do {} while (0)
+#define POWERPC_GET_PMC4(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 4)
+#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
+#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
+#else
+#define POWERPC_GET_PMC5(a) do {} while (0)
+#define POWERPC_GET_PMC6(a) do {} while (0)
+#endif
+#else /* POWERPC_MODE_64BITS */
+#define POWERP_PMC_DATATYPE unsigned long long
+#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a))
+#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a))
+#if (POWERPC_NUM_PMC_ENABLED > 2)
+#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a))
+#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a))
+#else
+#define POWERPC_GET_PMC3(a) do {} while (0)
+#define POWERPC_GET_PMC4(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 4)
+#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a))
+#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a))
+#else
+#define POWERPC_GET_PMC5(a) do {} while (0)
+#define POWERPC_GET_PMC6(a) do {} while (0)
+#endif
+#endif /* POWERPC_MODE_64BITS */
+#define POWERPC_PERF_DECLARE(a, cond)   \
+  POWERP_PMC_DATATYPE                   \
+    pmc_start[POWERPC_NUM_PMC_ENABLED], \
+    pmc_stop[POWERPC_NUM_PMC_ENABLED],  \
+    pmc_loop_index;
+#define POWERPC_PERF_START_COUNT(a, cond) do { \
+  POWERPC_GET_PMC6(pmc_start[5]); \
+  POWERPC_GET_PMC5(pmc_start[4]); \
+  POWERPC_GET_PMC4(pmc_start[3]); \
+  POWERPC_GET_PMC3(pmc_start[2]); \
+  POWERPC_GET_PMC2(pmc_start[1]); \
+  POWERPC_GET_PMC1(pmc_start[0]); \
+  } while (0)
+#define POWERPC_PERF_STOP_COUNT(a, cond) do { \
+  POWERPC_GET_PMC1(pmc_stop[0]); \
+  POWERPC_GET_PMC2(pmc_stop[1]); \
+  POWERPC_GET_PMC3(pmc_stop[2]); \
+  POWERPC_GET_PMC4(pmc_stop[3]); \
+  POWERPC_GET_PMC5(pmc_stop[4]); \
+  POWERPC_GET_PMC6(pmc_stop[5]); \
+  if (cond)                       \
+  {                               \
+    for(pmc_loop_index = 0;       \
+        pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
+        pmc_loop_index++)         \
+    {                             \
+      if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index])  \
+        {                                                         \
+        POWERP_PMC_DATATYPE diff =                                \
+          pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
+        if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
+          perfdata[pmc_loop_index][a][powerpc_data_min] = diff;   \
+        if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
+          perfdata[pmc_loop_index][a][powerpc_data_max] = diff;   \
+        perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
+        perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
+      }                           \
+    }                             \
+  }                               \
+} while (0)
+#else /* POWERPC_PERFORMANCE_REPORT */
+// those are needed to avoid empty statements.
+#define POWERPC_PERF_DECLARE(a, cond)        int altivec_placeholder __attribute__ ((unused))
+#define POWERPC_PERF_START_COUNT(a, cond)    do {} while (0)
+#define POWERPC_PERF_STOP_COUNT(a, cond)     do {} while (0)
+#endif /* POWERPC_PERFORMANCE_REPORT */
+
+#endif /*  _DSPUTIL_PPC_ */
diff --git a/contrib/ffmpeg/libavcodec/ppc/fdct_altivec.c b/contrib/ffmpeg/libavcodec/ppc/fdct_altivec.c
new file mode 100644
index 000000000..2418c32bb
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/fdct_altivec.c
@@ -0,0 +1,493 @@
+/* ffmpeg/libavcodec/ppc/fdct_altivec.c, this file is part of the
+ * AltiVec optimized library for the FFMPEG Multimedia System
+ * Copyright (C) 2003  James Klicman <james@klicman.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "common.h"
+#include "../dsputil.h"
+#include "dsputil_altivec.h"
+#include "gcc_fixes.h"
+
+
+#define vs16(v) ((vector signed short)(v))
+#define vs32(v) ((vector signed int)(v))
+#define vu8(v)  ((vector unsigned char)(v))
+#define vu16(v) ((vector unsigned short)(v))
+#define vu32(v) ((vector unsigned int)(v))
+
+
+#define C1     0.98078525066375732421875000 /* cos(1*PI/16) */
+#define C2     0.92387950420379638671875000 /* cos(2*PI/16) */
+#define C3     0.83146959543228149414062500 /* cos(3*PI/16) */
+#define C4     0.70710676908493041992187500 /* cos(4*PI/16) */
+#define C5     0.55557024478912353515625000 /* cos(5*PI/16) */
+#define C6     0.38268342614173889160156250 /* cos(6*PI/16) */
+#define C7     0.19509032368659973144531250 /* cos(7*PI/16) */
+#define SQRT_2 1.41421353816986083984375000 /* sqrt(2)      */
+
+
+#define W0 -(2 * C2)
+#define W1 (2 * C6)
+#define W2 (SQRT_2 * C6)
+#define W3 (SQRT_2 * C3)
+#define W4 (SQRT_2 * (-C1 + C3 + C5 - C7))
+#define W5 (SQRT_2 * ( C1 + C3 - C5 + C7))
+#define W6 (SQRT_2 * ( C1 + C3 + C5 - C7))
+#define W7 (SQRT_2 * ( C1 + C3 - C5 - C7))
+#define W8 (SQRT_2 * ( C7 - C3))
+#define W9 (SQRT_2 * (-C1 - C3))
+#define WA (SQRT_2 * (-C3 - C5))
+#define WB (SQRT_2 * ( C5 - C3))
+
+
+static vector float fdctconsts[3] = {
+    (vector float)AVV( W0, W1, W2, W3 ),
+    (vector float)AVV( W4, W5, W6, W7 ),
+    (vector float)AVV( W8, W9, WA, WB )
+};
+
+#define LD_W0 vec_splat(cnsts0, 0)
+#define LD_W1 vec_splat(cnsts0, 1)
+#define LD_W2 vec_splat(cnsts0, 2)
+#define LD_W3 vec_splat(cnsts0, 3)
+#define LD_W4 vec_splat(cnsts1, 0)
+#define LD_W5 vec_splat(cnsts1, 1)
+#define LD_W6 vec_splat(cnsts1, 2)
+#define LD_W7 vec_splat(cnsts1, 3)
+#define LD_W8 vec_splat(cnsts2, 0)
+#define LD_W9 vec_splat(cnsts2, 1)
+#define LD_WA vec_splat(cnsts2, 2)
+#define LD_WB vec_splat(cnsts2, 3)
+
+
+#define FDCTROW(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */                  \
+    x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
+    x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
+    x1 = vec_add(b1, b6);               /* x1 = b1 + b6; */         \
+    x6 = vec_sub(b1, b6);               /* x6 = b1 - b6; */         \
+    x2 = vec_add(b2, b5);               /* x2 = b2 + b5; */         \
+    x5 = vec_sub(b2, b5);               /* x5 = b2 - b5; */         \
+    x3 = vec_add(b3, b4);               /* x3 = b3 + b4; */         \
+    x4 = vec_sub(b3, b4);               /* x4 = b3 - b4; */         \
+                                                                    \
+    b7 = vec_add(x0, x3);               /* b7 = x0 + x3; */         \
+    b1 = vec_add(x1, x2);               /* b1 = x1 + x2; */         \
+    b0 = vec_add(b7, b1);               /* b0 = b7 + b1; */         \
+    b4 = vec_sub(b7, b1);               /* b4 = b7 - b1; */         \
+                                                                    \
+    b2 = vec_sub(x0, x3);               /* b2 = x0 - x3; */         \
+    b6 = vec_sub(x1, x2);               /* b6 = x1 - x2; */         \
+    b5 = vec_add(b6, b2);               /* b5 = b6 + b2; */         \
+    cnst = LD_W2;                                                   \
+    b5 = vec_madd(cnst, b5, mzero);     /* b5 = b5 * W2; */         \
+    cnst = LD_W1;                                                   \
+    b2 = vec_madd(cnst, b2, b5);        /* b2 = b5 + b2 * W1; */    \
+    cnst = LD_W0;                                                   \
+    b6 = vec_madd(cnst, b6, b5);        /* b6 = b5 + b6 * W0; */    \
+                                                                    \
+    x0 = vec_add(x4, x7);               /* x0 = x4 + x7; */         \
+    x1 = vec_add(x5, x6);               /* x1 = x5 + x6; */         \
+    x2 = vec_add(x4, x6);               /* x2 = x4 + x6; */         \
+    x3 = vec_add(x5, x7);               /* x3 = x5 + x7; */         \
+    x8 = vec_add(x2, x3);               /* x8 = x2 + x3; */         \
+    cnst = LD_W3;                                                   \
+    x8 = vec_madd(cnst, x8, mzero);     /* x8 = x8 * W3; */         \
+                                                                    \
+    cnst = LD_W8;                                                   \
+    x0 = vec_madd(cnst, x0, mzero);     /* x0 *= W8; */             \
+    cnst = LD_W9;                                                   \
+    x1 = vec_madd(cnst, x1, mzero);     /* x1 *= W9; */             \
+    cnst = LD_WA;                                                   \
+    x2 = vec_madd(cnst, x2, x8);        /* x2 = x2 * WA + x8; */    \
+    cnst = LD_WB;                                                   \
+    x3 = vec_madd(cnst, x3, x8);        /* x3 = x3 * WB + x8; */    \
+                                                                    \
+    cnst = LD_W4;                                                   \
+    b7 = vec_madd(cnst, x4, x0);        /* b7 = x4 * W4 + x0; */    \
+    cnst = LD_W5;                                                   \
+    b5 = vec_madd(cnst, x5, x1);        /* b5 = x5 * W5 + x1; */    \
+    cnst = LD_W6;                                                   \
+    b3 = vec_madd(cnst, x6, x1);        /* b3 = x6 * W6 + x1; */    \
+    cnst = LD_W7;                                                   \
+    b1 = vec_madd(cnst, x7, x0);        /* b1 = x7 * W7 + x0; */    \
+                                                                    \
+    b7 = vec_add(b7, x2);               /* b7 = b7 + x2; */         \
+    b5 = vec_add(b5, x3);               /* b5 = b5 + x3; */         \
+    b3 = vec_add(b3, x2);               /* b3 = b3 + x2; */         \
+    b1 = vec_add(b1, x3);               /* b1 = b1 + x3; */         \
+    /* }}} */
+
+#define FDCTCOL(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */                  \
+    x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
+    x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
+    x1 = vec_add(b1, b6);               /* x1 = b1 + b6; */         \
+    x6 = vec_sub(b1, b6);               /* x6 = b1 - b6; */         \
+    x2 = vec_add(b2, b5);               /* x2 = b2 + b5; */         \
+    x5 = vec_sub(b2, b5);               /* x5 = b2 - b5; */         \
+    x3 = vec_add(b3, b4);               /* x3 = b3 + b4; */         \
+    x4 = vec_sub(b3, b4);               /* x4 = b3 - b4; */         \
+                                                                    \
+    b7 = vec_add(x0, x3);               /* b7 = x0 + x3; */         \
+    b1 = vec_add(x1, x2);               /* b1 = x1 + x2; */         \
+    b0 = vec_add(b7, b1);               /* b0 = b7 + b1; */         \
+    b4 = vec_sub(b7, b1);               /* b4 = b7 - b1; */         \
+                                                                    \
+    b2 = vec_sub(x0, x3);               /* b2 = x0 - x3; */         \
+    b6 = vec_sub(x1, x2);               /* b6 = x1 - x2; */         \
+    b5 = vec_add(b6, b2);               /* b5 = b6 + b2; */         \
+    cnst = LD_W2;                                                   \
+    b5 = vec_madd(cnst, b5, mzero);     /* b5 = b5 * W2; */         \
+    cnst = LD_W1;                                                   \
+    b2 = vec_madd(cnst, b2, b5);        /* b2 = b5 + b2 * W1; */    \
+    cnst = LD_W0;                                                   \
+    b6 = vec_madd(cnst, b6, b5);        /* b6 = b5 + b6 * W0; */    \
+                                                                    \
+    x0 = vec_add(x4, x7);               /* x0 = x4 + x7; */         \
+    x1 = vec_add(x5, x6);               /* x1 = x5 + x6; */         \
+    x2 = vec_add(x4, x6);               /* x2 = x4 + x6; */         \
+    x3 = vec_add(x5, x7);               /* x3 = x5 + x7; */         \
+    x8 = vec_add(x2, x3);               /* x8 = x2 + x3; */         \
+    cnst = LD_W3;                                                   \
+    x8 = vec_madd(cnst, x8, mzero);     /* x8 = x8 * W3; */         \
+                                                                    \
+    cnst = LD_W8;                                                   \
+    x0 = vec_madd(cnst, x0, mzero);     /* x0 *= W8; */             \
+    cnst = LD_W9;                                                   \
+    x1 = vec_madd(cnst, x1, mzero);     /* x1 *= W9; */             \
+    cnst = LD_WA;                                                   \
+    x2 = vec_madd(cnst, x2, x8);        /* x2 = x2 * WA + x8; */    \
+    cnst = LD_WB;                                                   \
+    x3 = vec_madd(cnst, x3, x8);        /* x3 = x3 * WB + x8; */    \
+                                                                    \
+    cnst = LD_W4;                                                   \
+    b7 = vec_madd(cnst, x4, x0);        /* b7 = x4 * W4 + x0; */    \
+    cnst = LD_W5;                                                   \
+    b5 = vec_madd(cnst, x5, x1);        /* b5 = x5 * W5 + x1; */    \
+    cnst = LD_W6;                                                   \
+    b3 = vec_madd(cnst, x6, x1);        /* b3 = x6 * W6 + x1; */    \
+    cnst = LD_W7;                                                   \
+    b1 = vec_madd(cnst, x7, x0);        /* b1 = x7 * W7 + x0; */    \
+                                                                    \
+    b7 = vec_add(b7, x2);               /* b7 += x2; */             \
+    b5 = vec_add(b5, x3);               /* b5 += x3; */             \
+    b3 = vec_add(b3, x2);               /* b3 += x2; */             \
+    b1 = vec_add(b1, x3);               /* b1 += x3; */             \
+    /* }}} */
+
+
+
+/* two dimensional discrete cosine transform */
+
+void fdct_altivec(int16_t *block)
+{
+POWERPC_PERF_DECLARE(altivec_fdct, 1);
+    vector signed short *bp;
+    vector float *cp;
+    vector float b00, b10, b20, b30, b40, b50, b60, b70;
+    vector float b01, b11, b21, b31, b41, b51, b61, b71;
+    vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
+    vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+    POWERPC_PERF_START_COUNT(altivec_fdct, 1);
+
+
+    /* setup constants {{{ */
+    /* mzero = -0.0 */
+    mzero = ((vector float)vec_splat_u32(-1));
+    mzero = ((vector float)vec_sl(vu32(mzero), vu32(mzero)));
+    cp = fdctconsts;
+    cnsts0 = vec_ld(0, cp); cp++;
+    cnsts1 = vec_ld(0, cp); cp++;
+    cnsts2 = vec_ld(0, cp);
+    /* }}} */
+
+
+    /* 8x8 matrix transpose (vector short[8]) {{{ */
+#define MERGE_S16(hl,a,b) vec_merge##hl(vs16(a), vs16(b))
+
+    bp = (vector signed short*)block;
+    b00 = ((vector float)vec_ld(0,    bp));
+    b40 = ((vector float)vec_ld(16*4, bp));
+    b01 = ((vector float)MERGE_S16(h, b00, b40));
+    b11 = ((vector float)MERGE_S16(l, b00, b40));
+    bp++;
+    b10 = ((vector float)vec_ld(0,    bp));
+    b50 = ((vector float)vec_ld(16*4, bp));
+    b21 = ((vector float)MERGE_S16(h, b10, b50));
+    b31 = ((vector float)MERGE_S16(l, b10, b50));
+    bp++;
+    b20 = ((vector float)vec_ld(0,    bp));
+    b60 = ((vector float)vec_ld(16*4, bp));
+    b41 = ((vector float)MERGE_S16(h, b20, b60));
+    b51 = ((vector float)MERGE_S16(l, b20, b60));
+    bp++;
+    b30 = ((vector float)vec_ld(0,    bp));
+    b70 = ((vector float)vec_ld(16*4, bp));
+    b61 = ((vector float)MERGE_S16(h, b30, b70));
+    b71 = ((vector float)MERGE_S16(l, b30, b70));
+
+    x0 = ((vector float)MERGE_S16(h, b01, b41));
+    x1 = ((vector float)MERGE_S16(l, b01, b41));
+    x2 = ((vector float)MERGE_S16(h, b11, b51));
+    x3 = ((vector float)MERGE_S16(l, b11, b51));
+    x4 = ((vector float)MERGE_S16(h, b21, b61));
+    x5 = ((vector float)MERGE_S16(l, b21, b61));
+    x6 = ((vector float)MERGE_S16(h, b31, b71));
+    x7 = ((vector float)MERGE_S16(l, b31, b71));
+
+    b00 = ((vector float)MERGE_S16(h, x0, x4));
+    b10 = ((vector float)MERGE_S16(l, x0, x4));
+    b20 = ((vector float)MERGE_S16(h, x1, x5));
+    b30 = ((vector float)MERGE_S16(l, x1, x5));
+    b40 = ((vector float)MERGE_S16(h, x2, x6));
+    b50 = ((vector float)MERGE_S16(l, x2, x6));
+    b60 = ((vector float)MERGE_S16(h, x3, x7));
+    b70 = ((vector float)MERGE_S16(l, x3, x7));
+
+#undef MERGE_S16
+    /* }}} */
+
+
+/* Some of the initial calculations can be done as vector short before
+ * conversion to vector float.  The following code section takes advantage
+ * of this.
+ */
+#if 1
+    /* fdct rows {{{ */
+    x0 = ((vector float)vec_add(vs16(b00), vs16(b70)));
+    x7 = ((vector float)vec_sub(vs16(b00), vs16(b70)));
+    x1 = ((vector float)vec_add(vs16(b10), vs16(b60)));
+    x6 = ((vector float)vec_sub(vs16(b10), vs16(b60)));
+    x2 = ((vector float)vec_add(vs16(b20), vs16(b50)));
+    x5 = ((vector float)vec_sub(vs16(b20), vs16(b50)));
+    x3 = ((vector float)vec_add(vs16(b30), vs16(b40)));
+    x4 = ((vector float)vec_sub(vs16(b30), vs16(b40)));
+
+    b70 = ((vector float)vec_add(vs16(x0), vs16(x3)));
+    b10 = ((vector float)vec_add(vs16(x1), vs16(x2)));
+
+    b00 = ((vector float)vec_add(vs16(b70), vs16(b10)));
+    b40 = ((vector float)vec_sub(vs16(b70), vs16(b10)));
+
+#define CTF0(n) \
+    b##n##1 = ((vector float)vec_unpackl(vs16(b##n##0))); \
+    b##n##0 = ((vector float)vec_unpackh(vs16(b##n##0))); \
+    b##n##1 = vec_ctf(vs32(b##n##1), 0); \
+    b##n##0 = vec_ctf(vs32(b##n##0), 0);
+
+    CTF0(0);
+    CTF0(4);
+
+    b20 = ((vector float)vec_sub(vs16(x0), vs16(x3)));
+    b60 = ((vector float)vec_sub(vs16(x1), vs16(x2)));
+
+    CTF0(2);
+    CTF0(6);
+
+#undef CTF0
+
+    x0 = vec_add(b60, b20);
+    x1 = vec_add(b61, b21);
+
+    cnst = LD_W2;
+    x0 = vec_madd(cnst, x0, mzero);
+    x1 = vec_madd(cnst, x1, mzero);
+    cnst = LD_W1;
+    b20 = vec_madd(cnst, b20, x0);
+    b21 = vec_madd(cnst, b21, x1);
+    cnst = LD_W0;
+    b60 = vec_madd(cnst, b60, x0);
+    b61 = vec_madd(cnst, b61, x1);
+
+#define CTFX(x,b) \
+    b##0 = ((vector float)vec_unpackh(vs16(x))); \
+    b##1 = ((vector float)vec_unpackl(vs16(x))); \
+    b##0 = vec_ctf(vs32(b##0), 0); \
+    b##1 = vec_ctf(vs32(b##1), 0); \
+
+    CTFX(x4, b7);
+    CTFX(x5, b5);
+    CTFX(x6, b3);
+    CTFX(x7, b1);
+
+#undef CTFX
+
+
+    x0 = vec_add(b70, b10);
+    x1 = vec_add(b50, b30);
+    x2 = vec_add(b70, b30);
+    x3 = vec_add(b50, b10);
+    x8 = vec_add(x2, x3);
+    cnst = LD_W3;
+    x8 = vec_madd(cnst, x8, mzero);
+
+    cnst = LD_W8;
+    x0 = vec_madd(cnst, x0, mzero);
+    cnst = LD_W9;
+    x1 = vec_madd(cnst, x1, mzero);
+    cnst = LD_WA;
+    x2 = vec_madd(cnst, x2, x8);
+    cnst = LD_WB;
+    x3 = vec_madd(cnst, x3, x8);
+
+    cnst = LD_W4;
+    b70 = vec_madd(cnst, b70, x0);
+    cnst = LD_W5;
+    b50 = vec_madd(cnst, b50, x1);
+    cnst = LD_W6;
+    b30 = vec_madd(cnst, b30, x1);
+    cnst = LD_W7;
+    b10 = vec_madd(cnst, b10, x0);
+
+    b70 = vec_add(b70, x2);
+    b50 = vec_add(b50, x3);
+    b30 = vec_add(b30, x2);
+    b10 = vec_add(b10, x3);
+
+
+    x0 = vec_add(b71, b11);
+    x1 = vec_add(b51, b31);
+    x2 = vec_add(b71, b31);
+    x3 = vec_add(b51, b11);
+    x8 = vec_add(x2, x3);
+    cnst = LD_W3;
+    x8 = vec_madd(cnst, x8, mzero);
+
+    cnst = LD_W8;
+    x0 = vec_madd(cnst, x0, mzero);
+    cnst = LD_W9;
+    x1 = vec_madd(cnst, x1, mzero);
+    cnst = LD_WA;
+    x2 = vec_madd(cnst, x2, x8);
+    cnst = LD_WB;
+    x3 = vec_madd(cnst, x3, x8);
+
+    cnst = LD_W4;
+    b71 = vec_madd(cnst, b71, x0);
+    cnst = LD_W5;
+    b51 = vec_madd(cnst, b51, x1);
+    cnst = LD_W6;
+    b31 = vec_madd(cnst, b31, x1);
+    cnst = LD_W7;
+    b11 = vec_madd(cnst, b11, x0);
+
+    b71 = vec_add(b71, x2);
+    b51 = vec_add(b51, x3);
+    b31 = vec_add(b31, x2);
+    b11 = vec_add(b11, x3);
+    /* }}} */
+#else
+    /* convert to float {{{ */
+#define CTF(n) \
+    vs32(b##n##1) = vec_unpackl(vs16(b##n##0)); \
+    vs32(b##n##0) = vec_unpackh(vs16(b##n##0)); \
+    b##n##1 = vec_ctf(vs32(b##n##1), 0); \
+    b##n##0 = vec_ctf(vs32(b##n##0), 0); \
+
+    CTF(0);
+    CTF(1);
+    CTF(2);
+    CTF(3);
+    CTF(4);
+    CTF(5);
+    CTF(6);
+    CTF(7);
+
+#undef CTF
+    /* }}} */
+
+    FDCTROW(b00, b10, b20, b30, b40, b50, b60, b70);
+    FDCTROW(b01, b11, b21, b31, b41, b51, b61, b71);
+#endif
+
+
+    /* 8x8 matrix transpose (vector float[8][2]) {{{ */
+    x0 = vec_mergel(b00, b20);
+    x1 = vec_mergeh(b00, b20);
+    x2 = vec_mergel(b10, b30);
+    x3 = vec_mergeh(b10, b30);
+
+    b00 = vec_mergeh(x1, x3);
+    b10 = vec_mergel(x1, x3);
+    b20 = vec_mergeh(x0, x2);
+    b30 = vec_mergel(x0, x2);
+
+    x4 = vec_mergel(b41, b61);
+    x5 = vec_mergeh(b41, b61);
+    x6 = vec_mergel(b51, b71);
+    x7 = vec_mergeh(b51, b71);
+
+    b41 = vec_mergeh(x5, x7);
+    b51 = vec_mergel(x5, x7);
+    b61 = vec_mergeh(x4, x6);
+    b71 = vec_mergel(x4, x6);
+
+    x0 = vec_mergel(b01, b21);
+    x1 = vec_mergeh(b01, b21);
+    x2 = vec_mergel(b11, b31);
+    x3 = vec_mergeh(b11, b31);
+
+    x4 = vec_mergel(b40, b60);
+    x5 = vec_mergeh(b40, b60);
+    x6 = vec_mergel(b50, b70);
+    x7 = vec_mergeh(b50, b70);
+
+    b40 = vec_mergeh(x1, x3);
+    b50 = vec_mergel(x1, x3);
+    b60 = vec_mergeh(x0, x2);
+    b70 = vec_mergel(x0, x2);
+
+    b01 = vec_mergeh(x5, x7);
+    b11 = vec_mergel(x5, x7);
+    b21 = vec_mergeh(x4, x6);
+    b31 = vec_mergel(x4, x6);
+    /* }}} */
+
+
+    FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
+    FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71);
+
+
+    /* round, convert back to short {{{ */
+#define CTS(n) \
+    b##n##0 = vec_round(b##n##0); \
+    b##n##1 = vec_round(b##n##1); \
+    b##n##0 = ((vector float)vec_cts(b##n##0, 0)); \
+    b##n##1 = ((vector float)vec_cts(b##n##1, 0)); \
+    b##n##0 = ((vector float)vec_pack(vs32(b##n##0), vs32(b##n##1))); \
+    vec_st(vs16(b##n##0), 0, bp);
+
+    bp = (vector signed short*)block;
+    CTS(0); bp++;
+    CTS(1); bp++;
+    CTS(2); bp++;
+    CTS(3); bp++;
+    CTS(4); bp++;
+    CTS(5); bp++;
+    CTS(6); bp++;
+    CTS(7);
+
+#undef CTS
+    /* }}} */
+
+POWERPC_PERF_STOP_COUNT(altivec_fdct, 1);
+}
+
+/* vim:set foldmethod=marker foldlevel=0: */
diff --git a/contrib/ffmpeg/libavcodec/ppc/fft_altivec.c b/contrib/ffmpeg/libavcodec/ppc/fft_altivec.c
new file mode 100644
index 000000000..384a774ff
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/fft_altivec.c
@@ -0,0 +1,166 @@
+/*
+ * FFT/IFFT transforms
+ * AltiVec-enabled
+ * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
+ * Based on code Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+/*
+  those three macros are from libavcodec/fft.c
+  and are required for the reference C code
+*/
+/* butter fly op */
+#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
+{\
+  FFTSample ax, ay, bx, by;\
+  bx=pre1;\
+  by=pim1;\
+  ax=qre1;\
+  ay=qim1;\
+  pre = (bx + ax);\
+  pim = (by + ay);\
+  qre = (bx - ax);\
+  qim = (by - ay);\
+}
+#define MUL16(a,b) ((a) * (b))
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+   pre = (MUL16(are, bre) - MUL16(aim, bim));\
+   pim = (MUL16(are, bim) + MUL16(bre, aim));\
+}
+
+
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before with s->revtab table. No
+ * 1.0/sqrt(n) normalization is done.
+ * AltiVec-enabled
+ * This code assumes that the 'z' pointer is 16 bytes-aligned
+ * It also assumes all FFTComplex are 8 bytes-aligned pair of float
+ * The code is exactly the same as the SSE version, except
+ * that successive MUL + ADD/SUB have been merged into
+ * fused multiply-add ('vec_madd' in altivec)
+ */
+void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
+{
+POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
+    register const vector float vczero = (const vector float)vec_splat_u32(0.);
+
+    int ln = s->nbits;
+    int j, np, np2;
+    int nblocks, nloops;
+    register FFTComplex *p, *q;
+    FFTComplex *cptr, *cptr1;
+    int k;
+
+POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
+
+    np = 1 << ln;
+
+    {
+        vector float *r, a, b, a1, c1, c2;
+
+        r = (vector float *)&z[0];
+
+        c1 = vcii(p,p,n,n);
+
+        if (s->inverse)
+            {
+                c2 = vcii(p,p,n,p);
+            }
+        else
+            {
+                c2 = vcii(p,p,p,n);
+            }
+
+        j = (np >> 2);
+        do {
+            a = vec_ld(0, r);
+            a1 = vec_ld(sizeof(vector float), r);
+
+            b = vec_perm(a,a,vcprmle(1,0,3,2));
+            a = vec_madd(a,c1,b);
+            /* do the pass 0 butterfly */
+
+            b = vec_perm(a1,a1,vcprmle(1,0,3,2));
+            b = vec_madd(a1,c1,b);
+            /* do the pass 0 butterfly */
+
+            /* multiply third by -i */
+            b = vec_perm(b,b,vcprmle(2,3,1,0));
+
+            /* do the pass 1 butterfly */
+            vec_st(vec_madd(b,c2,a), 0, r);
+            vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r);
+
+            r += 2;
+        } while (--j != 0);
+    }
+    /* pass 2 .. ln-1 */
+
+    nblocks = np >> 3;
+    nloops = 1 << 2;
+    np2 = np >> 1;
+
+    cptr1 = s->exptab1;
+    do {
+        p = z;
+        q = z + nloops;
+        j = nblocks;
+        do {
+            cptr = cptr1;
+            k = nloops >> 1;
+            do {
+                vector float a,b,c,t1;
+
+                a = vec_ld(0, (float*)p);
+                b = vec_ld(0, (float*)q);
+
+                /* complex mul */
+                c = vec_ld(0, (float*)cptr);
+                /*  cre*re cim*re */
+                t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero);
+                c = vec_ld(sizeof(vector float), (float*)cptr);
+                /*  -cim*im cre*im */
+                b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1);
+
+                /* butterfly */
+                vec_st(vec_add(a,b), 0, (float*)p);
+                vec_st(vec_sub(a,b), 0, (float*)q);
+
+                p += 2;
+                q += 2;
+                cptr += 4;
+            } while (--k);
+
+            p += nloops;
+            q += nloops;
+        } while (--j);
+        cptr1 += nloops * 2;
+        nblocks = nblocks >> 1;
+        nloops = nloops << 1;
+    } while (nblocks != 0);
+
+POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/float_altivec.c b/contrib/ffmpeg/libavcodec/ppc/float_altivec.c
new file mode 100644
index 000000000..c6e43dec2
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/float_altivec.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+static void vector_fmul_altivec(float *dst, const float *src, int len)
+{
+    int i;
+    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
+    for(i=0; i<len-7; i+=8) {
+        d0 = vec_ld(0, dst+i);
+        s = vec_ld(0, src+i);
+        d1 = vec_ld(16, dst+i);
+        d0 = vec_madd(d0, s, zero);
+        d1 = vec_madd(d1, vec_ld(16,src+i), zero);
+        vec_st(d0, 0, dst+i);
+        vec_st(d1, 16, dst+i);
+    }
+}
+
+static void vector_fmul_reverse_altivec(float *dst, const float *src0,
+                                        const float *src1, int len)
+{
+    int i;
+    vector float d, s0, s1, h0, l0,
+                 s2, s3, zero = (vector float)vec_splat_u32(0);
+    src1 += len-4;
+    for(i=0; i<len-7; i+=8) {
+        s1 = vec_ld(0, src1-i);              // [a,b,c,d]
+        s0 = vec_ld(0, src0+i);
+        l0 = vec_mergel(s1, s1);             // [c,c,d,d]
+        s3 = vec_ld(-16, src1-i);
+        h0 = vec_mergeh(s1, s1);             // [a,a,b,b]
+        s2 = vec_ld(16, src0+i);
+        s1 = vec_mergeh(vec_mergel(l0,h0),   // [d,b,d,b]
+                        vec_mergeh(l0,h0));  // [c,a,c,a]
+                                             // [d,c,b,a]
+        l0 = vec_mergel(s3, s3);
+        d = vec_madd(s0, s1, zero);
+        h0 = vec_mergeh(s3, s3);
+        vec_st(d, 0, dst+i);
+        s3 = vec_mergeh(vec_mergel(l0,h0),
+                        vec_mergeh(l0,h0));
+        d = vec_madd(s2, s3, zero);
+        vec_st(d, 16, dst+i);
+    }
+}
+
+static void vector_fmul_add_add_altivec(float *dst, const float *src0,
+                                        const float *src1, const float *src2,
+                                        int src3, int len, int step)
+{
+    int i;
+    vector float d, s0, s1, s2, t0, t1, edges;
+    vector unsigned char align = vec_lvsr(0,dst),
+                         mask = vec_lvsl(0, dst);
+
+    t0 = vec_ld(0, dst);
+#if 0 //FIXME: there is still something wrong
+    if (step == 2) {
+        int y;
+        vector float d0, d1, s3, t2;
+        vector unsigned int sel =
+                vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
+        t1 = vec_ld(16, dst);
+        for (i=0,y=0; i<len-3; i+=4,y+=8) {
+
+            s0 = vec_ld(0,src0+i);
+            s1 = vec_ld(0,src1+i);
+            s2 = vec_ld(0,src2+i);
+
+//          t0 = vec_ld(0, dst+y);  //[x x x|a]
+//          t1 = vec_ld(16, dst+y); //[b c d|e]
+            t2 = vec_ld(31, dst+y); //[f g h|x]
+
+            d = vec_madd(s0,s1,s2); // [A B C D]
+
+                                                 // [A A B B]
+
+                                                 // [C C D D]
+
+            d0 = vec_perm(t0, t1, mask); // [a b c d]
+
+            d0 = vec_sel(vec_mergeh(d, d), d0, sel);   // [A b B d]
+
+            edges = vec_perm(t1, t0, mask);
+
+            t0 = vec_perm(edges, d0, align); // [x x x|A]
+
+            t1 = vec_perm(d0, edges, align); // [b B d|e]
+
+            vec_stl(t0, 0, dst+y);
+
+            d1 = vec_perm(t1, t2, mask); // [e f g h]
+
+            d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h]
+
+            edges = vec_perm(t2, t1, mask);
+
+            t1 = vec_perm(edges, d1, align); // [b B d|C]
+
+            t2 = vec_perm(d1, edges, align); // [f D h|x]
+
+            vec_stl(t1, 16, dst+y);
+
+            t0 = t1;
+
+            vec_stl(t2, 31, dst+y);
+
+            t1 = t2;
+        }
+    } else
+    #endif
+    if (step == 1 && src3 == 0)
+        for (i=0; i<len-3; i+=4) {
+            t1 = vec_ld(15, dst+i);
+            s0 = vec_ld(0, src0+i);
+            s1 = vec_ld(0, src1+i);
+            s2 = vec_ld(0, src2+i);
+            edges = vec_perm(t1 ,t0, mask);
+            d = vec_madd(s0,s1,s2);
+            t1 = vec_perm(d, edges, align);
+            t0 = vec_perm(edges, d, align);
+            vec_st(t1, 15, dst+i);
+            vec_st(t0, 0, dst+i);
+            t0 = t1;
+        }
+    else
+        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+}
+
+void float_to_int16_altivec(int16_t *dst, const float *src, int len)
+{
+    int i;
+    vector float s0, s1;
+    vector signed int t0, t1;
+    vector signed short d0, d1, d;
+    vector unsigned char align;
+    if(((long)dst)&15) //FIXME
+    for(i=0; i<len-7; i+=8) {
+        s0 = vec_ld(0, src+i);
+        s1 = vec_ld(16, src+i);
+        t0 = vec_cts(s0, 0);
+        d0 = vec_ld(0, dst+i);
+        t1 = vec_cts(s1, 0);
+        d1 = vec_ld(15, dst+i);
+        d = vec_packs(t0,t1);
+        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
+        align = vec_lvsr(0, dst+i);
+        d0 = vec_perm(d1, d, align);
+        d1 = vec_perm(d, d1, align);
+        vec_st(d0, 0, dst+i);
+        vec_st(d1,15, dst+i);
+    }
+    else
+    for(i=0; i<len-7; i+=8) {
+        s0 = vec_ld(0, src+i);
+        s1 = vec_ld(16, src+i);
+        t0 = vec_cts(s0, 0);
+        t1 = vec_cts(s1, 0);
+        d = vec_packs(t0,t1);
+        vec_st(d, 0, dst+i);
+    }
+}
+
+void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+    c->vector_fmul = vector_fmul_altivec;
+    c->vector_fmul_reverse = vector_fmul_reverse_altivec;
+    c->vector_fmul_add_add = vector_fmul_add_add_altivec;
+    if(!(avctx->flags & CODEC_FLAG_BITEXACT))
+        c->float_to_int16 = float_to_int16_altivec;
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/gcc_fixes.h b/contrib/ffmpeg/libavcodec/ppc/gcc_fixes.h
new file mode 100644
index 000000000..5a4a55188
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/gcc_fixes.h
@@ -0,0 +1,119 @@
+/*
+ * gcc fixes for altivec.
+ * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3)
+ * and to stay somewhat compatible with Darwin.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _GCC_FIXES_
+#define _GCC_FIXES_
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+
+#ifdef CONFIG_DARWIN
+# ifndef __MWERKS__
+#  define AVV(x...) (x)
+# else
+#  define AVV
+# endif
+#define REG_v(a) asm ( #a )
+#else
+
+#define AVV(x...) {x}
+
+#if (__GNUC__ < 4)
+# define REG_v(a)
+#else
+# define REG_v(a) asm ( #a )
+#endif
+
+#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
+
+/* This code was provided to me by Bartosch Pixa
+ * as a separate header file (broken_mergel.h).
+ * thanks to lu_zero for the workaround.
+ *
+ * See this mail for more information:
+ * http://gcc.gnu.org/ml/gcc/2003-04/msg00967.html
+ */
+
+static inline vector signed char ff_vmrglb (vector signed char const A,
+                                          vector signed char const B)
+{
+    static const vector unsigned char lowbyte = {
+        0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b,  0x1b,
+        0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
+    };
+    return vec_perm (A, B, lowbyte);
+}
+
+static inline vector signed short ff_vmrglh (vector signed short const A,
+                                          vector signed short const B)
+{
+    static const vector unsigned char lowhalf = {
+        0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
+        0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
+    };
+    return vec_perm (A, B, lowhalf);
+}
+
+static inline vector signed int ff_vmrglw (vector signed int const A,
+                                          vector signed int const B)
+{
+    static const vector unsigned char lowword = {
+        0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
+        0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
+    };
+    return vec_perm (A, B, lowword);
+}
+/*#define ff_vmrglb ff_vmrglb
+#define ff_vmrglh ff_vmrglh
+#define ff_vmrglw ff_vmrglw
+*/
+#undef vec_mergel
+
+#define vec_mergel(a1, a2) \
+__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
+      ((vector signed char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
+__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
+      ((vector unsigned char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
+__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
+      ((vector signed short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
+__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
+      ((vector unsigned short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
+__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
+      ((vector float) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
+__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
+      ((vector signed int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
+__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
+      ((vector unsigned int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
+    __altivec_link_error_invalid_argument ())))))))
+
+#endif
+
+#endif /* CONFIG_DARWIN */
+
+#ifndef __MWERKS__
+#define const_vector const vector
+#else
+#define const_vector vector
+#endif
+
+#endif /* _GCC_FIXES_ */
diff --git a/contrib/ffmpeg/libavcodec/ppc/gmc_altivec.c b/contrib/ffmpeg/libavcodec/ppc/gmc_altivec.c
new file mode 100644
index 000000000..42c936bb3
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/gmc_altivec.c
@@ -0,0 +1,146 @@
+/*
+ * GMC (Global Motion Compensation)
+ * AltiVec-enabled
+ * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+/*
+  altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
+  to preserve proper dst alignement.
+*/
+#define GMC1_PERF_COND (h==8)
+void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
+{
+POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
+    const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
+      {rounder, rounder, rounder, rounder,
+       rounder, rounder, rounder, rounder};
+    const unsigned short __attribute__ ((aligned(16))) ABCD[8] =
+      {
+        (16-x16)*(16-y16), /* A */
+        (   x16)*(16-y16), /* B */
+        (16-x16)*(   y16), /* C */
+        (   x16)*(   y16), /* D */
+        0, 0, 0, 0         /* padding */
+      };
+    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+    register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8);
+    register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
+    register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
+    int i;
+    unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
+    unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
+
+
+POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
+
+    tempA = vec_ld(0, (unsigned short*)ABCD);
+    Av = vec_splat(tempA, 0);
+    Bv = vec_splat(tempA, 1);
+    Cv = vec_splat(tempA, 2);
+    Dv = vec_splat(tempA, 3);
+
+    rounderV = vec_ld(0, (unsigned short*)rounder_a);
+
+    // we'll be able to pick-up our 9 char elements
+    // at src from those 32 bytes
+    // we load the first batch here, as inside the loop
+    // we can re-use 'src+stride' from one iteration
+    // as the 'src' of the next.
+    src_0 = vec_ld(0, src);
+    src_1 = vec_ld(16, src);
+    srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
+
+    if (src_really_odd != 0x0000000F)
+    { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
+      srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
+    }
+    else
+    {
+      srcvB = src_1;
+    }
+    srcvA = vec_mergeh(vczero, srcvA);
+    srcvB = vec_mergeh(vczero, srcvB);
+
+    for(i=0; i<h; i++)
+    {
+      dst_odd = (unsigned long)dst & 0x0000000F;
+      src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
+
+      dstv = vec_ld(0, dst);
+
+      // we we'll be able to pick-up our 9 char elements
+      // at src + stride from those 32 bytes
+      // then reuse the resulting 2 vectors srvcC and srcvD
+      // as the next srcvA and srcvB
+      src_0 = vec_ld(stride + 0, src);
+      src_1 = vec_ld(stride + 16, src);
+      srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
+
+      if (src_really_odd != 0x0000000F)
+      { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
+        srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
+      }
+      else
+      {
+        srcvD = src_1;
+      }
+
+      srcvC = vec_mergeh(vczero, srcvC);
+      srcvD = vec_mergeh(vczero, srcvD);
+
+
+      // OK, now we (finally) do the math :-)
+      // those four instructions replaces 32 int muls & 32 int adds.
+      // isn't AltiVec nice ?
+      tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
+      tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
+      tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
+      tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
+
+      srcvA = srcvC;
+      srcvB = srcvD;
+
+      tempD = vec_sr(tempD, vcsr8);
+
+      dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
+
+      if (dst_odd)
+      {
+        dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
+      }
+      else
+      {
+        dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
+      }
+
+      vec_st(dstv2, 0, dst);
+
+      dst += stride;
+      src += stride;
+    }
+
+POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/h264_altivec.c b/contrib/ffmpeg/libavcodec/ppc/h264_altivec.c
new file mode 100644
index 000000000..4aa366f97
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/h264_altivec.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
+
+#define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num       altivec_put_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec  put_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
+#define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
+#define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
+#define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
+#define PREFIX_h264_qpel16_v_lowpass_num       altivec_avg_h264_qpel16_v_lowpass_num
+#define PREFIX_h264_qpel16_hv_lowpass_altivec  avg_h264_qpel16_hv_lowpass_altivec
+#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
+#include "h264_template_altivec.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#undef PREFIX_h264_qpel16_h_lowpass_altivec
+#undef PREFIX_h264_qpel16_h_lowpass_num
+#undef PREFIX_h264_qpel16_v_lowpass_altivec
+#undef PREFIX_h264_qpel16_v_lowpass_num
+#undef PREFIX_h264_qpel16_hv_lowpass_altivec
+#undef PREFIX_h264_qpel16_hv_lowpass_num
+
+#define H264_MC(OPNAME, SIZE, CODETYPE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
+    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
+    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
+    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
+}\
+
+/* this code assume that stride % 16 == 0 */
+void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+    signed int ABCD[4] __attribute__((aligned(16))) =
+                        {((8 - x) * (8 - y)),
+                          ((x) * (8 - y)),
+                          ((8 - x) * (y)),
+                          ((x) * (y))};
+    register int i;
+    vector unsigned char fperm;
+    const vector signed int vABCD = vec_ld(0, ABCD);
+    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+    const vector signed int vzero = vec_splat_s32(0);
+    const vector signed short v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
+    const vector unsigned short v6us = vec_splat_u16(6);
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+    vector unsigned char vsrc0uc, vsrc1uc;
+    vector signed short vsrc0ssH, vsrc1ssH;
+    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+    vector signed short vsrc2ssH, vsrc3ssH, psum;
+    vector unsigned char vdst, ppsum, vfdst, fsum;
+
+    if (((unsigned long)dst) % 16 == 0) {
+      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17,
+                                        0x08, 0x09, 0x0A, 0x0B,
+                                        0x0C, 0x0D, 0x0E, 0x0F);
+    } else {
+      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x18, 0x19, 0x1A, 0x1B,
+                                        0x1C, 0x1D, 0x1E, 0x1F);
+    }
+
+    vsrcAuc = vec_ld(0, src);
+
+    if (loadSecond)
+      vsrcBuc = vec_ld(16, src);
+    vsrcperm0 = vec_lvsl(0, src);
+    vsrcperm1 = vec_lvsl(1, src);
+
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+    if (reallyBadAlign)
+      vsrc1uc = vsrcBuc;
+    else
+      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc0uc);
+    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc1uc);
+
+    if (!loadSecond) {// -> !reallyBadAlign
+      for (i = 0 ; i < h ; i++) {
+
+
+        vsrcCuc = vec_ld(stride + 0, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v28ss, psum);
+        psum = vec_sra(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_packsu(psum, psum);
+        fsum = vec_perm(vdst, ppsum, fperm);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    } else {
+        vector unsigned char vsrcDuc;
+      for (i = 0 ; i < h ; i++) {
+        vsrcCuc = vec_ld(stride + 0, src);
+        vsrcDuc = vec_ld(stride + 16, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+        if (reallyBadAlign)
+          vsrc3uc = vsrcDuc;
+        else
+          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v28ss, psum);
+        psum = vec_sr(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_pack(psum, psum);
+        fsum = vec_perm(vdst, ppsum, fperm);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    }
+}
+
+static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+                                    const uint8_t * src2, int dst_stride,
+                                    int src_stride1, int h)
+{
+    int i;
+    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+    mask_ = vec_lvsl(0, src2);
+
+    for (i = 0; i < h; i++) {
+
+        tmp1 = vec_ld(i * src_stride1, src1);
+        mask = vec_lvsl(i * src_stride1, src1);
+        tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+        a = vec_perm(tmp1, tmp2, mask);
+
+        tmp1 = vec_ld(i * 16, src2);
+        tmp2 = vec_ld(i * 16 + 15, src2);
+
+        b = vec_perm(tmp1, tmp2, mask_);
+
+        tmp1 = vec_ld(0, dst);
+        mask = vec_lvsl(0, dst);
+        tmp2 = vec_ld(15, dst);
+
+        d = vec_avg(a, b);
+
+        edges = vec_perm(tmp2, tmp1, mask);
+
+        align = vec_lvsr(0, dst);
+
+        tmp2 = vec_perm(d, edges, align);
+        tmp1 = vec_perm(edges, d, align);
+
+        vec_st(tmp2, 15, dst);
+        vec_st(tmp1, 0 , dst);
+
+        dst += dst_stride;
+    }
+}
+
+static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
+                                    const uint8_t * src2, int dst_stride,
+                                    int src_stride1, int h)
+{
+    int i;
+    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+
+    mask_ = vec_lvsl(0, src2);
+
+    for (i = 0; i < h; i++) {
+
+        tmp1 = vec_ld(i * src_stride1, src1);
+        mask = vec_lvsl(i * src_stride1, src1);
+        tmp2 = vec_ld(i * src_stride1 + 15, src1);
+
+        a = vec_perm(tmp1, tmp2, mask);
+
+        tmp1 = vec_ld(i * 16, src2);
+        tmp2 = vec_ld(i * 16 + 15, src2);
+
+        b = vec_perm(tmp1, tmp2, mask_);
+
+        tmp1 = vec_ld(0, dst);
+        mask = vec_lvsl(0, dst);
+        tmp2 = vec_ld(15, dst);
+
+        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
+
+        edges = vec_perm(tmp2, tmp1, mask);
+
+        align = vec_lvsr(0, dst);
+
+        tmp2 = vec_perm(d, edges, align);
+        tmp1 = vec_perm(edges, d, align);
+
+        vec_st(tmp2, 15, dst);
+        vec_st(tmp1, 0 , dst);
+
+        dst += dst_stride;
+    }
+}
+
+/* Implemented but could be faster
+#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
+ */
+
+  H264_MC(put_, 16, altivec)
+  H264_MC(avg_, 16, altivec)
+
+void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+
+#ifdef HAVE_ALTIVEC
+  if (has_altivec()) {
+    c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
+    c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
+    c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
+
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 0, 16);
+#undef dspfunc
+
+  } else
+#endif /* HAVE_ALTIVEC */
+  {
+    // Non-AltiVec PPC optimisations
+
+    // ... pending ...
+  }
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/h264_template_altivec.c b/contrib/ffmpeg/libavcodec/ppc/h264_template_altivec.c
new file mode 100644
index 000000000..e8ad67f2f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/h264_template_altivec.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* this code assume that stride % 16 == 0 */
+void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
+    signed int ABCD[4] __attribute__((aligned(16))) =
+                        {((8 - x) * (8 - y)),
+                          ((x) * (8 - y)),
+                          ((8 - x) * (y)),
+                          ((x) * (y))};
+    register int i;
+    vector unsigned char fperm;
+    const vector signed int vABCD = vec_ld(0, ABCD);
+    const vector signed short vA = vec_splat((vector signed short)vABCD, 1);
+    const vector signed short vB = vec_splat((vector signed short)vABCD, 3);
+    const vector signed short vC = vec_splat((vector signed short)vABCD, 5);
+    const vector signed short vD = vec_splat((vector signed short)vABCD, 7);
+    const vector signed int vzero = vec_splat_s32(0);
+    const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
+    const vector unsigned short v6us = vec_splat_u16(6);
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+    vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+    vector unsigned char vsrc0uc, vsrc1uc;
+    vector signed short vsrc0ssH, vsrc1ssH;
+    vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc;
+    vector signed short vsrc2ssH, vsrc3ssH, psum;
+    vector unsigned char vdst, ppsum, vfdst, fsum;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+
+    if (((unsigned long)dst) % 16 == 0) {
+      fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17,
+                                        0x08, 0x09, 0x0A, 0x0B,
+                                        0x0C, 0x0D, 0x0E, 0x0F);
+    } else {
+      fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x18, 0x19, 0x1A, 0x1B,
+                                        0x1C, 0x1D, 0x1E, 0x1F);
+    }
+
+    vsrcAuc = vec_ld(0, src);
+
+    if (loadSecond)
+      vsrcBuc = vec_ld(16, src);
+    vsrcperm0 = vec_lvsl(0, src);
+    vsrcperm1 = vec_lvsl(1, src);
+
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+    if (reallyBadAlign)
+      vsrc1uc = vsrcBuc;
+    else
+      vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+    vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc0uc);
+    vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                               (vector unsigned char)vsrc1uc);
+
+    if (!loadSecond) {// -> !reallyBadAlign
+      for (i = 0 ; i < h ; i++) {
+
+
+        vsrcCuc = vec_ld(stride + 0, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+        vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v32ss, psum);
+        psum = vec_sra(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_packsu(psum, psum);
+        vfdst = vec_perm(vdst, ppsum, fperm);
+
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    } else {
+        vector unsigned char vsrcDuc;
+      for (i = 0 ; i < h ; i++) {
+        vsrcCuc = vec_ld(stride + 0, src);
+        vsrcDuc = vec_ld(stride + 16, src);
+
+        vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+        if (reallyBadAlign)
+          vsrc3uc = vsrcDuc;
+        else
+          vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+        vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc2uc);
+        vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero,
+                                                (vector unsigned char)vsrc3uc);
+
+        psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
+        psum = vec_mladd(vB, vsrc1ssH, psum);
+        psum = vec_mladd(vC, vsrc2ssH, psum);
+        psum = vec_mladd(vD, vsrc3ssH, psum);
+        psum = vec_add(v32ss, psum);
+        psum = vec_sr(psum, v6us);
+
+        vdst = vec_ld(0, dst);
+        ppsum = (vector unsigned char)vec_pack(psum, psum);
+        vfdst = vec_perm(vdst, ppsum, fperm);
+
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        vsrc0ssH = vsrc2ssH;
+        vsrc1ssH = vsrc3ssH;
+
+        dst += stride;
+        src += stride;
+      }
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
+  register int i;
+
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char permM2 = vec_lvsl(-2, src);
+  const vector unsigned char permM1 = vec_lvsl(-1, src);
+  const vector unsigned char permP0 = vec_lvsl(+0, src);
+  const vector unsigned char permP1 = vec_lvsl(+1, src);
+  const vector unsigned char permP2 = vec_lvsl(+2, src);
+  const vector unsigned char permP3 = vec_lvsl(+3, src);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector unsigned short v5us = vec_splat_u16(5);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+  const vector unsigned char neg1 =
+                                (const vector unsigned char) vec_splat_s8(-1);
+
+  const vector unsigned char dstmask =
+                                vec_perm((const vector unsigned char)vzero,
+                                                               neg1, dstperm);
+
+  vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+
+  register int align = ((((unsigned long)src) - 2) % 16);
+
+  vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+                      srcP2A, srcP2B, srcP3A, srcP3B,
+                      srcM1A, srcM1B, srcM2A, srcM2B,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+                      pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+                      psumA, psumB, sumA, sumB;
+
+  vector unsigned char sum, dst1, dst2, vdst, fsum,
+                       rsum, fdst1, fdst2;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+
+  for (i = 0 ; i < 16 ; i ++) {
+    vector unsigned char srcR1 = vec_ld(-2, src);
+    vector unsigned char srcR2 = vec_ld(14, src);
+
+    switch (align) {
+    default: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = vec_perm(srcR1, srcR2, permP3);
+    } break;
+    case 11: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = srcR2;
+    } break;
+    case 12: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = srcR2;
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 13: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = srcR2;
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 14: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = srcR2;
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 15: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = srcR2;
+      srcP0 = vec_perm(srcR2, srcR3, permP0);
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    }
+
+    srcP0A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP0);
+    srcP0B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP0);
+    srcP1A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP1);
+    srcP1B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP1);
+
+    srcP2A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP2);
+    srcP2B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP2);
+    srcP3A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcP3);
+
+    srcM1A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcM1);
+    srcM1B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcM1);
+    srcM2A = (vector signed short)
+                vec_mergeh((vector unsigned char)vzero, srcM2);
+    srcM2B = (vector signed short)
+                vec_mergel((vector unsigned char)vzero, srcM2);
+
+    sum1A = vec_adds(srcP0A, srcP1A);
+    sum1B = vec_adds(srcP0B, srcP1B);
+    sum2A = vec_adds(srcM1A, srcP2A);
+    sum2B = vec_adds(srcM1B, srcP2B);
+    sum3A = vec_adds(srcM2A, srcP3A);
+    sum3B = vec_adds(srcM2B, srcP3B);
+
+    pp1A = vec_mladd(sum1A, v20ss, v16ss);
+    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    pp3A = vec_add(sum3A, pp1A);
+    pp3B = vec_add(sum3B, pp1B);
+
+    psumA = vec_sub(pp3A, pp2A);
+    psumB = vec_sub(pp3B, pp2B);
+
+    sumA = vec_sra(psumA, v5us);
+    sumB = vec_sra(psumB, v5us);
+
+    sum = vec_packsu(sumA, sumB);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    src += srcStride;
+    dst += dstStride;
+  }
+POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 */
+static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+  register int i;
+
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char perm = vec_lvsl(0, src);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector unsigned short v5us = vec_splat_u16(5);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+  const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
+  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+  uint8_t *srcbis = src - (srcStride * 2);
+
+  const vector unsigned char srcM2a = vec_ld(0, srcbis);
+  const vector unsigned char srcM2b = vec_ld(16, srcbis);
+  const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcM1b = vec_ld(16, srcbis);
+  const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP0b = vec_ld(16, srcbis);
+  const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP1b = vec_ld(16, srcbis);
+  const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm);
+//  srcbis += srcStride;
+  const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride);
+  const vector unsigned char srcP2b = vec_ld(16, srcbis);
+  const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm);
+//  srcbis += srcStride;
+
+  vector signed short srcM2ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcM2);
+  vector signed short srcM2ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcM2);
+  vector signed short srcM1ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcM1);
+  vector signed short srcM1ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcM1);
+  vector signed short srcP0ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP0);
+  vector signed short srcP0ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP0);
+  vector signed short srcP1ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP1);
+  vector signed short srcP1ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP1);
+  vector signed short srcP2ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP2);
+  vector signed short srcP2ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP2);
+
+  vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+                      psumA, psumB, sumA, sumB,
+                      srcP3ssA, srcP3ssB,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
+
+  vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2,
+                       srcP3a, srcP3b, srcP3;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+  for (i = 0 ; i < 16 ; i++) {
+    srcP3a = vec_ld(0, srcbis += srcStride);
+    srcP3b = vec_ld(16, srcbis);
+    srcP3 = vec_perm(srcP3a, srcP3b, perm);
+    srcP3ssA = (vector signed short)
+                                vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3ssB = (vector signed short)
+                                vec_mergel((vector unsigned char)vzero, srcP3);
+//    srcbis += srcStride;
+
+    sum1A = vec_adds(srcP0ssA, srcP1ssA);
+    sum1B = vec_adds(srcP0ssB, srcP1ssB);
+    sum2A = vec_adds(srcM1ssA, srcP2ssA);
+    sum2B = vec_adds(srcM1ssB, srcP2ssB);
+    sum3A = vec_adds(srcM2ssA, srcP3ssA);
+    sum3B = vec_adds(srcM2ssB, srcP3ssB);
+
+    srcM2ssA = srcM1ssA;
+    srcM2ssB = srcM1ssB;
+    srcM1ssA = srcP0ssA;
+    srcM1ssB = srcP0ssB;
+    srcP0ssA = srcP1ssA;
+    srcP0ssB = srcP1ssB;
+    srcP1ssA = srcP2ssA;
+    srcP1ssB = srcP2ssB;
+    srcP2ssA = srcP3ssA;
+    srcP2ssB = srcP3ssB;
+
+    pp1A = vec_mladd(sum1A, v20ss, v16ss);
+    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    pp3A = vec_add(sum3A, pp1A);
+    pp3B = vec_add(sum3B, pp1B);
+
+    psumA = vec_sub(pp3A, pp2A);
+    psumB = vec_sub(pp3B, pp2B);
+
+    sumA = vec_sra(psumA, v5us);
+    sumB = vec_sra(psumB, v5us);
+
+    sum = vec_packsu(sumA, sumB);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    dst += dstStride;
+  }
+  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+}
+
+/* this code assume stride % 16 == 0 *and* tmp is properly aligned */
+static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
+  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+  register int i;
+  const vector signed int vzero = vec_splat_s32(0);
+  const vector unsigned char permM2 = vec_lvsl(-2, src);
+  const vector unsigned char permM1 = vec_lvsl(-1, src);
+  const vector unsigned char permP0 = vec_lvsl(+0, src);
+  const vector unsigned char permP1 = vec_lvsl(+1, src);
+  const vector unsigned char permP2 = vec_lvsl(+2, src);
+  const vector unsigned char permP3 = vec_lvsl(+3, src);
+  const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+  const vector unsigned int v10ui = vec_splat_u32(10);
+  const vector signed short v5ss = vec_splat_s16(5);
+  const vector signed short v1ss = vec_splat_s16(1);
+  const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+  const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+
+  register int align = ((((unsigned long)src) - 2) % 16);
+
+  const vector unsigned char neg1 = (const vector unsigned char)
+                                                        vec_splat_s8(-1);
+
+  vector signed short srcP0A, srcP0B, srcP1A, srcP1B,
+                      srcP2A, srcP2B, srcP3A, srcP3B,
+                      srcM1A, srcM1B, srcM2A, srcM2B,
+                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+                      pp1A, pp1B, pp2A, pp2B, psumA, psumB;
+
+  const vector unsigned char dstperm = vec_lvsr(0, dst);
+
+  const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
+
+  const vector unsigned char mperm = (const vector unsigned char)
+    AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
+        0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
+  int16_t *tmpbis = tmp;
+
+  vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+                      tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
+                      tmpP2ssA, tmpP2ssB;
+
+  vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+                    pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
+                    pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
+                    ssumAe, ssumAo, ssumBe, ssumBo;
+  vector unsigned char fsum, sumv, sum, dst1, dst2, vdst,
+                       rsum, fdst1, fdst2;
+  vector signed short ssume, ssumo;
+
+  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+  src -= (2 * srcStride);
+  for (i = 0 ; i < 21 ; i ++) {
+    vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+    vector unsigned char srcR1 = vec_ld(-2, src);
+    vector unsigned char srcR2 = vec_ld(14, src);
+
+    switch (align) {
+    default: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = vec_perm(srcR1, srcR2, permP3);
+    } break;
+    case 11: {
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = vec_perm(srcR1, srcR2, permP2);
+      srcP3 = srcR2;
+    } break;
+    case 12: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = vec_perm(srcR1, srcR2, permP1);
+      srcP2 = srcR2;
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 13: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = vec_perm(srcR1, srcR2, permP0);
+      srcP1 = srcR2;
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 14: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = vec_perm(srcR1, srcR2, permM1);
+      srcP0 = srcR2;
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    case 15: {
+      vector unsigned char srcR3 = vec_ld(30, src);
+      srcM2 = vec_perm(srcR1, srcR2, permM2);
+      srcM1 = srcR2;
+      srcP0 = vec_perm(srcR2, srcR3, permP0);
+      srcP1 = vec_perm(srcR2, srcR3, permP1);
+      srcP2 = vec_perm(srcR2, srcR3, permP2);
+      srcP3 = vec_perm(srcR2, srcR3, permP3);
+    } break;
+    }
+
+    srcP0A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP0);
+    srcP0B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP0);
+    srcP1A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP1);
+    srcP1B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP1);
+
+    srcP2A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP2);
+    srcP2B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP2);
+    srcP3A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcP3);
+    srcP3B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcP3);
+
+    srcM1A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcM1);
+    srcM1B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcM1);
+    srcM2A = (vector signed short)
+                            vec_mergeh((vector unsigned char)vzero, srcM2);
+    srcM2B = (vector signed short)
+                            vec_mergel((vector unsigned char)vzero, srcM2);
+
+    sum1A = vec_adds(srcP0A, srcP1A);
+    sum1B = vec_adds(srcP0B, srcP1B);
+    sum2A = vec_adds(srcM1A, srcP2A);
+    sum2B = vec_adds(srcM1B, srcP2B);
+    sum3A = vec_adds(srcM2A, srcP3A);
+    sum3B = vec_adds(srcM2B, srcP3B);
+
+    pp1A = vec_mladd(sum1A, v20ss, sum3A);
+    pp1B = vec_mladd(sum1B, v20ss, sum3B);
+
+    pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
+    pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
+
+    psumA = vec_sub(pp1A, pp2A);
+    psumB = vec_sub(pp1B, pp2B);
+
+    vec_st(psumA, 0, tmp);
+    vec_st(psumB, 16, tmp);
+
+    src += srcStride;
+    tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
+  }
+
+  tmpM2ssA = vec_ld(0, tmpbis);
+  tmpM2ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpM1ssA = vec_ld(0, tmpbis);
+  tmpM1ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP0ssA = vec_ld(0, tmpbis);
+  tmpP0ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP1ssA = vec_ld(0, tmpbis);
+  tmpP1ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+  tmpP2ssA = vec_ld(0, tmpbis);
+  tmpP2ssB = vec_ld(16, tmpbis);
+  tmpbis += tmpStride;
+
+  for (i = 0 ; i < 16 ; i++) {
+    const vector signed short tmpP3ssA = vec_ld(0, tmpbis);
+    const vector signed short tmpP3ssB = vec_ld(16, tmpbis);
+
+    const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+    const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+    const vector signed short sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+    const vector signed short sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+    const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+    const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+
+    tmpbis += tmpStride;
+
+    tmpM2ssA = tmpM1ssA;
+    tmpM2ssB = tmpM1ssB;
+    tmpM1ssA = tmpP0ssA;
+    tmpM1ssB = tmpP0ssB;
+    tmpP0ssA = tmpP1ssA;
+    tmpP0ssB = tmpP1ssB;
+    tmpP1ssA = tmpP2ssA;
+    tmpP1ssB = tmpP2ssB;
+    tmpP2ssA = tmpP3ssA;
+    tmpP2ssB = tmpP3ssB;
+
+    pp1Ae = vec_mule(sum1A, v20ss);
+    pp1Ao = vec_mulo(sum1A, v20ss);
+    pp1Be = vec_mule(sum1B, v20ss);
+    pp1Bo = vec_mulo(sum1B, v20ss);
+
+    pp2Ae = vec_mule(sum2A, v5ss);
+    pp2Ao = vec_mulo(sum2A, v5ss);
+    pp2Be = vec_mule(sum2B, v5ss);
+    pp2Bo = vec_mulo(sum2B, v5ss);
+
+    pp3Ae = vec_sra((vector signed int)sum3A, v16ui);
+    pp3Ao = vec_mulo(sum3A, v1ss);
+    pp3Be = vec_sra((vector signed int)sum3B, v16ui);
+    pp3Bo = vec_mulo(sum3B, v1ss);
+
+    pp1cAe = vec_add(pp1Ae, v512si);
+    pp1cAo = vec_add(pp1Ao, v512si);
+    pp1cBe = vec_add(pp1Be, v512si);
+    pp1cBo = vec_add(pp1Bo, v512si);
+
+    pp32Ae = vec_sub(pp3Ae, pp2Ae);
+    pp32Ao = vec_sub(pp3Ao, pp2Ao);
+    pp32Be = vec_sub(pp3Be, pp2Be);
+    pp32Bo = vec_sub(pp3Bo, pp2Bo);
+
+    sumAe = vec_add(pp1cAe, pp32Ae);
+    sumAo = vec_add(pp1cAo, pp32Ao);
+    sumBe = vec_add(pp1cBe, pp32Be);
+    sumBo = vec_add(pp1cBo, pp32Bo);
+
+    ssumAe = vec_sra(sumAe, v10ui);
+    ssumAo = vec_sra(sumAo, v10ui);
+    ssumBe = vec_sra(sumBe, v10ui);
+    ssumBo = vec_sra(sumBo, v10ui);
+
+    ssume = vec_packs(ssumAe, ssumBe);
+    ssumo = vec_packs(ssumAo, ssumBo);
+
+    sumv = vec_packsu(ssume, ssumo);
+    sum = vec_perm(sumv, sumv, mperm);
+
+    dst1 = vec_ld(0, dst);
+    dst2 = vec_ld(16, dst);
+    vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst));
+
+    OP_U8_ALTIVEC(fsum, sum, vdst);
+
+    rsum = vec_perm(fsum, fsum, dstperm);
+    fdst1 = vec_sel(dst1, rsum, dstmask);
+    fdst2 = vec_sel(rsum, dst2, dstmask);
+
+    vec_st(fdst1, 0, dst);
+    vec_st(fdst2, 16, dst);
+
+    dst += dstStride;
+  }
+  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/idct_altivec.c b/contrib/ffmpeg/libavcodec/ppc/idct_altivec.c
new file mode 100644
index 000000000..cee46fc25
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/idct_altivec.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2001 Michel Lespinasse
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/*
+ * NOTE: This code is based on GPL code from the libmpeg2 project.  The
+ * author, Michel Lespinasses, has given explicit permission to release
+ * under LGPL as part of ffmpeg.
+ *
+ */
+
+/*
+ * FFMpeg integration by Dieter Shirley
+ *
+ * This file is a direct copy of the altivec idct module from the libmpeg2
+ * project.  I've deleted all of the libmpeg2 specific code, renamed the functions and
+ * re-ordered the function parameters.  The only change to the IDCT function
+ * itself was to factor out the partial transposition, and to perform a full
+ * transpose at the end of the function.
+ */
+
+
+#include <stdlib.h>                                      /* malloc(), free() */
+#include <string.h>
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+#define vector_s16_t vector signed short
+#define const_vector_s16_t const_vector signed short
+#define vector_u16_t vector unsigned short
+#define vector_s8_t vector signed char
+#define vector_u8_t vector unsigned char
+#define vector_s32_t vector signed int
+#define vector_u32_t vector unsigned int
+
+#define IDCT_HALF                                       \
+    /* 1st stage */                                     \
+    t1 = vec_mradds (a1, vx7, vx1 );                    \
+    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));    \
+    t7 = vec_mradds (a2, vx5, vx3);                     \
+    t3 = vec_mradds (ma2, vx3, vx5);                    \
+                                                        \
+    /* 2nd stage */                                     \
+    t5 = vec_adds (vx0, vx4);                           \
+    t0 = vec_subs (vx0, vx4);                           \
+    t2 = vec_mradds (a0, vx6, vx2);                     \
+    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));    \
+    t6 = vec_adds (t8, t3);                             \
+    t3 = vec_subs (t8, t3);                             \
+    t8 = vec_subs (t1, t7);                             \
+    t1 = vec_adds (t1, t7);                             \
+                                                        \
+    /* 3rd stage */                                     \
+    t7 = vec_adds (t5, t2);                             \
+    t2 = vec_subs (t5, t2);                             \
+    t5 = vec_adds (t0, t4);                             \
+    t0 = vec_subs (t0, t4);                             \
+    t4 = vec_subs (t8, t3);                             \
+    t3 = vec_adds (t8, t3);                             \
+                                                        \
+    /* 4th stage */                                     \
+    vy0 = vec_adds (t7, t1);                            \
+    vy7 = vec_subs (t7, t1);                            \
+    vy1 = vec_mradds (c4, t3, t5);                      \
+    vy6 = vec_mradds (mc4, t3, t5);                     \
+    vy2 = vec_mradds (c4, t4, t0);                      \
+    vy5 = vec_mradds (mc4, t4, t0);                     \
+    vy3 = vec_adds (t2, t6);                            \
+    vy4 = vec_subs (t2, t6);
+
+
+#define IDCT                                                            \
+    vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;                \
+    vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;                \
+    vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias;                  \
+    vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8;                    \
+    vector_u16_t shift;                                                 \
+                                                                        \
+    c4 = vec_splat (constants[0], 0);                                   \
+    a0 = vec_splat (constants[0], 1);                                   \
+    a1 = vec_splat (constants[0], 2);                                   \
+    a2 = vec_splat (constants[0], 3);                                   \
+    mc4 = vec_splat (constants[0], 4);                                  \
+    ma2 = vec_splat (constants[0], 5);                                  \
+    bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3);     \
+                                                                        \
+    zero = vec_splat_s16 (0);                                           \
+    shift = vec_splat_u16 (4);                                          \
+                                                                        \
+    vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero);    \
+    vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero);    \
+    vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero);    \
+    vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero);    \
+    vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero);    \
+    vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero);    \
+    vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero);    \
+    vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero);    \
+                                                                        \
+    IDCT_HALF                                                           \
+                                                                        \
+    vx0 = vec_mergeh (vy0, vy4);                                        \
+    vx1 = vec_mergel (vy0, vy4);                                        \
+    vx2 = vec_mergeh (vy1, vy5);                                        \
+    vx3 = vec_mergel (vy1, vy5);                                        \
+    vx4 = vec_mergeh (vy2, vy6);                                        \
+    vx5 = vec_mergel (vy2, vy6);                                        \
+    vx6 = vec_mergeh (vy3, vy7);                                        \
+    vx7 = vec_mergel (vy3, vy7);                                        \
+                                                                        \
+    vy0 = vec_mergeh (vx0, vx4);                                        \
+    vy1 = vec_mergel (vx0, vx4);                                        \
+    vy2 = vec_mergeh (vx1, vx5);                                        \
+    vy3 = vec_mergel (vx1, vx5);                                        \
+    vy4 = vec_mergeh (vx2, vx6);                                        \
+    vy5 = vec_mergel (vx2, vx6);                                        \
+    vy6 = vec_mergeh (vx3, vx7);                                        \
+    vy7 = vec_mergel (vx3, vx7);                                        \
+                                                                        \
+    vx0 = vec_adds (vec_mergeh (vy0, vy4), bias);                       \
+    vx1 = vec_mergel (vy0, vy4);                                        \
+    vx2 = vec_mergeh (vy1, vy5);                                        \
+    vx3 = vec_mergel (vy1, vy5);                                        \
+    vx4 = vec_mergeh (vy2, vy6);                                        \
+    vx5 = vec_mergel (vy2, vy6);                                        \
+    vx6 = vec_mergeh (vy3, vy7);                                        \
+    vx7 = vec_mergel (vy3, vy7);                                        \
+                                                                        \
+    IDCT_HALF                                                           \
+                                                                        \
+    shift = vec_splat_u16 (6);                                          \
+    vx0 = vec_sra (vy0, shift);                                         \
+    vx1 = vec_sra (vy1, shift);                                         \
+    vx2 = vec_sra (vy2, shift);                                         \
+    vx3 = vec_sra (vy3, shift);                                         \
+    vx4 = vec_sra (vy4, shift);                                         \
+    vx5 = vec_sra (vy5, shift);                                         \
+    vx6 = vec_sra (vy6, shift);                                         \
+    vx7 = vec_sra (vy7, shift);
+
+
+static const_vector_s16_t constants[5] = {
+    (vector_s16_t) AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31),
+    (vector_s16_t) AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725),
+    (vector_s16_t) AVV(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521),
+    (vector_s16_t) AVV(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692),
+    (vector_s16_t) AVV(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722)
+};
+
+void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
+{
+POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
+    vector_u8_t tmp;
+
+#ifdef POWERPC_PERFORMANCE_REPORT
+POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
+#endif
+    IDCT
+
+#define COPY(dest,src)                                          \
+    tmp = vec_packsu (src, src);                                \
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);       \
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+    COPY (dest, vx0)    dest += stride;
+    COPY (dest, vx1)    dest += stride;
+    COPY (dest, vx2)    dest += stride;
+    COPY (dest, vx3)    dest += stride;
+    COPY (dest, vx4)    dest += stride;
+    COPY (dest, vx5)    dest += stride;
+    COPY (dest, vx6)    dest += stride;
+    COPY (dest, vx7)
+
+POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
+}
+
+void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
+{
+POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
+    vector_u8_t tmp;
+    vector_s16_t tmp2, tmp3;
+    vector_u8_t perm0;
+    vector_u8_t perm1;
+    vector_u8_t p0, p1, p;
+
+#ifdef POWERPC_PERFORMANCE_REPORT
+POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
+#endif
+
+    IDCT
+
+    p0 = vec_lvsl (0, dest);
+    p1 = vec_lvsl (stride, dest);
+    p = vec_splat_u8 (-1);
+    perm0 = vec_mergeh (p, p0);
+    perm1 = vec_mergeh (p, p1);
+
+#define ADD(dest,src,perm)                                              \
+    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */                        \
+    tmp = vec_ld (0, dest);                                             \
+    tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm);       \
+    tmp3 = vec_adds (tmp2, src);                                        \
+    tmp = vec_packsu (tmp3, tmp3);                                      \
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);               \
+    vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+    ADD (dest, vx0, perm0)      dest += stride;
+    ADD (dest, vx1, perm1)      dest += stride;
+    ADD (dest, vx2, perm0)      dest += stride;
+    ADD (dest, vx3, perm1)      dest += stride;
+    ADD (dest, vx4, perm0)      dest += stride;
+    ADD (dest, vx5, perm1)      dest += stride;
+    ADD (dest, vx6, perm0)      dest += stride;
+    ADD (dest, vx7, perm1)
+
+POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
+}
+
diff --git a/contrib/ffmpeg/libavcodec/ppc/mathops.h b/contrib/ffmpeg/libavcodec/ppc/mathops.h
new file mode 100644
index 000000000..6af23f246
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/mathops.h
@@ -0,0 +1,33 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(ARCH_POWERPC_405)
+/* signed 16x16 -> 32 multiply add accumulate */
+#   define MAC16(rt, ra, rb) \
+        asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+
+/* signed 16x16 -> 32 multiply */
+#   define MUL16(ra, rb) \
+        ({ int __rt;
+         asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));
+         __rt; })
+#endif
diff --git a/contrib/ffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/contrib/ffmpeg/libavcodec/ppc/mpegvideo_altivec.c
new file mode 100644
index 000000000..3822cb20e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/mpegvideo_altivec.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright (c) 2002 Dieter Shirley
+ *
+ * dct_unquantize_h263_altivec:
+ * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+// Swaps two variables (used for altivec registers)
+#define SWAP(a,b) \
+do { \
+    __typeof__(a) swap_temp=a; \
+    a=b; \
+    b=swap_temp; \
+} while (0)
+
+// transposes a matrix consisting of four vectors with four elements each
+#define TRANSPOSE4(a,b,c,d) \
+do { \
+  __typeof__(a) _trans_ach = vec_mergeh(a, c); \
+  __typeof__(a) _trans_acl = vec_mergel(a, c); \
+  __typeof__(a) _trans_bdh = vec_mergeh(b, d); \
+  __typeof__(a) _trans_bdl = vec_mergel(b, d); \
+ \
+  a = vec_mergeh(_trans_ach, _trans_bdh); \
+  b = vec_mergel(_trans_ach, _trans_bdh); \
+  c = vec_mergeh(_trans_acl, _trans_bdl); \
+  d = vec_mergel(_trans_acl, _trans_bdl); \
+} while (0)
+
+
+// Loads a four-byte value (int or float) from the target address
+// into every element in the target vector.  Only works if the
+// target address is four-byte aligned (which should be always).
+#define LOAD4(vec, address) \
+{ \
+    __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \
+    vector unsigned char _perm_vec = vec_lvsl(0,(address)); \
+    vec = vec_ld(0, _load_addr); \
+    vec = vec_perm(vec, vec, _perm_vec); \
+    vec = vec_splat(vec, 0); \
+}
+
+
+#ifdef CONFIG_DARWIN
+#define FOUROF(a) (a)
+#else
+// slower, for dumb non-apple GCC
+#define FOUROF(a) {a,a,a,a}
+#endif
+int dct_quantize_altivec(MpegEncContext* s,
+                        DCTELEM* data, int n,
+                        int qscale, int* overflow)
+{
+    int lastNonZero;
+    vector float row0, row1, row2, row3, row4, row5, row6, row7;
+    vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7;
+    const_vector float zero = (const_vector float)FOUROF(0.);
+    // used after quantise step
+    int oldBaseValue = 0;
+
+    // Load the data into the row/alt vectors
+    {
+        vector signed short data0, data1, data2, data3, data4, data5, data6, data7;
+
+        data0 = vec_ld(0, data);
+        data1 = vec_ld(16, data);
+        data2 = vec_ld(32, data);
+        data3 = vec_ld(48, data);
+        data4 = vec_ld(64, data);
+        data5 = vec_ld(80, data);
+        data6 = vec_ld(96, data);
+        data7 = vec_ld(112, data);
+
+        // Transpose the data before we start
+        TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
+
+        // load the data into floating point vectors.  We load
+        // the high half of each row into the main row vectors
+        // and the low half into the alt vectors.
+        row0 = vec_ctf(vec_unpackh(data0), 0);
+        alt0 = vec_ctf(vec_unpackl(data0), 0);
+        row1 = vec_ctf(vec_unpackh(data1), 0);
+        alt1 = vec_ctf(vec_unpackl(data1), 0);
+        row2 = vec_ctf(vec_unpackh(data2), 0);
+        alt2 = vec_ctf(vec_unpackl(data2), 0);
+        row3 = vec_ctf(vec_unpackh(data3), 0);
+        alt3 = vec_ctf(vec_unpackl(data3), 0);
+        row4 = vec_ctf(vec_unpackh(data4), 0);
+        alt4 = vec_ctf(vec_unpackl(data4), 0);
+        row5 = vec_ctf(vec_unpackh(data5), 0);
+        alt5 = vec_ctf(vec_unpackl(data5), 0);
+        row6 = vec_ctf(vec_unpackh(data6), 0);
+        alt6 = vec_ctf(vec_unpackl(data6), 0);
+        row7 = vec_ctf(vec_unpackh(data7), 0);
+        alt7 = vec_ctf(vec_unpackl(data7), 0);
+    }
+
+    // The following block could exist as a separate an altivec dct
+                // function.  However, if we put it inline, the DCT data can remain
+                // in the vector local variables, as floats, which we'll use during the
+                // quantize step...
+    {
+        const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
+        const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
+        const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f);
+        const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f);
+        const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f);
+        const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f);
+        const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f);
+        const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f);
+        const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f);
+        const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f);
+        const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f);
+        const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f);
+
+
+        int whichPass, whichHalf;
+
+        for(whichPass = 1; whichPass<=2; whichPass++)
+        {
+            for(whichHalf = 1; whichHalf<=2; whichHalf++)
+            {
+                vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+                vector float tmp10, tmp11, tmp12, tmp13;
+                vector float z1, z2, z3, z4, z5;
+
+                tmp0 = vec_add(row0, row7); // tmp0 = dataptr[0] + dataptr[7];
+                tmp7 = vec_sub(row0, row7); // tmp7 = dataptr[0] - dataptr[7];
+                tmp3 = vec_add(row3, row4); // tmp3 = dataptr[3] + dataptr[4];
+                tmp4 = vec_sub(row3, row4); // tmp4 = dataptr[3] - dataptr[4];
+                tmp1 = vec_add(row1, row6); // tmp1 = dataptr[1] + dataptr[6];
+                tmp6 = vec_sub(row1, row6); // tmp6 = dataptr[1] - dataptr[6];
+                tmp2 = vec_add(row2, row5); // tmp2 = dataptr[2] + dataptr[5];
+                tmp5 = vec_sub(row2, row5); // tmp5 = dataptr[2] - dataptr[5];
+
+                tmp10 = vec_add(tmp0, tmp3); // tmp10 = tmp0 + tmp3;
+                tmp13 = vec_sub(tmp0, tmp3); // tmp13 = tmp0 - tmp3;
+                tmp11 = vec_add(tmp1, tmp2); // tmp11 = tmp1 + tmp2;
+                tmp12 = vec_sub(tmp1, tmp2); // tmp12 = tmp1 - tmp2;
+
+
+                // dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+                row0 = vec_add(tmp10, tmp11);
+
+                // dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+                row4 = vec_sub(tmp10, tmp11);
+
+
+                // z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+                z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);
+
+                // dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                //                                CONST_BITS-PASS1_BITS);
+                row2 = vec_madd(tmp13, vec_0_765366865, z1);
+
+                // dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                //                                CONST_BITS-PASS1_BITS);
+                row6 = vec_madd(tmp12, vec_1_847759065, z1);
+
+                z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7;
+                z2 = vec_add(tmp5, tmp6); // z2 = tmp5 + tmp6;
+                z3 = vec_add(tmp4, tmp6); // z3 = tmp4 + tmp6;
+                z4 = vec_add(tmp5, tmp7); // z4 = tmp5 + tmp7;
+
+                // z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+                z5 = vec_madd(vec_add(z3, z4), vec_1_175875602, (vector float)zero);
+
+                // z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+                z3 = vec_madd(z3, vec_1_961570560, z5);
+
+                // z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+                z4 = vec_madd(z4, vec_0_390180644, z5);
+
+                // The following adds are rolled into the multiplies above
+                // z3 = vec_add(z3, z5);  // z3 += z5;
+                // z4 = vec_add(z4, z5);  // z4 += z5;
+
+                // z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+                // Wow!  It's actually more effecient to roll this multiply
+                // into the adds below, even thought the multiply gets done twice!
+                // z2 = vec_madd(z2, vec_2_562915447, (vector float)zero);
+
+                // z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+                // Same with this one...
+                // z1 = vec_madd(z1, vec_0_899976223, (vector float)zero);
+
+                // tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+                // dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+                row7 = vec_madd(tmp4, vec_0_298631336, vec_madd(z1, vec_0_899976223, z3));
+
+                // tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+                // dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+                row5 = vec_madd(tmp5, vec_2_053119869, vec_madd(z2, vec_2_562915447, z4));
+
+                // tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+                // dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+                row3 = vec_madd(tmp6, vec_3_072711026, vec_madd(z2, vec_2_562915447, z3));
+
+                // tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+                // dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+                row1 = vec_madd(z1, vec_0_899976223, vec_madd(tmp7, vec_1_501321110, z4));
+
+                // Swap the row values with the alts.  If this is the first half,
+                // this sets up the low values to be acted on in the second half.
+                // If this is the second half, it puts the high values back in
+                // the row values where they are expected to be when we're done.
+                SWAP(row0, alt0);
+                SWAP(row1, alt1);
+                SWAP(row2, alt2);
+                SWAP(row3, alt3);
+                SWAP(row4, alt4);
+                SWAP(row5, alt5);
+                SWAP(row6, alt6);
+                SWAP(row7, alt7);
+            }
+
+            if (whichPass == 1)
+            {
+                // transpose the data for the second pass
+
+                // First, block transpose the upper right with lower left.
+                SWAP(row4, alt0);
+                SWAP(row5, alt1);
+                SWAP(row6, alt2);
+                SWAP(row7, alt3);
+
+                // Now, transpose each block of four
+                TRANSPOSE4(row0, row1, row2, row3);
+                TRANSPOSE4(row4, row5, row6, row7);
+                TRANSPOSE4(alt0, alt1, alt2, alt3);
+                TRANSPOSE4(alt4, alt5, alt6, alt7);
+            }
+        }
+    }
+
+    // perform the quantise step, using the floating point data
+    // still in the row/alt registers
+    {
+        const int* biasAddr;
+        const vector signed int* qmat;
+        vector float bias, negBias;
+
+        if (s->mb_intra)
+        {
+            vector signed int baseVector;
+
+            // We must cache element 0 in the intra case
+            // (it needs special handling).
+            baseVector = vec_cts(vec_splat(row0, 0), 0);
+            vec_ste(baseVector, 0, &oldBaseValue);
+
+            qmat = (vector signed int*)s->q_intra_matrix[qscale];
+            biasAddr = &(s->intra_quant_bias);
+        }
+        else
+        {
+            qmat = (vector signed int*)s->q_inter_matrix[qscale];
+            biasAddr = &(s->inter_quant_bias);
+        }
+
+        // Load the bias vector (We add 0.5 to the bias so that we're
+                                // rounding when we convert to int, instead of flooring.)
+        {
+            vector signed int biasInt;
+            const vector float negOneFloat = (vector float)FOUROF(-1.0f);
+            LOAD4(biasInt, biasAddr);
+            bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT);
+            negBias = vec_madd(bias, negOneFloat, zero);
+        }
+
+        {
+            vector float q0, q1, q2, q3, q4, q5, q6, q7;
+
+            q0 = vec_ctf(qmat[0], QMAT_SHIFT);
+            q1 = vec_ctf(qmat[2], QMAT_SHIFT);
+            q2 = vec_ctf(qmat[4], QMAT_SHIFT);
+            q3 = vec_ctf(qmat[6], QMAT_SHIFT);
+            q4 = vec_ctf(qmat[8], QMAT_SHIFT);
+            q5 = vec_ctf(qmat[10], QMAT_SHIFT);
+            q6 = vec_ctf(qmat[12], QMAT_SHIFT);
+            q7 = vec_ctf(qmat[14], QMAT_SHIFT);
+
+            row0 = vec_sel(vec_madd(row0, q0, negBias), vec_madd(row0, q0, bias),
+                    vec_cmpgt(row0, zero));
+            row1 = vec_sel(vec_madd(row1, q1, negBias), vec_madd(row1, q1, bias),
+                    vec_cmpgt(row1, zero));
+            row2 = vec_sel(vec_madd(row2, q2, negBias), vec_madd(row2, q2, bias),
+                    vec_cmpgt(row2, zero));
+            row3 = vec_sel(vec_madd(row3, q3, negBias), vec_madd(row3, q3, bias),
+                    vec_cmpgt(row3, zero));
+            row4 = vec_sel(vec_madd(row4, q4, negBias), vec_madd(row4, q4, bias),
+                    vec_cmpgt(row4, zero));
+            row5 = vec_sel(vec_madd(row5, q5, negBias), vec_madd(row5, q5, bias),
+                    vec_cmpgt(row5, zero));
+            row6 = vec_sel(vec_madd(row6, q6, negBias), vec_madd(row6, q6, bias),
+                    vec_cmpgt(row6, zero));
+            row7 = vec_sel(vec_madd(row7, q7, negBias), vec_madd(row7, q7, bias),
+                    vec_cmpgt(row7, zero));
+
+            q0 = vec_ctf(qmat[1], QMAT_SHIFT);
+            q1 = vec_ctf(qmat[3], QMAT_SHIFT);
+            q2 = vec_ctf(qmat[5], QMAT_SHIFT);
+            q3 = vec_ctf(qmat[7], QMAT_SHIFT);
+            q4 = vec_ctf(qmat[9], QMAT_SHIFT);
+            q5 = vec_ctf(qmat[11], QMAT_SHIFT);
+            q6 = vec_ctf(qmat[13], QMAT_SHIFT);
+            q7 = vec_ctf(qmat[15], QMAT_SHIFT);
+
+            alt0 = vec_sel(vec_madd(alt0, q0, negBias), vec_madd(alt0, q0, bias),
+                    vec_cmpgt(alt0, zero));
+            alt1 = vec_sel(vec_madd(alt1, q1, negBias), vec_madd(alt1, q1, bias),
+                    vec_cmpgt(alt1, zero));
+            alt2 = vec_sel(vec_madd(alt2, q2, negBias), vec_madd(alt2, q2, bias),
+                    vec_cmpgt(alt2, zero));
+            alt3 = vec_sel(vec_madd(alt3, q3, negBias), vec_madd(alt3, q3, bias),
+                    vec_cmpgt(alt3, zero));
+            alt4 = vec_sel(vec_madd(alt4, q4, negBias), vec_madd(alt4, q4, bias),
+                    vec_cmpgt(alt4, zero));
+            alt5 = vec_sel(vec_madd(alt5, q5, negBias), vec_madd(alt5, q5, bias),
+                    vec_cmpgt(alt5, zero));
+            alt6 = vec_sel(vec_madd(alt6, q6, negBias), vec_madd(alt6, q6, bias),
+                    vec_cmpgt(alt6, zero));
+            alt7 = vec_sel(vec_madd(alt7, q7, negBias), vec_madd(alt7, q7, bias),
+                    vec_cmpgt(alt7, zero));
+        }
+
+
+    }
+
+    // Store the data back into the original block
+    {
+        vector signed short data0, data1, data2, data3, data4, data5, data6, data7;
+
+        data0 = vec_pack(vec_cts(row0, 0), vec_cts(alt0, 0));
+        data1 = vec_pack(vec_cts(row1, 0), vec_cts(alt1, 0));
+        data2 = vec_pack(vec_cts(row2, 0), vec_cts(alt2, 0));
+        data3 = vec_pack(vec_cts(row3, 0), vec_cts(alt3, 0));
+        data4 = vec_pack(vec_cts(row4, 0), vec_cts(alt4, 0));
+        data5 = vec_pack(vec_cts(row5, 0), vec_cts(alt5, 0));
+        data6 = vec_pack(vec_cts(row6, 0), vec_cts(alt6, 0));
+        data7 = vec_pack(vec_cts(row7, 0), vec_cts(alt7, 0));
+
+        {
+            // Clamp for overflow
+            vector signed int max_q_int, min_q_int;
+            vector signed short max_q, min_q;
+
+            LOAD4(max_q_int, &(s->max_qcoeff));
+            LOAD4(min_q_int, &(s->min_qcoeff));
+
+            max_q = vec_pack(max_q_int, max_q_int);
+            min_q = vec_pack(min_q_int, min_q_int);
+
+            data0 = vec_max(vec_min(data0, max_q), min_q);
+            data1 = vec_max(vec_min(data1, max_q), min_q);
+            data2 = vec_max(vec_min(data2, max_q), min_q);
+            data4 = vec_max(vec_min(data4, max_q), min_q);
+            data5 = vec_max(vec_min(data5, max_q), min_q);
+            data6 = vec_max(vec_min(data6, max_q), min_q);
+            data7 = vec_max(vec_min(data7, max_q), min_q);
+        }
+
+        {
+        vector bool char zero_01, zero_23, zero_45, zero_67;
+        vector signed char scanIndices_01, scanIndices_23, scanIndices_45, scanIndices_67;
+        vector signed char negOne = vec_splat_s8(-1);
+        vector signed char* scanPtr =
+                (vector signed char*)(s->intra_scantable.inverse);
+        signed char lastNonZeroChar;
+
+        // Determine the largest non-zero index.
+        zero_01 = vec_pack(vec_cmpeq(data0, (vector signed short)zero),
+                vec_cmpeq(data1, (vector signed short)zero));
+        zero_23 = vec_pack(vec_cmpeq(data2, (vector signed short)zero),
+                vec_cmpeq(data3, (vector signed short)zero));
+        zero_45 = vec_pack(vec_cmpeq(data4, (vector signed short)zero),
+                vec_cmpeq(data5, (vector signed short)zero));
+        zero_67 = vec_pack(vec_cmpeq(data6, (vector signed short)zero),
+                vec_cmpeq(data7, (vector signed short)zero));
+
+        // 64 biggest values
+        scanIndices_01 = vec_sel(scanPtr[0], negOne, zero_01);
+        scanIndices_23 = vec_sel(scanPtr[1], negOne, zero_23);
+        scanIndices_45 = vec_sel(scanPtr[2], negOne, zero_45);
+        scanIndices_67 = vec_sel(scanPtr[3], negOne, zero_67);
+
+        // 32 largest values
+        scanIndices_01 = vec_max(scanIndices_01, scanIndices_23);
+        scanIndices_45 = vec_max(scanIndices_45, scanIndices_67);
+
+        // 16 largest values
+        scanIndices_01 = vec_max(scanIndices_01, scanIndices_45);
+
+        // 8 largest values
+        scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne),
+                vec_mergel(scanIndices_01, negOne));
+
+        // 4 largest values
+        scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne),
+                vec_mergel(scanIndices_01, negOne));
+
+        // 2 largest values
+        scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne),
+                vec_mergel(scanIndices_01, negOne));
+
+        // largest value
+        scanIndices_01 = vec_max(vec_mergeh(scanIndices_01, negOne),
+                vec_mergel(scanIndices_01, negOne));
+
+        scanIndices_01 = vec_splat(scanIndices_01, 0);
+
+
+        vec_ste(scanIndices_01, 0, &lastNonZeroChar);
+
+        lastNonZero = lastNonZeroChar;
+
+        // While the data is still in vectors we check for the transpose IDCT permute
+        // and handle it using the vector unit if we can.  This is the permute used
+        // by the altivec idct, so it is common when using the altivec dct.
+
+        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
+        {
+            TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
+        }
+
+        vec_st(data0, 0, data);
+        vec_st(data1, 16, data);
+        vec_st(data2, 32, data);
+        vec_st(data3, 48, data);
+        vec_st(data4, 64, data);
+        vec_st(data5, 80, data);
+        vec_st(data6, 96, data);
+        vec_st(data7, 112, data);
+        }
+    }
+
+    // special handling of block[0]
+    if (s->mb_intra)
+    {
+        if (!s->h263_aic)
+        {
+            if (n < 4)
+                oldBaseValue /= s->y_dc_scale;
+            else
+                oldBaseValue /= s->c_dc_scale;
+        }
+
+        // Divide by 8, rounding the result
+        data[0] = (oldBaseValue + 4) >> 3;
+    }
+
+    // We handled the tranpose permutation above and we don't
+    // need to permute the "no" permutation case.
+    if ((lastNonZero > 0) &&
+        (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
+        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
+    {
+        ff_block_permute(data, s->dsp.idct_permutation,
+                s->intra_scantable.scantable, lastNonZero);
+    }
+
+    return lastNonZero;
+}
+#undef FOUROF
+
+/*
+  AltiVec version of dct_unquantize_h263
+  this code assumes `block' is 16 bytes-aligned
+*/
+void dct_unquantize_h263_altivec(MpegEncContext *s,
+                                 DCTELEM *block, int n, int qscale)
+{
+POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1);
+    int i, level, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
+
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
+            if (n < 4)
+                block[0] = block[0] * s->y_dc_scale;
+            else
+                block[0] = block[0] * s->c_dc_scale;
+        }else
+            qadd = 0;
+        i = 1;
+        nCoeffs= 63; //does not allways use zigzag table
+    } else {
+        i = 0;
+        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+    }
+
+    {
+      register const_vector signed short vczero = (const_vector signed short)vec_splat_s16(0);
+      short __attribute__ ((aligned(16))) qmul8[] =
+          {
+            qmul, qmul, qmul, qmul,
+            qmul, qmul, qmul, qmul
+          };
+      short __attribute__ ((aligned(16))) qadd8[] =
+          {
+            qadd, qadd, qadd, qadd,
+            qadd, qadd, qadd, qadd
+          };
+      short __attribute__ ((aligned(16))) nqadd8[] =
+          {
+            -qadd, -qadd, -qadd, -qadd,
+            -qadd, -qadd, -qadd, -qadd
+          };
+      register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
+      register vector bool short blockv_null, blockv_neg;
+      register short backup_0 = block[0];
+      register int j = 0;
+
+      qmulv = vec_ld(0, qmul8);
+      qaddv = vec_ld(0, qadd8);
+      nqaddv = vec_ld(0, nqadd8);
+
+#if 0 // block *is* 16 bytes-aligned, it seems.
+      // first make sure block[j] is 16 bytes-aligned
+      for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
+        level = block[j];
+        if (level) {
+          if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[j] = level;
+        }
+      }
+#endif
+
+      // vectorize all the 16 bytes-aligned blocks
+      // of 8 elements
+      for(; (j + 7) <= nCoeffs ; j+=8)
+      {
+        blockv = vec_ld(j << 1, block);
+        blockv_neg = vec_cmplt(blockv, vczero);
+        blockv_null = vec_cmpeq(blockv, vczero);
+        // choose between +qadd or -qadd as the third operand
+        temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
+        // multiply & add (block{i,i+7} * qmul [+-] qadd)
+        temp1 = vec_mladd(blockv, qmulv, temp1);
+        // put 0 where block[{i,i+7} used to have 0
+        blockv = vec_sel(temp1, blockv, blockv_null);
+        vec_st(blockv, j << 1, block);
+      }
+
+      // if nCoeffs isn't a multiple of 8, finish the job
+      // using good old scalar units.
+      // (we could do it using a truncated vector,
+      // but I'm not sure it's worth the hassle)
+      for(; j <= nCoeffs ; j++) {
+        level = block[j];
+        if (level) {
+          if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[j] = level;
+        }
+      }
+
+      if (i == 1)
+      { // cheat. this avoid special-casing the first iteration
+        block[0] = backup_0;
+      }
+    }
+POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/contrib/ffmpeg/libavcodec/ppc/mpegvideo_ppc.c
new file mode 100644
index 000000000..c5e822f77
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/mpegvideo_ppc.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2002 Dieter Shirley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include <time.h>
+
+#ifdef HAVE_ALTIVEC
+#include "dsputil_altivec.h"
+#endif
+
+extern int dct_quantize_altivec(MpegEncContext *s,
+        DCTELEM *block, int n,
+        int qscale, int *overflow);
+extern void dct_unquantize_h263_altivec(MpegEncContext *s,
+                                        DCTELEM *block, int n, int qscale);
+
+extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
+extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
+
+
+void MPV_common_init_ppc(MpegEncContext *s)
+{
+#ifdef HAVE_ALTIVEC
+    if (has_altivec())
+    {
+      if (s->avctx->lowres==0)
+      {
+        if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||
+                (s->avctx->idct_algo == FF_IDCT_ALTIVEC))
+        {
+            s->dsp.idct_put = idct_put_altivec;
+            s->dsp.idct_add = idct_add_altivec;
+            s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+        }
+      }
+
+        // Test to make sure that the dct required alignments are met.
+        if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
+                (((long)(s->q_inter_matrix) & 0x0f) != 0))
+        {
+            av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
+                    "to use Altivec DCT. Reverting to non-altivec version.\n");
+            return;
+        }
+
+        if (((long)(s->intra_scantable.inverse) & 0x0f) != 0)
+        {
+            av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
+                    "to use Altivec DCT. Reverting to non-altivec version.\n");
+            return;
+        }
+
+
+        if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
+                (s->avctx->dct_algo == FF_DCT_ALTIVEC))
+        {
+#if 0 /* seems to cause trouble under some circumstances */
+            s->dct_quantize = dct_quantize_altivec;
+#endif
+            s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec;
+            s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec;
+        }
+    } else
+#endif
+    {
+        /* Non-AltiVec PPC optimisations here */
+    }
+}
+
diff --git a/contrib/ffmpeg/libavcodec/ppc/snow_altivec.c b/contrib/ffmpeg/libavcodec/ppc/snow_altivec.c
new file mode 100644
index 000000000..b15672ffe
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/snow_altivec.c
@@ -0,0 +1,788 @@
+/*
+ * Altivec optimized snow DSP utils
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+#include "dsputil_altivec.h"
+#include "../snow.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+
+
+//FIXME remove this replication
+#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
+
+static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
+{
+    int offset;
+    DWTELEM * buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
+
+    assert(buf->data_stack_top >= 0);
+//  assert(!buf->line[line]);
+    if (buf->line[line])
+        return buf->line[line];
+
+    offset = buf->line_width * line;
+    buffer = buf->data_stack[buf->data_stack_top];
+    buf->data_stack_top--;
+    buf->line[line] = buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
+
+    return buffer;
+}
+
+
+//altivec code
+
+void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width)
+{
+    const int w2= (width+1)>>1;
+    DECLARE_ALIGNED_16(DWTELEM, temp[(width>>1)]);
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+    vector signed int t1, t2, x, y, tmp1, tmp2;
+    vector signed int *vbuf, *vtmp;
+    vector unsigned char align;
+
+
+
+    { // Lift 0
+        DWTELEM * const ref = b + w2 - 1;
+        DWTELEM b_0 = b[0];
+        vbuf = (vector signed int *)b;
+
+        tmp1 = vec_ld (0, ref);
+        align = vec_lvsl (0, ref);
+        tmp2 = vec_ld (15, ref);
+        t1= vec_perm(tmp1, tmp2, align);
+
+        i = 0;
+
+        for (i=0; i<w_l-15; i+=16) {
+#if 0
+        b[i+0] = b[i+0] - ((3 * (ref[i+0] + ref[i+1]) + 4) >> 3);
+        b[i+1] = b[i+1] - ((3 * (ref[i+1] + ref[i+2]) + 4) >> 3);
+        b[i+2] = b[i+2] - ((3 * (ref[i+2] + ref[i+3]) + 4) >> 3);
+        b[i+3] = b[i+3] - ((3 * (ref[i+3] + ref[i+4]) + 4) >> 3);
+#else
+
+        tmp1 = vec_ld (0, ref+4+i);
+        tmp2 = vec_ld (15, ref+4+i);
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+8+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+8+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+12+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+12+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        tmp1 = vec_ld (0, ref+16+i);
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+
+        tmp2 = vec_ld (15, ref+16+i);
+
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1,vec_sld(t1,t2,4));
+        y = vec_add(vec_add(y,y),y);
+
+        vbuf++;
+
+        y = vec_add(y, vec_splat_s32(4));
+        y = vec_sra(y, vec_splat_u32(3));
+        *vbuf = vec_sub(*vbuf, y);
+
+        t1=t2;
+
+        vbuf++;
+
+#endif
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+        b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+    }
+
+    { // Lift 1
+        DWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; (((long)&dst[i]) & 0xF) && i<w_r; i++){
+            dst[i] = dst[i] - (b[i] + b[i + 1]);
+        }
+
+        align = vec_lvsl(0, b+i);
+        tmp1 = vec_ld(0, b+i);
+        vbuf = (vector signed int*) (dst + i);
+        tmp2 = vec_ld(15, b+i);
+
+        t1 = vec_perm(tmp1, tmp2, align);
+
+        for (; i<w_r-3; i+=4) {
+
+#if 0
+            dst[i]   = dst[i]   - (b[i]   + b[i + 1]);
+            dst[i+1] = dst[i+1] - (b[i+1] + b[i + 2]);
+            dst[i+2] = dst[i+2] - (b[i+2] + b[i + 3]);
+            dst[i+3] = dst[i+3] - (b[i+3] + b[i + 4]);
+#else
+
+        tmp1 = vec_ld(0, b+4+i);
+        tmp2 = vec_ld(15, b+4+i);
+
+        t2 = vec_perm(tmp1, tmp2, align);
+
+        y = vec_add(t1, vec_sld(t1,t2,4));
+        *vbuf = vec_sub (*vbuf, y);
+
+        vbuf++;
+
+        t1 = t2;
+
+#endif
+
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        DWTELEM * const ref = b+w2 - 1;
+        DWTELEM b_0 = b[0];
+        vbuf= (vector signed int *) b;
+
+        tmp1 = vec_ld (0, ref);
+        align = vec_lvsl (0, ref);
+        tmp2 = vec_ld (15, ref);
+        t1= vec_perm(tmp1, tmp2, align);
+
+        i = 0;
+        for (; i<w_l-15; i+=16) {
+#if 0
+            b[i]   = b[i]   - (((8 -(ref[i]   + ref[i+1])) - (b[i]  <<2)) >> 4);
+            b[i+1] = b[i+1] - (((8 -(ref[i+1] + ref[i+2])) - (b[i+1]<<2)) >> 4);
+            b[i+2] = b[i+2] - (((8 -(ref[i+2] + ref[i+3])) - (b[i+2]<<2)) >> 4);
+            b[i+3] = b[i+3] - (((8 -(ref[i+3] + ref[i+4])) - (b[i+3]<<2)) >> 4);
+#else
+            tmp1 = vec_ld (0, ref+4+i);
+            tmp2 = vec_ld (15, ref+4+i);
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+8+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+8+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+12+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+12+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            tmp1 = vec_ld (0, ref+16+i);
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+
+            tmp2 = vec_ld (15, ref+16+i);
+
+            *vbuf = vec_sub( *vbuf, y);
+
+            t1 = t2;
+
+            vbuf++;
+
+            t2 = vec_perm(tmp1, tmp2, align);
+
+            y = vec_add(t1,vec_sld(t1,t2,4));
+            y = vec_sub(vec_splat_s32(8),y);
+
+            t1 = t2;
+
+            x = vec_sl(*vbuf,vec_splat_u32(2));
+            y = vec_sra(vec_sub(y,x),vec_splat_u32(4));
+            *vbuf = vec_sub( *vbuf, y);
+
+            vbuf++;
+
+#endif
+        }
+
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+        b[0] = b_0 - (((-2 * ref[1] + W_BO) - 4 * b_0) >> W_BS);
+    }
+
+    { // Lift 3
+        DWTELEM * const src = b+w2;
+
+        vbuf = (vector signed int *)b;
+        vtmp = (vector signed int *)temp;
+
+        i = 0;
+        align = vec_lvsl(0, src);
+
+        for (; i<w_r-3; i+=4) {
+#if 0
+            temp[i] = src[i] - ((-3*(b[i] + b[i+1]))>>1);
+            temp[i+1] = src[i+1] - ((-3*(b[i+1] + b[i+2]))>>1);
+            temp[i+2] = src[i+2] - ((-3*(b[i+2] + b[i+3]))>>1);
+            temp[i+3] = src[i+3] - ((-3*(b[i+3] + b[i+4]))>>1);
+#else
+            tmp1 = vec_ld(0,src+i);
+            t1 = vec_add(vbuf[0],vec_sld(vbuf[0],vbuf[1],4));
+            tmp2 = vec_ld(15,src+i);
+            t1 = vec_sub(vec_splat_s32(0),t1); //bad!
+            t1 = vec_add(t1,vec_add(t1,t1));
+            t2 = vec_perm(tmp1 ,tmp2 ,align);
+            t1 = vec_sra(t1,vec_splat_u32(1));
+            vbuf++;
+            *vtmp = vec_sub(t2,t1);
+            vtmp++;
+
+#endif
+
+        }
+
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -3, 0, 1);
+    }
+
+    {
+    //Interleave
+        int a;
+        vector signed int *t = (vector signed int *)temp,
+                          *v = (vector signed int *)b;
+
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0xE) != 0xE; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=14; i>=0; i-=16){
+           a=i/4;
+
+           v[a+3]=vec_mergel(v[(a>>1)+1],t[(a>>1)+1]);
+           v[a+2]=vec_mergeh(v[(a>>1)+1],t[(a>>1)+1]);
+           v[a+1]=vec_mergel(v[a>>1],t[a>>1]);
+           v[a]=vec_mergeh(v[a>>1],t[a>>1]);
+
+        }
+
+    }
+}
+
+void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width)
+{
+    int i, w4 = width/4;
+    vector signed int *v0, *v1,*v2,*v3,*v4,*v5;
+    vector signed int t1, t2;
+
+    v0=(vector signed int *)b0;
+    v1=(vector signed int *)b1;
+    v2=(vector signed int *)b2;
+    v3=(vector signed int *)b3;
+    v4=(vector signed int *)b4;
+    v5=(vector signed int *)b5;
+
+    for (i=0; i< w4;i++)
+    {
+
+    #if 0
+        b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
+        b3[i] -= ((b2[i] + b4[i]));
+        b2[i] += ((b1[i] + b3[i])+4*b2[i]+8)>>4;
+        b1[i] += (3*(b0[i] + b2[i]))>>1;
+    #else
+        t1 = vec_add(v3[i], v5[i]);
+        t2 = vec_add(t1, vec_add(t1,t1));
+        t1 = vec_add(t2, vec_splat_s32(4));
+        v4[i] = vec_sub(v4[i], vec_sra(t1,vec_splat_u32(3)));
+
+        v3[i] = vec_sub(v3[i], vec_add(v2[i], v4[i]));
+
+        t1 = vec_add(vec_splat_s32(8), vec_add(v1[i], v3[i]));
+        t2 = vec_sl(v2[i], vec_splat_u32(2));
+        v2[i] = vec_add(v2[i], vec_sra(vec_add(t1,t2),vec_splat_u32(4)));
+        t1 = vec_add(v0[i], v2[i]);
+        t2 = vec_add(t1, vec_add(t1,t1));
+        v1[i] = vec_add(v1[i], vec_sra(t2,vec_splat_u32(1)));
+
+    #endif
+    }
+
+    for(i*=4; i < width; i++)
+    {
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+}
+
+#define LOAD_BLOCKS \
+            tmp1 = vec_ld(0, &block[3][y*src_stride]);\
+            align = vec_lvsl(0, &block[3][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[3][y*src_stride]);\
+\
+            b3 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[2][y*src_stride]);\
+            align = vec_lvsl(0, &block[2][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[2][y*src_stride]);\
+\
+            b2 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[1][y*src_stride]);\
+            align = vec_lvsl(0, &block[1][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[1][y*src_stride]);\
+\
+            b1 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, &block[0][y*src_stride]);\
+            align = vec_lvsl(0, &block[0][y*src_stride]);\
+            tmp2 = vec_ld(15, &block[0][y*src_stride]);\
+\
+            b0 = vec_perm(tmp1,tmp2,align);
+
+#define LOAD_OBMCS \
+            tmp1 = vec_ld(0, obmc1);\
+            align = vec_lvsl(0, obmc1);\
+            tmp2 = vec_ld(15, obmc1);\
+\
+            ob1 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc2);\
+            align = vec_lvsl(0, obmc2);\
+            tmp2 = vec_ld(15, obmc2);\
+\
+            ob2 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc3);\
+            align = vec_lvsl(0, obmc3);\
+            tmp2 = vec_ld(15, obmc3);\
+\
+            ob3 = vec_perm(tmp1,tmp2,align);\
+\
+            tmp1 = vec_ld(0, obmc4);\
+            align = vec_lvsl(0, obmc4);\
+            tmp2 = vec_ld(15, obmc4);\
+\
+            ob4 = vec_perm(tmp1,tmp2,align);
+
+/* interleave logic
+ * h1 <- [ a,b,a,b, a,b,a,b, a,b,a,b, a,b,a,b ]
+ * h2 <- [ c,d,c,d, c,d,c,d, c,d,c,d, c,d,c,d ]
+ * h  <- [ a,b,c,d, a,b,c,d, a,b,c,d, a,b,c,d ]
+ */
+
+#define STEPS_0_1\
+            h1 = (vector unsigned short)\
+                 vec_mergeh(ob1, ob2);\
+\
+            h2 = (vector unsigned short)\
+                 vec_mergeh(ob3, ob4);\
+\
+            ih = (vector unsigned char)\
+                 vec_mergeh(h1,h2);\
+\
+            l1 = (vector unsigned short) vec_mergeh(b3, b2);\
+\
+            ih1 = (vector unsigned char) vec_mergel(h1, h2);\
+\
+            l2 = (vector unsigned short) vec_mergeh(b1, b0);\
+\
+            il = (vector unsigned char) vec_mergeh(l1, l2);\
+\
+            v[0] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+            il1 = (vector unsigned char) vec_mergel(l1, l2);\
+\
+            v[1] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+#define FINAL_STEP_SCALAR\
+        for(x=0; x<b_w; x++)\
+            if(add){\
+                vbuf[x] += dst[x + src_x];\
+                vbuf[x] = (vbuf[x] + (1<<(FRAC_BITS-1))) >> FRAC_BITS;\
+                if(vbuf[x]&(~255)) vbuf[x]= ~(vbuf[x]>>31);\
+                dst8[x + y*src_stride] = vbuf[x];\
+            }else{\
+                dst[x + src_x] -= vbuf[x];\
+            }
+
+static void inner_add_yblock_bw_8_obmc_16_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+
+    DECLARE_ALIGNED_16(int, vbuf[16]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1
+        STEPS_0_1
+
+        FINAL_STEP_SCALAR
+
+       }
+
+}
+
+#define STEPS_2_3\
+            h1 = (vector unsigned short) vec_mergel(ob1, ob2);\
+\
+            h2 = (vector unsigned short) vec_mergel(ob3, ob4);\
+\
+            ih = (vector unsigned char) vec_mergeh(h1,h2);\
+\
+            l1 = (vector unsigned short) vec_mergel(b3, b2);\
+\
+            l2 = (vector unsigned short) vec_mergel(b1, b0);\
+\
+            ih1 = (vector unsigned char) vec_mergel(h1,h2);\
+\
+            il = (vector unsigned char) vec_mergeh(l1,l2);\
+\
+            v[2] = (vector signed int) vec_msum(ih, il, vec_splat_u32(0));\
+\
+            il1 = (vector unsigned char) vec_mergel(l1,l2);\
+\
+            v[3] = (vector signed int) vec_msum(ih1, il1, vec_splat_u32(0));
+
+
+static void inner_add_yblock_bw_16_obmc_32_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+    DECLARE_ALIGNED_16(int, vbuf[b_w]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1 2 3
+        STEPS_0_1
+
+        STEPS_2_3
+
+        FINAL_STEP_SCALAR
+
+    }
+}
+
+#define FINAL_STEP_VEC \
+\
+    if(add)\
+        {\
+            for(x=0; x<b_w/4; x++)\
+            {\
+                v[x] = vec_add(v[x], d[x]);\
+                v[x] = vec_sra(vec_add(v[x],\
+                                       vec_sl( vec_splat_s32(1),\
+                                               vec_splat_u32(7))),\
+                               vec_splat_u32(8));\
+\
+                mask = (vector bool int) vec_sl((vector signed int)\
+                        vec_cmpeq(v[x],v[x]),vec_splat_u32(8));\
+                mask = (vector bool int) vec_and(v[x],vec_nor(mask,mask));\
+\
+                mask = (vector bool int)\
+                        vec_cmpeq((vector signed int)mask,\
+                                  (vector signed int)vec_splat_u32(0));\
+\
+                vs = vec_sra(v[x],vec_splat_u32(8));\
+                vs = vec_sra(v[x],vec_splat_u32(8));\
+                vs = vec_sra(v[x],vec_splat_u32(15));\
+\
+                vs = vec_nor(vs,vs);\
+\
+                v[x]= vec_sel(v[x],vs,mask);\
+            }\
+\
+            for(x=0; x<b_w; x++)\
+                dst8[x + y*src_stride] = vbuf[x];\
+\
+        }\
+         else\
+            for(x=0; x<b_w/4; x++)\
+                d[x] = vec_sub(d[x], v[x]);
+
+static void inner_add_yblock_a_bw_8_obmc_16_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector bool int mask;
+    vector signed int vs;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+
+    DECLARE_ALIGNED_16(int, vbuf[16]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+//FIXME i could avoid some loads!
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1
+        STEPS_0_1
+
+        FINAL_STEP_VEC
+
+       }
+
+}
+
+static void inner_add_yblock_a_bw_16_obmc_32_altivec(uint8_t *obmc,
+                                             const int obmc_stride,
+                                             uint8_t * * block, int b_w,
+                                             int b_h, int src_x, int src_y,
+                                             int src_stride, slice_buffer * sb,
+                                             int add, uint8_t * dst8)
+{
+    int y, x;
+    DWTELEM * dst;
+    vector bool int mask;
+    vector signed int vs;
+    vector unsigned short h1, h2, l1, l2;
+    vector unsigned char ih, il, ih1, il1, tmp1, tmp2, align;
+    vector unsigned char b0,b1,b2,b3;
+    vector unsigned char ob1,ob2,ob3,ob4;
+    DECLARE_ALIGNED_16(int, vbuf[b_w]);
+    vector signed int *v = (vector signed int *)vbuf, *d;
+
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+
+        dst = slice_buffer_get_line(sb, src_y + y);
+        d = (vector signed int *)(dst + src_x);
+
+        // load blocks
+        LOAD_BLOCKS
+
+        // load obmcs
+        LOAD_OBMCS
+
+        // steps 0 1 2 3
+        STEPS_0_1
+
+        STEPS_2_3
+
+        FINAL_STEP_VEC
+
+    }
+}
+
+
+void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride,
+                                      uint8_t * * block, int b_w, int b_h,
+                                      int src_x, int src_y, int src_stride,
+                                      slice_buffer * sb, int add,
+                                      uint8_t * dst8)
+{
+    if (src_x&15) {
+        if (b_w == 16)
+            inner_add_yblock_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+                                                   b_w, b_h, src_x, src_y,
+                                                   src_stride, sb, add, dst8);
+        else if (b_w == 8)
+            inner_add_yblock_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+                                                  b_w, b_h, src_x, src_y,
+                                                  src_stride, sb, add, dst8);
+        else
+            ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+                                     src_y, src_stride, sb, add, dst8);
+    } else {
+        if (b_w == 16)
+            inner_add_yblock_a_bw_16_obmc_32_altivec(obmc, obmc_stride, block,
+                                                     b_w, b_h, src_x, src_y,
+                                                     src_stride, sb, add, dst8);
+        else if (b_w == 8)
+            inner_add_yblock_a_bw_8_obmc_16_altivec(obmc, obmc_stride, block,
+                                                    b_w, b_h, src_x, src_y,
+                                                    src_stride, sb, add, dst8);
+        else
+            ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,
+                                     src_y, src_stride, sb, add, dst8);
+    }
+}
+
+
+void snow_init_altivec(DSPContext* c, AVCodecContext *avctx)
+{
+        c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
+        c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
+        c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
+}
diff --git a/contrib/ffmpeg/libavcodec/ppc/types_altivec.h b/contrib/ffmpeg/libavcodec/ppc/types_altivec.h
new file mode 100644
index 000000000..f29026e04
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/types_altivec.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/***********************************************************************
+ * Vector types
+ **********************************************************************/
+#define vec_u8_t  vector unsigned char
+#define vec_s8_t  vector signed char
+#define vec_u16_t vector unsigned short
+#define vec_s16_t vector signed short
+#define vec_u32_t vector unsigned int
+#define vec_s32_t vector signed int
+
+/***********************************************************************
+ * Null vector
+ **********************************************************************/
+#define LOAD_ZERO const vec_u8_t zerov = vec_splat_u8( 0 )
+
+#define zero_u8v  (vec_u8_t)  zerov
+#define zero_s8v  (vec_s8_t)  zerov
+#define zero_u16v (vec_u16_t) zerov
+#define zero_s16v (vec_s16_t) zerov
+#define zero_u32v (vec_u32_t) zerov
+#define zero_s32v (vec_s32_t) zerov
diff --git a/contrib/ffmpeg/libavcodec/ppc/vc1dsp_altivec.c b/contrib/ffmpeg/libavcodec/ppc/vc1dsp_altivec.c
new file mode 100644
index 000000000..114c9d41f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ppc/vc1dsp_altivec.c
@@ -0,0 +1,338 @@
+/*
+ * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include "../dsputil.h"
+
+#include "gcc_fixes.h"
+
+#include "dsputil_altivec.h"
+
+// main steps of 8x8 transform
+#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
+do { \
+    t0 = vec_sl(vec_add(s0, s4), vec_2); \
+    t0 = vec_add(vec_sl(t0, vec_1), t0); \
+    t0 = vec_add(t0, vec_rnd); \
+    t1 = vec_sl(vec_sub(s0, s4), vec_2); \
+    t1 = vec_add(vec_sl(t1, vec_1), t1); \
+    t1 = vec_add(t1, vec_rnd); \
+    t2 = vec_add(vec_sl(s6, vec_2), vec_sl(s6, vec_1)); \
+    t2 = vec_add(t2, vec_sl(s2, vec_4)); \
+    t3 = vec_add(vec_sl(s2, vec_2), vec_sl(s2, vec_1)); \
+    t3 = vec_sub(t3, vec_sl(s6, vec_4)); \
+    t4 = vec_add(t0, t2); \
+    t5 = vec_add(t1, t3); \
+    t6 = vec_sub(t1, t3); \
+    t7 = vec_sub(t0, t2); \
+\
+    t0 = vec_sl(vec_add(s1, s3), vec_4); \
+    t0 = vec_add(t0, vec_sl(s5, vec_3)); \
+    t0 = vec_add(t0, vec_sl(s7, vec_2)); \
+    t0 = vec_add(t0, vec_sub(s5, s3)); \
+\
+    t1 = vec_sl(vec_sub(s1, s5), vec_4); \
+    t1 = vec_sub(t1, vec_sl(s7, vec_3)); \
+    t1 = vec_sub(t1, vec_sl(s3, vec_2)); \
+    t1 = vec_sub(t1, vec_add(s1, s7)); \
+\
+    t2 = vec_sl(vec_sub(s7, s3), vec_4); \
+    t2 = vec_add(t2, vec_sl(s1, vec_3)); \
+    t2 = vec_add(t2, vec_sl(s5, vec_2)); \
+    t2 = vec_add(t2, vec_sub(s1, s7)); \
+\
+    t3 = vec_sl(vec_sub(s5, s7), vec_4); \
+    t3 = vec_sub(t3, vec_sl(s3, vec_3)); \
+    t3 = vec_add(t3, vec_sl(s1, vec_2)); \
+    t3 = vec_sub(t3, vec_add(s3, s5)); \
+\
+    s0 = vec_add(t4, t0); \
+    s1 = vec_add(t5, t1); \
+    s2 = vec_add(t6, t2); \
+    s3 = vec_add(t7, t3); \
+    s4 = vec_sub(t7, t3); \
+    s5 = vec_sub(t6, t2); \
+    s6 = vec_sub(t5, t1); \
+    s7 = vec_sub(t4, t0); \
+}while(0)
+
+#define SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+    s0 = vec_sra(s0, vec_3); \
+    s1 = vec_sra(s1, vec_3); \
+    s2 = vec_sra(s2, vec_3); \
+    s3 = vec_sra(s3, vec_3); \
+    s4 = vec_sra(s4, vec_3); \
+    s5 = vec_sra(s5, vec_3); \
+    s6 = vec_sra(s6, vec_3); \
+    s7 = vec_sra(s7, vec_3); \
+}while(0)
+
+#define SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7) \
+do { \
+    s0 = vec_sra(s0, vec_7); \
+    s1 = vec_sra(s1, vec_7); \
+    s2 = vec_sra(s2, vec_7); \
+    s3 = vec_sra(s3, vec_7); \
+    s4 = vec_sra(vec_add(s4, vec_1s), vec_7); \
+    s5 = vec_sra(vec_add(s5, vec_1s), vec_7); \
+    s6 = vec_sra(vec_add(s6, vec_1s), vec_7); \
+    s7 = vec_sra(vec_add(s7, vec_1s), vec_7); \
+}while(0)
+
+/* main steps of 4x4 transform */
+#define STEP4(s0, s1, s2, s3, vec_rnd) \
+do { \
+    t1 = vec_add(vec_sl(s0, vec_4), s0); \
+    t1 = vec_add(t1, vec_rnd); \
+    t2 = vec_add(vec_sl(s2, vec_4), s2); \
+    t0 = vec_add(t1, t2); \
+    t1 = vec_sub(t1, t2); \
+    t3 = vec_sl(vec_sub(s3, s1), vec_1); \
+    t3 = vec_add(t3, vec_sl(t3, vec_2)); \
+    t2 = vec_add(t3, vec_sl(s1, vec_5)); \
+    t3 = vec_add(t3, vec_sl(s3, vec_3)); \
+    t3 = vec_add(t3, vec_sl(s3, vec_2)); \
+    s0 = vec_add(t0, t2); \
+    s1 = vec_sub(t1, t3); \
+    s2 = vec_add(t1, t3); \
+    s3 = vec_sub(t0, t2); \
+}while (0)
+
+#define SHIFT_HOR4(s0, s1, s2, s3) \
+    s0 = vec_sra(s0, vec_3); \
+    s1 = vec_sra(s1, vec_3); \
+    s2 = vec_sra(s2, vec_3); \
+    s3 = vec_sra(s3, vec_3);
+
+#define SHIFT_VERT4(s0, s1, s2, s3) \
+    s0 = vec_sra(s0, vec_7); \
+    s1 = vec_sra(s1, vec_7); \
+    s2 = vec_sra(s2, vec_7); \
+    s3 = vec_sra(s3, vec_7);
+
+/** Do inverse transform on 8x8 block
+*/
+static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
+{
+    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+    const vector unsigned int vec_7 = vec_splat_u32(7);
+    const vector unsigned int vec_5 = vec_splat_u32(5);
+    const vector unsigned int vec_4 = vec_splat_u32(4);
+    const vector  signed int vec_4s = vec_splat_s32(4);
+    const vector unsigned int vec_3 = vec_splat_u32(3);
+    const vector unsigned int vec_2 = vec_splat_u32(2);
+    const vector  signed int vec_1s = vec_splat_s32(1);
+    const vector unsigned int vec_1 = vec_splat_u32(1);
+
+
+    src0 = vec_ld(  0, block);
+    src1 = vec_ld( 16, block);
+    src2 = vec_ld( 32, block);
+    src3 = vec_ld( 48, block);
+    src4 = vec_ld( 64, block);
+    src5 = vec_ld( 80, block);
+    src6 = vec_ld( 96, block);
+    src7 = vec_ld(112, block);
+
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_64);
+    SHIFT_VERT8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_64);
+    SHIFT_VERT8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+
+    vec_st(src0,  0, block);
+    vec_st(src1, 16, block);
+    vec_st(src2, 32, block);
+    vec_st(src3, 48, block);
+    vec_st(src4, 64, block);
+    vec_st(src5, 80, block);
+    vec_st(src6, 96, block);
+    vec_st(src7,112, block);
+}
+
+/** Do inverse transform on 8x4 part of block
+*/
+static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n)
+{
+    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
+    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
+    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
+    const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
+    const vector unsigned int vec_7 = vec_splat_u32(7);
+    const vector unsigned int vec_5 = vec_splat_u32(5);
+    const vector unsigned int vec_4 = vec_splat_u32(4);
+    const vector  signed int vec_4s = vec_splat_s32(4);
+    const vector unsigned int vec_3 = vec_splat_u32(3);
+    const vector unsigned int vec_2 = vec_splat_u32(2);
+    const vector unsigned int vec_1 = vec_splat_u32(1);
+
+    src0 = vec_ld(  0, block);
+    src1 = vec_ld( 16, block);
+    src2 = vec_ld( 32, block);
+    src3 = vec_ld( 48, block);
+    src4 = vec_ld( 64, block);
+    src5 = vec_ld( 80, block);
+    src6 = vec_ld( 96, block);
+    src7 = vec_ld(112, block);
+
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+    s0 = vec_unpackl(src0);
+    s1 = vec_unpackl(src1);
+    s2 = vec_unpackl(src2);
+    s3 = vec_unpackl(src3);
+    s4 = vec_unpackl(src4);
+    s5 = vec_unpackl(src5);
+    s6 = vec_unpackl(src6);
+    s7 = vec_unpackl(src7);
+    s8 = vec_unpackh(src0);
+    s9 = vec_unpackh(src1);
+    sA = vec_unpackh(src2);
+    sB = vec_unpackh(src3);
+    sC = vec_unpackh(src4);
+    sD = vec_unpackh(src5);
+    sE = vec_unpackh(src6);
+    sF = vec_unpackh(src7);
+    STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
+    SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
+    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
+    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
+    src0 = vec_pack(s8, s0);
+    src1 = vec_pack(s9, s1);
+    src2 = vec_pack(sA, s2);
+    src3 = vec_pack(sB, s3);
+    src4 = vec_pack(sC, s4);
+    src5 = vec_pack(sD, s5);
+    src6 = vec_pack(sE, s6);
+    src7 = vec_pack(sF, s7);
+    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
+
+    if(!n){ // upper half of block
+        s0 = vec_unpackh(src0);
+        s1 = vec_unpackh(src1);
+        s2 = vec_unpackh(src2);
+        s3 = vec_unpackh(src3);
+        s8 = vec_unpackl(src0);
+        s9 = vec_unpackl(src1);
+        sA = vec_unpackl(src2);
+        sB = vec_unpackl(src3);
+        STEP4(s0, s1, s2, s3, vec_64);
+        SHIFT_VERT4(s0, s1, s2, s3);
+        STEP4(s8, s9, sA, sB, vec_64);
+        SHIFT_VERT4(s8, s9, sA, sB);
+        src0 = vec_pack(s0, s8);
+        src1 = vec_pack(s1, s9);
+        src2 = vec_pack(s2, sA);
+        src3 = vec_pack(s3, sB);
+
+        vec_st(src0,  0, block);
+        vec_st(src1, 16, block);
+        vec_st(src2, 32, block);
+        vec_st(src3, 48, block);
+    } else { //lower half of block
+        s0 = vec_unpackh(src4);
+        s1 = vec_unpackh(src5);
+        s2 = vec_unpackh(src6);
+        s3 = vec_unpackh(src7);
+        s8 = vec_unpackl(src4);
+        s9 = vec_unpackl(src5);
+        sA = vec_unpackl(src6);
+        sB = vec_unpackl(src7);
+        STEP4(s0, s1, s2, s3, vec_64);
+        SHIFT_VERT4(s0, s1, s2, s3);
+        STEP4(s8, s9, sA, sB, vec_64);
+        SHIFT_VERT4(s8, s9, sA, sB);
+        src4 = vec_pack(s0, s8);
+        src5 = vec_pack(s1, s9);
+        src6 = vec_pack(s2, sA);
+        src7 = vec_pack(s3, sB);
+
+        vec_st(src4, 64, block);
+        vec_st(src5, 80, block);
+        vec_st(src6, 96, block);
+        vec_st(src7,112, block);
+    }
+}
+
+
+void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
+    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
+    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
+}
diff --git a/contrib/ffmpeg/libavcodec/ps2/dsputil_mmi.c b/contrib/ffmpeg/libavcodec/ps2/dsputil_mmi.c
new file mode 100644
index 000000000..0d72ae88c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ps2/dsputil_mmi.c
@@ -0,0 +1,163 @@
+/*
+ * MMI optimized DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMI optimization by Leon van Stuivenberg
+ * clear_blocks_mmi() by BroadQ
+ */
+
+#include "../dsputil.h"
+#include "mmi.h"
+
+void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_mmi_idct(DCTELEM *block);
+
+static void clear_blocks_mmi(DCTELEM * blocks)
+{
+        asm volatile(
+        ".set noreorder    \n"
+        "addiu $9, %0, 768 \n"
+        "nop               \n"
+        "1:                \n"
+        "sq $0, 0(%0)      \n"
+        "move $8, %0       \n"
+        "addi %0, %0, 64   \n"
+        "sq $0, 16($8)     \n"
+        "slt $10, %0, $9   \n"
+        "sq $0, 32($8)     \n"
+        "bnez $10, 1b      \n"
+        "sq $0, 48($8)     \n"
+        ".set reorder      \n"
+        : "+r" (blocks) ::  "$8", "$9", "memory" );
+}
+
+
+static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
+{
+        asm volatile(
+        ".set   push            \n\t"
+        ".set   mips3           \n\t"
+        "ld     $8, 0(%0)       \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $9, 0(%0)       \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $10, 0(%0)      \n\t"
+        "pextlb $8, $0, $8      \n\t"
+        "sq     $8, 0(%1)       \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $8, 0(%0)       \n\t"
+        "pextlb $9, $0, $9      \n\t"
+        "sq     $9, 16(%1)      \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $9, 0(%0)       \n\t"
+        "pextlb $10, $0, $10    \n\t"
+        "sq     $10, 32(%1)     \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $10, 0(%0)      \n\t"
+        "pextlb $8, $0, $8      \n\t"
+        "sq     $8, 48(%1)      \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $8, 0(%0)       \n\t"
+        "pextlb $9, $0, $9      \n\t"
+        "sq     $9, 64(%1)      \n\t"
+        "add    %0, %0, %2      \n\t"
+        "ld     $9, 0(%0)       \n\t"
+        "pextlb $10, $0, $10    \n\t"
+        "sq     $10, 80(%1)     \n\t"
+        "pextlb $8, $0, $8      \n\t"
+        "sq     $8, 96(%1)      \n\t"
+        "pextlb $9, $0, $9      \n\t"
+        "sq     $9, 112(%1)     \n\t"
+        ".set   pop             \n\t"
+        : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" );
+}
+
+
+static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+        asm volatile(
+        ".set   push            \n\t"
+        ".set   mips3           \n\t"
+        "1:                     \n\t"
+        "ldr    $8, 0(%1)       \n\t"
+        "addiu  %2, %2, -1      \n\t"
+        "ldl    $8, 7(%1)       \n\t"
+        "add    %1, %1, %3      \n\t"
+        "sd     $8, 0(%0)       \n\t"
+        "add    %0, %0, %3      \n\t"
+        "bgtz   %2, 1b          \n\t"
+        ".set   pop             \n\t"
+        : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
+        : "$8", "memory" );
+}
+
+
+static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+        asm volatile (
+        ".set   push            \n\t"
+        ".set   mips3           \n\t"
+        "1:                     \n\t"
+        "ldr    $8, 0(%1)       \n\t"
+        "add    $11, %1, %3     \n\t"
+        "ldl    $8, 7(%1)       \n\t"
+        "add    $10, %0, %3     \n\t"
+        "ldr    $9, 8(%1)       \n\t"
+        "ldl    $9, 15(%1)      \n\t"
+        "ldr    $12, 0($11)     \n\t"
+        "add    %1, $11, %3     \n\t"
+        "ldl    $12, 7($11)     \n\t"
+        "pcpyld $8, $9, $8      \n\t"
+        "sq     $8, 0(%0)       \n\t"
+        "ldr    $13, 8($11)     \n\t"
+        "addiu  %2, %2, -2      \n\t"
+        "ldl    $13, 15($11)    \n\t"
+        "add    %0, $10, %3     \n\t"
+        "pcpyld $12, $13, $12   \n\t"
+        "sq     $12, 0($10)     \n\t"
+        "bgtz   %2, 1b          \n\t"
+        ".set   pop             \n\t"
+        : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
+        : "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
+}
+
+
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
+{
+    const int idct_algo= avctx->idct_algo;
+
+    c->clear_blocks = clear_blocks_mmi;
+
+    c->put_pixels_tab[1][0] = put_pixels8_mmi;
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi;
+
+    c->put_pixels_tab[0][0] = put_pixels16_mmi;
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
+
+    c->get_pixels = get_pixels_mmi;
+
+    if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
+        c->idct_put= ff_mmi_idct_put;
+        c->idct_add= ff_mmi_idct_add;
+        c->idct    = ff_mmi_idct;
+        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+    }
+}
+
diff --git a/contrib/ffmpeg/libavcodec/ps2/idct_mmi.c b/contrib/ffmpeg/libavcodec/ps2/idct_mmi.c
new file mode 100644
index 000000000..dfe9b3726
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ps2/idct_mmi.c
@@ -0,0 +1,363 @@
+/*
+ * Originally provided by Intel at Application Note AP-922.
+ *
+ * Column code adapted from Peter Gubanov.
+ * Copyright (c) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
+ * http://www.elecard.com/peter/idct.shtml
+ * rounding trick copyright (c) 2000 Michel Lespinasse <walken@zoy.org>
+ *
+ * MMI port and (c) 2002 by Leon van Stuivenberg
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+*/
+
+#include "../common.h"
+#include "../dsputil.h"
+#include "mmi.h"
+
+#define BITS_INV_ACC    5       // 4 or 5 for IEEE
+#define SHIFT_INV_ROW   (16 - BITS_INV_ACC)
+#define SHIFT_INV_COL   (1 + BITS_INV_ACC)
+
+#define TG1             6518
+#define TG2             13573
+#define TG3             21895
+#define CS4             23170
+
+#define ROUNDER_0       0
+#define ROUNDER_1       16
+
+#define TAB_i_04        (32+0)
+#define TAB_i_17        (32+64)
+#define TAB_i_26        (32+128)
+#define TAB_i_35        (32+192)
+
+#define TG_1_16         (32+256+0)
+#define TG_2_16         (32+256+16)
+#define TG_3_16         (32+256+32)
+#define COS_4_16        (32+256+48)
+
+#define CLIPMAX         (32+256+64+0)
+
+static short consttable[] align16 = {
+/* rounder 0*/  // assume SHIFT_INV_ROW == 11
+ 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
+/* rounder 1*/
+ 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
+/* row 0/4*/
+ 16384,  21407, -16384, -21407,  22725,  19266, -22725, -12873,
+  8867,  16384,   8867,  16384,   4520,  12873,  -4520,  19266,
+ 16384,  -8867,  16384,  -8867,  12873, -22725,  19266, -22725,
+ 21407, -16384, -21407,  16384,  19266,   4520, -12873,   4520,
+/* row 1/7*/
+ 22725,  29692, -22725, -29692,  31521,  26722, -31521, -17855,
+ 12299,  22725,  12299,  22725,   6270,  17855,  -6270,  26722,
+ 22725, -12299,  22725, -12299,  17855, -31521,  26722, -31521,
+ 29692, -22725, -29692,  22725,  26722,   6270, -17855,   6270,
+/* row 2/6*/
+ 21407,  27969, -21407, -27969,  29692,  25172, -29692, -16819,
+ 11585,  21407,  11585,  21407,   5906,  16819,  -5906,  25172,
+ 21407, -11585,  21407, -11585,  16819, -29692,  25172, -29692,
+ 27969, -21407, -27969,  21407,  25172,   5906, -16819,   5906,
+/*row 3/5*/
+ 19266,  25172, -19266, -25172,  26722,  22654, -26722, -15137,
+ 10426,  19266,  10426,  19266,   5315,  15137,  -5315,  22654,
+ 19266, -10426,  19266, -10426,  15137, -26722,  22654, -26722,
+ 25172, -19266, -25172,  19266,  22654,   5315, -15137,   5315,
+/*column constants*/
+ TG1, TG1, TG1, TG1, TG1, TG1, TG1, TG1,
+ TG2, TG2, TG2, TG2, TG2, TG2, TG2, TG2,
+ TG3, TG3, TG3, TG3, TG3, TG3, TG3, TG3,
+ CS4, CS4, CS4, CS4, CS4, CS4, CS4, CS4,
+/* clamp */
+ 255, 255, 255, 255, 255, 255, 255, 255
+};
+
+
+#define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
+        lq(blk, rowoff, $16);   /* r16 = x7  x5  x3  x1  x6  x4  x2  x0 */ \
+        /*slot*/ \
+        lq($24, 0+taboff, $17); /* r17 = w */ \
+        /*delay slot $16*/ \
+        lq($24, 16+taboff, $18);/* r18 = w */ \
+        prevh($16, $2);         /* r2  = x1  x3  x5  x7  x0  x2  x4  x6 */ \
+        lq($24, 32+taboff, $19);/* r19 = w */ \
+        phmadh($17, $16, $17);  /* r17 = b1"b0'a1"a0' */ \
+        lq($24, 48+taboff, $20);/* r20 = w */ \
+        phmadh($18, $2, $18);   /* r18 = b1'b0"a1'a0" */ \
+        phmadh($19, $16, $19);  /* r19 = b3"b2'a3"a2' */ \
+        phmadh($20, $2, $20);   /* r20 = b3'b2"a3'a2" */ \
+        paddw($17, $18, $17);   /* r17 = (b1)(b0)(a1)(a0) */ \
+        paddw($19, $20, $19);   /* r19 = (b3)(b2)(a3)(a2) */ \
+        pcpyld($19, $17, $18);  /* r18 = (a3)(a2)(a1)(a0) */ \
+        pcpyud($17, $19, $20);  /* r20 = (b3)(b2)(b1)(b0) */ \
+        paddw($18, rnd, $18);   /* r18 = (a3)(a2)(a1)(a0) */\
+        paddw($18, $20, $17);   /* r17 = ()()()(a0+b0) */ \
+        psubw($18, $20, $20);   /* r20 = ()()()(a0-b0) */ \
+        psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
+        psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
+        ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0  Note order */ \
+\
+        prevh(outreg, $2);        \
+        pcpyud($2, $2, $2);        \
+        pcpyld($2, outreg, outreg);        \
+}
+
+
+#define DCT_8_INV_COL8() \
+\
+        lq($24, TG_3_16, $2);   /* r2  = tn3 */         \
+\
+        pmulth($11, $2, $17);   /* r17 = x3 * tn3 (6420) */ \
+        psraw($17, 15, $17);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $17, $17);   /* r17 = x3 * tn3 */    \
+        psubh($17, $13, $17);   /* r17 = tm35 */        \
+\
+        pmulth($13, $2, $18);   /* r18 = x5 * tn3 (6420) */ \
+        psraw($18, 15, $18);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $18, $18);   /* r18 = x5 * tn3 */    \
+        paddh($18, $11, $18);   /* r18 = tp35 */        \
+\
+        lq($24, TG_1_16, $2);   /* r2  = tn1 */         \
+\
+        pmulth($15, $2, $19);   /* r19 = x7 * tn1 (6420) */ \
+        psraw($19, 15, $19);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $19, $19);   /* r19 = x7 * tn1 */    \
+        paddh($19, $9, $19);    /* r19 = tp17 */        \
+\
+        pmulth($9, $2, $20);    /* r20 = x1 * tn1 (6420) */ \
+        psraw($20, 15, $20);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $20, $20);   /* r20 = x1 * tn1 */    \
+        psubh($20, $15, $20);   /* r20 = tm17 */        \
+\
+        psubh($19, $18, $3);    /* r3  = t1 */          \
+        paddh($20, $17, $16);   /* r16 = t2 */          \
+        psubh($20, $17, $23);   /* r23 = b3 */          \
+        paddh($19, $18, $20);   /* r20 = b0 */          \
+\
+        lq($24, COS_4_16, $2);  /* r2  = cs4 */         \
+\
+        paddh($3, $16, $21);    /* r21 = t1+t2 */       \
+        psubh($3, $16, $22);    /* r22 = t1-t2 */       \
+\
+        pmulth($21, $2, $21);   /* r21 = cs4 * (t1+t2) 6420 */ \
+        psraw($21, 15, $21);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $21, $21);   /* r21 = b1 */          \
+\
+        pmulth($22, $2, $22);   /* r22 = cs4 * (t1-t2) 6420 */ \
+        psraw($22, 15, $22);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $22, $22);   /* r22 = b2 */          \
+\
+        lq($24, TG_2_16, $2);   /* r2  = tn2 */         \
+\
+        pmulth($10, $2, $17);   /* r17 = x2 * tn2 (6420) */ \
+        psraw($17, 15, $17);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $17, $17);   /* r17 = x3 * tn3 */    \
+        psubh($17, $14, $17);   /* r17 = tm26 */        \
+\
+        pmulth($14, $2, $18);   /* r18 = x6 * tn2 (6420) */ \
+        psraw($18, 15, $18);    \
+        pmfhl_uw($3);           /* r3  = 7531 */        \
+        psraw($3, 15, $3);      \
+        pinteh($3, $18, $18);   /* r18 = x6 * tn2 */    \
+        paddh($18, $10, $18);   /* r18 = tp26 */        \
+\
+        paddh($8, $12, $2);     /* r2  = tp04 */        \
+        psubh($8, $12, $3);     /* r3  = tm04 */        \
+\
+        paddh($2, $18, $16);    /* r16 = a0 */          \
+        psubh($2, $18, $19);    /* r19 = a3 */          \
+        psubh($3, $17, $18);    /* r18 = a2 */          \
+        paddh($3, $17, $17);    /* r17 = a1 */
+
+
+#define DCT_8_INV_COL8_STORE(blk) \
+\
+        paddh($16, $20, $2);    /* y0  a0+b0 */ \
+        psubh($16, $20, $16);   /* y7  a0-b0 */ \
+        psrah($2, SHIFT_INV_COL, $2);           \
+        psrah($16, SHIFT_INV_COL, $16);         \
+        sq($2, 0, blk);                         \
+        sq($16, 112, blk);                      \
+\
+        paddh($17, $21, $3);    /* y1  a1+b1 */ \
+        psubh($17, $21, $17);   /* y6  a1-b1 */ \
+        psrah($3, SHIFT_INV_COL, $3);           \
+        psrah($17, SHIFT_INV_COL, $17);         \
+        sq($3, 16, blk);                        \
+        sq($17, 96, blk);                       \
+\
+        paddh($18, $22, $2);    /* y2  a2+b2 */ \
+        psubh($18, $22, $18);   /* y5  a2-b2 */ \
+        psrah($2, SHIFT_INV_COL, $2);           \
+        psrah($18, SHIFT_INV_COL, $18);         \
+        sq($2, 32, blk);                        \
+        sq($18, 80, blk);                       \
+\
+        paddh($19, $23, $3);    /* y3  a3+b3 */ \
+        psubh($19, $23, $19);   /* y4  a3-b3 */ \
+        psrah($3, SHIFT_INV_COL, $3);           \
+        psrah($19, SHIFT_INV_COL, $19);         \
+        sq($3, 48, blk);                        \
+        sq($19, 64, blk);
+
+
+
+#define DCT_8_INV_COL8_PMS() \
+        paddh($16, $20, $2);    /* y0  a0+b0 */ \
+        psubh($16, $20, $20);   /* y7  a0-b0 */ \
+        psrah($2, SHIFT_INV_COL, $16);          \
+        psrah($20, SHIFT_INV_COL, $20);         \
+\
+        paddh($17, $21, $3);    /* y1  a1+b1 */ \
+        psubh($17, $21, $21);   /* y6  a1-b1 */ \
+        psrah($3, SHIFT_INV_COL, $17);          \
+        psrah($21, SHIFT_INV_COL, $21);         \
+\
+        paddh($18, $22, $2);    /* y2  a2+b2 */ \
+        psubh($18, $22, $22);   /* y5  a2-b2 */ \
+        psrah($2, SHIFT_INV_COL, $18);          \
+        psrah($22, SHIFT_INV_COL, $22);         \
+\
+        paddh($19, $23, $3);    /* y3  a3+b3 */ \
+        psubh($19, $23, $23);   /* y4  a3-b3 */ \
+        psrah($3, SHIFT_INV_COL, $19);          \
+        psrah($23, SHIFT_INV_COL, $23);
+
+#define PUT(rs)                 \
+        pminh(rs, $11, $2);     \
+        pmaxh($2, $0, $2);      \
+        ppacb($0, $2, $2);      \
+        sd3(2, 0, 4);           \
+        __asm__ __volatile__ ("add $4, $5, $4");
+
+#define DCT_8_INV_COL8_PUT() \
+        PUT($16);        \
+        PUT($17);        \
+        PUT($18);        \
+        PUT($19);        \
+        PUT($23);        \
+        PUT($22);        \
+        PUT($21);        \
+        PUT($20);
+
+#define ADD(rs)          \
+        ld3(4, 0, 2);        \
+        pextlb($0, $2, $2);  \
+        paddh($2, rs, $2);   \
+        pminh($2, $11, $2);  \
+        pmaxh($2, $0, $2);   \
+        ppacb($0, $2, $2);   \
+        sd3(2, 0, 4); \
+        __asm__ __volatile__ ("add $4, $5, $4");
+
+/*fixme: schedule*/
+#define DCT_8_INV_COL8_ADD() \
+        ADD($16);        \
+        ADD($17);        \
+        ADD($18);        \
+        ADD($19);        \
+        ADD($23);        \
+        ADD($22);        \
+        ADD($21);        \
+        ADD($20);
+
+
+void ff_mmi_idct(int16_t * block)
+{
+        /* $4 = block */
+        __asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
+        lq($24, ROUNDER_0, $8);
+        lq($24, ROUNDER_1, $7);
+        DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
+        DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
+        DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
+        DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
+        DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
+        DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
+        DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
+        DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
+        DCT_8_INV_COL8();
+        DCT_8_INV_COL8_STORE($4);
+
+        //let savedtemp regs be saved
+        __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
+}
+
+
+void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+        /* $4 = dest, $5 = line_size, $6 = block */
+        __asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
+        lq($24, ROUNDER_0, $8);
+        lq($24, ROUNDER_1, $7);
+        DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
+        DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
+        DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
+        DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
+        DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
+        DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
+        DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
+        DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
+        DCT_8_INV_COL8();
+        lq($24, CLIPMAX, $11);
+        DCT_8_INV_COL8_PMS();
+        DCT_8_INV_COL8_PUT();
+
+        //let savedtemp regs be saved
+        __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
+}
+
+
+void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+        /* $4 = dest, $5 = line_size, $6 = block */
+        __asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
+        lq($24, ROUNDER_0, $8);
+        lq($24, ROUNDER_1, $7);
+        DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
+        DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
+        DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
+        DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
+        DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
+        DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
+        DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
+        DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
+        DCT_8_INV_COL8();
+        lq($24, CLIPMAX, $11);
+        DCT_8_INV_COL8_PMS();
+        DCT_8_INV_COL8_ADD();
+
+        //let savedtemp regs be saved
+        __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
+}
+
diff --git a/contrib/ffmpeg/libavcodec/ps2/mmi.h b/contrib/ffmpeg/libavcodec/ps2/mmi.h
new file mode 100644
index 000000000..e2e49a86c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ps2/mmi.h
@@ -0,0 +1,172 @@
+/*
+ * copyright (c) 2002 Leon van Stuivenberg
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef __mmi_H
+#define __mmi_H
+
+#define align16 __attribute__ ((aligned (16)))
+
+/*
+#define r0 $zero
+#define r1 $at          //assembler!
+#define r2 $v0          //return
+#define r3 $v1          //return
+#define r4 $a0          //arg
+#define r5 $a1          //arg
+#define r6 $a2          //arg
+#define r7 $a3          //arg
+#define r8 $t0          //temp
+#define r9 $t1          //temp
+#define r10 $t2         //temp
+#define r11 $t3         //temp
+#define r12 $t4         //temp
+#define r13 $t5         //temp
+#define r14 $t6         //temp
+#define r15 $t7         //temp
+#define r16 $s0         //saved temp
+#define r17 $s1         //saved temp
+#define r18 $s2         //saved temp
+#define r19 $s3         //saved temp
+#define r20 $s4         //saved temp
+#define r21 $s5         //saved temp
+#define r22 $s6         //saved temp
+#define r23 $s7         //saved temp
+#define r24 $t8         //temp
+#define r25 $t9         //temp
+#define r26 $k0         //kernel
+#define r27 $k1         //kernel
+#define r28 $gp         //global ptr
+#define r29 $sp         //stack ptr
+#define r30 $fp         //frame ptr
+#define r31 $ra         //return addr
+*/
+
+
+#define         lq(base, off, reg)        \
+        __asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
+
+#define         lq2(mem, reg)        \
+        __asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
+
+#define         sq(reg, off, base)        \
+        __asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
+
+/*
+#define         ld(base, off, reg)        \
+        __asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
+*/
+
+#define         ld3(base, off, reg)        \
+        __asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
+
+#define         ldr3(base, off, reg)        \
+        __asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
+
+#define         ldl3(base, off, reg)        \
+        __asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
+
+/*
+#define         sd(reg, off, base)        \
+        __asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
+*/
+//seems assembler has bug encoding mnemonic 'sd', so DIY
+#define         sd3(reg, off, base)        \
+        __asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
+
+#define         sw(reg, off, base)        \
+        __asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
+
+#define         sq2(reg, mem)        \
+        __asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
+
+#define         pinth(rs, rt, rd) \
+        __asm__ __volatile__ ("pinth  " #rd ", " #rs ", " #rt )
+
+#define         phmadh(rs, rt, rd) \
+        __asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
+
+#define         pcpyud(rs, rt, rd) \
+        __asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
+
+#define         pcpyld(rs, rt, rd) \
+        __asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
+
+#define         pcpyh(rt, rd) \
+        __asm__ __volatile__ ("pcpyh  " #rd ", " #rt )
+
+#define         paddw(rs, rt, rd) \
+        __asm__ __volatile__ ("paddw  " #rd ", " #rs ", " #rt )
+
+#define         pextlw(rs, rt, rd) \
+        __asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
+
+#define         pextuw(rs, rt, rd) \
+        __asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
+
+#define         pextlh(rs, rt, rd) \
+        __asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
+
+#define         pextuh(rs, rt, rd) \
+        __asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
+
+#define         psubw(rs, rt, rd) \
+        __asm__ __volatile__ ("psubw  " #rd ", " #rs ", " #rt )
+
+#define         psraw(rt, sa, rd) \
+        __asm__ __volatile__ ("psraw  " #rd ", " #rt ", %0" : : "i"(sa) )
+
+#define         ppach(rs, rt, rd) \
+        __asm__ __volatile__ ("ppach  " #rd ", " #rs ", " #rt )
+
+#define         ppacb(rs, rt, rd) \
+        __asm__ __volatile__ ("ppacb  " #rd ", " #rs ", " #rt )
+
+#define         prevh(rt, rd) \
+        __asm__ __volatile__ ("prevh  " #rd ", " #rt )
+
+#define         pmulth(rs, rt, rd) \
+        __asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
+
+#define         pmaxh(rs, rt, rd) \
+        __asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
+
+#define         pminh(rs, rt, rd) \
+        __asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
+
+#define         pinteh(rs, rt, rd) \
+        __asm__ __volatile__ ("pinteh  " #rd ", " #rs ", " #rt )
+
+#define         paddh(rs, rt, rd) \
+        __asm__ __volatile__ ("paddh  " #rd ", " #rs ", " #rt )
+
+#define         psubh(rs, rt, rd) \
+        __asm__ __volatile__ ("psubh  " #rd ", " #rs ", " #rt )
+
+#define         psrah(rt, sa, rd) \
+        __asm__ __volatile__ ("psrah  " #rd ", " #rt ", %0" : : "i"(sa) )
+
+#define         pmfhl_uw(rd) \
+        __asm__ __volatile__ ("pmfhl.uw  " #rd)
+
+#define         pextlb(rs, rt, rd) \
+        __asm__ __volatile__ ("pextlb  " #rd ", " #rs ", " #rt )
+
+#endif
+
diff --git a/contrib/ffmpeg/libavcodec/ps2/mpegvideo_mmi.c b/contrib/ffmpeg/libavcodec/ps2/mpegvideo_mmi.c
new file mode 100644
index 000000000..1e5f08aae
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ps2/mpegvideo_mmi.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMI optimization by Leon van Stuivenberg
+ */
+
+#include "../dsputil.h"
+#include "../mpegvideo.h"
+#include "../avcodec.h"
+
+static void dct_unquantize_h263_mmi(MpegEncContext *s,
+                                  DCTELEM *block, int n, int qscale)
+{
+    int level=0, qmul, qadd;
+    int nCoeffs;
+
+    assert(s->block_last_index[n]>=0);
+
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
+            if (n < 4)
+                level = block[0] * s->y_dc_scale;
+            else
+                level = block[0] * s->c_dc_scale;
+        }else {
+            qadd = 0;
+            level = block[0];
+        }
+        nCoeffs= 63; //does not allways use zigzag table
+    } else {
+        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+    }
+
+    asm volatile(
+        "add    $14, $0, %3     \n\t"
+        "pcpyld $8, %0, %0      \n\t"
+        "pcpyh  $8, $8          \n\t"   //r8 = qmul
+        "pcpyld $9, %1, %1      \n\t"
+        "pcpyh  $9, $9          \n\t"   //r9 = qadd
+        ".p2align 2             \n\t"
+        "1:                     \n\t"
+        "lq     $10, 0($14)     \n\t"   //r10 = level
+        "addi   $14, $14, 16    \n\t"   //block+=8
+        "addi   %2, %2, -8      \n\t"
+        "pcgth  $11, $0, $10    \n\t"   //r11 = level < 0 ? -1 : 0
+        "pcgth  $12, $10, $0    \n\t"   //r12 = level > 0 ? -1 : 0
+        "por    $12, $11, $12   \n\t"
+        "pmulth $10, $10, $8    \n\t"
+        "paddh  $13, $9, $11    \n\t"
+        "pxor   $13, $13, $11   \n\t"   //r13 = level < 0 ? -qadd : qadd
+        "pmfhl.uw $11           \n\t"
+        "pinteh $10, $11, $10   \n\t"   //r10 = level * qmul
+        "paddh  $10, $10, $13   \n\t"
+        "pand   $10, $10, $12   \n\t"
+        "sq     $10, -16($14)   \n\t"
+        "bgez   %2, 1b          \n\t"
+        :: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
+
+    if(s->mb_intra)
+        block[0]= level;
+}
+
+
+void MPV_common_init_mmi(MpegEncContext *s)
+{
+    s->dct_unquantize_h263_intra =
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_mmi;
+}
+
+
diff --git a/contrib/ffmpeg/libavcodec/pthread.c b/contrib/ffmpeg/libavcodec/pthread.c
new file mode 100644
index 000000000..4737211cb
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/pthread.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2004 Roman Shaposhnik.
+ *
+ * Many thanks to Steven M. Schultz for providing clever ideas and
+ * to Michael Niedermayer <michaelni@gmx.at> for writing initial
+ * implementation.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+#include <pthread.h>
+
+#include "avcodec.h"
+#include "common.h"
+
+typedef int (action_t)(AVCodecContext *c, void *arg);
+
+typedef struct ThreadContext {
+    pthread_t *workers;
+    action_t *func;
+    void **args;
+    int *rets;
+    int rets_count;
+    int job_count;
+
+    pthread_cond_t last_job_cond;
+    pthread_cond_t current_job_cond;
+    pthread_mutex_t current_job_lock;
+    int current_job;
+    int done;
+} ThreadContext;
+
+static void* worker(void *v)
+{
+    AVCodecContext *avctx = v;
+    ThreadContext *c = avctx->thread_opaque;
+    int our_job = c->job_count;
+    int thread_count = avctx->thread_count;
+    int self_id;
+
+    pthread_mutex_lock(&c->current_job_lock);
+    self_id = c->current_job++;
+    for (;;){
+        while (our_job >= c->job_count) {
+            if (c->current_job == thread_count + c->job_count)
+                pthread_cond_signal(&c->last_job_cond);
+
+            pthread_cond_wait(&c->current_job_cond, &c->current_job_lock);
+            our_job = self_id;
+
+            if (c->done) {
+                pthread_mutex_unlock(&c->current_job_lock);
+                return NULL;
+            }
+        }
+        pthread_mutex_unlock(&c->current_job_lock);
+
+        c->rets[our_job%c->rets_count] = c->func(avctx, c->args[our_job]);
+
+        pthread_mutex_lock(&c->current_job_lock);
+        our_job = c->current_job++;
+    }
+}
+
+static always_inline void avcodec_thread_park_workers(ThreadContext *c, int thread_count)
+{
+    pthread_cond_wait(&c->last_job_cond, &c->current_job_lock);
+    pthread_mutex_unlock(&c->current_job_lock);
+}
+
+void avcodec_thread_free(AVCodecContext *avctx)
+{
+    ThreadContext *c = avctx->thread_opaque;
+    int i;
+
+    pthread_mutex_lock(&c->current_job_lock);
+    c->done = 1;
+    pthread_cond_broadcast(&c->current_job_cond);
+    pthread_mutex_unlock(&c->current_job_lock);
+
+    for (i=0; i<avctx->thread_count; i++)
+         pthread_join(c->workers[i], NULL);
+
+    pthread_mutex_destroy(&c->current_job_lock);
+    pthread_cond_destroy(&c->current_job_cond);
+    pthread_cond_destroy(&c->last_job_cond);
+    av_free(c->workers);
+    av_free(c);
+}
+
+int avcodec_thread_execute(AVCodecContext *avctx, action_t* func, void **arg, int *ret, int job_count)
+{
+    ThreadContext *c= avctx->thread_opaque;
+    int dummy_ret;
+
+    if (job_count <= 0)
+        return 0;
+
+    pthread_mutex_lock(&c->current_job_lock);
+
+    c->current_job = avctx->thread_count;
+    c->job_count = job_count;
+    c->args = arg;
+    c->func = func;
+    if (ret) {
+        c->rets = ret;
+        c->rets_count = job_count;
+    } else {
+        c->rets = &dummy_ret;
+        c->rets_count = 1;
+    }
+    pthread_cond_broadcast(&c->current_job_cond);
+
+    avcodec_thread_park_workers(c, avctx->thread_count);
+
+    return 0;
+}
+
+int avcodec_thread_init(AVCodecContext *avctx, int thread_count)
+{
+    int i;
+    ThreadContext *c;
+
+    c = av_mallocz(sizeof(ThreadContext));
+    if (!c)
+        return -1;
+
+    c->workers = av_mallocz(sizeof(pthread_t)*thread_count);
+    if (!c->workers) {
+        av_free(c);
+        return -1;
+    }
+
+    avctx->thread_opaque = c;
+    avctx->thread_count = thread_count;
+    c->current_job = 0;
+    c->job_count = 0;
+    c->done = 0;
+    pthread_cond_init(&c->current_job_cond, NULL);
+    pthread_cond_init(&c->last_job_cond, NULL);
+    pthread_mutex_init(&c->current_job_lock, NULL);
+    pthread_mutex_lock(&c->current_job_lock);
+    for (i=0; i<thread_count; i++) {
+        if(pthread_create(&c->workers[i], NULL, worker, avctx)) {
+           avctx->thread_count = i;
+           pthread_mutex_unlock(&c->current_job_lock);
+           avcodec_thread_free(avctx);
+           return -1;
+        }
+    }
+
+    avcodec_thread_park_workers(c, thread_count);
+
+    avctx->execute = avcodec_thread_execute;
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/qdm2.c b/contrib/ffmpeg/libavcodec/qdm2.c
new file mode 100644
index 000000000..b9462f3cb
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/qdm2.c
@@ -0,0 +1,2042 @@
+/*
+ * QDM2 compatible decoder
+ * Copyright (c) 2003 Ewald Snel
+ * Copyright (c) 2005 Benjamin Larsson
+ * Copyright (c) 2005 Alex Beregszaszi
+ * Copyright (c) 2005 Roberto Togni
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file qdm2.c
+ * QDM2 decoder
+ * @author Ewald Snel, Benjamin Larsson, Alex Beregszaszi, Roberto Togni
+ * The decoder is not perfect yet, there are still some distortions
+ * especially on files encoded with 16 or 8 subbands.
+ */
+
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#define ALT_BITSTREAM_READER_LE
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+#ifdef CONFIG_MPEGAUDIO_HP
+#define USE_HIGHPRECISION
+#endif
+
+#include "mpegaudio.h"
+
+#include "qdm2data.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+
+#define SOFTCLIP_THRESHOLD 27600
+#define HARDCLIP_THRESHOLD 35716
+
+
+#define QDM2_LIST_ADD(list, size, packet) \
+do { \
+      if (size > 0) { \
+    list[size - 1].next = &list[size]; \
+      } \
+      list[size].packet = packet; \
+      list[size].next = NULL; \
+      size++; \
+} while(0)
+
+// Result is 8, 16 or 30
+#define QDM2_SB_USED(sub_sampling) (((sub_sampling) >= 2) ? 30 : 8 << (sub_sampling))
+
+#define FIX_NOISE_IDX(noise_idx) \
+  if ((noise_idx) >= 3840) \
+    (noise_idx) -= 3840; \
+
+#define SB_DITHERING_NOISE(sb,noise_idx) (noise_table[(noise_idx)++] * sb_noise_attenuation[(sb)])
+
+#define BITS_LEFT(length,gb) ((length) - get_bits_count ((gb)))
+
+#define SAMPLES_NEEDED \
+     av_log (NULL,AV_LOG_INFO,"This file triggers some untested code. Please contact the developers.\n");
+
+#define SAMPLES_NEEDED_2(why) \
+     av_log (NULL,AV_LOG_INFO,"This file triggers some missing code. Please contact the developers.\nPosition: %s\n",why);
+
+
+typedef int8_t sb_int8_array[2][30][64];
+
+/**
+ * Subpacket
+ */
+typedef struct {
+    int type;            ///< subpacket type
+    unsigned int size;   ///< subpacket size
+    const uint8_t *data; ///< pointer to subpacket data (points to input data buffer, it's not a private copy)
+} QDM2SubPacket;
+
+/**
+ * A node in the subpacket list
+ */
+typedef struct _QDM2SubPNode {
+    QDM2SubPacket *packet;      ///< packet
+    struct _QDM2SubPNode *next; ///< pointer to next packet in the list, NULL if leaf node
+} QDM2SubPNode;
+
+typedef struct {
+    float level;
+    float *samples_im;
+    float *samples_re;
+    float *table;
+    int   phase;
+    int   phase_shift;
+    int   duration;
+    short time_index;
+    short cutoff;
+} FFTTone;
+
+typedef struct {
+    int16_t sub_packet;
+    uint8_t channel;
+    int16_t offset;
+    int16_t exp;
+    uint8_t phase;
+} FFTCoefficient;
+
+typedef struct {
+    float re;
+    float im;
+} QDM2Complex;
+
+typedef struct {
+    QDM2Complex complex[256 + 1] __attribute__((aligned(16)));
+    float       samples_im[MPA_MAX_CHANNELS][256];
+    float       samples_re[MPA_MAX_CHANNELS][256];
+} QDM2FFT;
+
+/**
+ * QDM2 decoder context
+ */
+typedef struct {
+    /// Parameters from codec header, do not change during playback
+    int nb_channels;         ///< number of channels
+    int channels;            ///< number of channels
+    int group_size;          ///< size of frame group (16 frames per group)
+    int fft_size;            ///< size of FFT, in complex numbers
+    int checksum_size;       ///< size of data block, used also for checksum
+
+    /// Parameters built from header parameters, do not change during playback
+    int group_order;         ///< order of frame group
+    int fft_order;           ///< order of FFT (actually fftorder+1)
+    int fft_frame_size;      ///< size of fft frame, in components (1 comples = re + im)
+    int frame_size;          ///< size of data frame
+    int frequency_range;
+    int sub_sampling;        ///< subsampling: 0=25%, 1=50%, 2=100% */
+    int coeff_per_sb_select; ///< selector for "num. of coeffs. per subband" tables. Can be 0, 1, 2
+    int cm_table_select;     ///< selector for "coding method" tables. Can be 0, 1 (from init: 0-4)
+
+    /// Packets and packet lists
+    QDM2SubPacket sub_packets[16];      ///< the packets themselves
+    QDM2SubPNode sub_packet_list_A[16]; ///< list of all packets
+    QDM2SubPNode sub_packet_list_B[16]; ///< FFT packets B are on list
+    int sub_packets_B;                  ///< number of packets on 'B' list
+    QDM2SubPNode sub_packet_list_C[16]; ///< packets with errors?
+    QDM2SubPNode sub_packet_list_D[16]; ///< DCT packets
+
+    /// FFT and tones
+    FFTTone fft_tones[1000];
+    int fft_tone_start;
+    int fft_tone_end;
+    FFTCoefficient fft_coefs[1000];
+    int fft_coefs_index;
+    int fft_coefs_min_index[5];
+    int fft_coefs_max_index[5];
+    int fft_level_exp[6];
+    FFTContext fft_ctx;
+    FFTComplex exptab[128];
+    QDM2FFT fft;
+
+    /// I/O data
+    uint8_t *compressed_data;
+    int compressed_size;
+    float output_buffer[1024];
+
+    /// Synthesis filter
+    MPA_INT synth_buf[MPA_MAX_CHANNELS][512*2] __attribute__((aligned(16)));
+    int synth_buf_offset[MPA_MAX_CHANNELS];
+    int32_t sb_samples[MPA_MAX_CHANNELS][128][SBLIMIT] __attribute__((aligned(16)));
+
+    /// Mixed temporary data used in decoding
+    float tone_level[MPA_MAX_CHANNELS][30][64];
+    int8_t coding_method[MPA_MAX_CHANNELS][30][64];
+    int8_t quantized_coeffs[MPA_MAX_CHANNELS][10][8];
+    int8_t tone_level_idx_base[MPA_MAX_CHANNELS][30][8];
+    int8_t tone_level_idx_hi1[MPA_MAX_CHANNELS][3][8][8];
+    int8_t tone_level_idx_mid[MPA_MAX_CHANNELS][26][8];
+    int8_t tone_level_idx_hi2[MPA_MAX_CHANNELS][26];
+    int8_t tone_level_idx[MPA_MAX_CHANNELS][30][64];
+    int8_t tone_level_idx_temp[MPA_MAX_CHANNELS][30][64];
+
+    // Flags
+    int has_errors;         ///< packet has errors
+    int superblocktype_2_3; ///< select fft tables and some algorithm based on superblock type
+    int do_synth_filter;    ///< used to perform or skip synthesis filter
+
+    int sub_packet;
+    int noise_idx; ///< index for dithering noise table
+} QDM2Context;
+
+
+static uint8_t empty_buffer[FF_INPUT_BUFFER_PADDING_SIZE];
+
+static VLC vlc_tab_level;
+static VLC vlc_tab_diff;
+static VLC vlc_tab_run;
+static VLC fft_level_exp_alt_vlc;
+static VLC fft_level_exp_vlc;
+static VLC fft_stereo_exp_vlc;
+static VLC fft_stereo_phase_vlc;
+static VLC vlc_tab_tone_level_idx_hi1;
+static VLC vlc_tab_tone_level_idx_mid;
+static VLC vlc_tab_tone_level_idx_hi2;
+static VLC vlc_tab_type30;
+static VLC vlc_tab_type34;
+static VLC vlc_tab_fft_tone_offset[5];
+
+static uint16_t softclip_table[HARDCLIP_THRESHOLD - SOFTCLIP_THRESHOLD + 1];
+static float noise_table[4096];
+static uint8_t random_dequant_index[256][5];
+static uint8_t random_dequant_type24[128][3];
+static float noise_samples[128];
+
+static MPA_INT mpa_window[512] __attribute__((aligned(16)));
+
+
+static void softclip_table_init(void) {
+    int i;
+    double dfl = SOFTCLIP_THRESHOLD - 32767;
+    float delta = 1.0 / -dfl;
+    for (i = 0; i < HARDCLIP_THRESHOLD - SOFTCLIP_THRESHOLD + 1; i++)
+        softclip_table[i] = SOFTCLIP_THRESHOLD - ((int)(sin((float)i * delta) * dfl) & 0x0000FFFF);
+}
+
+
+// random generated table
+static void rnd_table_init(void) {
+    int i,j;
+    uint32_t ldw,hdw;
+    uint64_t tmp64_1;
+    uint64_t random_seed = 0;
+    float delta = 1.0 / 16384.0;
+    for(i = 0; i < 4096 ;i++) {
+        random_seed = random_seed * 214013 + 2531011;
+        noise_table[i] = (delta * (float)(((int32_t)random_seed >> 16) & 0x00007FFF)- 1.0) * 1.3;
+    }
+
+    for (i = 0; i < 256 ;i++) {
+        random_seed = 81;
+        ldw = i;
+        for (j = 0; j < 5 ;j++) {
+            random_dequant_index[i][j] = (uint8_t)((ldw / random_seed) & 0xFF);
+            ldw = (uint32_t)ldw % (uint32_t)random_seed;
+            tmp64_1 = (random_seed * 0x55555556);
+            hdw = (uint32_t)(tmp64_1 >> 32);
+            random_seed = (uint64_t)(hdw + (ldw >> 31));
+        }
+    }
+    for (i = 0; i < 128 ;i++) {
+        random_seed = 25;
+        ldw = i;
+        for (j = 0; j < 3 ;j++) {
+            random_dequant_type24[i][j] = (uint8_t)((ldw / random_seed) & 0xFF);
+            ldw = (uint32_t)ldw % (uint32_t)random_seed;
+            tmp64_1 = (random_seed * 0x66666667);
+            hdw = (uint32_t)(tmp64_1 >> 33);
+            random_seed = hdw + (ldw >> 31);
+        }
+    }
+}
+
+
+static void init_noise_samples(void) {
+    int i;
+    int random_seed = 0;
+    float delta = 1.0 / 16384.0;
+    for (i = 0; i < 128;i++) {
+        random_seed = random_seed * 214013 + 2531011;
+        noise_samples[i] = (delta * (float)((random_seed >> 16) & 0x00007fff) - 1.0);
+    }
+}
+
+
+static void qdm2_init_vlc(void)
+{
+    init_vlc (&vlc_tab_level, 8, 24,
+        vlc_tab_level_huffbits, 1, 1,
+        vlc_tab_level_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_diff, 8, 37,
+        vlc_tab_diff_huffbits, 1, 1,
+        vlc_tab_diff_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_run, 5, 6,
+        vlc_tab_run_huffbits, 1, 1,
+        vlc_tab_run_huffcodes, 1, 1, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&fft_level_exp_alt_vlc, 8, 28,
+        fft_level_exp_alt_huffbits, 1, 1,
+        fft_level_exp_alt_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&fft_level_exp_vlc, 8, 20,
+        fft_level_exp_huffbits, 1, 1,
+        fft_level_exp_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&fft_stereo_exp_vlc, 6, 7,
+        fft_stereo_exp_huffbits, 1, 1,
+        fft_stereo_exp_huffcodes, 1, 1, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&fft_stereo_phase_vlc, 6, 9,
+        fft_stereo_phase_huffbits, 1, 1,
+        fft_stereo_phase_huffcodes, 1, 1, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_tone_level_idx_hi1, 8, 20,
+        vlc_tab_tone_level_idx_hi1_huffbits, 1, 1,
+        vlc_tab_tone_level_idx_hi1_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_tone_level_idx_mid, 8, 24,
+        vlc_tab_tone_level_idx_mid_huffbits, 1, 1,
+        vlc_tab_tone_level_idx_mid_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_tone_level_idx_hi2, 8, 24,
+        vlc_tab_tone_level_idx_hi2_huffbits, 1, 1,
+        vlc_tab_tone_level_idx_hi2_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_type30, 6, 9,
+        vlc_tab_type30_huffbits, 1, 1,
+        vlc_tab_type30_huffcodes, 1, 1, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_type34, 5, 10,
+        vlc_tab_type34_huffbits, 1, 1,
+        vlc_tab_type34_huffcodes, 1, 1, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_fft_tone_offset[0], 8, 23,
+        vlc_tab_fft_tone_offset_0_huffbits, 1, 1,
+        vlc_tab_fft_tone_offset_0_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_fft_tone_offset[1], 8, 28,
+        vlc_tab_fft_tone_offset_1_huffbits, 1, 1,
+        vlc_tab_fft_tone_offset_1_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_fft_tone_offset[2], 8, 32,
+        vlc_tab_fft_tone_offset_2_huffbits, 1, 1,
+        vlc_tab_fft_tone_offset_2_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_fft_tone_offset[3], 8, 35,
+        vlc_tab_fft_tone_offset_3_huffbits, 1, 1,
+        vlc_tab_fft_tone_offset_3_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+
+    init_vlc (&vlc_tab_fft_tone_offset[4], 8, 38,
+        vlc_tab_fft_tone_offset_4_huffbits, 1, 1,
+        vlc_tab_fft_tone_offset_4_huffcodes, 2, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
+}
+
+
+/* for floating point to fixed point conversion */
+static float f2i_scale = (float) (1 << (FRAC_BITS - 15));
+
+
+static int qdm2_get_vlc (GetBitContext *gb, VLC *vlc, int flag, int depth)
+{
+    int value;
+
+    value = get_vlc2(gb, vlc->table, vlc->bits, depth);
+
+    /* stage-2, 3 bits exponent escape sequence */
+    if (value-- == 0)
+        value = get_bits (gb, get_bits (gb, 3) + 1);
+
+    /* stage-3, optional */
+    if (flag) {
+        int tmp = vlc_stage3_values[value];
+
+        if ((value & ~3) > 0)
+            tmp += get_bits (gb, (value >> 2));
+        value = tmp;
+    }
+
+    return value;
+}
+
+
+static int qdm2_get_se_vlc (VLC *vlc, GetBitContext *gb, int depth)
+{
+    int value = qdm2_get_vlc (gb, vlc, 0, depth);
+
+    return (value & 1) ? ((value + 1) >> 1) : -(value >> 1);
+}
+
+
+/**
+ * QDM2 checksum
+ *
+ * @param data      pointer to data to be checksum'ed
+ * @param length    data length
+ * @param value     checksum value
+ *
+ * @return          0 if checksum is OK
+ */
+static uint16_t qdm2_packet_checksum (uint8_t *data, int length, int value) {
+    int i;
+
+    for (i=0; i < length; i++)
+        value -= data[i];
+
+    return (uint16_t)(value & 0xffff);
+}
+
+
+/**
+ * Fills a QDM2SubPacket structure with packet type, size, and data pointer.
+ *
+ * @param gb            bitreader context
+ * @param sub_packet    packet under analysis
+ */
+static void qdm2_decode_sub_packet_header (GetBitContext *gb, QDM2SubPacket *sub_packet)
+{
+    sub_packet->type = get_bits (gb, 8);
+
+    if (sub_packet->type == 0) {
+        sub_packet->size = 0;
+        sub_packet->data = NULL;
+    } else {
+        sub_packet->size = get_bits (gb, 8);
+
+      if (sub_packet->type & 0x80) {
+          sub_packet->size <<= 8;
+          sub_packet->size  |= get_bits (gb, 8);
+          sub_packet->type  &= 0x7f;
+      }
+
+      if (sub_packet->type == 0x7f)
+          sub_packet->type |= (get_bits (gb, 8) << 8);
+
+      sub_packet->data = &gb->buffer[get_bits_count(gb) / 8]; // FIXME: this depends on bitreader internal data
+    }
+
+    av_log(NULL,AV_LOG_DEBUG,"Subpacket: type=%d size=%d start_offs=%x\n",
+        sub_packet->type, sub_packet->size, get_bits_count(gb) / 8);
+}
+
+
+/**
+ * Return node pointer to first packet of requested type in list.
+ *
+ * @param list    list of subpackets to be scanned
+ * @param type    type of searched subpacket
+ * @return        node pointer for subpacket if found, else NULL
+ */
+static QDM2SubPNode* qdm2_search_subpacket_type_in_list (QDM2SubPNode *list, int type)
+{
+    while (list != NULL && list->packet != NULL) {
+        if (list->packet->type == type)
+            return list;
+        list = list->next;
+    }
+    return NULL;
+}
+
+
+/**
+ * Replaces 8 elements with their average value.
+ * Called by qdm2_decode_superblock before starting subblock decoding.
+ *
+ * @param q       context
+ */
+static void average_quantized_coeffs (QDM2Context *q)
+{
+    int i, j, n, ch, sum;
+
+    n = coeff_per_sb_for_avg[q->coeff_per_sb_select][QDM2_SB_USED(q->sub_sampling) - 1] + 1;
+
+    for (ch = 0; ch < q->nb_channels; ch++)
+        for (i = 0; i < n; i++) {
+            sum = 0;
+
+            for (j = 0; j < 8; j++)
+                sum += q->quantized_coeffs[ch][i][j];
+
+            sum /= 8;
+            if (sum > 0)
+                sum--;
+
+            for (j=0; j < 8; j++)
+                q->quantized_coeffs[ch][i][j] = sum;
+        }
+}
+
+
+/**
+ * Build subband samples with noise weighted by q->tone_level.
+ * Called by synthfilt_build_sb_samples.
+ *
+ * @param q     context
+ * @param sb    subband index
+ */
+static void build_sb_samples_from_noise (QDM2Context *q, int sb)
+{
+    int ch, j;
+
+    FIX_NOISE_IDX(q->noise_idx);
+
+    if (!q->nb_channels)
+        return;
+
+    for (ch = 0; ch < q->nb_channels; ch++)
+        for (j = 0; j < 64; j++) {
+            q->sb_samples[ch][j * 2][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5);
+            q->sb_samples[ch][j * 2 + 1][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5);
+        }
+}
+
+
+/**
+ * Called while processing data from subpackets 11 and 12.
+ * Used after making changes to coding_method array.
+ *
+ * @param sb               subband index
+ * @param channels         number of channels
+ * @param coding_method    q->coding_method[0][0][0]
+ */
+static void fix_coding_method_array (int sb, int channels, sb_int8_array coding_method)
+{
+    int j,k;
+    int ch;
+    int run, case_val;
+    int switchtable[23] = {0,5,1,5,5,5,5,5,2,5,5,5,5,5,5,5,3,5,5,5,5,5,4};
+
+    for (ch = 0; ch < channels; ch++) {
+        for (j = 0; j < 64; ) {
+            if((coding_method[ch][sb][j] - 8) > 22) {
+                run = 1;
+                case_val = 8;
+            } else {
+                switch (switchtable[coding_method[ch][sb][j]-8]) {
+                    case 0: run = 10; case_val = 10; break;
+                    case 1: run = 1; case_val = 16; break;
+                    case 2: run = 5; case_val = 24; break;
+                    case 3: run = 3; case_val = 30; break;
+                    case 4: run = 1; case_val = 30; break;
+                    case 5: run = 1; case_val = 8; break;
+                    default: run = 1; case_val = 8; break;
+                }
+            }
+            for (k = 0; k < run; k++)
+                if (j + k < 128)
+                    if (coding_method[ch][sb + (j + k) / 64][(j + k) % 64] > coding_method[ch][sb][j])
+                        if (k > 0) {
+                           SAMPLES_NEEDED
+                            //not debugged, almost never used
+                            memset(&coding_method[ch][sb][j + k], case_val, k * sizeof(int8_t));
+                            memset(&coding_method[ch][sb][j + k], case_val, 3 * sizeof(int8_t));
+                        }
+            j += run;
+        }
+    }
+}
+
+
+/**
+ * Related to synthesis filter
+ * Called by process_subpacket_10
+ *
+ * @param q       context
+ * @param flag    1 if called after getting data from subpacket 10, 0 if no subpacket 10
+ */
+static void fill_tone_level_array (QDM2Context *q, int flag)
+{
+    int i, sb, ch, sb_used;
+    int tmp, tab;
+
+    // This should never happen
+    if (q->nb_channels <= 0)
+        return;
+
+    for (ch = 0; ch < q->nb_channels; ch++)
+        for (sb = 0; sb < 30; sb++)
+            for (i = 0; i < 8; i++) {
+                if ((tab=coeff_per_sb_for_dequant[q->coeff_per_sb_select][sb]) < (last_coeff[q->coeff_per_sb_select] - 1))
+                    tmp = q->quantized_coeffs[ch][tab + 1][i] * dequant_table[q->coeff_per_sb_select][tab + 1][sb]+
+                          q->quantized_coeffs[ch][tab][i] * dequant_table[q->coeff_per_sb_select][tab][sb];
+                else
+                    tmp = q->quantized_coeffs[ch][tab][i] * dequant_table[q->coeff_per_sb_select][tab][sb];
+                if(tmp < 0)
+                    tmp += 0xff;
+                q->tone_level_idx_base[ch][sb][i] = (tmp / 256) & 0xff;
+            }
+
+    sb_used = QDM2_SB_USED(q->sub_sampling);
+
+    if ((q->superblocktype_2_3 != 0) && !flag) {
+        for (sb = 0; sb < sb_used; sb++)
+            for (ch = 0; ch < q->nb_channels; ch++)
+                for (i = 0; i < 64; i++) {
+                    q->tone_level_idx[ch][sb][i] = q->tone_level_idx_base[ch][sb][i / 8];
+                    if (q->tone_level_idx[ch][sb][i] < 0)
+                        q->tone_level[ch][sb][i] = 0;
+                    else
+                        q->tone_level[ch][sb][i] = fft_tone_level_table[0][q->tone_level_idx[ch][sb][i] & 0x3f];
+                }
+    } else {
+        tab = q->superblocktype_2_3 ? 0 : 1;
+        for (sb = 0; sb < sb_used; sb++) {
+            if ((sb >= 4) && (sb <= 23)) {
+                for (ch = 0; ch < q->nb_channels; ch++)
+                    for (i = 0; i < 64; i++) {
+                        tmp = q->tone_level_idx_base[ch][sb][i / 8] -
+                              q->tone_level_idx_hi1[ch][sb / 8][i / 8][i % 8] -
+                              q->tone_level_idx_mid[ch][sb - 4][i / 8] -
+                              q->tone_level_idx_hi2[ch][sb - 4];
+                        q->tone_level_idx[ch][sb][i] = tmp & 0xff;
+                        if ((tmp < 0) || (!q->superblocktype_2_3 && !tmp))
+                            q->tone_level[ch][sb][i] = 0;
+                        else
+                            q->tone_level[ch][sb][i] = fft_tone_level_table[tab][tmp & 0x3f];
+                }
+            } else {
+                if (sb > 4) {
+                    for (ch = 0; ch < q->nb_channels; ch++)
+                        for (i = 0; i < 64; i++) {
+                            tmp = q->tone_level_idx_base[ch][sb][i / 8] -
+                                  q->tone_level_idx_hi1[ch][2][i / 8][i % 8] -
+                                  q->tone_level_idx_hi2[ch][sb - 4];
+                            q->tone_level_idx[ch][sb][i] = tmp & 0xff;
+                            if ((tmp < 0) || (!q->superblocktype_2_3 && !tmp))
+                                q->tone_level[ch][sb][i] = 0;
+                            else
+                                q->tone_level[ch][sb][i] = fft_tone_level_table[tab][tmp & 0x3f];
+                    }
+                } else {
+                    for (ch = 0; ch < q->nb_channels; ch++)
+                        for (i = 0; i < 64; i++) {
+                            tmp = q->tone_level_idx[ch][sb][i] = q->tone_level_idx_base[ch][sb][i / 8];
+                            if ((tmp < 0) || (!q->superblocktype_2_3 && !tmp))
+                                q->tone_level[ch][sb][i] = 0;
+                            else
+                                q->tone_level[ch][sb][i] = fft_tone_level_table[tab][tmp & 0x3f];
+                        }
+                }
+            }
+        }
+    }
+
+    return;
+}
+
+
+/**
+ * Related to synthesis filter
+ * Called by process_subpacket_11
+ * c is built with data from subpacket 11
+ * Most of this function is used only if superblock_type_2_3 == 0, never seen it in samples
+ *
+ * @param tone_level_idx
+ * @param tone_level_idx_temp
+ * @param coding_method        q->coding_method[0][0][0]
+ * @param nb_channels          number of channels
+ * @param c                    coming from subpacket 11, passed as 8*c
+ * @param superblocktype_2_3   flag based on superblock packet type
+ * @param cm_table_select      q->cm_table_select
+ */
+static void fill_coding_method_array (sb_int8_array tone_level_idx, sb_int8_array tone_level_idx_temp,
+                sb_int8_array coding_method, int nb_channels,
+                int c, int superblocktype_2_3, int cm_table_select)
+{
+    int ch, sb, j;
+    int tmp, acc, esp_40, comp;
+    int add1, add2, add3, add4;
+    int64_t multres;
+
+    // This should never happen
+    if (nb_channels <= 0)
+        return;
+
+    if (!superblocktype_2_3) {
+        /* This case is untested, no samples available */
+        SAMPLES_NEEDED
+        for (ch = 0; ch < nb_channels; ch++)
+            for (sb = 0; sb < 30; sb++) {
+                for (j = 1; j < 64; j++) {
+                    add1 = tone_level_idx[ch][sb][j] - 10;
+                    if (add1 < 0)
+                        add1 = 0;
+                    add2 = add3 = add4 = 0;
+                    if (sb > 1) {
+                        add2 = tone_level_idx[ch][sb - 2][j] + tone_level_idx_offset_table[sb][0] - 6;
+                        if (add2 < 0)
+                            add2 = 0;
+                    }
+                    if (sb > 0) {
+                        add3 = tone_level_idx[ch][sb - 1][j] + tone_level_idx_offset_table[sb][1] - 6;
+                        if (add3 < 0)
+                            add3 = 0;
+                    }
+                    if (sb < 29) {
+                        add4 = tone_level_idx[ch][sb + 1][j] + tone_level_idx_offset_table[sb][3] - 6;
+                        if (add4 < 0)
+                            add4 = 0;
+                    }
+                    tmp = tone_level_idx[ch][sb][j + 1] * 2 - add4 - add3 - add2 - add1;
+                    if (tmp < 0)
+                        tmp = 0;
+                    tone_level_idx_temp[ch][sb][j + 1] = tmp & 0xff;
+                }
+                tone_level_idx_temp[ch][sb][0] = tone_level_idx_temp[ch][sb][1];
+            }
+            acc = 0;
+            for (ch = 0; ch < nb_channels; ch++)
+                for (sb = 0; sb < 30; sb++)
+                    for (j = 0; j < 64; j++)
+                        acc += tone_level_idx_temp[ch][sb][j];
+            if (acc)
+                tmp = c * 256 / (acc & 0xffff);
+            multres = 0x66666667 * (acc * 10);
+            esp_40 = (multres >> 32) / 8 + ((multres & 0xffffffff) >> 31);
+            for (ch = 0;  ch < nb_channels; ch++)
+                for (sb = 0; sb < 30; sb++)
+                    for (j = 0; j < 64; j++) {
+                        comp = tone_level_idx_temp[ch][sb][j]* esp_40 * 10;
+                        if (comp < 0)
+                            comp += 0xff;
+                        comp /= 256; // signed shift
+                        switch(sb) {
+                            case 0:
+                                if (comp < 30)
+                                    comp = 30;
+                                comp += 15;
+                                break;
+                            case 1:
+                                if (comp < 24)
+                                    comp = 24;
+                                comp += 10;
+                                break;
+                            case 2:
+                            case 3:
+                            case 4:
+                                if (comp < 16)
+                                    comp = 16;
+                        }
+                        if (comp <= 5)
+                            tmp = 0;
+                        else if (comp <= 10)
+                            tmp = 10;
+                        else if (comp <= 16)
+                            tmp = 16;
+                        else if (comp <= 24)
+                            tmp = -1;
+                        else
+                            tmp = 0;
+                        coding_method[ch][sb][j] = ((tmp & 0xfffa) + 30 )& 0xff;
+                    }
+            for (sb = 0; sb < 30; sb++)
+                fix_coding_method_array(sb, nb_channels, coding_method);
+            for (ch = 0; ch < nb_channels; ch++)
+                for (sb = 0; sb < 30; sb++)
+                    for (j = 0; j < 64; j++)
+                        if (sb >= 10) {
+                            if (coding_method[ch][sb][j] < 10)
+                                coding_method[ch][sb][j] = 10;
+                        } else {
+                            if (sb >= 2) {
+                                if (coding_method[ch][sb][j] < 16)
+                                    coding_method[ch][sb][j] = 16;
+                            } else {
+                                if (coding_method[ch][sb][j] < 30)
+                                    coding_method[ch][sb][j] = 30;
+                            }
+                        }
+    } else { // superblocktype_2_3 != 0
+        for (ch = 0; ch < nb_channels; ch++)
+            for (sb = 0; sb < 30; sb++)
+                for (j = 0; j < 64; j++)
+                    coding_method[ch][sb][j] = coding_method_table[cm_table_select][sb];
+    }
+
+    return;
+}
+
+
+/**
+ *
+ * Called by process_subpacket_11 to process more data from subpacket 11 with sb 0-8
+ * Called by process_subpacket_12 to process data from subpacket 12 with sb 8-sb_used
+ *
+ * @param q         context
+ * @param gb        bitreader context
+ * @param length    packet length in bits
+ * @param sb_min    lower subband processed (sb_min included)
+ * @param sb_max    higher subband processed (sb_max excluded)
+ */
+static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int length, int sb_min, int sb_max)
+{
+    int sb, j, k, n, ch, run, channels;
+    int joined_stereo, zero_encoding, chs;
+    int type34_first;
+    float type34_div = 0;
+    float type34_predictor;
+    float samples[10], sign_bits[16];
+
+    if (length == 0) {
+        // If no data use noise
+        for (sb=sb_min; sb < sb_max; sb++)
+            build_sb_samples_from_noise (q, sb);
+
+        return;
+    }
+
+    for (sb = sb_min; sb < sb_max; sb++) {
+        FIX_NOISE_IDX(q->noise_idx);
+
+        channels = q->nb_channels;
+
+        if (q->nb_channels <= 1 || sb < 12)
+            joined_stereo = 0;
+        else if (sb >= 24)
+            joined_stereo = 1;
+        else
+            joined_stereo = (BITS_LEFT(length,gb) >= 1) ? get_bits1 (gb) : 0;
+
+        if (joined_stereo) {
+            if (BITS_LEFT(length,gb) >= 16)
+                for (j = 0; j < 16; j++)
+                    sign_bits[j] = get_bits1 (gb);
+
+            for (j = 0; j < 64; j++)
+                if (q->coding_method[1][sb][j] > q->coding_method[0][sb][j])
+                    q->coding_method[0][sb][j] = q->coding_method[1][sb][j];
+
+            fix_coding_method_array(sb, q->nb_channels, q->coding_method);
+            channels = 1;
+        }
+
+        for (ch = 0; ch < channels; ch++) {
+            zero_encoding = (BITS_LEFT(length,gb) >= 1) ? get_bits1(gb) : 0;
+            type34_predictor = 0.0;
+            type34_first = 1;
+
+            for (j = 0; j < 128; ) {
+                switch (q->coding_method[ch][sb][j / 2]) {
+                    case 8:
+                        if (BITS_LEFT(length,gb) >= 10) {
+                            if (zero_encoding) {
+                                for (k = 0; k < 5; k++) {
+                                    if ((j + 2 * k) >= 128)
+                                        break;
+                                    samples[2 * k] = get_bits1(gb) ? dequant_1bit[joined_stereo][2 * get_bits1(gb)] : 0;
+                                }
+                            } else {
+                                n = get_bits(gb, 8);
+                                for (k = 0; k < 5; k++)
+                                    samples[2 * k] = dequant_1bit[joined_stereo][random_dequant_index[n][k]];
+                            }
+                            for (k = 0; k < 5; k++)
+                                samples[2 * k + 1] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        } else {
+                            for (k = 0; k < 10; k++)
+                                samples[k] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        }
+                        run = 10;
+                        break;
+
+                    case 10:
+                        if (BITS_LEFT(length,gb) >= 1) {
+                            float f = 0.81;
+
+                            if (get_bits1(gb))
+                                f = -f;
+                            f -= noise_samples[((sb + 1) * (j +5 * ch + 1)) & 127] * 9.0 / 40.0;
+                            samples[0] = f;
+                        } else {
+                            samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        }
+                        run = 1;
+                        break;
+
+                    case 16:
+                        if (BITS_LEFT(length,gb) >= 10) {
+                            if (zero_encoding) {
+                                for (k = 0; k < 5; k++) {
+                                    if ((j + k) >= 128)
+                                        break;
+                                    samples[k] = (get_bits1(gb) == 0) ? 0 : dequant_1bit[joined_stereo][2 * get_bits1(gb)];
+                                }
+                            } else {
+                                n = get_bits (gb, 8);
+                                for (k = 0; k < 5; k++)
+                                    samples[k] = dequant_1bit[joined_stereo][random_dequant_index[n][k]];
+                            }
+                        } else {
+                            for (k = 0; k < 5; k++)
+                                samples[k] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        }
+                        run = 5;
+                        break;
+
+                    case 24:
+                        if (BITS_LEFT(length,gb) >= 7) {
+                            n = get_bits(gb, 7);
+                            for (k = 0; k < 3; k++)
+                                samples[k] = (random_dequant_type24[n][k] - 2.0) * 0.5;
+                        } else {
+                            for (k = 0; k < 3; k++)
+                                samples[k] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        }
+                        run = 3;
+                        break;
+
+                    case 30:
+                        if (BITS_LEFT(length,gb) >= 4)
+                            samples[0] = type30_dequant[qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1)];
+                        else
+                            samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+
+                        run = 1;
+                        break;
+
+                    case 34:
+                        if (BITS_LEFT(length,gb) >= 7) {
+                            if (type34_first) {
+                                type34_div = (float)(1 << get_bits(gb, 2));
+                                samples[0] = ((float)get_bits(gb, 5) - 16.0) / 15.0;
+                                type34_predictor = samples[0];
+                                type34_first = 0;
+                            } else {
+                                samples[0] = type34_delta[qdm2_get_vlc(gb, &vlc_tab_type34, 0, 1)] / type34_div + type34_predictor;
+                                type34_predictor = samples[0];
+                            }
+                        } else {
+                            samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        }
+                        run = 1;
+                        break;
+
+                    default:
+                        samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                        run = 1;
+                        break;
+                }
+
+                if (joined_stereo) {
+                    float tmp[10][MPA_MAX_CHANNELS];
+
+                    for (k = 0; k < run; k++) {
+                        tmp[k][0] = samples[k];
+                        tmp[k][1] = (sign_bits[(j + k) / 8]) ? -samples[k] : samples[k];
+                    }
+                    for (chs = 0; chs < q->nb_channels; chs++)
+                        for (k = 0; k < run; k++)
+                            if ((j + k) < 128)
+                                q->sb_samples[chs][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs] + .5);
+                } else {
+                    for (k = 0; k < run; k++)
+                        if ((j + k) < 128)
+                            q->sb_samples[ch][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[ch][sb][(j + k)/2] * samples[k] + .5);
+                }
+
+                j += run;
+            } // j loop
+        } // channel loop
+    } // subband loop
+}
+
+
+/**
+ * Init the first element of a channel in quantized_coeffs with data from packet 10 (quantized_coeffs[ch][0]).
+ * This is similar to process_subpacket_9, but for a single channel and for element [0]
+ * same VLC tables as process_subpacket_9 are used.
+ *
+ * @param q         context
+ * @param quantized_coeffs    pointer to quantized_coeffs[ch][0]
+ * @param gb        bitreader context
+ * @param length    packet length in bits
+ */
+static void init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext *gb, int length)
+{
+    int i, k, run, level, diff;
+
+    if (BITS_LEFT(length,gb) < 16)
+        return;
+    level = qdm2_get_vlc(gb, &vlc_tab_level, 0, 2);
+
+    quantized_coeffs[0] = level;
+
+    for (i = 0; i < 7; ) {
+        if (BITS_LEFT(length,gb) < 16)
+            break;
+        run = qdm2_get_vlc(gb, &vlc_tab_run, 0, 1) + 1;
+
+        if (BITS_LEFT(length,gb) < 16)
+            break;
+        diff = qdm2_get_se_vlc(&vlc_tab_diff, gb, 2);
+
+        for (k = 1; k <= run; k++)
+            quantized_coeffs[i + k] = (level + ((k * diff) / run));
+
+        level += diff;
+        i += run;
+    }
+}
+
+
+/**
+ * Related to synthesis filter, process data from packet 10
+ * Init part of quantized_coeffs via function init_quantized_coeffs_elem0
+ * Init tone_level_idx_hi1, tone_level_idx_hi2, tone_level_idx_mid with data from packet 10
+ *
+ * @param q         context
+ * @param gb        bitreader context
+ * @param length    packet length in bits
+ */
+static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb, int length)
+{
+    int sb, j, k, n, ch;
+
+    for (ch = 0; ch < q->nb_channels; ch++) {
+        init_quantized_coeffs_elem0(q->quantized_coeffs[ch][0], gb, length);
+
+        if (BITS_LEFT(length,gb) < 16) {
+            memset(q->quantized_coeffs[ch][0], 0, 8);
+            break;
+        }
+    }
+
+    n = q->sub_sampling + 1;
+
+    for (sb = 0; sb < n; sb++)
+        for (ch = 0; ch < q->nb_channels; ch++)
+            for (j = 0; j < 8; j++) {
+                if (BITS_LEFT(length,gb) < 1)
+                    break;
+                if (get_bits1(gb)) {
+                    for (k=0; k < 8; k++) {
+                        if (BITS_LEFT(length,gb) < 16)
+                            break;
+                        q->tone_level_idx_hi1[ch][sb][j][k] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_hi1, 0, 2);
+                    }
+                } else {
+                    for (k=0; k < 8; k++)
+                        q->tone_level_idx_hi1[ch][sb][j][k] = 0;
+                }
+            }
+
+    n = QDM2_SB_USED(q->sub_sampling) - 4;
+
+    for (sb = 0; sb < n; sb++)
+        for (ch = 0; ch < q->nb_channels; ch++) {
+            if (BITS_LEFT(length,gb) < 16)
+                break;
+            q->tone_level_idx_hi2[ch][sb] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_hi2, 0, 2);
+            if (sb > 19)
+                q->tone_level_idx_hi2[ch][sb] -= 16;
+            else
+                for (j = 0; j < 8; j++)
+                    q->tone_level_idx_mid[ch][sb][j] = -16;
+        }
+
+    n = QDM2_SB_USED(q->sub_sampling) - 5;
+
+    for (sb = 0; sb < n; sb++)
+        for (ch = 0; ch < q->nb_channels; ch++)
+            for (j = 0; j < 8; j++) {
+                if (BITS_LEFT(length,gb) < 16)
+                    break;
+                q->tone_level_idx_mid[ch][sb][j] = qdm2_get_vlc(gb, &vlc_tab_tone_level_idx_mid, 0, 2) - 32;
+            }
+}
+
+/**
+ * Process subpacket 9, init quantized_coeffs with data from it
+ *
+ * @param q       context
+ * @param node    pointer to node with packet
+ */
+static void process_subpacket_9 (QDM2Context *q, QDM2SubPNode *node)
+{
+    GetBitContext gb;
+    int i, j, k, n, ch, run, level, diff;
+
+    init_get_bits(&gb, node->packet->data, node->packet->size*8);
+
+    n = coeff_per_sb_for_avg[q->coeff_per_sb_select][QDM2_SB_USED(q->sub_sampling) - 1] + 1; // same as averagesomething function
+
+    for (i = 1; i < n; i++)
+        for (ch=0; ch < q->nb_channels; ch++) {
+            level = qdm2_get_vlc(&gb, &vlc_tab_level, 0, 2);
+            q->quantized_coeffs[ch][i][0] = level;
+
+            for (j = 0; j < (8 - 1); ) {
+                run = qdm2_get_vlc(&gb, &vlc_tab_run, 0, 1) + 1;
+                diff = qdm2_get_se_vlc(&vlc_tab_diff, &gb, 2);
+
+                for (k = 1; k <= run; k++)
+                    q->quantized_coeffs[ch][i][j + k] = (level + ((k*diff) / run));
+
+                level += diff;
+                j += run;
+            }
+        }
+
+    for (ch = 0; ch < q->nb_channels; ch++)
+        for (i = 0; i < 8; i++)
+            q->quantized_coeffs[ch][0][i] = 0;
+}
+
+
+/**
+ * Process subpacket 10 if not null, else
+ *
+ * @param q         context
+ * @param node      pointer to node with packet
+ * @param length    packet length in bits
+ */
+static void process_subpacket_10 (QDM2Context *q, QDM2SubPNode *node, int length)
+{
+    GetBitContext gb;
+
+    init_get_bits(&gb, ((node == NULL) ? empty_buffer : node->packet->data), ((node == NULL) ? 0 : node->packet->size*8));
+
+    if (length != 0) {
+        init_tone_level_dequantization(q, &gb, length);
+        fill_tone_level_array(q, 1);
+    } else {
+        fill_tone_level_array(q, 0);
+    }
+}
+
+
+/**
+ * Process subpacket 11
+ *
+ * @param q         context
+ * @param node      pointer to node with packet
+ * @param length    packet length in bit
+ */
+static void process_subpacket_11 (QDM2Context *q, QDM2SubPNode *node, int length)
+{
+    GetBitContext gb;
+
+    init_get_bits(&gb, ((node == NULL) ? empty_buffer : node->packet->data), ((node == NULL) ? 0 : node->packet->size*8));
+    if (length >= 32) {
+        int c = get_bits (&gb, 13);
+
+        if (c > 3)
+            fill_coding_method_array (q->tone_level_idx, q->tone_level_idx_temp, q->coding_method,
+                                      q->nb_channels, 8*c, q->superblocktype_2_3, q->cm_table_select);
+    }
+
+    synthfilt_build_sb_samples(q, &gb, length, 0, 8);
+}
+
+
+/**
+ * Process subpacket 12
+ *
+ * @param q         context
+ * @param node      pointer to node with packet
+ * @param length    packet length in bits
+ */
+static void process_subpacket_12 (QDM2Context *q, QDM2SubPNode *node, int length)
+{
+    GetBitContext gb;
+
+    init_get_bits(&gb, ((node == NULL) ? empty_buffer : node->packet->data), ((node == NULL) ? 0 : node->packet->size*8));
+    synthfilt_build_sb_samples(q, &gb, length, 8, QDM2_SB_USED(q->sub_sampling));
+}
+
+/*
+ * Process new subpackets for synthesis filter
+ *
+ * @param q       context
+ * @param list    list with synthesis filter packets (list D)
+ */
+static void process_synthesis_subpackets (QDM2Context *q, QDM2SubPNode *list)
+{
+    QDM2SubPNode *nodes[4];
+
+    nodes[0] = qdm2_search_subpacket_type_in_list(list, 9);
+    if (nodes[0] != NULL)
+        process_subpacket_9(q, nodes[0]);
+
+    nodes[1] = qdm2_search_subpacket_type_in_list(list, 10);
+    if (nodes[1] != NULL)
+        process_subpacket_10(q, nodes[1], nodes[1]->packet->size << 3);
+    else
+        process_subpacket_10(q, NULL, 0);
+
+    nodes[2] = qdm2_search_subpacket_type_in_list(list, 11);
+    if (nodes[0] != NULL && nodes[1] != NULL && nodes[2] != NULL)
+        process_subpacket_11(q, nodes[2], (nodes[2]->packet->size << 3));
+    else
+        process_subpacket_11(q, NULL, 0);
+
+    nodes[3] = qdm2_search_subpacket_type_in_list(list, 12);
+    if (nodes[0] != NULL && nodes[1] != NULL && nodes[3] != NULL)
+        process_subpacket_12(q, nodes[3], (nodes[3]->packet->size << 3));
+    else
+        process_subpacket_12(q, NULL, 0);
+}
+
+
+/*
+ * Decode superblock, fill packet lists.
+ *
+ * @param q    context
+ */
+static void qdm2_decode_super_block (QDM2Context *q)
+{
+    GetBitContext gb;
+    QDM2SubPacket header, *packet;
+    int i, packet_bytes, sub_packet_size, sub_packets_D;
+    unsigned int next_index = 0;
+
+    memset(q->tone_level_idx_hi1, 0, sizeof(q->tone_level_idx_hi1));
+    memset(q->tone_level_idx_mid, 0, sizeof(q->tone_level_idx_mid));
+    memset(q->tone_level_idx_hi2, 0, sizeof(q->tone_level_idx_hi2));
+
+    q->sub_packets_B = 0;
+    sub_packets_D = 0;
+
+    average_quantized_coeffs(q); // average elements in quantized_coeffs[max_ch][10][8]
+
+    init_get_bits(&gb, q->compressed_data, q->compressed_size*8);
+    qdm2_decode_sub_packet_header(&gb, &header);
+
+    if (header.type < 2 || header.type >= 8) {
+        q->has_errors = 1;
+        av_log(NULL,AV_LOG_ERROR,"bad superblock type\n");
+        return;
+    }
+
+    q->superblocktype_2_3 = (header.type == 2 || header.type == 3);
+    packet_bytes = (q->compressed_size - get_bits_count(&gb) / 8);
+
+    init_get_bits(&gb, header.data, header.size*8);
+
+    if (header.type == 2 || header.type == 4 || header.type == 5) {
+        int csum = 257 * get_bits(&gb, 8) + 2 * get_bits(&gb, 8);
+
+        csum = qdm2_packet_checksum(q->compressed_data, q->checksum_size, csum);
+
+        if (csum != 0) {
+            q->has_errors = 1;
+            av_log(NULL,AV_LOG_ERROR,"bad packet checksum\n");
+            return;
+        }
+    }
+
+    q->sub_packet_list_B[0].packet = NULL;
+    q->sub_packet_list_D[0].packet = NULL;
+
+    for (i = 0; i < 6; i++)
+        if (--q->fft_level_exp[i] < 0)
+            q->fft_level_exp[i] = 0;
+
+    for (i = 0; packet_bytes > 0; i++) {
+        int j;
+
+        q->sub_packet_list_A[i].next = NULL;
+
+        if (i > 0) {
+            q->sub_packet_list_A[i - 1].next = &q->sub_packet_list_A[i];
+
+            /* seek to next block */
+            init_get_bits(&gb, header.data, header.size*8);
+            skip_bits(&gb, next_index*8);
+
+            if (next_index >= header.size)
+                break;
+        }
+
+        /* decode subpacket */
+        packet = &q->sub_packets[i];
+        qdm2_decode_sub_packet_header(&gb, packet);
+        next_index = packet->size + get_bits_count(&gb) / 8;
+        sub_packet_size = ((packet->size > 0xff) ? 1 : 0) + packet->size + 2;
+
+        if (packet->type == 0)
+            break;
+
+        if (sub_packet_size > packet_bytes) {
+            if (packet->type != 10 && packet->type != 11 && packet->type != 12)
+                break;
+            packet->size += packet_bytes - sub_packet_size;
+        }
+
+        packet_bytes -= sub_packet_size;
+
+        /* add subpacket to 'all subpackets' list */
+        q->sub_packet_list_A[i].packet = packet;
+
+        /* add subpacket to related list */
+        if (packet->type == 8) {
+            SAMPLES_NEEDED_2("packet type 8");
+            return;
+        } else if (packet->type >= 9 && packet->type <= 12) {
+            /* packets for MPEG Audio like Synthesis Filter */
+            QDM2_LIST_ADD(q->sub_packet_list_D, sub_packets_D, packet);
+        } else if (packet->type == 13) {
+            for (j = 0; j < 6; j++)
+                q->fft_level_exp[j] = get_bits(&gb, 6);
+        } else if (packet->type == 14) {
+            for (j = 0; j < 6; j++)
+                q->fft_level_exp[j] = qdm2_get_vlc(&gb, &fft_level_exp_vlc, 0, 2);
+        } else if (packet->type == 15) {
+            SAMPLES_NEEDED_2("packet type 15")
+            return;
+        } else if (packet->type >= 16 && packet->type < 48 && !fft_subpackets[packet->type - 16]) {
+            /* packets for FFT */
+            QDM2_LIST_ADD(q->sub_packet_list_B, q->sub_packets_B, packet);
+        }
+    } // Packet bytes loop
+
+/* **************************************************************** */
+    if (q->sub_packet_list_D[0].packet != NULL) {
+        process_synthesis_subpackets(q, q->sub_packet_list_D);
+        q->do_synth_filter = 1;
+    } else if (q->do_synth_filter) {
+        process_subpacket_10(q, NULL, 0);
+        process_subpacket_11(q, NULL, 0);
+        process_subpacket_12(q, NULL, 0);
+    }
+/* **************************************************************** */
+}
+
+
+static void qdm2_fft_init_coefficient (QDM2Context *q, int sub_packet,
+                       int offset, int duration, int channel,
+                       int exp, int phase)
+{
+    if (q->fft_coefs_min_index[duration] < 0)
+        q->fft_coefs_min_index[duration] = q->fft_coefs_index;
+
+    q->fft_coefs[q->fft_coefs_index].sub_packet = ((sub_packet >= 16) ? (sub_packet - 16) : sub_packet);
+    q->fft_coefs[q->fft_coefs_index].channel = channel;
+    q->fft_coefs[q->fft_coefs_index].offset = offset;
+    q->fft_coefs[q->fft_coefs_index].exp = exp;
+    q->fft_coefs[q->fft_coefs_index].phase = phase;
+    q->fft_coefs_index++;
+}
+
+
+static void qdm2_fft_decode_tones (QDM2Context *q, int duration, GetBitContext *gb, int b)
+{
+    int channel, stereo, phase, exp;
+    int local_int_4,  local_int_8,  stereo_phase,  local_int_10;
+    int local_int_14, stereo_exp, local_int_20, local_int_28;
+    int n, offset;
+
+    local_int_4 = 0;
+    local_int_28 = 0;
+    local_int_20 = 2;
+    local_int_8 = (4 - duration);
+    local_int_10 = 1 << (q->group_order - duration - 1);
+    offset = 1;
+
+    while (1) {
+        if (q->superblocktype_2_3) {
+            while ((n = qdm2_get_vlc(gb, &vlc_tab_fft_tone_offset[local_int_8], 1, 2)) < 2) {
+                offset = 1;
+                if (n == 0) {
+                    local_int_4 += local_int_10;
+                    local_int_28 += (1 << local_int_8);
+                } else {
+                    local_int_4 += 8*local_int_10;
+                    local_int_28 += (8 << local_int_8);
+                }
+            }
+            offset += (n - 2);
+        } else {
+            offset += qdm2_get_vlc(gb, &vlc_tab_fft_tone_offset[local_int_8], 1, 2);
+            while (offset >= (local_int_10 - 1)) {
+                offset += (1 - (local_int_10 - 1));
+                local_int_4  += local_int_10;
+                local_int_28 += (1 << local_int_8);
+            }
+        }
+
+        if (local_int_4 >= q->group_size)
+            return;
+
+        local_int_14 = (offset >> local_int_8);
+
+        if (q->nb_channels > 1) {
+            channel = get_bits1(gb);
+            stereo = get_bits1(gb);
+        } else {
+            channel = 0;
+            stereo = 0;
+        }
+
+        exp = qdm2_get_vlc(gb, (b ? &fft_level_exp_vlc : &fft_level_exp_alt_vlc), 0, 2);
+        exp += q->fft_level_exp[fft_level_index_table[local_int_14]];
+        exp = (exp < 0) ? 0 : exp;
+
+        phase = get_bits(gb, 3);
+        stereo_exp = 0;
+        stereo_phase = 0;
+
+        if (stereo) {
+            stereo_exp = (exp - qdm2_get_vlc(gb, &fft_stereo_exp_vlc, 0, 1));
+            stereo_phase = (phase - qdm2_get_vlc(gb, &fft_stereo_phase_vlc, 0, 1));
+            if (stereo_phase < 0)
+                stereo_phase += 8;
+        }
+
+        if (q->frequency_range > (local_int_14 + 1)) {
+            int sub_packet = (local_int_20 + local_int_28);
+
+            qdm2_fft_init_coefficient(q, sub_packet, offset, duration, channel, exp, phase);
+            if (stereo)
+                qdm2_fft_init_coefficient(q, sub_packet, offset, duration, (1 - channel), stereo_exp, stereo_phase);
+        }
+
+        offset++;
+    }
+}
+
+
+static void qdm2_decode_fft_packets (QDM2Context *q)
+{
+    int i, j, min, max, value, type, unknown_flag;
+    GetBitContext gb;
+
+    if (q->sub_packet_list_B[0].packet == NULL)
+        return;
+
+    /* reset minimum indices for FFT coefficients */
+    q->fft_coefs_index = 0;
+    for (i=0; i < 5; i++)
+        q->fft_coefs_min_index[i] = -1;
+
+    /* process subpackets ordered by type, largest type first */
+    for (i = 0, max = 256; i < q->sub_packets_B; i++) {
+        QDM2SubPacket *packet;
+
+        /* find subpacket with largest type less than max */
+        for (j = 0, min = 0, packet = NULL; j < q->sub_packets_B; j++) {
+            value = q->sub_packet_list_B[j].packet->type;
+            if (value > min && value < max) {
+                min = value;
+                packet = q->sub_packet_list_B[j].packet;
+            }
+        }
+
+        max = min;
+
+        /* check for errors (?) */
+        if (i == 0 && (packet->type < 16 || packet->type >= 48 || fft_subpackets[packet->type - 16]))
+            return;
+
+        /* decode FFT tones */
+        init_get_bits (&gb, packet->data, packet->size*8);
+
+        if (packet->type >= 32 && packet->type < 48 && !fft_subpackets[packet->type - 16])
+            unknown_flag = 1;
+        else
+            unknown_flag = 0;
+
+        type = packet->type;
+
+        if ((type >= 17 && type < 24) || (type >= 33 && type < 40)) {
+            int duration = q->sub_sampling + 5 - (type & 15);
+
+            if (duration >= 0 && duration < 4)
+                qdm2_fft_decode_tones(q, duration, &gb, unknown_flag);
+        } else if (type == 31) {
+            for (j=0; j < 4; j++)
+                qdm2_fft_decode_tones(q, j, &gb, unknown_flag);
+        } else if (type == 46) {
+            for (j=0; j < 6; j++)
+                q->fft_level_exp[j] = get_bits(&gb, 6);
+            for (j=0; j < 4; j++)
+            qdm2_fft_decode_tones(q, j, &gb, unknown_flag);
+        }
+    } // Loop on B packets
+
+    /* calculate maximum indices for FFT coefficients */
+    for (i = 0, j = -1; i < 5; i++)
+        if (q->fft_coefs_min_index[i] >= 0) {
+            if (j >= 0)
+                q->fft_coefs_max_index[j] = q->fft_coefs_min_index[i];
+            j = i;
+        }
+    if (j >= 0)
+        q->fft_coefs_max_index[j] = q->fft_coefs_index;
+}
+
+
+static void qdm2_fft_generate_tone (QDM2Context *q, FFTTone *tone)
+{
+   float level, f[6];
+   int i;
+   QDM2Complex c;
+   const double iscale = 2.0*M_PI / 512.0;
+
+    tone->phase += tone->phase_shift;
+
+    /* calculate current level (maximum amplitude) of tone */
+    level = fft_tone_envelope_table[tone->duration][tone->time_index] * tone->level;
+    c.im = level * sin(tone->phase*iscale);
+    c.re = level * cos(tone->phase*iscale);
+
+    /* generate FFT coefficients for tone */
+    if (tone->duration >= 3 || tone->cutoff >= 3) {
+        tone->samples_im[0] += c.im;
+        tone->samples_re[0] += c.re;
+        tone->samples_im[1] -= c.im;
+        tone->samples_re[1] -= c.re;
+    } else {
+        f[1] = -tone->table[4];
+        f[0] =  tone->table[3] - tone->table[0];
+        f[2] =  1.0 - tone->table[2] - tone->table[3];
+        f[3] =  tone->table[1] + tone->table[4] - 1.0;
+        f[4] =  tone->table[0] - tone->table[1];
+        f[5] =  tone->table[2];
+        for (i = 0; i < 2; i++) {
+            tone->samples_re[fft_cutoff_index_table[tone->cutoff][i]] += c.re * f[i];
+            tone->samples_im[fft_cutoff_index_table[tone->cutoff][i]] += c.im *((tone->cutoff <= i) ? -f[i] : f[i]);
+        }
+        for (i = 0; i < 4; i++) {
+            tone->samples_re[i] += c.re * f[i+2];
+            tone->samples_im[i] += c.im * f[i+2];
+        }
+    }
+
+    /* copy the tone if it has not yet died out */
+    if (++tone->time_index < ((1 << (5 - tone->duration)) - 1)) {
+      memcpy(&q->fft_tones[q->fft_tone_end], tone, sizeof(FFTTone));
+      q->fft_tone_end = (q->fft_tone_end + 1) % 1000;
+    }
+}
+
+
+static void qdm2_fft_tone_synthesizer (QDM2Context *q, int sub_packet)
+{
+    int i, j, ch;
+    const double iscale = 0.25 * M_PI;
+
+    for (ch = 0; ch < q->channels; ch++) {
+        memset(q->fft.samples_im[ch], 0, q->fft_size * sizeof(float));
+        memset(q->fft.samples_re[ch], 0, q->fft_size * sizeof(float));
+    }
+
+
+    /* apply FFT tones with duration 4 (1 FFT period) */
+    if (q->fft_coefs_min_index[4] >= 0)
+        for (i = q->fft_coefs_min_index[4]; i < q->fft_coefs_max_index[4]; i++) {
+            float level;
+            QDM2Complex c;
+
+            if (q->fft_coefs[i].sub_packet != sub_packet)
+                break;
+
+            ch = (q->channels == 1) ? 0 : q->fft_coefs[i].channel;
+            level = (q->fft_coefs[i].exp < 0) ? 0.0 : fft_tone_level_table[q->superblocktype_2_3 ? 0 : 1][q->fft_coefs[i].exp & 63];
+
+            c.re = level * cos(q->fft_coefs[i].phase * iscale);
+            c.im = level * sin(q->fft_coefs[i].phase * iscale);
+            q->fft.samples_re[ch][q->fft_coefs[i].offset + 0] += c.re;
+            q->fft.samples_im[ch][q->fft_coefs[i].offset + 0] += c.im;
+            q->fft.samples_re[ch][q->fft_coefs[i].offset + 1] -= c.re;
+            q->fft.samples_im[ch][q->fft_coefs[i].offset + 1] -= c.im;
+        }
+
+    /* generate existing FFT tones */
+    for (i = q->fft_tone_end; i != q->fft_tone_start; ) {
+        qdm2_fft_generate_tone(q, &q->fft_tones[q->fft_tone_start]);
+        q->fft_tone_start = (q->fft_tone_start + 1) % 1000;
+    }
+
+    /* create and generate new FFT tones with duration 0 (long) to 3 (short) */
+    for (i = 0; i < 4; i++)
+        if (q->fft_coefs_min_index[i] >= 0) {
+            for (j = q->fft_coefs_min_index[i]; j < q->fft_coefs_max_index[i]; j++) {
+                int offset, four_i;
+                FFTTone tone;
+
+                if (q->fft_coefs[j].sub_packet != sub_packet)
+                    break;
+
+                four_i = (4 - i);
+                offset = q->fft_coefs[j].offset >> four_i;
+                ch = (q->channels == 1) ? 0 : q->fft_coefs[j].channel;
+
+                if (offset < q->frequency_range) {
+                    if (offset < 2)
+                        tone.cutoff = offset;
+                    else
+                        tone.cutoff = (offset >= 60) ? 3 : 2;
+
+                    tone.level = (q->fft_coefs[j].exp < 0) ? 0.0 : fft_tone_level_table[q->superblocktype_2_3 ? 0 : 1][q->fft_coefs[j].exp & 63];
+                    tone.samples_im = &q->fft.samples_im[ch][offset];
+                    tone.samples_re = &q->fft.samples_re[ch][offset];
+                    tone.table = (float*)fft_tone_sample_table[i][q->fft_coefs[j].offset - (offset << four_i)];
+                    tone.phase = 64 * q->fft_coefs[j].phase - (offset << 8) - 128;
+                    tone.phase_shift = (2 * q->fft_coefs[j].offset + 1) << (7 - four_i);
+                    tone.duration = i;
+                    tone.time_index = 0;
+
+                    qdm2_fft_generate_tone(q, &tone);
+                }
+            }
+            q->fft_coefs_min_index[i] = j;
+        }
+}
+
+
+static void qdm2_calculate_fft (QDM2Context *q, int channel, int sub_packet)
+{
+    const int n = 1 << (q->fft_order - 1);
+    const int n2 = n >> 1;
+    const float gain = (q->channels == 1 && q->nb_channels == 2) ? 0.25f : 0.50f;
+    float c, s, f0, f1, f2, f3;
+    int i, j;
+
+    /* prerotation (or something like that) */
+    for (i=1; i < n2; i++) {
+        j  = (n - i);
+        c = q->exptab[i].re;
+        s = -q->exptab[i].im;
+        f0 = (q->fft.samples_re[channel][i] - q->fft.samples_re[channel][j]) * gain;
+        f1 = (q->fft.samples_im[channel][i] + q->fft.samples_im[channel][j]) * gain;
+        f2 = (q->fft.samples_re[channel][i] + q->fft.samples_re[channel][j]) * gain;
+        f3 = (q->fft.samples_im[channel][i] - q->fft.samples_im[channel][j]) * gain;
+        q->fft.complex[i].re =  s * f0 - c * f1 + f2;
+        q->fft.complex[i].im =  c * f0 + s * f1 + f3;
+        q->fft.complex[j].re = -s * f0 + c * f1 + f2;
+        q->fft.complex[j].im =  c * f0 + s * f1 - f3;
+    }
+
+    q->fft.complex[ 0].re =  q->fft.samples_re[channel][ 0] * gain * 2.0;
+    q->fft.complex[ 0].im =  q->fft.samples_re[channel][ 0] * gain * 2.0;
+    q->fft.complex[n2].re =  q->fft.samples_re[channel][n2] * gain * 2.0;
+    q->fft.complex[n2].im = -q->fft.samples_im[channel][n2] * gain * 2.0;
+
+    ff_fft_permute(&q->fft_ctx, (FFTComplex *) q->fft.complex);
+    ff_fft_calc (&q->fft_ctx, (FFTComplex *) q->fft.complex);
+    /* add samples to output buffer */
+    for (i = 0; i < ((q->fft_frame_size + 15) & ~15); i++)
+        q->output_buffer[q->channels * i + channel] += ((float *) q->fft.complex)[i];
+}
+
+
+/**
+ * @param q        context
+ * @param index    subpacket number
+ */
+static void qdm2_synthesis_filter (QDM2Context *q, int index)
+{
+    OUT_INT samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE];
+    int i, k, ch, sb_used, sub_sampling, dither_state = 0;
+
+    /* copy sb_samples */
+    sb_used = QDM2_SB_USED(q->sub_sampling);
+
+    for (ch = 0; ch < q->channels; ch++)
+        for (i = 0; i < 8; i++)
+            for (k=sb_used; k < SBLIMIT; k++)
+                q->sb_samples[ch][(8 * index) + i][k] = 0;
+
+    for (ch = 0; ch < q->nb_channels; ch++) {
+        OUT_INT *samples_ptr = samples + ch;
+
+        for (i = 0; i < 8; i++) {
+            ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+                mpa_window, &dither_state,
+                samples_ptr, q->nb_channels,
+                q->sb_samples[ch][(8 * index) + i]);
+            samples_ptr += 32 * q->nb_channels;
+        }
+    }
+
+    /* add samples to output buffer */
+    sub_sampling = (4 >> q->sub_sampling);
+
+    for (ch = 0; ch < q->channels; ch++)
+        for (i = 0; i < q->frame_size; i++)
+            q->output_buffer[q->channels * i + ch] += (float)(samples[q->nb_channels * sub_sampling * i + ch] >> (sizeof(OUT_INT)*8-16));
+}
+
+
+/**
+ * Init static data (does not depend on specific file)
+ *
+ * @param q    context
+ */
+static void qdm2_init(QDM2Context *q) {
+    static int inited = 0;
+
+    if (inited != 0)
+        return;
+    inited = 1;
+
+    qdm2_init_vlc();
+    ff_mpa_synth_init(mpa_window);
+    softclip_table_init();
+    rnd_table_init();
+    init_noise_samples();
+
+    av_log(NULL, AV_LOG_DEBUG, "init done\n");
+}
+
+
+#if 0
+static void dump_context(QDM2Context *q)
+{
+    int i;
+#define PRINT(a,b) av_log(NULL,AV_LOG_DEBUG," %s = %d\n", a, b);
+    PRINT("compressed_data",q->compressed_data);
+    PRINT("compressed_size",q->compressed_size);
+    PRINT("frame_size",q->frame_size);
+    PRINT("checksum_size",q->checksum_size);
+    PRINT("channels",q->channels);
+    PRINT("nb_channels",q->nb_channels);
+    PRINT("fft_frame_size",q->fft_frame_size);
+    PRINT("fft_size",q->fft_size);
+    PRINT("sub_sampling",q->sub_sampling);
+    PRINT("fft_order",q->fft_order);
+    PRINT("group_order",q->group_order);
+    PRINT("group_size",q->group_size);
+    PRINT("sub_packet",q->sub_packet);
+    PRINT("frequency_range",q->frequency_range);
+    PRINT("has_errors",q->has_errors);
+    PRINT("fft_tone_end",q->fft_tone_end);
+    PRINT("fft_tone_start",q->fft_tone_start);
+    PRINT("fft_coefs_index",q->fft_coefs_index);
+    PRINT("coeff_per_sb_select",q->coeff_per_sb_select);
+    PRINT("cm_table_select",q->cm_table_select);
+    PRINT("noise_idx",q->noise_idx);
+
+    for (i = q->fft_tone_start; i < q->fft_tone_end; i++)
+    {
+    FFTTone *t = &q->fft_tones[i];
+
+    av_log(NULL,AV_LOG_DEBUG,"Tone (%d) dump:\n", i);
+    av_log(NULL,AV_LOG_DEBUG,"  level = %f\n", t->level);
+//  PRINT(" level", t->level);
+    PRINT(" phase", t->phase);
+    PRINT(" phase_shift", t->phase_shift);
+    PRINT(" duration", t->duration);
+    PRINT(" samples_im", t->samples_im);
+    PRINT(" samples_re", t->samples_re);
+    PRINT(" table", t->table);
+    }
+
+}
+#endif
+
+
+/**
+ * Init parameters from codec extradata
+ */
+static int qdm2_decode_init(AVCodecContext *avctx)
+{
+    QDM2Context *s = avctx->priv_data;
+    uint8_t *extradata;
+    int extradata_size;
+    int tmp_val, tmp, size;
+    int i;
+    float alpha;
+
+    /* extradata parsing
+
+    Structure:
+    wave {
+        frma (QDM2)
+        QDCA
+        QDCP
+    }
+
+    32  size (including this field)
+    32  tag (=frma)
+    32  type (=QDM2 or QDMC)
+
+    32  size (including this field, in bytes)
+    32  tag (=QDCA) // maybe mandatory parameters
+    32  unknown (=1)
+    32  channels (=2)
+    32  samplerate (=44100)
+    32  bitrate (=96000)
+    32  block size (=4096)
+    32  frame size (=256) (for one channel)
+    32  packet size (=1300)
+
+    32  size (including this field, in bytes)
+    32  tag (=QDCP) // maybe some tuneable parameters
+    32  float1 (=1.0)
+    32  zero ?
+    32  float2 (=1.0)
+    32  float3 (=1.0)
+    32  unknown (27)
+    32  unknown (8)
+    32  zero ?
+    */
+
+    if (!avctx->extradata || (avctx->extradata_size < 48)) {
+        av_log(avctx, AV_LOG_ERROR, "extradata missing or truncated\n");
+        return -1;
+    }
+
+    extradata = avctx->extradata;
+    extradata_size = avctx->extradata_size;
+
+    while (extradata_size > 7) {
+        if (!memcmp(extradata, "frmaQDM", 7))
+            break;
+        extradata++;
+        extradata_size--;
+    }
+
+    if (extradata_size < 12) {
+        av_log(avctx, AV_LOG_ERROR, "not enough extradata (%i)\n",
+               extradata_size);
+        return -1;
+    }
+
+    if (memcmp(extradata, "frmaQDM", 7)) {
+        av_log(avctx, AV_LOG_ERROR, "invalid headers, QDM? not found\n");
+        return -1;
+    }
+
+    if (extradata[7] == 'C') {
+//        s->is_qdmc = 1;
+        av_log(avctx, AV_LOG_ERROR, "stream is QDMC version 1, which is not supported\n");
+        return -1;
+    }
+
+    extradata += 8;
+    extradata_size -= 8;
+
+    size = BE_32(extradata);
+
+    if(size > extradata_size){
+        av_log(avctx, AV_LOG_ERROR, "extradata size too small, %i < %i\n",
+               extradata_size, size);
+        return -1;
+    }
+
+    extradata += 4;
+    av_log(avctx, AV_LOG_DEBUG, "size: %d\n", size);
+    if (BE_32(extradata) != MKBETAG('Q','D','C','A')) {
+        av_log(avctx, AV_LOG_ERROR, "invalid extradata, expecting QDCA\n");
+        return -1;
+    }
+
+    extradata += 8;
+
+    avctx->channels = s->nb_channels = s->channels = BE_32(extradata);
+    extradata += 4;
+
+    avctx->sample_rate = BE_32(extradata);
+    extradata += 4;
+
+    avctx->bit_rate = BE_32(extradata);
+    extradata += 4;
+
+    s->group_size = BE_32(extradata);
+    extradata += 4;
+
+    s->fft_size = BE_32(extradata);
+    extradata += 4;
+
+    s->checksum_size = BE_32(extradata);
+    extradata += 4;
+
+    s->fft_order = av_log2(s->fft_size) + 1;
+    s->fft_frame_size = 2 * s->fft_size; // complex has two floats
+
+    // something like max decodable tones
+    s->group_order = av_log2(s->group_size) + 1;
+    s->frame_size = s->group_size / 16; // 16 iterations per super block
+
+    s->sub_sampling = s->fft_order - 7;
+    s->frequency_range = 255 / (1 << (2 - s->sub_sampling));
+
+    switch ((s->sub_sampling * 2 + s->channels - 1)) {
+        case 0: tmp = 40; break;
+        case 1: tmp = 48; break;
+        case 2: tmp = 56; break;
+        case 3: tmp = 72; break;
+        case 4: tmp = 80; break;
+        case 5: tmp = 100;break;
+        default: tmp=s->sub_sampling; break;
+    }
+    tmp_val = 0;
+    if ((tmp * 1000) < avctx->bit_rate)  tmp_val = 1;
+    if ((tmp * 1440) < avctx->bit_rate)  tmp_val = 2;
+    if ((tmp * 1760) < avctx->bit_rate)  tmp_val = 3;
+    if ((tmp * 2240) < avctx->bit_rate)  tmp_val = 4;
+    s->cm_table_select = tmp_val;
+
+    if (s->sub_sampling == 0)
+        tmp = 7999;
+    else
+        tmp = ((-(s->sub_sampling -1)) & 8000) + 20000;
+    /*
+    0: 7999 -> 0
+    1: 20000 -> 2
+    2: 28000 -> 2
+    */
+    if (tmp < 8000)
+        s->coeff_per_sb_select = 0;
+    else if (tmp <= 16000)
+        s->coeff_per_sb_select = 1;
+    else
+        s->coeff_per_sb_select = 2;
+
+    // Fail on unknown fft order, if it's > 9 it can overflow s->exptab[]
+    if ((s->fft_order < 7) || (s->fft_order > 9)) {
+        av_log(avctx, AV_LOG_ERROR, "Unknown FFT order (%d), contact the developers!\n", s->fft_order);
+        return -1;
+    }
+
+    ff_fft_init(&s->fft_ctx, s->fft_order - 1, 1);
+
+    for (i = 1; i < (1 << (s->fft_order - 2)); i++) {
+        alpha = 2 * M_PI * (float)i / (float)(1 << (s->fft_order - 1));
+        s->exptab[i].re = cos(alpha);
+        s->exptab[i].im = sin(alpha);
+    }
+
+    qdm2_init(s);
+
+//    dump_context(s);
+    return 0;
+}
+
+
+static int qdm2_decode_close(AVCodecContext *avctx)
+{
+    QDM2Context *s = avctx->priv_data;
+
+    ff_fft_end(&s->fft_ctx);
+
+    return 0;
+}
+
+
+static void qdm2_decode (QDM2Context *q, uint8_t *in, int16_t *out)
+{
+    int ch, i;
+    const int frame_size = (q->frame_size * q->channels);
+
+    /* select input buffer */
+    q->compressed_data = in;
+    q->compressed_size = q->checksum_size;
+
+//  dump_context(q);
+
+    /* copy old block, clear new block of output samples */
+    memmove(q->output_buffer, &q->output_buffer[frame_size], frame_size * sizeof(float));
+    memset(&q->output_buffer[frame_size], 0, frame_size * sizeof(float));
+
+    /* decode block of QDM2 compressed data */
+    if (q->sub_packet == 0) {
+        q->has_errors = 0; // zero it for a new super block
+        av_log(NULL,AV_LOG_DEBUG,"Superblock follows\n");
+        qdm2_decode_super_block(q);
+    }
+
+    /* parse subpackets */
+    if (!q->has_errors) {
+        if (q->sub_packet == 2)
+            qdm2_decode_fft_packets(q);
+
+        qdm2_fft_tone_synthesizer(q, q->sub_packet);
+    }
+
+    /* sound synthesis stage 1 (FFT) */
+    for (ch = 0; ch < q->channels; ch++) {
+        qdm2_calculate_fft(q, ch, q->sub_packet);
+
+        if (!q->has_errors && q->sub_packet_list_C[0].packet != NULL) {
+            SAMPLES_NEEDED_2("has errors, and C list is not empty")
+            return;
+        }
+    }
+
+    /* sound synthesis stage 2 (MPEG audio like synthesis filter) */
+    if (!q->has_errors && q->do_synth_filter)
+        qdm2_synthesis_filter(q, q->sub_packet);
+
+    q->sub_packet = (q->sub_packet + 1) % 16;
+
+    /* clip and convert output float[] to 16bit signed samples */
+    for (i = 0; i < frame_size; i++) {
+        int value = (int)q->output_buffer[i];
+
+        if (value > SOFTCLIP_THRESHOLD)
+            value = (value >  HARDCLIP_THRESHOLD) ?  32767 :  softclip_table[ value - SOFTCLIP_THRESHOLD];
+        else if (value < -SOFTCLIP_THRESHOLD)
+            value = (value < -HARDCLIP_THRESHOLD) ? -32767 : -softclip_table[-value - SOFTCLIP_THRESHOLD];
+
+        out[i] = value;
+    }
+}
+
+
+static int qdm2_decode_frame(AVCodecContext *avctx,
+            void *data, int *data_size,
+            uint8_t *buf, int buf_size)
+{
+    QDM2Context *s = avctx->priv_data;
+
+    if(!buf)
+        return 0;
+    if(buf_size < s->checksum_size)
+        return -1;
+
+    *data_size = s->channels * s->frame_size * sizeof(int16_t);
+
+    av_log(avctx, AV_LOG_DEBUG, "decode(%d): %p[%d] -> %p[%d]\n",
+       buf_size, buf, s->checksum_size, data, *data_size);
+
+    qdm2_decode(s, buf, data);
+
+    // reading only when next superblock found
+    if (s->sub_packet == 0) {
+        return s->checksum_size;
+    }
+
+    return 0;
+}
+
+AVCodec qdm2_decoder =
+{
+    .name = "qdm2",
+    .type = CODEC_TYPE_AUDIO,
+    .id = CODEC_ID_QDM2,
+    .priv_data_size = sizeof(QDM2Context),
+    .init = qdm2_decode_init,
+    .close = qdm2_decode_close,
+    .decode = qdm2_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/qdm2data.h b/contrib/ffmpeg/libavcodec/qdm2data.h
new file mode 100644
index 000000000..6d7d07463
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/qdm2data.h
@@ -0,0 +1,530 @@
+/*
+ * QDM2 compatible decoder
+ * Copyright (c) 2003 Ewald Snel
+ * Copyright (c) 2005 Benjamin Larsson
+ * Copyright (c) 2005 Alex Beregszaszi
+ * Copyright (c) 2005 Roberto Togni
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+ /**
+ * @file qdm2data.h
+ * Various QDM2 tables.
+ */
+
+#ifndef QDM2DATA_H
+#define QDM2DATA_H
+
+/** VLC TABLES **/
+
+/* values in this table range from -1..23; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_level_huffcodes[24] = {
+    0x037c, 0x0004, 0x003c, 0x004c, 0x003a, 0x002c, 0x001c, 0x001a,
+    0x0024, 0x0014, 0x0001, 0x0002, 0x0000, 0x0003, 0x0007, 0x0005,
+    0x0006, 0x0008, 0x0009, 0x000a, 0x000c, 0x00fc, 0x007c, 0x017c
+};
+
+static const uint8_t vlc_tab_level_huffbits[24] = {
+    10, 6, 7, 7, 6, 6, 6, 6, 6, 5, 4, 4, 4, 3, 3, 3, 3, 4, 4, 5, 7, 8, 9, 10
+};
+
+/* values in this table range from -1..36; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_diff_huffcodes[37] = {
+    0x1c57, 0x0004, 0x0000, 0x0001, 0x0003, 0x0002, 0x000f, 0x000e,
+    0x0007, 0x0016, 0x0037, 0x0027, 0x0026, 0x0066, 0x0006, 0x0097,
+    0x0046, 0x01c6, 0x0017, 0x0786, 0x0086, 0x0257, 0x00d7, 0x0357,
+    0x00c6, 0x0386, 0x0186, 0x0000, 0x0157, 0x0c57, 0x0057, 0x0000,
+    0x0b86, 0x0000, 0x1457, 0x0000, 0x0457
+};
+
+static const uint8_t vlc_tab_diff_huffbits[37] = {
+    13, 3, 3, 2, 3, 3, 4, 4, 6, 5, 6, 6, 7, 7, 8, 8,
+    8, 9, 8, 11, 9, 10, 8, 10, 9, 12, 10, 0, 10, 13, 11, 0,
+    12, 0, 13, 0, 13
+};
+
+/* values in this table range from -1..5; adjust retrieved value by -1 */
+static const uint8_t vlc_tab_run_huffcodes[6] = {
+    0x1f, 0x00, 0x01, 0x03, 0x07, 0x0f
+};
+
+static const uint8_t vlc_tab_run_huffbits[6] = {
+    5, 1, 2, 3, 4, 5
+};
+
+/* values in this table range from -1..19; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_tone_level_idx_hi1_huffcodes[20] = {
+    0x5714, 0x000c, 0x0002, 0x0001, 0x0000, 0x0004, 0x0034, 0x0054,
+    0x0094, 0x0014, 0x0114, 0x0214, 0x0314, 0x0614, 0x0e14, 0x0f14,
+    0x2714, 0x0714, 0x1714, 0x3714
+};
+
+static const uint8_t vlc_tab_tone_level_idx_hi1_huffbits[20] = {
+    15, 4, 2, 1, 3, 5, 6, 7, 8, 10, 10, 11, 11, 12, 12, 12, 14, 14, 15, 14
+};
+
+/* values in this table range from -1..23; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_tone_level_idx_mid_huffcodes[24] = {
+    0x0fea, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x03ea, 0x00ea, 0x002a, 0x001a,
+    0x0006, 0x0001, 0x0000, 0x0002, 0x000a, 0x006a, 0x01ea, 0x07ea
+};
+
+static const uint8_t vlc_tab_tone_level_idx_mid_huffbits[24] = {
+    12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 9, 7, 5, 3, 1, 2, 4, 6, 8, 10, 12
+};
+
+/* values in this table range from -1..23; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_tone_level_idx_hi2_huffcodes[24] = {
+    0x0664, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0064, 0x00e4,
+    0x00a4, 0x0068, 0x0004, 0x0008, 0x0014, 0x0018, 0x0000, 0x0001,
+    0x0002, 0x0003, 0x000c, 0x0028, 0x0024, 0x0164, 0x0000, 0x0264
+};
+
+static const uint8_t vlc_tab_tone_level_idx_hi2_huffbits[24] = {
+    11, 0, 0, 0, 0, 0, 10, 8, 8, 7, 6, 6, 5, 5, 4, 2, 2, 2, 4, 7, 8, 9, 0, 11
+};
+
+/* values in this table range from -1..8; adjust retrieved value by -1 */
+static const uint8_t vlc_tab_type30_huffcodes[9] = {
+    0x3c, 0x06, 0x00, 0x01, 0x03, 0x02, 0x04, 0x0c, 0x1c
+};
+
+static const uint8_t vlc_tab_type30_huffbits[9] = {
+    6, 3, 3, 2, 2, 3, 4, 5, 6
+};
+
+/* values in this table range from -1..9; adjust retrieved value by -1 */
+static const uint8_t vlc_tab_type34_huffcodes[10] = {
+    0x18, 0x00, 0x01, 0x04, 0x05, 0x07, 0x03, 0x02, 0x06, 0x08
+};
+
+static const uint8_t vlc_tab_type34_huffbits[10] = {
+    5, 4, 3, 3, 3, 3, 3, 3, 3, 5
+};
+
+/* values in this table range from -1..22; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_fft_tone_offset_0_huffcodes[23] = {
+    0x038e, 0x0001, 0x0000, 0x0022, 0x000a, 0x0006, 0x0012, 0x0002,
+    0x001e, 0x003e, 0x0056, 0x0016, 0x000e, 0x0032, 0x0072, 0x0042,
+    0x008e, 0x004e, 0x00f2, 0x002e, 0x0036, 0x00c2, 0x018e
+};
+
+static const uint8_t vlc_tab_fft_tone_offset_0_huffbits[23] = {
+    10, 1, 2, 6, 4, 5, 6, 7, 6, 6, 7, 7, 8, 7, 8, 8, 9, 7, 8, 6, 6, 8, 10
+};
+
+/* values in this table range from -1..27; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_fft_tone_offset_1_huffcodes[28] = {
+    0x07a4, 0x0001, 0x0020, 0x0012, 0x001c, 0x0008, 0x0006, 0x0010,
+    0x0000, 0x0014, 0x0004, 0x0032, 0x0070, 0x000c, 0x0002, 0x003a,
+    0x001a, 0x002c, 0x002a, 0x0022, 0x0024, 0x000a, 0x0064, 0x0030,
+    0x0062, 0x00a4, 0x01a4, 0x03a4
+};
+
+static const uint8_t vlc_tab_fft_tone_offset_1_huffbits[28] = {
+    11, 1, 6, 6, 5, 4, 3, 6, 6, 5, 6, 6, 7, 6, 6, 6,
+    6, 6, 6, 7, 8, 6, 7, 7, 7, 9, 10, 11
+};
+
+/* values in this table range from -1..31; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_fft_tone_offset_2_huffcodes[32] = {
+    0x1760, 0x0001, 0x0000, 0x0082, 0x000c, 0x0006, 0x0003, 0x0007,
+    0x0008, 0x0004, 0x0010, 0x0012, 0x0022, 0x001a, 0x0000, 0x0020,
+    0x000a, 0x0040, 0x004a, 0x006a, 0x002a, 0x0042, 0x0002, 0x0060,
+    0x00aa, 0x00e0, 0x00c2, 0x01c2, 0x0160, 0x0360, 0x0760, 0x0f60
+};
+
+static const uint8_t vlc_tab_fft_tone_offset_2_huffbits[32] = {
+    13, 2, 0, 8, 4, 3, 3, 3, 4, 4, 5, 5, 6, 5, 7, 7,
+    7, 7, 7, 7, 8, 8, 8, 9, 8, 8, 9, 9, 10, 11, 13, 12
+};
+
+/* values in this table range from -1..34; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_fft_tone_offset_3_huffcodes[35] = {
+    0x33ea, 0x0005, 0x0000, 0x000c, 0x0000, 0x0006, 0x0003, 0x0008,
+    0x0002, 0x0001, 0x0004, 0x0007, 0x001a, 0x000f, 0x001c, 0x002c,
+    0x000a, 0x001d, 0x002d, 0x002a, 0x000d, 0x004c, 0x008c, 0x006a,
+    0x00cd, 0x004d, 0x00ea, 0x020c, 0x030c, 0x010c, 0x01ea, 0x07ea,
+    0x0bea, 0x03ea, 0x13ea
+};
+
+static const uint8_t vlc_tab_fft_tone_offset_3_huffbits[35] = {
+    14, 4, 0, 10, 4, 3, 3, 4, 4, 3, 4, 4, 5, 4, 5, 6,
+    6, 5, 6, 7, 7, 7, 8, 8, 8, 8, 9, 10, 10, 10, 10, 11,
+    12, 13, 14
+};
+
+/* values in this table range from -1..37; adjust retrieved value by -1 */
+static const uint16_t vlc_tab_fft_tone_offset_4_huffcodes[38] = {
+    0x5282, 0x0016, 0x0000, 0x0136, 0x0004, 0x0000, 0x0007, 0x000a,
+    0x000e, 0x0003, 0x0001, 0x000d, 0x0006, 0x0009, 0x0012, 0x0005,
+    0x0025, 0x0022, 0x0015, 0x0002, 0x0076, 0x0035, 0x0042, 0x00c2,
+    0x0182, 0x00b6, 0x0036, 0x03c2, 0x0482, 0x01c2, 0x0682, 0x0882,
+    0x0a82, 0x0082, 0x0282, 0x1282, 0x3282, 0x2282
+};
+
+static const uint8_t vlc_tab_fft_tone_offset_4_huffbits[38] = {
+    15, 6, 0, 9, 3, 3, 3, 4, 4, 3, 4, 4, 5, 4, 5, 6,
+    6, 6, 6, 8, 7, 6, 8, 9, 9, 8, 9, 10, 11, 10, 11, 12,
+    12, 12, 14, 15, 14, 14
+};
+
+/** FFT TABLES **/
+
+/* values in this table range from -1..27; adjust retrieved value by -1 */
+static const uint16_t fft_level_exp_alt_huffcodes[28] = {
+    0x1ec6, 0x0006, 0x00c2, 0x0142, 0x0242, 0x0246, 0x00c6, 0x0046,
+    0x0042, 0x0146, 0x00a2, 0x0062, 0x0026, 0x0016, 0x000e, 0x0005,
+    0x0004, 0x0003, 0x0000, 0x0001, 0x000a, 0x0012, 0x0002, 0x0022,
+    0x01c6, 0x02c6, 0x06c6, 0x0ec6
+};
+
+static const uint8_t fft_level_exp_alt_huffbits[28] = {
+    13, 7, 8, 9, 10, 10, 10, 10, 10, 9, 8, 7, 6, 5, 4, 3,
+    3, 2, 3, 3, 4, 5, 7, 8, 9, 11, 12, 13
+};
+
+/* values in this table range from -1..19; adjust retrieved value by -1 */
+static const uint16_t fft_level_exp_huffcodes[20] = {
+    0x0f24, 0x0001, 0x0002, 0x0000, 0x0006, 0x0005, 0x0007, 0x000c,
+    0x000b, 0x0014, 0x0013, 0x0004, 0x0003, 0x0023, 0x0064, 0x00a4,
+    0x0024, 0x0124, 0x0324, 0x0724
+};
+
+static const uint8_t fft_level_exp_huffbits[20] = {
+    12, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 10, 11, 12
+};
+
+/* values in this table range from -1..6; adjust retrieved value by -1 */
+static const uint8_t fft_stereo_exp_huffcodes[7] = {
+    0x3e, 0x01, 0x00, 0x02, 0x06, 0x0e, 0x1e
+};
+
+static const uint8_t fft_stereo_exp_huffbits[7] = {
+    6, 1, 2, 3, 4, 5, 6
+};
+
+/* values in this table range from -1..8; adjust retrieved value by -1 */
+static const uint8_t fft_stereo_phase_huffcodes[9] = {
+    0x35, 0x02, 0x00, 0x01, 0x0d, 0x15, 0x05, 0x09, 0x03
+};
+
+static const uint8_t fft_stereo_phase_huffbits[9] = {
+    6, 2, 2, 4, 4, 6, 5, 4, 2
+};
+
+static const int fft_cutoff_index_table[4][2] = {
+    { 1, 2 }, {-1, 0 }, {-1,-2 }, { 0, 0 }
+};
+
+static const int16_t fft_level_index_table[256] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+static uint8_t last_coeff[3] = {
+    4, 7, 10
+};
+
+static uint8_t coeff_per_sb_for_avg[3][30] = {
+    { 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+    { 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+    { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 }
+};
+
+static uint32_t dequant_table[3][10][30] = {
+    { { 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 256, 256, 205, 154, 102, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 51, 102, 154, 205, 256, 238, 219, 201, 183, 165, 146, 128, 110, 91, 73, 55, 37, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 18, 37, 55, 73, 91, 110, 128, 146, 165, 183, 201, 219, 238, 256, 228, 199, 171, 142, 114, 85, 57, 28 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+    { { 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 256, 171, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 85, 171, 256, 171, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 85, 171, 256, 219, 183, 146, 110, 73, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 73, 110, 146, 183, 219, 256, 228, 199, 171, 142, 114, 85, 57, 28, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 57, 85, 114, 142, 171, 199, 228, 256, 213, 171, 128, 85, 43 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+    { { 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 256, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 256, 171, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 85, 171, 256, 192, 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 128, 192, 256, 205, 154, 102, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 102, 154, 205, 256, 213, 171, 128, 85, 43, 0, 0, 0, 0, 0, 0 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 85, 128, 171, 213, 256, 213, 171, 128, 85, 43 } }
+};
+
+static uint8_t coeff_per_sb_for_dequant[3][30] = {
+    { 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+    { 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6 },
+    { 0, 1, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9 }
+};
+
+/* first index is subband, 2nd index is 0, 1 or 3 (2 is unused) */
+static int8_t tone_level_idx_offset_table[30][4] = {
+    { -50, -50,  0, -50 },
+    { -50, -50,  0, -50 },
+    { -50,  -9,  0, -19 },
+    { -16,  -6,  0, -12 },
+    { -11,  -4,  0,  -8 },
+    {  -8,  -3,  0,  -6 },
+    {  -7,  -3,  0,  -5 },
+    {  -6,  -2,  0,  -4 },
+    {  -5,  -2,  0,  -3 },
+    {  -4,  -1,  0,  -3 },
+    {  -4,  -1,  0,  -2 },
+    {  -3,  -1,  0,  -2 },
+    {  -3,  -1,  0,  -2 },
+    {  -3,  -1,  0,  -2 },
+    {  -2,  -1,  0,  -1 },
+    {  -2,  -1,  0,  -1 },
+    {  -2,  -1,  0,  -1 },
+    {  -2,   0,  0,  -1 },
+    {  -2,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,  -1 },
+    {  -1,   0,  0,   0 },
+    {  -1,   0,  0,   0 },
+    {  -1,   0,  0,   0 },
+    {  -1,   0,  0,   0 }
+};
+
+/* all my samples have 1st index 0 or 1 */
+/* second index is subband, only indexes 0-29 seem to be used */
+static int8_t coding_method_table[5][30] = {
+    { 34, 30, 24, 24, 16, 16, 16, 16, 10, 10, 10, 10, 10, 10, 10,
+      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+    },
+    { 34, 30, 24, 24, 16, 16, 16, 16, 10, 10, 10, 10, 10, 10, 10,
+      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+    },
+    { 34, 30, 30, 30, 24, 24, 16, 16, 16, 16, 16, 16, 10, 10, 10,
+      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+    },
+    { 34, 34, 30, 30, 24, 24, 24, 24, 16, 16, 16, 16, 16, 16, 16,
+      16, 16, 16, 16, 16, 16, 16, 10, 10, 10, 10, 10, 10, 10, 10
+    },
+    { 34, 34, 30, 30, 30, 30, 30, 30, 24, 24, 24, 24, 24, 24, 24,
+      24, 24, 24, 24, 24, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+    },
+};
+
+static const int vlc_stage3_values[60] = {
+        0,     1,     2,     3,     4,     6,     8,    10,    12,    16,    20,    24,
+       28,    36,    44,    52,    60,    76,    92,   108,   124,   156,   188,   220,
+      252,   316,   380,   444,   508,   636,   764,   892,  1020,  1276,  1532,  1788,
+     2044,  2556,  3068,  3580,  4092,  5116,  6140,  7164,  8188, 10236, 12284, 14332,
+    16380, 20476, 24572, 28668, 32764, 40956, 49148, 57340, 65532, 81916, 98300,114684
+};
+
+static const float fft_tone_sample_table[4][16][5] = {
+    { { .0100000000f,-.0037037037f,-.0020000000f,-.0069444444f,-.0018416207f },
+      { .0416666667f, .0000000000f, .0000000000f,-.0208333333f,-.0123456791f },
+      { .1250000000f, .0558035709f, .0330687836f,-.0164473690f,-.0097465888f },
+      { .1562500000f, .0625000000f, .0370370370f,-.0062500000f,-.0037037037f },
+      { .1996007860f, .0781250000f, .0462962948f, .0022727272f, .0013468013f },
+      { .2000000000f, .0625000000f, .0370370373f, .0208333333f, .0074074073f },
+      { .2127659619f, .0555555556f, .0329218097f, .0208333333f, .0123456791f },
+      { .2173913121f, .0473484844f, .0280583613f, .0347222239f, .0205761325f },
+      { .2173913121f, .0347222239f, .0205761325f, .0473484844f, .0280583613f },
+      { .2127659619f, .0208333333f, .0123456791f, .0555555556f, .0329218097f },
+      { .2000000000f, .0208333333f, .0074074073f, .0625000000f, .0370370370f },
+      { .1996007860f, .0022727272f, .0013468013f, .0781250000f, .0462962948f },
+      { .1562500000f,-.0062500000f,-.0037037037f, .0625000000f, .0370370370f },
+      { .1250000000f,-.0164473690f,-.0097465888f, .0558035709f, .0330687836f },
+      { .0416666667f,-.0208333333f,-.0123456791f, .0000000000f, .0000000000f },
+      { .0100000000f,-.0069444444f,-.0018416207f,-.0037037037f,-.0020000000f } },
+
+    { { .0050000000f,-.0200000000f, .0125000000f,-.3030303030f, .0020000000f },
+      { .1041666642f, .0400000000f,-.0250000000f, .0333333333f,-.0200000000f },
+      { .1250000000f, .0100000000f, .0142857144f,-.0500000007f,-.0200000000f },
+      { .1562500000f,-.0006250000f,-.00049382716f,-.000625000f,-.00049382716f },
+      { .1562500000f,-.0006250000f,-.00049382716f,-.000625000f,-.00049382716f },
+      { .1250000000f,-.0500000000f,-.0200000000f, .0100000000f, .0142857144f },
+      { .1041666667f, .0333333333f,-.0200000000f, .0400000000f,-.0250000000f },
+      { .0050000000f,-.3030303030f, .0020000001f,-.0200000000f, .0125000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f } },
+
+    { { .1428571492f, .1250000000f,-.0285714287f,-.0357142873f, .0208333333f },
+      { .1818181818f, .0588235296f, .0333333333f, .0212765951f, .0100000000f },
+      { .1818181818f, .0212765951f, .0100000000f, .0588235296f, .0333333333f },
+      { .1428571492f,-.0357142873f, .0208333333f, .1250000000f,-.0285714287f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f } },
+
+    { { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
+      { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f } }
+};
+
+static const float fft_tone_level_table[2][64] = { {
+/* pow ~ (i > 46) ? 0 : (((((i & 1) ? 431 : 304) << (i >> 1))) / 1024.0); */
+    0.17677669f, 0.42677650f, 0.60355347f, 0.85355347f,
+    1.20710683f, 1.68359375f, 2.37500000f, 3.36718750f,
+    4.75000000f, 6.73437500f, 9.50000000f, 13.4687500f,
+    19.0000000f, 26.9375000f, 38.0000000f, 53.8750000f,
+    76.0000000f, 107.750000f, 152.000000f, 215.500000f,
+    304.000000f, 431.000000f, 608.000000f, 862.000000f,
+    1216.00000f, 1724.00000f, 2432.00000f, 3448.00000f,
+    4864.00000f, 6896.00000f, 9728.00000f, 13792.0000f,
+    19456.0000f, 27584.0000f, 38912.0000f, 55168.0000f,
+    77824.0000f, 110336.000f, 155648.000f, 220672.000f,
+    311296.000f, 441344.000f, 622592.000f, 882688.000f,
+    1245184.00f, 1765376.00f, 2490368.00f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+  }, {
+/* pow = (i > 45) ? 0 : ((((i & 1) ? 431 : 304) << (i >> 1)) / 512.0); */
+    0.59375000f, 0.84179688f, 1.18750000f, 1.68359375f,
+    2.37500000f, 3.36718750f, 4.75000000f, 6.73437500f,
+    9.50000000f, 13.4687500f, 19.0000000f, 26.9375000f,
+    38.0000000f, 53.8750000f, 76.0000000f, 107.750000f,
+    152.000000f, 215.500000f, 304.000000f, 431.000000f,
+    608.000000f, 862.000000f, 1216.00000f, 1724.00000f,
+    2432.00000f, 3448.00000f, 4864.00000f, 6896.00000f,
+    9728.00000f, 13792.0000f, 19456.0000f, 27584.0000f,
+    38912.0000f, 55168.0000f, 77824.0000f, 110336.000f,
+    155648.000f, 220672.000f, 311296.000f, 441344.000f,
+    622592.000f, 882688.000f, 1245184.00f, 1765376.00f,
+    2490368.00f, 3530752.00f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f
+} };
+
+static const float fft_tone_envelope_table[4][31] = {
+    { .009607375f, .038060248f, .084265202f, .146446645f, .222214907f, .308658302f,
+      .402454883f, .500000060f, .597545207f, .691341758f, .777785182f, .853553414f,
+      .915734828f, .961939812f, .990392685f, 1.00000000f, .990392625f, .961939752f,
+      .915734768f, .853553295f, .777785063f, .691341639f, .597545087f, .500000000f,
+      .402454853f, .308658272f, .222214878f, .146446615f, .084265172f, .038060218f,
+      .009607345f },
+    { .038060248f, .146446645f, .308658302f, .500000060f, .691341758f, .853553414f,
+      .961939812f, 1.00000000f, .961939752f, .853553295f, .691341639f, .500000000f,
+      .308658272f, .146446615f, .038060218f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f },
+    { .146446645f, .500000060f, .853553414f, 1.00000000f, .853553295f, .500000000f,
+      .146446615f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f },
+    { .500000060f, 1.00000000f, .500000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f, .000000000f, .000000000f, .000000000f, .000000000f, .000000000f,
+      .000000000f }
+};
+
+static const float sb_noise_attenuation[32] = {
+    0.0f, 0.0f, 0.3f, 0.4f, 0.5f, 0.7f, 1.0f, 1.0f,
+    1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+    1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+    1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+};
+
+static const uint8_t fft_subpackets[32] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0
+};
+
+/* first index is joined_stereo, second index is 0 or 2 (1 is unused) */
+static float dequant_1bit[2][3] = {
+    {-0.920000f, 0.000000f, 0.920000f },
+    {-0.890000f, 0.000000f, 0.890000f }
+};
+
+static const float type30_dequant[8] = {
+   -1.0f,-0.625f,-0.291666656732559f,0.0f,
+   0.25f,0.5f,0.75f,1.0f,
+};
+
+static const float type34_delta[10] = { // FIXME: covers 8 entries..
+    -1.0f,-0.60947573184967f,-0.333333343267441f,-0.138071194291115f,0.0f,
+    0.138071194291115f,0.333333343267441f,0.60947573184967f,1.0f,0.0f,
+};
+
+#endif /* QDM2DATA_H */
diff --git a/contrib/ffmpeg/libavcodec/qdrw.c b/contrib/ffmpeg/libavcodec/qdrw.c
new file mode 100644
index 000000000..8ebb43c4a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/qdrw.c
@@ -0,0 +1,160 @@
+/*
+ * QuickDraw (qdrw) codec
+ * Copyright (c) 2004 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file qdrw.c
+ * Apple QuickDraw codec.
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+typedef struct QdrawContext{
+    AVCodecContext *avctx;
+    AVFrame pic;
+    uint8_t palette[256*3];
+} QdrawContext;
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    QdrawContext * const a = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&a->pic;
+    uint8_t* outdata;
+    int colors;
+    int i;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    outdata = a->pic.data[0];
+
+    buf += 0x68; /* jump to palette */
+    colors = BE_32(buf);
+    buf += 4;
+
+    if(colors < 0 || colors > 256) {
+        av_log(avctx, AV_LOG_ERROR, "Error color count - %i(0x%X)\n", colors, colors);
+        return -1;
+    }
+
+    for (i = 0; i <= colors; i++) {
+        unsigned int idx;
+        idx = BE_16(buf); /* color index */
+        buf += 2;
+
+        if (idx > 255) {
+            av_log(avctx, AV_LOG_ERROR, "Palette index out of range: %u\n", idx);
+            buf += 6;
+            continue;
+        }
+        a->palette[idx * 3 + 0] = *buf++;
+        buf++;
+        a->palette[idx * 3 + 1] = *buf++;
+        buf++;
+        a->palette[idx * 3 + 2] = *buf++;
+        buf++;
+    }
+
+    buf += 18; /* skip unneeded data */
+    for (i = 0; i < avctx->height; i++) {
+        int size, left, code, pix;
+        uint8_t *next;
+        uint8_t *out;
+        int tsize = 0;
+
+        /* decode line */
+        out = outdata;
+        size = BE_16(buf); /* size of packed line */
+        buf += 2;
+        left = size;
+        next = buf + size;
+        while (left > 0) {
+            code = *buf++;
+            if (code & 0x80 ) { /* run */
+                int i;
+                pix = *buf++;
+                if ((out + (257 - code) * 3) > (outdata +  a->pic.linesize[0]))
+                    break;
+                for (i = 0; i < 257 - code; i++) {
+                    *out++ = a->palette[pix * 3 + 0];
+                    *out++ = a->palette[pix * 3 + 1];
+                    *out++ = a->palette[pix * 3 + 2];
+                }
+                tsize += 257 - code;
+                left -= 2;
+            } else { /* copy */
+                int i, pix;
+                if ((out + code * 3) > (outdata +  a->pic.linesize[0]))
+                    break;
+                for (i = 0; i <= code; i++) {
+                    pix = *buf++;
+                    *out++ = a->palette[pix * 3 + 0];
+                    *out++ = a->palette[pix * 3 + 1];
+                    *out++ = a->palette[pix * 3 + 2];
+                }
+                left -= 2 + code;
+                tsize += code + 1;
+            }
+        }
+        buf = next;
+        outdata += a->pic.linesize[0];
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = a->pic;
+
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx){
+//    QdrawContext * const a = avctx->priv_data;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+
+    avctx->pix_fmt= PIX_FMT_RGB24;
+
+    return 0;
+}
+
+AVCodec qdraw_decoder = {
+    "qdraw",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_QDRAW,
+    sizeof(QdrawContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/qpeg.c b/contrib/ffmpeg/libavcodec/qpeg.c
new file mode 100644
index 000000000..3c597e8df
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/qpeg.c
@@ -0,0 +1,324 @@
+/*
+ * QPEG codec
+ * Copyright (c) 2004 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file qpeg.c
+ * QPEG codec.
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+typedef struct QpegContext{
+    AVCodecContext *avctx;
+    AVFrame pic;
+    uint8_t *refdata;
+} QpegContext;
+
+static void qpeg_decode_intra(uint8_t *src, uint8_t *dst, int size,
+                            int stride, int width, int height)
+{
+    int i;
+    int code;
+    int c0, c1;
+    int run, copy;
+    int filled = 0;
+    int rows_to_go;
+
+    rows_to_go = height;
+    height--;
+    dst = dst + height * stride;
+
+    while((size > 0) && (rows_to_go > 0)) {
+        code = *src++;
+        size--;
+        run = copy = 0;
+        if(code == 0xFC) /* end-of-picture code */
+            break;
+        if(code >= 0xF8) { /* very long run */
+            c0 = *src++;
+            c1 = *src++;
+            size -= 2;
+            run = ((code & 0x7) << 16) + (c0 << 8) + c1 + 2;
+        } else if (code >= 0xF0) { /* long run */
+            c0 = *src++;
+            size--;
+            run = ((code & 0xF) << 8) + c0 + 2;
+        } else if (code >= 0xE0) { /* short run */
+            run = (code & 0x1F) + 2;
+        } else if (code >= 0xC0) { /* very long copy */
+            c0 = *src++;
+            c1 = *src++;
+            size -= 2;
+            copy = ((code & 0x3F) << 16) + (c0 << 8) + c1 + 1;
+        } else if (code >= 0x80) { /* long copy */
+            c0 = *src++;
+            size--;
+            copy = ((code & 0x7F) << 8) + c0 + 1;
+        } else { /* short copy */
+            copy = code + 1;
+        }
+
+        /* perform actual run or copy */
+        if(run) {
+            int p;
+
+            p = *src++;
+            size--;
+            for(i = 0; i < run; i++) {
+                dst[filled++] = p;
+                if (filled >= width) {
+                    filled = 0;
+                    dst -= stride;
+                    rows_to_go--;
+                    if(rows_to_go <= 0)
+                        break;
+                }
+            }
+        } else {
+            size -= copy;
+            for(i = 0; i < copy; i++) {
+                dst[filled++] = *src++;
+                if (filled >= width) {
+                    filled = 0;
+                    dst -= stride;
+                    rows_to_go--;
+                    if(rows_to_go <= 0)
+                        break;
+                }
+            }
+        }
+    }
+}
+
+static int qpeg_table_h[16] =
+ { 0x00, 0x20, 0x20, 0x20, 0x18, 0x10, 0x10, 0x20, 0x10, 0x08, 0x18, 0x08, 0x08, 0x18, 0x10, 0x04};
+static int qpeg_table_w[16] =
+ { 0x00, 0x20, 0x18, 0x08, 0x18, 0x10, 0x20, 0x10, 0x08, 0x10, 0x20, 0x20, 0x08, 0x10, 0x18, 0x04};
+
+/* Decodes delta frames */
+static void qpeg_decode_inter(uint8_t *src, uint8_t *dst, int size,
+                            int stride, int width, int height,
+                            int delta, uint8_t *ctable, uint8_t *refdata)
+{
+    int i, j;
+    int code;
+    int filled = 0;
+    int orig_height;
+    uint8_t *blkdata;
+
+    /* copy prev frame */
+    for(i = 0; i < height; i++)
+        memcpy(refdata + (i * width), dst + (i * stride), width);
+
+    orig_height = height;
+    blkdata = src - 0x86;
+    height--;
+    dst = dst + height * stride;
+
+    while((size > 0) && (height >= 0)) {
+        code = *src++;
+        size--;
+
+        if(delta) {
+            /* motion compensation */
+            while((code & 0xF0) == 0xF0) {
+                if(delta == 1) {
+                    int me_idx;
+                    int me_w, me_h, me_x, me_y;
+                    uint8_t *me_plane;
+                    int corr, val;
+
+                    /* get block size by index */
+                    me_idx = code & 0xF;
+                    me_w = qpeg_table_w[me_idx];
+                    me_h = qpeg_table_h[me_idx];
+
+                    /* extract motion vector */
+                    corr = *src++;
+                    size--;
+
+                    val = corr >> 4;
+                    if(val > 7)
+                        val -= 16;
+                    me_x = val;
+
+                    val = corr & 0xF;
+                    if(val > 7)
+                        val -= 16;
+                    me_y = val;
+
+                    /* check motion vector */
+                    if ((me_x + filled < 0) || (me_x + me_w + filled > width) ||
+                       (height - me_y - me_h < 0) || (height - me_y > orig_height) ||
+                       (filled + me_w > width) || (height - me_h < 0))
+                        av_log(NULL, AV_LOG_ERROR, "Bogus motion vector (%i,%i), block size %ix%i at %i,%i\n",
+                               me_x, me_y, me_w, me_h, filled, height);
+                    else {
+                        /* do motion compensation */
+                        me_plane = refdata + (filled + me_x) + (height - me_y) * width;
+                        for(j = 0; j < me_h; j++) {
+                            for(i = 0; i < me_w; i++)
+                                dst[filled + i - (j * stride)] = me_plane[i - (j * width)];
+                        }
+                    }
+                }
+                code = *src++;
+                size--;
+            }
+        }
+
+        if(code == 0xE0) /* end-of-picture code */
+            break;
+        if(code > 0xE0) { /* run code: 0xE1..0xFF */
+            int p;
+
+            code &= 0x1F;
+            p = *src++;
+            size--;
+            for(i = 0; i <= code; i++) {
+                dst[filled++] = p;
+                if(filled >= width) {
+                    filled = 0;
+                    dst -= stride;
+                    height--;
+                }
+            }
+        } else if(code >= 0xC0) { /* copy code: 0xC0..0xDF */
+            code &= 0x1F;
+
+            for(i = 0; i <= code; i++) {
+                dst[filled++] = *src++;
+                if(filled >= width) {
+                    filled = 0;
+                    dst -= stride;
+                    height--;
+                }
+            }
+            size -= code + 1;
+        } else if(code >= 0x80) { /* skip code: 0x80..0xBF */
+            int skip;
+
+            code &= 0x3F;
+            /* codes 0x80 and 0x81 are actually escape codes,
+               skip value minus constant is in the next byte */
+            if(!code)
+                skip = (*src++) + 64;
+            else if(code == 1)
+                skip = (*src++) + 320;
+            else
+                skip = code;
+            filled += skip;
+            while( filled >= width) {
+                filled -= width;
+                dst -= stride;
+                height--;
+                if(height < 0)
+                    break;
+            }
+        } else {
+            /* zero code treated as one-pixel skip */
+            if(code)
+                dst[filled++] = ctable[code & 0x7F];
+            else
+                filled++;
+            if(filled >= width) {
+                filled = 0;
+                dst -= stride;
+                height--;
+            }
+        }
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    QpegContext * const a = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&a->pic;
+    uint8_t* outdata;
+    int delta;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    outdata = a->pic.data[0];
+    if(buf[0x85] == 0x10) {
+        qpeg_decode_intra(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height);
+    } else {
+        delta = buf[0x85];
+        qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->refdata);
+    }
+
+    /* make the palette available on the way out */
+    memcpy(a->pic.data[1], a->avctx->palctrl->palette, AVPALETTE_SIZE);
+    if (a->avctx->palctrl->palette_changed) {
+        a->pic.palette_has_changed = 1;
+        a->avctx->palctrl->palette_changed = 0;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = a->pic;
+
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    QpegContext * const a = avctx->priv_data;
+
+    a->avctx = avctx;
+    avctx->pix_fmt= PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    a->pic.data[0] = NULL;
+    a->refdata = av_malloc(avctx->width * avctx->height);
+
+    return 0;
+}
+
+static int decode_end(AVCodecContext *avctx){
+    QpegContext * const a = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&a->pic;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    av_free(a->refdata);
+    return 0;
+}
+
+AVCodec qpeg_decoder = {
+    "qpeg",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_QPEG,
+    sizeof(QpegContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/qtrle.c b/contrib/ffmpeg/libavcodec/qtrle.c
new file mode 100644
index 000000000..d4b314d03
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/qtrle.c
@@ -0,0 +1,630 @@
+/*
+ * Quicktime Animation (RLE) Video Decoder
+ * Copyright (C) 2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file qtrle.c
+ * QT RLE Video Decoder by Mike Melanson (melanson@pcisys.net)
+ * For more information about the QT RLE format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * The QT RLE decoder has seven modes of operation:
+ * 1, 2, 4, 8, 16, 24, and 32 bits per pixel. For modes 1, 2, 4, and 8
+ * the decoder outputs PAL8 colorspace data. 16-bit data yields RGB555
+ * data. 24-bit data is RGB24 and 32-bit data is RGBA32.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+typedef struct QtrleContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+} QtrleContext;
+
+#define CHECK_STREAM_PTR(n) \
+  if ((stream_ptr + n) > s->size) { \
+    av_log (s->avctx, AV_LOG_INFO, "Problem: stream_ptr out of bounds (%d >= %d)\n", \
+      stream_ptr + n, s->size); \
+    return; \
+  }
+
+#define CHECK_PIXEL_PTR(n) \
+  if ((pixel_ptr + n > pixel_limit) || (pixel_ptr + n < 0)) { \
+    av_log (s->avctx, AV_LOG_INFO, "Problem: pixel_ptr = %d, pixel_limit = %d\n", \
+      pixel_ptr + n, pixel_limit); \
+    return; \
+  } \
+
+static void qtrle_decode_1bpp(QtrleContext *s)
+{
+}
+
+static void qtrle_decode_2bpp(QtrleContext *s)
+{
+}
+
+static void qtrle_decode_4bpp(QtrleContext *s)
+{
+    int stream_ptr;
+    int header;
+    int start_line;
+    int lines_to_change;
+    int rle_code;
+    int row_ptr, pixel_ptr;
+    int row_inc = s->frame.linesize[0];
+    unsigned char pi1, pi2, pi3, pi4, pi5, pi6, pi7, pi8;  /* 8 palette indices */
+    unsigned char *rgb = s->frame.data[0];
+    int pixel_limit = s->frame.linesize[0] * s->avctx->height;
+
+    /* check if this frame is even supposed to change */
+    if (s->size < 8)
+        return;
+
+    /* start after the chunk size */
+    stream_ptr = 4;
+
+    /* fetch the header */
+    CHECK_STREAM_PTR(2);
+    header = BE_16(&s->buf[stream_ptr]);
+    stream_ptr += 2;
+
+    /* if a header is present, fetch additional decoding parameters */
+    if (header & 0x0008) {
+        CHECK_STREAM_PTR(8);
+        start_line = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+        lines_to_change = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+    } else {
+        start_line = 0;
+        lines_to_change = s->avctx->height;
+    }
+
+    row_ptr = row_inc * start_line;
+    while (lines_to_change--) {
+        CHECK_STREAM_PTR(2);
+        pixel_ptr = row_ptr + (8 * (s->buf[stream_ptr++] - 1));
+
+        while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
+            if (rle_code == 0) {
+                /* there's another skip code in the stream */
+                CHECK_STREAM_PTR(1);
+                pixel_ptr += (8 * (s->buf[stream_ptr++] - 1));
+                CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
+            } else if (rle_code < 0) {
+                /* decode the run length code */
+                rle_code = -rle_code;
+                /* get the next 4 bytes from the stream, treat them as palette
+                 * indices, and output them rle_code times */
+                CHECK_STREAM_PTR(4);
+                pi1 = ((s->buf[stream_ptr]) >> 4) & 0x0f;
+                pi2 = (s->buf[stream_ptr++]) & 0x0f;
+                pi3 = ((s->buf[stream_ptr]) >> 4) & 0x0f;
+                pi4 = (s->buf[stream_ptr++]) & 0x0f;
+                pi5 = ((s->buf[stream_ptr]) >> 4) & 0x0f;
+                pi6 = (s->buf[stream_ptr++]) & 0x0f;
+                pi7 = ((s->buf[stream_ptr]) >> 4) & 0x0f;
+                pi8 = (s->buf[stream_ptr++]) & 0x0f;
+
+                CHECK_PIXEL_PTR(rle_code * 8);
+
+                while (rle_code--) {
+                    rgb[pixel_ptr++] = pi1;
+                    rgb[pixel_ptr++] = pi2;
+                    rgb[pixel_ptr++] = pi3;
+                    rgb[pixel_ptr++] = pi4;
+                    rgb[pixel_ptr++] = pi5;
+                    rgb[pixel_ptr++] = pi6;
+                    rgb[pixel_ptr++] = pi7;
+                    rgb[pixel_ptr++] = pi8;
+                }
+            } else {
+                /* copy the same pixel directly to output 4 times */
+                rle_code *= 4;
+                CHECK_STREAM_PTR(rle_code);
+                CHECK_PIXEL_PTR(rle_code*2);
+
+                while (rle_code--) {
+                    rgb[pixel_ptr++] = ((s->buf[stream_ptr]) >> 4) & 0x0f;
+                    rgb[pixel_ptr++] = (s->buf[stream_ptr++]) & 0x0f;
+                }
+            }
+        }
+        row_ptr += row_inc;
+    }
+}
+
+static void qtrle_decode_8bpp(QtrleContext *s)
+{
+    int stream_ptr;
+    int header;
+    int start_line;
+    int lines_to_change;
+    int rle_code;
+    int row_ptr, pixel_ptr;
+    int row_inc = s->frame.linesize[0];
+    unsigned char pi1, pi2, pi3, pi4;  /* 4 palette indices */
+    unsigned char *rgb = s->frame.data[0];
+    int pixel_limit = s->frame.linesize[0] * s->avctx->height;
+
+    /* check if this frame is even supposed to change */
+    if (s->size < 8)
+        return;
+
+    /* start after the chunk size */
+    stream_ptr = 4;
+
+    /* fetch the header */
+    CHECK_STREAM_PTR(2);
+    header = BE_16(&s->buf[stream_ptr]);
+    stream_ptr += 2;
+
+    /* if a header is present, fetch additional decoding parameters */
+    if (header & 0x0008) {
+        CHECK_STREAM_PTR(8);
+        start_line = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+        lines_to_change = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+    } else {
+        start_line = 0;
+        lines_to_change = s->avctx->height;
+    }
+
+    row_ptr = row_inc * start_line;
+    while (lines_to_change--) {
+        CHECK_STREAM_PTR(2);
+        pixel_ptr = row_ptr + (4 * (s->buf[stream_ptr++] - 1));
+
+        while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
+            if (rle_code == 0) {
+                /* there's another skip code in the stream */
+                CHECK_STREAM_PTR(1);
+                pixel_ptr += (4 * (s->buf[stream_ptr++] - 1));
+                CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
+            } else if (rle_code < 0) {
+                /* decode the run length code */
+                rle_code = -rle_code;
+                /* get the next 4 bytes from the stream, treat them as palette
+                 * indices, and output them rle_code times */
+                CHECK_STREAM_PTR(4);
+                pi1 = s->buf[stream_ptr++];
+                pi2 = s->buf[stream_ptr++];
+                pi3 = s->buf[stream_ptr++];
+                pi4 = s->buf[stream_ptr++];
+
+                CHECK_PIXEL_PTR(rle_code * 4);
+
+                while (rle_code--) {
+                    rgb[pixel_ptr++] = pi1;
+                    rgb[pixel_ptr++] = pi2;
+                    rgb[pixel_ptr++] = pi3;
+                    rgb[pixel_ptr++] = pi4;
+                }
+            } else {
+                /* copy the same pixel directly to output 4 times */
+                rle_code *= 4;
+                CHECK_STREAM_PTR(rle_code);
+                CHECK_PIXEL_PTR(rle_code);
+
+                while (rle_code--) {
+                    rgb[pixel_ptr++] = s->buf[stream_ptr++];
+                }
+            }
+        }
+        row_ptr += row_inc;
+    }
+}
+
+static void qtrle_decode_16bpp(QtrleContext *s)
+{
+    int stream_ptr;
+    int header;
+    int start_line;
+    int lines_to_change;
+    int rle_code;
+    int row_ptr, pixel_ptr;
+    int row_inc = s->frame.linesize[0];
+    unsigned short rgb16;
+    unsigned char *rgb = s->frame.data[0];
+    int pixel_limit = s->frame.linesize[0] * s->avctx->height;
+
+    /* check if this frame is even supposed to change */
+    if (s->size < 8)
+        return;
+
+    /* start after the chunk size */
+    stream_ptr = 4;
+
+    /* fetch the header */
+    CHECK_STREAM_PTR(2);
+    header = BE_16(&s->buf[stream_ptr]);
+    stream_ptr += 2;
+
+    /* if a header is present, fetch additional decoding parameters */
+    if (header & 0x0008) {
+        CHECK_STREAM_PTR(8);
+        start_line = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+        lines_to_change = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+    } else {
+        start_line = 0;
+        lines_to_change = s->avctx->height;
+    }
+
+    row_ptr = row_inc * start_line;
+    while (lines_to_change--) {
+        CHECK_STREAM_PTR(2);
+        pixel_ptr = row_ptr + (s->buf[stream_ptr++] - 1) * 2;
+
+        while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
+            if (rle_code == 0) {
+                /* there's another skip code in the stream */
+                CHECK_STREAM_PTR(1);
+                pixel_ptr += (s->buf[stream_ptr++] - 1) * 2;
+                CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
+            } else if (rle_code < 0) {
+                /* decode the run length code */
+                rle_code = -rle_code;
+                CHECK_STREAM_PTR(2);
+                rgb16 = BE_16(&s->buf[stream_ptr]);
+                stream_ptr += 2;
+
+                CHECK_PIXEL_PTR(rle_code * 2);
+
+                while (rle_code--) {
+                    *(unsigned short *)(&rgb[pixel_ptr]) = rgb16;
+                    pixel_ptr += 2;
+                }
+            } else {
+                CHECK_STREAM_PTR(rle_code * 2);
+                CHECK_PIXEL_PTR(rle_code * 2);
+
+                /* copy pixels directly to output */
+                while (rle_code--) {
+                    rgb16 = BE_16(&s->buf[stream_ptr]);
+                    stream_ptr += 2;
+                    *(unsigned short *)(&rgb[pixel_ptr]) = rgb16;
+                    pixel_ptr += 2;
+                }
+            }
+        }
+        row_ptr += row_inc;
+    }
+}
+
+static void qtrle_decode_24bpp(QtrleContext *s)
+{
+    int stream_ptr;
+    int header;
+    int start_line;
+    int lines_to_change;
+    int rle_code;
+    int row_ptr, pixel_ptr;
+    int row_inc = s->frame.linesize[0];
+    unsigned char r, g, b;
+    unsigned char *rgb = s->frame.data[0];
+    int pixel_limit = s->frame.linesize[0] * s->avctx->height;
+
+    /* check if this frame is even supposed to change */
+    if (s->size < 8)
+        return;
+
+    /* start after the chunk size */
+    stream_ptr = 4;
+
+    /* fetch the header */
+    CHECK_STREAM_PTR(2);
+    header = BE_16(&s->buf[stream_ptr]);
+    stream_ptr += 2;
+
+    /* if a header is present, fetch additional decoding parameters */
+    if (header & 0x0008) {
+        CHECK_STREAM_PTR(8);
+        start_line = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+        lines_to_change = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+    } else {
+        start_line = 0;
+        lines_to_change = s->avctx->height;
+    }
+
+    row_ptr = row_inc * start_line;
+    while (lines_to_change--) {
+        CHECK_STREAM_PTR(2);
+        pixel_ptr = row_ptr + (s->buf[stream_ptr++] - 1) * 3;
+
+        while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
+            if (rle_code == 0) {
+                /* there's another skip code in the stream */
+                CHECK_STREAM_PTR(1);
+                pixel_ptr += (s->buf[stream_ptr++] - 1) * 3;
+                CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
+            } else if (rle_code < 0) {
+                /* decode the run length code */
+                rle_code = -rle_code;
+                CHECK_STREAM_PTR(3);
+                r = s->buf[stream_ptr++];
+                g = s->buf[stream_ptr++];
+                b = s->buf[stream_ptr++];
+
+                CHECK_PIXEL_PTR(rle_code * 3);
+
+                while (rle_code--) {
+                    rgb[pixel_ptr++] = r;
+                    rgb[pixel_ptr++] = g;
+                    rgb[pixel_ptr++] = b;
+                }
+            } else {
+                CHECK_STREAM_PTR(rle_code * 3);
+                CHECK_PIXEL_PTR(rle_code * 3);
+
+                /* copy pixels directly to output */
+                while (rle_code--) {
+                    rgb[pixel_ptr++] = s->buf[stream_ptr++];
+                    rgb[pixel_ptr++] = s->buf[stream_ptr++];
+                    rgb[pixel_ptr++] = s->buf[stream_ptr++];
+                }
+            }
+        }
+        row_ptr += row_inc;
+    }
+}
+
+static void qtrle_decode_32bpp(QtrleContext *s)
+{
+    int stream_ptr;
+    int header;
+    int start_line;
+    int lines_to_change;
+    int rle_code;
+    int row_ptr, pixel_ptr;
+    int row_inc = s->frame.linesize[0];
+    unsigned char a, r, g, b;
+    unsigned int argb;
+    unsigned char *rgb = s->frame.data[0];
+    int pixel_limit = s->frame.linesize[0] * s->avctx->height;
+
+    /* check if this frame is even supposed to change */
+    if (s->size < 8)
+        return;
+
+    /* start after the chunk size */
+    stream_ptr = 4;
+
+    /* fetch the header */
+    CHECK_STREAM_PTR(2);
+    header = BE_16(&s->buf[stream_ptr]);
+    stream_ptr += 2;
+
+    /* if a header is present, fetch additional decoding parameters */
+    if (header & 0x0008) {
+        CHECK_STREAM_PTR(8);
+        start_line = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+        lines_to_change = BE_16(&s->buf[stream_ptr]);
+        stream_ptr += 4;
+    } else {
+        start_line = 0;
+        lines_to_change = s->avctx->height;
+    }
+
+    row_ptr = row_inc * start_line;
+    while (lines_to_change--) {
+        CHECK_STREAM_PTR(2);
+        pixel_ptr = row_ptr + (s->buf[stream_ptr++] - 1) * 4;
+
+        while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) {
+            if (rle_code == 0) {
+                /* there's another skip code in the stream */
+                CHECK_STREAM_PTR(1);
+                pixel_ptr += (s->buf[stream_ptr++] - 1) * 4;
+                CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
+            } else if (rle_code < 0) {
+                /* decode the run length code */
+                rle_code = -rle_code;
+                CHECK_STREAM_PTR(4);
+                a = s->buf[stream_ptr++];
+                r = s->buf[stream_ptr++];
+                g = s->buf[stream_ptr++];
+                b = s->buf[stream_ptr++];
+                argb = (a << 24) | (r << 16) | (g << 8) | (b << 0);
+
+                CHECK_PIXEL_PTR(rle_code * 4);
+
+                while (rle_code--) {
+                    *(unsigned int *)(&rgb[pixel_ptr]) = argb;
+                    pixel_ptr += 4;
+                }
+            } else {
+                CHECK_STREAM_PTR(rle_code * 4);
+                CHECK_PIXEL_PTR(rle_code * 4);
+
+                /* copy pixels directly to output */
+                while (rle_code--) {
+                    a = s->buf[stream_ptr++];
+                    r = s->buf[stream_ptr++];
+                    g = s->buf[stream_ptr++];
+                    b = s->buf[stream_ptr++];
+                    argb = (a << 24) | (r << 16) | (g << 8) | (b << 0);
+                    *(unsigned int *)(&rgb[pixel_ptr]) = argb;
+                    pixel_ptr += 4;
+                }
+            }
+        }
+        row_ptr += row_inc;
+    }
+}
+
+static int qtrle_decode_init(AVCodecContext *avctx)
+{
+    QtrleContext *s = (QtrleContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+    switch (avctx->bits_per_sample) {
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+    case 33:
+    case 34:
+    case 36:
+    case 40:
+        avctx->pix_fmt = PIX_FMT_PAL8;
+        break;
+
+    case 16:
+        avctx->pix_fmt = PIX_FMT_RGB555;
+        break;
+
+    case 24:
+        avctx->pix_fmt = PIX_FMT_RGB24;
+        break;
+
+    case 32:
+        avctx->pix_fmt = PIX_FMT_RGBA32;
+        break;
+
+    default:
+        av_log (avctx, AV_LOG_ERROR, "Unsupported colorspace: %d bits/sample?\n",
+            avctx->bits_per_sample);
+        break;
+    }
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int qtrle_decode_frame(AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size)
+{
+    QtrleContext *s = (QtrleContext *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE |
+                            FF_BUFFER_HINTS_REUSABLE | FF_BUFFER_HINTS_READABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log (s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    switch (avctx->bits_per_sample) {
+    case 1:
+    case 33:
+        qtrle_decode_1bpp(s);
+        break;
+
+    case 2:
+    case 34:
+        qtrle_decode_2bpp(s);
+        break;
+
+    case 4:
+    case 36:
+        qtrle_decode_4bpp(s);
+        /* make the palette available on the way out */
+        memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE);
+        if (s->avctx->palctrl->palette_changed) {
+            s->frame.palette_has_changed = 1;
+            s->avctx->palctrl->palette_changed = 0;
+        }
+        break;
+
+    case 8:
+    case 40:
+        qtrle_decode_8bpp(s);
+        /* make the palette available on the way out */
+        memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE);
+        if (s->avctx->palctrl->palette_changed) {
+            s->frame.palette_has_changed = 1;
+            s->avctx->palctrl->palette_changed = 0;
+        }
+        break;
+
+    case 16:
+        qtrle_decode_16bpp(s);
+        break;
+
+    case 24:
+        qtrle_decode_24bpp(s);
+        break;
+
+    case 32:
+        qtrle_decode_32bpp(s);
+        break;
+
+    default:
+        av_log (s->avctx, AV_LOG_ERROR, "Unsupported colorspace: %d bits/sample?\n",
+            avctx->bits_per_sample);
+        break;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int qtrle_decode_end(AVCodecContext *avctx)
+{
+    QtrleContext *s = (QtrleContext *)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec qtrle_decoder = {
+    "qtrle",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_QTRLE,
+    sizeof(QtrleContext),
+    qtrle_decode_init,
+    NULL,
+    qtrle_decode_end,
+    qtrle_decode_frame,
+    CODEC_CAP_DR1,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/ra144.c b/contrib/ffmpeg/libavcodec/ra144.c
new file mode 100644
index 000000000..c4f4b813b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ra144.c
@@ -0,0 +1,519 @@
+/*
+ * Real Audio 1.0 (14.4K)
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "ra144.h"
+
+#define DATABLOCK1      20      /* size of 14.4 input block in bytes */
+#define DATACHUNK1      1440    /* size of 14.4 input chunk in bytes */
+#define AUDIOBLOCK      160     /* size of output block in 16-bit words (320 bytes) */
+#define AUDIOBUFFER     12288   /* size of output buffer in 16-bit words (24576 bytes) */
+/* consts */
+#define NBLOCKS         4       /* number of segments within a block */
+#define BLOCKSIZE       40      /* (quarter) block size in 16-bit words (80 bytes) */
+#define HALFBLOCK       20      /* BLOCKSIZE/2 */
+#define BUFFERSIZE      146     /* for do_output */
+
+
+/* internal globals */
+typedef struct {
+        unsigned int     resetflag, val, oldval;
+        unsigned int     unpacked[28];          /* buffer for unpacked input */
+        unsigned int    *iptr;                  /* pointer to current input (from unpacked) */
+        unsigned int     gval;
+        unsigned short  *gsp;
+        unsigned int     gbuf1[8];
+        unsigned short   gbuf2[120];
+        signed   short   output_buffer[40];
+        unsigned int    *decptr;                /* decoder ptr */
+        signed   short  *decsp;
+
+        /* the swapped buffers */
+        unsigned int     swapb1a[10];
+        unsigned int     swapb2a[10];
+        unsigned int     swapb1b[10];
+        unsigned int     swapb2b[10];
+        unsigned int    *swapbuf1;
+        unsigned int    *swapbuf2;
+        unsigned int    *swapbuf1alt;
+        unsigned int    *swapbuf2alt;
+
+        unsigned int buffer[5];
+        unsigned short int buffer_2[148];
+        unsigned short int buffer_a[40];
+        unsigned short int buffer_b[40];
+        unsigned short int buffer_c[40];
+        unsigned short int buffer_d[40];
+
+        unsigned short int work[50];
+        unsigned short *sptr;
+
+        int buffer1[10];
+        int buffer2[10];
+
+        signed short wavtable1[2304];
+        unsigned short wavtable2[2304];
+} Real144_internal;
+
+static int ra144_decode_init(AVCodecContext * avctx)
+{
+        Real144_internal *glob=avctx->priv_data;
+
+        memset(glob,0,sizeof(Real144_internal));
+        glob->resetflag=1;
+        glob->swapbuf1=glob->swapb1a;
+        glob->swapbuf2=glob->swapb2a;
+        glob->swapbuf1alt=glob->swapb1b;
+        glob->swapbuf2alt=glob->swapb2b;
+
+        memcpy(glob->wavtable1,wavtable1,sizeof(wavtable1));
+        memcpy(glob->wavtable2,wavtable2,sizeof(wavtable2));
+
+        return 0;
+}
+
+static void final(Real144_internal *glob, short *i1, short *i2, void *out, int *statbuf, int len);
+static void add_wav(Real144_internal *glob, int n, int f, int m1, int m2, int m3, short *s1, short *s2, short *s3, short *dest);
+static int irms(short *data, int factor);
+static void rotate_block(short *source, short *target, int offset);
+/* lookup square roots in table */
+static int t_sqrt(unsigned int x)
+{
+  int s=0;
+  while (x>0xfff) { s++; x=x>>2; }
+  return (sqrt_table[x]<<s)<<2;
+}
+
+/* do 'voice' */
+static void do_voice(int *a1, int *a2)
+{
+  int buffer[10];
+  int *b1,*b2;
+  int x,y;
+  int *ptr,*tmp;
+
+  b1=buffer;
+  b2=a2;
+
+  for (x=0;x<10;x++) {
+    b1[x]=(*a1)<<4;
+
+    if(x>0) {
+      ptr=b2+x;
+      for (y=0;y<=x-1;y++)
+        b1[y]=(((*a1)*(*(--ptr)))>>12)+b2[y];
+    }
+    tmp=b1;
+    b1=b2;
+    b2=tmp;
+    a1++;
+  }
+  ptr=a2+10;
+  while (ptr>a2) (*a2++)>>=4;
+}
+
+
+/* do quarter-block output */
+static void do_output_subblock(Real144_internal *glob, unsigned int x)
+{
+  int a,b,c,d,e,f,g;
+
+  if (x==1) memset(glob->buffer,0,20);
+  if ((*glob->iptr)==0) a=0;
+  else a=(*glob->iptr)+HALFBLOCK-1;
+  glob->iptr++;
+  b=*(glob->iptr++);
+  c=*(glob->iptr++);
+  d=*(glob->iptr++);
+  if (a) rotate_block(glob->buffer_2,glob->buffer_a,a);
+  memcpy(glob->buffer_b,etable1+b*BLOCKSIZE,BLOCKSIZE*2);
+  e=((ftable1[b]>>4)*glob->gval)>>8;
+  memcpy(glob->buffer_c,etable2+c*BLOCKSIZE,BLOCKSIZE*2);
+  f=((ftable2[c]>>4)*glob->gval)>>8;
+  if (a) g=irms(glob->buffer_a,glob->gval)>>12;
+  else g=0;
+  add_wav(glob,d,a,g,e,f,glob->buffer_a,glob->buffer_b,glob->buffer_c,glob->buffer_d);
+  memmove(glob->buffer_2,glob->buffer_2+BLOCKSIZE,(BUFFERSIZE-BLOCKSIZE)*2);
+  memcpy(glob->buffer_2+BUFFERSIZE-BLOCKSIZE,glob->buffer_d,BLOCKSIZE*2);
+  final(glob,glob->gsp,glob->buffer_d,glob->output_buffer,glob->buffer,BLOCKSIZE);
+}
+
+/* rotate block */
+static void rotate_block(short *source, short *target, int offset)
+{
+  short *end;
+  short *ptr1;
+  short *ptr2;
+  short *ptr3;
+  ptr2=source+BUFFERSIZE;
+  ptr3=ptr1=ptr2-offset;
+  end=target+BLOCKSIZE;
+  while (target<end) {
+    *(target++)=*(ptr3++);
+    if (ptr3==ptr2) ptr3=ptr1;
+  }
+}
+
+/* inverse root mean square */
+static int irms(short *data, int factor)
+{
+  short *p1,*p2;
+  unsigned int sum;
+  p2=(p1=data)+BLOCKSIZE;
+  for (sum=0;p2>p1;p1++) sum+=(*p1)*(*p1);
+  if (sum==0) return 0; /* OOPS - division by zero */
+  return (0x20000000/(t_sqrt(sum)>>8))*factor;
+}
+
+/* multiply/add wavetable */
+static void add_wav(Real144_internal *glob, int n, int f, int m1, int m2, int m3, short *s1, short *s2, short *s3, short *dest)
+{
+  int a,b,c;
+  short *ptr,*ptr2;
+
+  ptr=glob->wavtable1+n*9;
+  ptr2=glob->wavtable2+n*9;
+  if (f!=0) {
+    a=((*ptr)*m1)>>((*ptr2)+1);
+  } else {
+    a=0;
+  }
+  ptr++;ptr2++;
+  b=((*ptr)*m2)>>((*ptr2)+1);
+  ptr++;ptr2++;
+  c=((*ptr)*m3)>>((*ptr2)+1);
+  ptr2=(ptr=dest)+BLOCKSIZE;
+  if (f!=0)
+    while (ptr<ptr2)
+      *(ptr++)=((*(s1++))*a+(*(s2++))*b+(*(s3++))*c)>>12;
+  else
+    while (ptr<ptr2)
+      *(ptr++)=((*(s2++))*b+(*(s3++))*c)>>12;
+}
+
+
+static void final(Real144_internal *glob, short *i1, short *i2, void *out, int *statbuf, int len)
+{
+  int x,sum;
+  int buffer[10];
+  short *ptr;
+  short *ptr2;
+
+  memcpy(glob->work,statbuf,20);
+  memcpy(glob->work+10,i2,len*2);
+
+  buffer[9]=i1[0];
+  buffer[8]=i1[1];
+  buffer[7]=i1[2];
+  buffer[6]=i1[3];
+  buffer[5]=i1[4];
+  buffer[4]=i1[5];
+  buffer[3]=i1[6];
+  buffer[2]=i1[7];
+  buffer[1]=i1[8];
+  buffer[0]=i1[9];
+
+  ptr2=(ptr=glob->work)+len;
+  while (ptr<ptr2) {
+    for(sum=0,x=0;x<=9;x++)
+      sum+=buffer[x]*(ptr[x]);
+    sum=sum>>12;
+    x=ptr[10]-sum;
+    if (x<-32768 || x>32767)
+    {
+      memset(out,0,len*2);
+      memset(statbuf,0,20);
+      return;
+    }
+    ptr[10]=x;
+    ptr++;
+  }
+  memcpy(out,ptr+10-len,len*2);
+  memcpy(statbuf,ptr,20);
+}
+
+/* Decode 20-byte input */
+static void unpack_input(unsigned char *input, unsigned int *output)
+{
+  unsigned int outbuffer[28];
+  unsigned short inbuffer[10];
+  unsigned int x;
+  unsigned int *ptr;
+
+  /* fix endianness */
+  for (x=0;x<20;x+=2)
+    inbuffer[x/2]=(input[x]<<8)+input[x+1];
+
+  /* unpack */
+  ptr=outbuffer;
+  *(ptr++)=27;
+  *(ptr++)=(inbuffer[0]>>10)&0x3f;
+  *(ptr++)=(inbuffer[0]>>5)&0x1f;
+  *(ptr++)=inbuffer[0]&0x1f;
+  *(ptr++)=(inbuffer[1]>>12)&0xf;
+  *(ptr++)=(inbuffer[1]>>8)&0xf;
+  *(ptr++)=(inbuffer[1]>>5)&7;
+  *(ptr++)=(inbuffer[1]>>2)&7;
+  *(ptr++)=((inbuffer[1]<<1)&6)|((inbuffer[2]>>15)&1);
+  *(ptr++)=(inbuffer[2]>>12)&7;
+  *(ptr++)=(inbuffer[2]>>10)&3;
+  *(ptr++)=(inbuffer[2]>>5)&0x1f;
+  *(ptr++)=((inbuffer[2]<<2)&0x7c)|((inbuffer[3]>>14)&3);
+  *(ptr++)=(inbuffer[3]>>6)&0xff;
+  *(ptr++)=((inbuffer[3]<<1)&0x7e)|((inbuffer[4]>>15)&1);
+  *(ptr++)=(inbuffer[4]>>8)&0x7f;
+  *(ptr++)=(inbuffer[4]>>1)&0x7f;
+  *(ptr++)=((inbuffer[4]<<7)&0x80)|((inbuffer[5]>>9)&0x7f);
+  *(ptr++)=(inbuffer[5]>>2)&0x7f;
+  *(ptr++)=((inbuffer[5]<<5)&0x60)|((inbuffer[6]>>11)&0x1f);
+  *(ptr++)=(inbuffer[6]>>4)&0x7f;
+  *(ptr++)=((inbuffer[6]<<4)&0xf0)|((inbuffer[7]>>12)&0xf);
+  *(ptr++)=(inbuffer[7]>>5)&0x7f;
+  *(ptr++)=((inbuffer[7]<<2)&0x7c)|((inbuffer[8]>>14)&3);
+  *(ptr++)=(inbuffer[8]>>7)&0x7f;
+  *(ptr++)=((inbuffer[8]<<1)&0xfe)|((inbuffer[9]>>15)&1);
+  *(ptr++)=(inbuffer[9]>>8)&0x7f;
+  *(ptr++)=(inbuffer[9]>>1)&0x7f;
+
+  *(output++)=outbuffer[11];
+  for (x=1;x<11;*(output++)=outbuffer[x++]);
+  ptr=outbuffer+12;
+  for (x=0;x<16;x+=4)
+  {
+    *(output++)=ptr[x];
+    *(output++)=ptr[x+2];
+    *(output++)=ptr[x+3];
+    *(output++)=ptr[x+1];
+  }
+}
+
+static unsigned int rms(int *data, int f)
+{
+  int *c;
+  int x;
+  unsigned int res;
+  int b;
+
+  c=data;
+  b=0;
+  res=0x10000;
+  for (x=0;x<10;x++)
+  {
+    res=(((0x1000000-(*c)*(*c))>>12)*res)>>12;
+    if (res==0) return 0;
+    if (res<=0x3fff)
+    {
+      while (res<=0x3fff)
+      {
+        b++;
+        res<<=2;
+      }
+    } else {
+      if (res>0x10000)
+        return 0; /* We're screwed, might as well go out with a bang. :P */
+    }
+    c++;
+  }
+  if (res>0) res=t_sqrt(res);
+
+  res>>=(b+10);
+  res=(res*f)>>10;
+  return res;
+}
+
+static void dec1(Real144_internal *glob, int *data, int *inp, int n, int f)
+{
+  short *ptr,*end;
+
+  *(glob->decptr++)=rms(data,f);
+  glob->decptr++;
+  end=(ptr=glob->decsp)+(n*10);
+  while (ptr<end) *(ptr++)=*(inp++);
+}
+
+static int eq(Real144_internal *glob, short *in, int *target)
+{
+  int retval;
+  int a;
+  int b;
+  int c;
+  unsigned int u;
+  short *sptr;
+  int *ptr1,*ptr2,*ptr3;
+  int *bp1,*bp2,*temp;
+
+  retval=0;
+  bp1=glob->buffer1;
+  bp2=glob->buffer2;
+  ptr2=(ptr3=glob->buffer2)+9;
+  sptr=in;
+  while (ptr2>=ptr3)
+    *(ptr3++)=*(sptr++);
+
+  target+=9;
+  a=bp2[9];
+  *target=a;
+  if (a+0x1000>0x1fff)
+    return 0; /* We're screwed, might as well go out with a bang. :P */
+  c=8;u=a;
+  while (c>=0)
+  {
+    if (u==0x1000) u++;
+    if (u==0xfffff000) u--;
+    b=0x1000-((u*u)>>12);
+    if (b==0) b++;
+    ptr2=bp1;
+    ptr1=(ptr3=bp2)+c;
+    for (u=0;u<=c;u++)
+      *(ptr2++)=((*(ptr3++)-(((*target)*(*(ptr1--)))>>12))*(0x1000000/b))>>12;
+    *(--target)=u=bp1[(c--)];
+    if ((u+0x1000)>0x1fff) retval=1;
+    temp=bp2;
+    bp2=bp1;
+    bp1=temp;
+  }
+  return retval;
+}
+
+static void dec2(Real144_internal *glob, int *data, int *inp, int n, int f, int *inp2, int l)
+{
+  unsigned int *ptr1,*ptr2;
+  int work[10];
+  int a,b;
+  int x;
+  int result;
+
+  if(l+1<NBLOCKS/2) a=NBLOCKS-(l+1);
+  else a=l+1;
+  b=NBLOCKS-a;
+  if (l==0)
+  {
+    glob->decsp=glob->sptr=glob->gbuf2;
+    glob->decptr=glob->gbuf1;
+  }
+  ptr1=inp;
+  ptr2=inp2;
+  for (x=0;x<10*n;x++)
+    *(glob->sptr++)=(a*(*ptr1++)+b*(*ptr2++))>>2;
+  result=eq(glob,glob->decsp,work);
+  if (result==1)
+  {
+    dec1(glob,data,inp,n,f);
+  } else {
+    *(glob->decptr++)=rms(work,f);
+    glob->decptr++;
+  }
+  glob->decsp+=n*10;
+}
+
+/* Uncompress one block (20 bytes -> 160*2 bytes) */
+static int ra144_decode_frame(AVCodecContext * avctx,
+            void *vdata, int *data_size,
+            uint8_t * buf, int buf_size)
+{
+  unsigned int a,b,c;
+  long s;
+  signed short *shptr;
+  unsigned int *lptr,*temp;
+  const short **dptr;
+  int16_t *datao;
+  int16_t *data = vdata;
+  Real144_internal *glob=avctx->priv_data;
+
+  if(buf_size==0)
+      return 0;
+
+  datao = data;
+  unpack_input(buf,glob->unpacked);
+
+  glob->iptr=glob->unpacked;
+  glob->val=decodetable[0][(*(glob->iptr++))<<1];
+
+  dptr=decodetable+1;
+  lptr=glob->swapbuf1;
+  while (lptr<glob->swapbuf1+10)
+    *(lptr++)=(*(dptr++))[(*(glob->iptr++))<<1];
+
+  do_voice(glob->swapbuf1,glob->swapbuf2);
+
+  a=t_sqrt(glob->val*glob->oldval)>>12;
+
+  for (c=0;c<NBLOCKS;c++) {
+    if (c==(NBLOCKS-1)) {
+      dec1(glob,glob->swapbuf1,glob->swapbuf2,3,glob->val);
+    } else {
+      if (c*2==(NBLOCKS-2)) {
+        if (glob->oldval<glob->val) {
+          dec2(glob,glob->swapbuf1,glob->swapbuf2,3,a,glob->swapbuf2alt,c);
+        } else {
+          dec2(glob,glob->swapbuf1alt,glob->swapbuf2alt,3,a,glob->swapbuf2,c);
+        }
+      } else {
+        if (c*2<(NBLOCKS-2)) {
+          dec2(glob,glob->swapbuf1alt,glob->swapbuf2alt,3,glob->oldval,glob->swapbuf2,c);
+        } else {
+          dec2(glob,glob->swapbuf1,glob->swapbuf2,3,glob->val,glob->swapbuf2alt,c);
+        }
+      }
+    }
+  }
+
+  /* do output */
+  for (b=0,c=0;c<4;c++) {
+    glob->gval=glob->gbuf1[c*2];
+    glob->gsp=glob->gbuf2+b;
+    do_output_subblock(glob,glob->resetflag);
+    glob->resetflag=0;
+
+    shptr=glob->output_buffer;
+    while (shptr<glob->output_buffer+BLOCKSIZE) {
+      s=*(shptr++)<<2;
+      *data=s;
+      if (s>32767) *data=32767;
+      if (s<-32767) *data=-32768;
+      data++;
+    }
+    b+=30;
+  }
+
+  glob->oldval=glob->val;
+  temp=glob->swapbuf1alt;
+  glob->swapbuf1alt=glob->swapbuf1;
+  glob->swapbuf1=temp;
+  temp=glob->swapbuf2alt;
+  glob->swapbuf2alt=glob->swapbuf2;
+  glob->swapbuf2=temp;
+  *data_size=(data-datao)*sizeof(*data);
+  return 20;
+}
+
+
+AVCodec ra_144_decoder =
+{
+    "real_144",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_RA_144,
+    sizeof(Real144_internal),
+    ra144_decode_init,
+    NULL,
+    NULL,
+    ra144_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/ra144.h b/contrib/ffmpeg/libavcodec/ra144.h
new file mode 100644
index 000000000..6d477b2f8
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ra144.h
@@ -0,0 +1,2428 @@
+/*
+ * Real Audio 1.0 (14.4K)
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef RA144TABLES_H
+#define RA144TABLES_H
+
+/* 14.4 data tables */
+static const unsigned short sqrt_table[4096]={
+0x0000,0x0400,0x05a8,0x06ed,0x0800,0x08f1,0x09cc,0x0a95,
+0x0b50,0x0c00,0x0ca6,0x0d44,0x0ddb,0x0e6c,0x0ef7,0x0f7d,
+0x1000,0x107e,0x10f8,0x116f,0x11e3,0x1254,0x12c2,0x132e,
+0x1398,0x1400,0x1465,0x14c8,0x152a,0x158a,0x15e8,0x1645,
+0x16a0,0x16fa,0x1752,0x17aa,0x1800,0x1854,0x18a8,0x18fa,
+0x194c,0x199c,0x19ec,0x1a3a,0x1a88,0x1ad5,0x1b21,0x1b6c,
+0x1bb6,0x1c00,0x1c48,0x1c90,0x1cd8,0x1d1e,0x1d64,0x1daa,
+0x1dee,0x1e33,0x1e76,0x1eb9,0x1efb,0x1f3d,0x1f7e,0x1fbf,
+0x2000,0x203f,0x207f,0x20bd,0x20fc,0x2139,0x2177,0x21b4,
+0x21f0,0x222d,0x2268,0x22a4,0x22df,0x2319,0x2353,0x238d,
+0x23c6,0x2400,0x2438,0x2471,0x24a9,0x24e0,0x2518,0x254f,
+0x2585,0x25bc,0x25f2,0x2628,0x265d,0x2693,0x26c8,0x26fc,
+0x2731,0x2765,0x2799,0x27cc,0x2800,0x2833,0x2865,0x2898,
+0x28ca,0x28fc,0x292e,0x2960,0x2991,0x29c2,0x29f3,0x2a24,
+0x2a54,0x2a85,0x2ab5,0x2ae5,0x2b14,0x2b44,0x2b73,0x2ba2,
+0x2bd1,0x2c00,0x2c2e,0x2c5c,0x2c8a,0x2cb8,0x2ce6,0x2d13,
+0x2d41,0x2d6e,0x2d9b,0x2dc8,0x2df4,0x2e21,0x2e4d,0x2e79,
+0x2ea5,0x2ed1,0x2efd,0x2f28,0x2f54,0x2f7f,0x2faa,0x2fd5,
+0x3000,0x302a,0x3055,0x307f,0x30a9,0x30d3,0x30fd,0x3127,
+0x3150,0x317a,0x31a3,0x31cc,0x31f5,0x321e,0x3247,0x3270,
+0x3298,0x32c1,0x32e9,0x3311,0x3339,0x3361,0x3389,0x33b0,
+0x33d8,0x3400,0x3427,0x344e,0x3475,0x349c,0x34c3,0x34ea,
+0x3510,0x3537,0x355d,0x3584,0x35aa,0x35d0,0x35f6,0x361c,
+0x3642,0x3667,0x368d,0x36b2,0x36d8,0x36fd,0x3722,0x3747,
+0x376c,0x3791,0x37b6,0x37db,0x3800,0x3824,0x3848,0x386d,
+0x3891,0x38b5,0x38d9,0x38fd,0x3921,0x3945,0x3969,0x398c,
+0x39b0,0x39d3,0x39f7,0x3a1a,0x3a3d,0x3a60,0x3a83,0x3aa6,
+0x3ac9,0x3aec,0x3b0f,0x3b31,0x3b54,0x3b76,0x3b99,0x3bbb,
+0x3bdd,0x3c00,0x3c22,0x3c44,0x3c66,0x3c87,0x3ca9,0x3ccb,
+0x3ced,0x3d0e,0x3d30,0x3d51,0x3d72,0x3d94,0x3db5,0x3dd6,
+0x3df7,0x3e18,0x3e39,0x3e5a,0x3e7b,0x3e9c,0x3ebc,0x3edd,
+0x3efd,0x3f1e,0x3f3e,0x3f5f,0x3f7f,0x3f9f,0x3fbf,0x3fdf,
+0x4000,0x401f,0x403f,0x405f,0x407f,0x409f,0x40be,0x40de,
+0x40fe,0x411d,0x413c,0x415c,0x417b,0x419a,0x41ba,0x41d9,
+0x41f8,0x4217,0x4236,0x4255,0x4273,0x4292,0x42b1,0x42d0,
+0x42ee,0x430d,0x432b,0x434a,0x4368,0x4387,0x43a5,0x43c3,
+0x43e1,0x4400,0x441e,0x443c,0x445a,0x4478,0x4495,0x44b3,
+0x44d1,0x44ef,0x450c,0x452a,0x4548,0x4565,0x4583,0x45a0,
+0x45be,0x45db,0x45f8,0x4615,0x4633,0x4650,0x466d,0x468a,
+0x46a7,0x46c4,0x46e1,0x46fe,0x471b,0x4737,0x4754,0x4771,
+0x478d,0x47aa,0x47c7,0x47e3,0x4800,0x481c,0x4838,0x4855,
+0x4871,0x488d,0x48a9,0x48c6,0x48e2,0x48fe,0x491a,0x4936,
+0x4952,0x496e,0x498a,0x49a5,0x49c1,0x49dd,0x49f9,0x4a14,
+0x4a30,0x4a4b,0x4a67,0x4a83,0x4a9e,0x4ab9,0x4ad5,0x4af0,
+0x4b0b,0x4b27,0x4b42,0x4b5d,0x4b78,0x4b93,0x4bae,0x4bca,
+0x4be5,0x4c00,0x4c1a,0x4c35,0x4c50,0x4c6b,0x4c86,0x4ca1,
+0x4cbb,0x4cd6,0x4cf1,0x4d0b,0x4d26,0x4d40,0x4d5b,0x4d75,
+0x4d90,0x4daa,0x4dc4,0x4ddf,0x4df9,0x4e13,0x4e2d,0x4e48,
+0x4e62,0x4e7c,0x4e96,0x4eb0,0x4eca,0x4ee4,0x4efe,0x4f18,
+0x4f32,0x4f4c,0x4f65,0x4f7f,0x4f99,0x4fb3,0x4fcc,0x4fe6,
+0x5000,0x5019,0x5033,0x504c,0x5066,0x507f,0x5099,0x50b2,
+0x50cb,0x50e5,0x50fe,0x5117,0x5130,0x514a,0x5163,0x517c,
+0x5195,0x51ae,0x51c7,0x51e0,0x51f9,0x5212,0x522b,0x5244,
+0x525d,0x5276,0x528f,0x52a7,0x52c0,0x52d9,0x52f2,0x530a,
+0x5323,0x533c,0x5354,0x536d,0x5385,0x539e,0x53b6,0x53cf,
+0x53e7,0x5400,0x5418,0x5430,0x5449,0x5461,0x5479,0x5491,
+0x54a9,0x54c2,0x54da,0x54f2,0x550a,0x5522,0x553a,0x5552,
+0x556a,0x5582,0x559a,0x55b2,0x55ca,0x55e2,0x55fa,0x5611,
+0x5629,0x5641,0x5659,0x5670,0x5688,0x56a0,0x56b7,0x56cf,
+0x56e6,0x56fe,0x5716,0x572d,0x5745,0x575c,0x5773,0x578b,
+0x57a2,0x57ba,0x57d1,0x57e8,0x5800,0x5817,0x582e,0x5845,
+0x585c,0x5874,0x588b,0x58a2,0x58b9,0x58d0,0x58e7,0x58fe,
+0x5915,0x592c,0x5943,0x595a,0x5971,0x5988,0x599f,0x59b5,
+0x59cc,0x59e3,0x59fa,0x5a11,0x5a27,0x5a3e,0x5a55,0x5a6b,
+0x5a82,0x5a99,0x5aaf,0x5ac6,0x5adc,0x5af3,0x5b09,0x5b20,
+0x5b36,0x5b4d,0x5b63,0x5b7a,0x5b90,0x5ba6,0x5bbd,0x5bd3,
+0x5be9,0x5c00,0x5c16,0x5c2c,0x5c42,0x5c58,0x5c6f,0x5c85,
+0x5c9b,0x5cb1,0x5cc7,0x5cdd,0x5cf3,0x5d09,0x5d1f,0x5d35,
+0x5d4b,0x5d61,0x5d77,0x5d8d,0x5da3,0x5db9,0x5dce,0x5de4,
+0x5dfa,0x5e10,0x5e26,0x5e3b,0x5e51,0x5e67,0x5e7c,0x5e92,
+0x5ea8,0x5ebd,0x5ed3,0x5ee9,0x5efe,0x5f14,0x5f29,0x5f3f,
+0x5f54,0x5f6a,0x5f7f,0x5f95,0x5faa,0x5fbf,0x5fd5,0x5fea,
+0x6000,0x6015,0x602a,0x603f,0x6055,0x606a,0x607f,0x6094,
+0x60aa,0x60bf,0x60d4,0x60e9,0x60fe,0x6113,0x6128,0x613d,
+0x6152,0x6168,0x617d,0x6192,0x61a7,0x61bb,0x61d0,0x61e5,
+0x61fa,0x620f,0x6224,0x6239,0x624e,0x6263,0x6277,0x628c,
+0x62a1,0x62b6,0x62ca,0x62df,0x62f4,0x6309,0x631d,0x6332,
+0x6347,0x635b,0x6370,0x6384,0x6399,0x63ad,0x63c2,0x63d7,
+0x63eb,0x6400,0x6414,0x6428,0x643d,0x6451,0x6466,0x647a,
+0x648e,0x64a3,0x64b7,0x64cb,0x64e0,0x64f4,0x6508,0x651d,
+0x6531,0x6545,0x6559,0x656e,0x6582,0x6596,0x65aa,0x65be,
+0x65d2,0x65e6,0x65fa,0x660f,0x6623,0x6637,0x664b,0x665f,
+0x6673,0x6687,0x669b,0x66af,0x66c3,0x66d6,0x66ea,0x66fe,
+0x6712,0x6726,0x673a,0x674e,0x6761,0x6775,0x6789,0x679d,
+0x67b1,0x67c4,0x67d8,0x67ec,0x6800,0x6813,0x6827,0x683b,
+0x684e,0x6862,0x6875,0x6889,0x689d,0x68b0,0x68c4,0x68d7,
+0x68eb,0x68fe,0x6912,0x6925,0x6939,0x694c,0x6960,0x6973,
+0x6986,0x699a,0x69ad,0x69c1,0x69d4,0x69e7,0x69fb,0x6a0e,
+0x6a21,0x6a35,0x6a48,0x6a5b,0x6a6e,0x6a82,0x6a95,0x6aa8,
+0x6abb,0x6ace,0x6ae2,0x6af5,0x6b08,0x6b1b,0x6b2e,0x6b41,
+0x6b54,0x6b67,0x6b7a,0x6b8d,0x6ba1,0x6bb4,0x6bc7,0x6bda,
+0x6bed,0x6c00,0x6c12,0x6c25,0x6c38,0x6c4b,0x6c5e,0x6c71,
+0x6c84,0x6c97,0x6caa,0x6cbc,0x6ccf,0x6ce2,0x6cf5,0x6d08,
+0x6d1a,0x6d2d,0x6d40,0x6d53,0x6d65,0x6d78,0x6d8b,0x6d9e,
+0x6db0,0x6dc3,0x6dd6,0x6de8,0x6dfb,0x6e0d,0x6e20,0x6e33,
+0x6e45,0x6e58,0x6e6a,0x6e7d,0x6e8f,0x6ea2,0x6eb4,0x6ec7,
+0x6ed9,0x6eec,0x6efe,0x6f11,0x6f23,0x6f36,0x6f48,0x6f5a,
+0x6f6d,0x6f7f,0x6f92,0x6fa4,0x6fb6,0x6fc9,0x6fdb,0x6fed,
+0x7000,0x7012,0x7024,0x7036,0x7049,0x705b,0x706d,0x707f,
+0x7091,0x70a4,0x70b6,0x70c8,0x70da,0x70ec,0x70fe,0x7110,
+0x7123,0x7135,0x7147,0x7159,0x716b,0x717d,0x718f,0x71a1,
+0x71b3,0x71c5,0x71d7,0x71e9,0x71fb,0x720d,0x721f,0x7231,
+0x7243,0x7255,0x7267,0x7279,0x728a,0x729c,0x72ae,0x72c0,
+0x72d2,0x72e4,0x72f5,0x7307,0x7319,0x732b,0x733d,0x734e,
+0x7360,0x7372,0x7384,0x7395,0x73a7,0x73b9,0x73ca,0x73dc,
+0x73ee,0x7400,0x7411,0x7423,0x7434,0x7446,0x7458,0x7469,
+0x747b,0x748c,0x749e,0x74b0,0x74c1,0x74d3,0x74e4,0x74f6,
+0x7507,0x7519,0x752a,0x753c,0x754d,0x755f,0x7570,0x7581,
+0x7593,0x75a4,0x75b6,0x75c7,0x75d8,0x75ea,0x75fb,0x760d,
+0x761e,0x762f,0x7641,0x7652,0x7663,0x7674,0x7686,0x7697,
+0x76a8,0x76ba,0x76cb,0x76dc,0x76ed,0x76fe,0x7710,0x7721,
+0x7732,0x7743,0x7754,0x7766,0x7777,0x7788,0x7799,0x77aa,
+0x77bb,0x77cc,0x77dd,0x77ee,0x7800,0x7811,0x7822,0x7833,
+0x7844,0x7855,0x7866,0x7877,0x7888,0x7899,0x78aa,0x78bb,
+0x78cc,0x78dd,0x78ee,0x78fe,0x790f,0x7920,0x7931,0x7942,
+0x7953,0x7964,0x7975,0x7986,0x7996,0x79a7,0x79b8,0x79c9,
+0x79da,0x79eb,0x79fb,0x7a0c,0x7a1d,0x7a2e,0x7a3e,0x7a4f,
+0x7a60,0x7a71,0x7a81,0x7a92,0x7aa3,0x7ab3,0x7ac4,0x7ad5,
+0x7ae5,0x7af6,0x7b07,0x7b17,0x7b28,0x7b39,0x7b49,0x7b5a,
+0x7b6b,0x7b7b,0x7b8c,0x7b9c,0x7bad,0x7bbd,0x7bce,0x7bde,
+0x7bef,0x7c00,0x7c10,0x7c21,0x7c31,0x7c41,0x7c52,0x7c62,
+0x7c73,0x7c83,0x7c94,0x7ca4,0x7cb5,0x7cc5,0x7cd5,0x7ce6,
+0x7cf6,0x7d07,0x7d17,0x7d27,0x7d38,0x7d48,0x7d58,0x7d69,
+0x7d79,0x7d89,0x7d9a,0x7daa,0x7dba,0x7dcb,0x7ddb,0x7deb,
+0x7dfb,0x7e0c,0x7e1c,0x7e2c,0x7e3c,0x7e4d,0x7e5d,0x7e6d,
+0x7e7d,0x7e8d,0x7e9e,0x7eae,0x7ebe,0x7ece,0x7ede,0x7eee,
+0x7efe,0x7f0f,0x7f1f,0x7f2f,0x7f3f,0x7f4f,0x7f5f,0x7f6f,
+0x7f7f,0x7f8f,0x7f9f,0x7faf,0x7fbf,0x7fcf,0x7fdf,0x7fef,
+0x8000,0x800f,0x801f,0x802f,0x803f,0x804f,0x805f,0x806f,
+0x807f,0x808f,0x809f,0x80af,0x80bf,0x80cf,0x80df,0x80ef,
+0x80ff,0x810e,0x811e,0x812e,0x813e,0x814e,0x815e,0x816d,
+0x817d,0x818d,0x819d,0x81ad,0x81bc,0x81cc,0x81dc,0x81ec,
+0x81fc,0x820b,0x821b,0x822b,0x823b,0x824a,0x825a,0x826a,
+0x8279,0x8289,0x8299,0x82a8,0x82b8,0x82c8,0x82d7,0x82e7,
+0x82f7,0x8306,0x8316,0x8326,0x8335,0x8345,0x8354,0x8364,
+0x8374,0x8383,0x8393,0x83a2,0x83b2,0x83c1,0x83d1,0x83e0,
+0x83f0,0x8400,0x840f,0x841f,0x842e,0x843e,0x844d,0x845c,
+0x846c,0x847b,0x848b,0x849a,0x84aa,0x84b9,0x84c9,0x84d8,
+0x84e7,0x84f7,0x8506,0x8516,0x8525,0x8534,0x8544,0x8553,
+0x8562,0x8572,0x8581,0x8591,0x85a0,0x85af,0x85be,0x85ce,
+0x85dd,0x85ec,0x85fc,0x860b,0x861a,0x862a,0x8639,0x8648,
+0x8657,0x8667,0x8676,0x8685,0x8694,0x86a3,0x86b3,0x86c2,
+0x86d1,0x86e0,0x86ef,0x86ff,0x870e,0x871d,0x872c,0x873b,
+0x874a,0x8759,0x8769,0x8778,0x8787,0x8796,0x87a5,0x87b4,
+0x87c3,0x87d2,0x87e1,0x87f0,0x8800,0x880f,0x881e,0x882d,
+0x883c,0x884b,0x885a,0x8869,0x8878,0x8887,0x8896,0x88a5,
+0x88b4,0x88c3,0x88d2,0x88e1,0x88f0,0x88ff,0x890e,0x891c,
+0x892b,0x893a,0x8949,0x8958,0x8967,0x8976,0x8985,0x8994,
+0x89a3,0x89b2,0x89c0,0x89cf,0x89de,0x89ed,0x89fc,0x8a0b,
+0x8a19,0x8a28,0x8a37,0x8a46,0x8a55,0x8a64,0x8a72,0x8a81,
+0x8a90,0x8a9f,0x8aad,0x8abc,0x8acb,0x8ada,0x8ae8,0x8af7,
+0x8b06,0x8b15,0x8b23,0x8b32,0x8b41,0x8b50,0x8b5e,0x8b6d,
+0x8b7c,0x8b8a,0x8b99,0x8ba8,0x8bb6,0x8bc5,0x8bd4,0x8be2,
+0x8bf1,0x8c00,0x8c0e,0x8c1d,0x8c2b,0x8c3a,0x8c49,0x8c57,
+0x8c66,0x8c74,0x8c83,0x8c91,0x8ca0,0x8caf,0x8cbd,0x8ccc,
+0x8cda,0x8ce9,0x8cf7,0x8d06,0x8d14,0x8d23,0x8d31,0x8d40,
+0x8d4e,0x8d5d,0x8d6b,0x8d7a,0x8d88,0x8d97,0x8da5,0x8db4,
+0x8dc2,0x8dd1,0x8ddf,0x8ded,0x8dfc,0x8e0a,0x8e19,0x8e27,
+0x8e36,0x8e44,0x8e52,0x8e61,0x8e6f,0x8e7d,0x8e8c,0x8e9a,
+0x8ea9,0x8eb7,0x8ec5,0x8ed4,0x8ee2,0x8ef0,0x8eff,0x8f0d,
+0x8f1b,0x8f2a,0x8f38,0x8f46,0x8f54,0x8f63,0x8f71,0x8f7f,
+0x8f8e,0x8f9c,0x8faa,0x8fb8,0x8fc7,0x8fd5,0x8fe3,0x8ff1,
+0x9000,0x900e,0x901c,0x902a,0x9038,0x9047,0x9055,0x9063,
+0x9071,0x907f,0x908d,0x909c,0x90aa,0x90b8,0x90c6,0x90d4,
+0x90e2,0x90f0,0x90ff,0x910d,0x911b,0x9129,0x9137,0x9145,
+0x9153,0x9161,0x916f,0x917e,0x918c,0x919a,0x91a8,0x91b6,
+0x91c4,0x91d2,0x91e0,0x91ee,0x91fc,0x920a,0x9218,0x9226,
+0x9234,0x9242,0x9250,0x925e,0x926c,0x927a,0x9288,0x9296,
+0x92a4,0x92b2,0x92c0,0x92ce,0x92dc,0x92ea,0x92f8,0x9306,
+0x9314,0x9321,0x932f,0x933d,0x934b,0x9359,0x9367,0x9375,
+0x9383,0x9391,0x939f,0x93ac,0x93ba,0x93c8,0x93d6,0x93e4,
+0x93f2,0x9400,0x940d,0x941b,0x9429,0x9437,0x9445,0x9452,
+0x9460,0x946e,0x947c,0x948a,0x9497,0x94a5,0x94b3,0x94c1,
+0x94cf,0x94dc,0x94ea,0x94f8,0x9506,0x9513,0x9521,0x952f,
+0x953c,0x954a,0x9558,0x9566,0x9573,0x9581,0x958f,0x959c,
+0x95aa,0x95b8,0x95c5,0x95d3,0x95e1,0x95ee,0x95fc,0x960a,
+0x9617,0x9625,0x9633,0x9640,0x964e,0x965c,0x9669,0x9677,
+0x9684,0x9692,0x96a0,0x96ad,0x96bb,0x96c8,0x96d6,0x96e4,
+0x96f1,0x96ff,0x970c,0x971a,0x9727,0x9735,0x9742,0x9750,
+0x975d,0x976b,0x9779,0x9786,0x9794,0x97a1,0x97af,0x97bc,
+0x97ca,0x97d7,0x97e5,0x97f2,0x9800,0x980d,0x981a,0x9828,
+0x9835,0x9843,0x9850,0x985e,0x986b,0x9879,0x9886,0x9893,
+0x98a1,0x98ae,0x98bc,0x98c9,0x98d6,0x98e4,0x98f1,0x98ff,
+0x990c,0x9919,0x9927,0x9934,0x9942,0x994f,0x995c,0x996a,
+0x9977,0x9984,0x9992,0x999f,0x99ac,0x99ba,0x99c7,0x99d4,
+0x99e2,0x99ef,0x99fc,0x9a09,0x9a17,0x9a24,0x9a31,0x9a3f,
+0x9a4c,0x9a59,0x9a66,0x9a74,0x9a81,0x9a8e,0x9a9b,0x9aa9,
+0x9ab6,0x9ac3,0x9ad0,0x9ade,0x9aeb,0x9af8,0x9b05,0x9b12,
+0x9b20,0x9b2d,0x9b3a,0x9b47,0x9b54,0x9b62,0x9b6f,0x9b7c,
+0x9b89,0x9b96,0x9ba3,0x9bb1,0x9bbe,0x9bcb,0x9bd8,0x9be5,
+0x9bf2,0x9c00,0x9c0d,0x9c1a,0x9c27,0x9c34,0x9c41,0x9c4e,
+0x9c5b,0x9c68,0x9c75,0x9c83,0x9c90,0x9c9d,0x9caa,0x9cb7,
+0x9cc4,0x9cd1,0x9cde,0x9ceb,0x9cf8,0x9d05,0x9d12,0x9d1f,
+0x9d2c,0x9d39,0x9d46,0x9d53,0x9d60,0x9d6d,0x9d7a,0x9d87,
+0x9d94,0x9da1,0x9dae,0x9dbb,0x9dc8,0x9dd5,0x9de2,0x9def,
+0x9dfc,0x9e09,0x9e16,0x9e23,0x9e30,0x9e3d,0x9e4a,0x9e57,
+0x9e64,0x9e71,0x9e7e,0x9e8b,0x9e98,0x9ea4,0x9eb1,0x9ebe,
+0x9ecb,0x9ed8,0x9ee5,0x9ef2,0x9eff,0x9f0c,0x9f18,0x9f25,
+0x9f32,0x9f3f,0x9f4c,0x9f59,0x9f66,0x9f72,0x9f7f,0x9f8c,
+0x9f99,0x9fa6,0x9fb3,0x9fbf,0x9fcc,0x9fd9,0x9fe6,0x9ff3,
+0xa000,0xa00c,0xa019,0xa026,0xa033,0xa03f,0xa04c,0xa059,
+0xa066,0xa073,0xa07f,0xa08c,0xa099,0xa0a6,0xa0b2,0xa0bf,
+0xa0cc,0xa0d9,0xa0e5,0xa0f2,0xa0ff,0xa10b,0xa118,0xa125,
+0xa132,0xa13e,0xa14b,0xa158,0xa164,0xa171,0xa17e,0xa18a,
+0xa197,0xa1a4,0xa1b0,0xa1bd,0xa1ca,0xa1d6,0xa1e3,0xa1f0,
+0xa1fc,0xa209,0xa216,0xa222,0xa22f,0xa23c,0xa248,0xa255,
+0xa261,0xa26e,0xa27b,0xa287,0xa294,0xa2a0,0xa2ad,0xa2ba,
+0xa2c6,0xa2d3,0xa2df,0xa2ec,0xa2f8,0xa305,0xa312,0xa31e,
+0xa32b,0xa337,0xa344,0xa350,0xa35d,0xa369,0xa376,0xa382,
+0xa38f,0xa39b,0xa3a8,0xa3b5,0xa3c1,0xa3ce,0xa3da,0xa3e7,
+0xa3f3,0xa400,0xa40c,0xa418,0xa425,0xa431,0xa43e,0xa44a,
+0xa457,0xa463,0xa470,0xa47c,0xa489,0xa495,0xa4a2,0xa4ae,
+0xa4ba,0xa4c7,0xa4d3,0xa4e0,0xa4ec,0xa4f9,0xa505,0xa511,
+0xa51e,0xa52a,0xa537,0xa543,0xa54f,0xa55c,0xa568,0xa574,
+0xa581,0xa58d,0xa59a,0xa5a6,0xa5b2,0xa5bf,0xa5cb,0xa5d7,
+0xa5e4,0xa5f0,0xa5fc,0xa609,0xa615,0xa621,0xa62e,0xa63a,
+0xa646,0xa653,0xa65f,0xa66b,0xa678,0xa684,0xa690,0xa69d,
+0xa6a9,0xa6b5,0xa6c1,0xa6ce,0xa6da,0xa6e6,0xa6f2,0xa6ff,
+0xa70b,0xa717,0xa724,0xa730,0xa73c,0xa748,0xa754,0xa761,
+0xa76d,0xa779,0xa785,0xa792,0xa79e,0xa7aa,0xa7b6,0xa7c3,
+0xa7cf,0xa7db,0xa7e7,0xa7f3,0xa800,0xa80c,0xa818,0xa824,
+0xa830,0xa83c,0xa849,0xa855,0xa861,0xa86d,0xa879,0xa885,
+0xa892,0xa89e,0xa8aa,0xa8b6,0xa8c2,0xa8ce,0xa8da,0xa8e6,
+0xa8f3,0xa8ff,0xa90b,0xa917,0xa923,0xa92f,0xa93b,0xa947,
+0xa953,0xa960,0xa96c,0xa978,0xa984,0xa990,0xa99c,0xa9a8,
+0xa9b4,0xa9c0,0xa9cc,0xa9d8,0xa9e4,0xa9f0,0xa9fc,0xaa09,
+0xaa15,0xaa21,0xaa2d,0xaa39,0xaa45,0xaa51,0xaa5d,0xaa69,
+0xaa75,0xaa81,0xaa8d,0xaa99,0xaaa5,0xaab1,0xaabd,0xaac9,
+0xaad5,0xaae1,0xaaed,0xaaf9,0xab05,0xab11,0xab1d,0xab29,
+0xab35,0xab41,0xab4d,0xab58,0xab64,0xab70,0xab7c,0xab88,
+0xab94,0xaba0,0xabac,0xabb8,0xabc4,0xabd0,0xabdc,0xabe8,
+0xabf4,0xac00,0xac0b,0xac17,0xac23,0xac2f,0xac3b,0xac47,
+0xac53,0xac5f,0xac6b,0xac76,0xac82,0xac8e,0xac9a,0xaca6,
+0xacb2,0xacbe,0xacc9,0xacd5,0xace1,0xaced,0xacf9,0xad05,
+0xad11,0xad1c,0xad28,0xad34,0xad40,0xad4c,0xad57,0xad63,
+0xad6f,0xad7b,0xad87,0xad92,0xad9e,0xadaa,0xadb6,0xadc2,
+0xadcd,0xadd9,0xade5,0xadf1,0xadfd,0xae08,0xae14,0xae20,
+0xae2c,0xae37,0xae43,0xae4f,0xae5b,0xae66,0xae72,0xae7e,
+0xae8a,0xae95,0xaea1,0xaead,0xaeb8,0xaec4,0xaed0,0xaedc,
+0xaee7,0xaef3,0xaeff,0xaf0a,0xaf16,0xaf22,0xaf2e,0xaf39,
+0xaf45,0xaf51,0xaf5c,0xaf68,0xaf74,0xaf7f,0xaf8b,0xaf97,
+0xafa2,0xafae,0xafba,0xafc5,0xafd1,0xafdd,0xafe8,0xaff4,
+0xb000,0xb00b,0xb017,0xb022,0xb02e,0xb03a,0xb045,0xb051,
+0xb05c,0xb068,0xb074,0xb07f,0xb08b,0xb097,0xb0a2,0xb0ae,
+0xb0b9,0xb0c5,0xb0d0,0xb0dc,0xb0e8,0xb0f3,0xb0ff,0xb10a,
+0xb116,0xb121,0xb12d,0xb139,0xb144,0xb150,0xb15b,0xb167,
+0xb172,0xb17e,0xb189,0xb195,0xb1a0,0xb1ac,0xb1b8,0xb1c3,
+0xb1cf,0xb1da,0xb1e6,0xb1f1,0xb1fd,0xb208,0xb214,0xb21f,
+0xb22b,0xb236,0xb242,0xb24d,0xb259,0xb264,0xb270,0xb27b,
+0xb286,0xb292,0xb29d,0xb2a9,0xb2b4,0xb2c0,0xb2cb,0xb2d7,
+0xb2e2,0xb2ee,0xb2f9,0xb305,0xb310,0xb31b,0xb327,0xb332,
+0xb33e,0xb349,0xb355,0xb360,0xb36b,0xb377,0xb382,0xb38e,
+0xb399,0xb3a4,0xb3b0,0xb3bb,0xb3c7,0xb3d2,0xb3dd,0xb3e9,
+0xb3f4,0xb400,0xb40b,0xb416,0xb422,0xb42d,0xb438,0xb444,
+0xb44f,0xb45a,0xb466,0xb471,0xb47c,0xb488,0xb493,0xb49f,
+0xb4aa,0xb4b5,0xb4c1,0xb4cc,0xb4d7,0xb4e2,0xb4ee,0xb4f9,
+0xb504,0xb510,0xb51b,0xb526,0xb532,0xb53d,0xb548,0xb554,
+0xb55f,0xb56a,0xb575,0xb581,0xb58c,0xb597,0xb5a3,0xb5ae,
+0xb5b9,0xb5c4,0xb5d0,0xb5db,0xb5e6,0xb5f1,0xb5fd,0xb608,
+0xb613,0xb61e,0xb62a,0xb635,0xb640,0xb64b,0xb657,0xb662,
+0xb66d,0xb678,0xb684,0xb68f,0xb69a,0xb6a5,0xb6b0,0xb6bc,
+0xb6c7,0xb6d2,0xb6dd,0xb6e8,0xb6f4,0xb6ff,0xb70a,0xb715,
+0xb720,0xb72c,0xb737,0xb742,0xb74d,0xb758,0xb763,0xb76f,
+0xb77a,0xb785,0xb790,0xb79b,0xb7a6,0xb7b2,0xb7bd,0xb7c8,
+0xb7d3,0xb7de,0xb7e9,0xb7f4,0xb800,0xb80b,0xb816,0xb821,
+0xb82c,0xb837,0xb842,0xb84d,0xb858,0xb864,0xb86f,0xb87a,
+0xb885,0xb890,0xb89b,0xb8a6,0xb8b1,0xb8bc,0xb8c7,0xb8d3,
+0xb8de,0xb8e9,0xb8f4,0xb8ff,0xb90a,0xb915,0xb920,0xb92b,
+0xb936,0xb941,0xb94c,0xb957,0xb962,0xb96d,0xb978,0xb983,
+0xb98f,0xb99a,0xb9a5,0xb9b0,0xb9bb,0xb9c6,0xb9d1,0xb9dc,
+0xb9e7,0xb9f2,0xb9fd,0xba08,0xba13,0xba1e,0xba29,0xba34,
+0xba3f,0xba4a,0xba55,0xba60,0xba6b,0xba76,0xba81,0xba8c,
+0xba97,0xbaa2,0xbaad,0xbab8,0xbac3,0xbace,0xbad8,0xbae3,
+0xbaee,0xbaf9,0xbb04,0xbb0f,0xbb1a,0xbb25,0xbb30,0xbb3b,
+0xbb46,0xbb51,0xbb5c,0xbb67,0xbb72,0xbb7d,0xbb88,0xbb92,
+0xbb9d,0xbba8,0xbbb3,0xbbbe,0xbbc9,0xbbd4,0xbbdf,0xbbea,
+0xbbf5,0xbc00,0xbc0a,0xbc15,0xbc20,0xbc2b,0xbc36,0xbc41,
+0xbc4c,0xbc57,0xbc61,0xbc6c,0xbc77,0xbc82,0xbc8d,0xbc98,
+0xbca3,0xbcad,0xbcb8,0xbcc3,0xbcce,0xbcd9,0xbce4,0xbcef,
+0xbcf9,0xbd04,0xbd0f,0xbd1a,0xbd25,0xbd30,0xbd3a,0xbd45,
+0xbd50,0xbd5b,0xbd66,0xbd70,0xbd7b,0xbd86,0xbd91,0xbd9c,
+0xbda6,0xbdb1,0xbdbc,0xbdc7,0xbdd2,0xbddc,0xbde7,0xbdf2,
+0xbdfd,0xbe08,0xbe12,0xbe1d,0xbe28,0xbe33,0xbe3d,0xbe48,
+0xbe53,0xbe5e,0xbe68,0xbe73,0xbe7e,0xbe89,0xbe93,0xbe9e,
+0xbea9,0xbeb4,0xbebe,0xbec9,0xbed4,0xbedf,0xbee9,0xbef4,
+0xbeff,0xbf0a,0xbf14,0xbf1f,0xbf2a,0xbf34,0xbf3f,0xbf4a,
+0xbf55,0xbf5f,0xbf6a,0xbf75,0xbf7f,0xbf8a,0xbf95,0xbf9f,
+0xbfaa,0xbfb5,0xbfbf,0xbfca,0xbfd5,0xbfdf,0xbfea,0xbff5,
+0xc000,0xc00a,0xc015,0xc01f,0xc02a,0xc035,0xc03f,0xc04a,
+0xc055,0xc05f,0xc06a,0xc075,0xc07f,0xc08a,0xc095,0xc09f,
+0xc0aa,0xc0b5,0xc0bf,0xc0ca,0xc0d4,0xc0df,0xc0ea,0xc0f4,
+0xc0ff,0xc109,0xc114,0xc11f,0xc129,0xc134,0xc13e,0xc149,
+0xc154,0xc15e,0xc169,0xc173,0xc17e,0xc189,0xc193,0xc19e,
+0xc1a8,0xc1b3,0xc1bd,0xc1c8,0xc1d3,0xc1dd,0xc1e8,0xc1f2,
+0xc1fd,0xc207,0xc212,0xc21d,0xc227,0xc232,0xc23c,0xc247,
+0xc251,0xc25c,0xc266,0xc271,0xc27b,0xc286,0xc290,0xc29b,
+0xc2a5,0xc2b0,0xc2bb,0xc2c5,0xc2d0,0xc2da,0xc2e5,0xc2ef,
+0xc2fa,0xc304,0xc30f,0xc319,0xc324,0xc32e,0xc339,0xc343,
+0xc34e,0xc358,0xc363,0xc36d,0xc377,0xc382,0xc38c,0xc397,
+0xc3a1,0xc3ac,0xc3b6,0xc3c1,0xc3cb,0xc3d6,0xc3e0,0xc3eb,
+0xc3f5,0xc400,0xc40a,0xc414,0xc41f,0xc429,0xc434,0xc43e,
+0xc449,0xc453,0xc45d,0xc468,0xc472,0xc47d,0xc487,0xc492,
+0xc49c,0xc4a6,0xc4b1,0xc4bb,0xc4c6,0xc4d0,0xc4da,0xc4e5,
+0xc4ef,0xc4fa,0xc504,0xc50e,0xc519,0xc523,0xc52e,0xc538,
+0xc542,0xc54d,0xc557,0xc562,0xc56c,0xc576,0xc581,0xc58b,
+0xc595,0xc5a0,0xc5aa,0xc5b4,0xc5bf,0xc5c9,0xc5d4,0xc5de,
+0xc5e8,0xc5f3,0xc5fd,0xc607,0xc612,0xc61c,0xc626,0xc631,
+0xc63b,0xc645,0xc650,0xc65a,0xc664,0xc66f,0xc679,0xc683,
+0xc68e,0xc698,0xc6a2,0xc6ac,0xc6b7,0xc6c1,0xc6cb,0xc6d6,
+0xc6e0,0xc6ea,0xc6f5,0xc6ff,0xc709,0xc713,0xc71e,0xc728,
+0xc732,0xc73d,0xc747,0xc751,0xc75b,0xc766,0xc770,0xc77a,
+0xc784,0xc78f,0xc799,0xc7a3,0xc7ae,0xc7b8,0xc7c2,0xc7cc,
+0xc7d7,0xc7e1,0xc7eb,0xc7f5,0xc800,0xc80a,0xc814,0xc81e,
+0xc828,0xc833,0xc83d,0xc847,0xc851,0xc85c,0xc866,0xc870,
+0xc87a,0xc884,0xc88f,0xc899,0xc8a3,0xc8ad,0xc8b7,0xc8c2,
+0xc8cc,0xc8d6,0xc8e0,0xc8ea,0xc8f5,0xc8ff,0xc909,0xc913,
+0xc91d,0xc928,0xc932,0xc93c,0xc946,0xc950,0xc95a,0xc965,
+0xc96f,0xc979,0xc983,0xc98d,0xc997,0xc9a2,0xc9ac,0xc9b6,
+0xc9c0,0xc9ca,0xc9d4,0xc9df,0xc9e9,0xc9f3,0xc9fd,0xca07,
+0xca11,0xca1b,0xca26,0xca30,0xca3a,0xca44,0xca4e,0xca58,
+0xca62,0xca6c,0xca76,0xca81,0xca8b,0xca95,0xca9f,0xcaa9,
+0xcab3,0xcabd,0xcac7,0xcad1,0xcadc,0xcae6,0xcaf0,0xcafa,
+0xcb04,0xcb0e,0xcb18,0xcb22,0xcb2c,0xcb36,0xcb40,0xcb4a,
+0xcb55,0xcb5f,0xcb69,0xcb73,0xcb7d,0xcb87,0xcb91,0xcb9b,
+0xcba5,0xcbaf,0xcbb9,0xcbc3,0xcbcd,0xcbd7,0xcbe1,0xcbeb,
+0xcbf5,0xcc00,0xcc0a,0xcc14,0xcc1e,0xcc28,0xcc32,0xcc3c,
+0xcc46,0xcc50,0xcc5a,0xcc64,0xcc6e,0xcc78,0xcc82,0xcc8c,
+0xcc96,0xcca0,0xccaa,0xccb4,0xccbe,0xccc8,0xccd2,0xccdc,
+0xcce6,0xccf0,0xccfa,0xcd04,0xcd0e,0xcd18,0xcd22,0xcd2c,
+0xcd36,0xcd40,0xcd4a,0xcd54,0xcd5e,0xcd68,0xcd72,0xcd7c,
+0xcd86,0xcd90,0xcd99,0xcda3,0xcdad,0xcdb7,0xcdc1,0xcdcb,
+0xcdd5,0xcddf,0xcde9,0xcdf3,0xcdfd,0xce07,0xce11,0xce1b,
+0xce25,0xce2f,0xce39,0xce43,0xce4c,0xce56,0xce60,0xce6a,
+0xce74,0xce7e,0xce88,0xce92,0xce9c,0xcea6,0xceb0,0xceba,
+0xcec3,0xcecd,0xced7,0xcee1,0xceeb,0xcef5,0xceff,0xcf09,
+0xcf13,0xcf1d,0xcf26,0xcf30,0xcf3a,0xcf44,0xcf4e,0xcf58,
+0xcf62,0xcf6c,0xcf75,0xcf7f,0xcf89,0xcf93,0xcf9d,0xcfa7,
+0xcfb1,0xcfbb,0xcfc4,0xcfce,0xcfd8,0xcfe2,0xcfec,0xcff6,
+0xd000,0xd009,0xd013,0xd01d,0xd027,0xd031,0xd03b,0xd044,
+0xd04e,0xd058,0xd062,0xd06c,0xd076,0xd07f,0xd089,0xd093,
+0xd09d,0xd0a7,0xd0b0,0xd0ba,0xd0c4,0xd0ce,0xd0d8,0xd0e1,
+0xd0eb,0xd0f5,0xd0ff,0xd109,0xd112,0xd11c,0xd126,0xd130,
+0xd13a,0xd143,0xd14d,0xd157,0xd161,0xd16b,0xd174,0xd17e,
+0xd188,0xd192,0xd19b,0xd1a5,0xd1af,0xd1b9,0xd1c3,0xd1cc,
+0xd1d6,0xd1e0,0xd1ea,0xd1f3,0xd1fd,0xd207,0xd211,0xd21a,
+0xd224,0xd22e,0xd238,0xd241,0xd24b,0xd255,0xd25f,0xd268,
+0xd272,0xd27c,0xd285,0xd28f,0xd299,0xd2a3,0xd2ac,0xd2b6,
+0xd2c0,0xd2c9,0xd2d3,0xd2dd,0xd2e7,0xd2f0,0xd2fa,0xd304,
+0xd30d,0xd317,0xd321,0xd32b,0xd334,0xd33e,0xd348,0xd351,
+0xd35b,0xd365,0xd36e,0xd378,0xd382,0xd38b,0xd395,0xd39f,
+0xd3a8,0xd3b2,0xd3bc,0xd3c6,0xd3cf,0xd3d9,0xd3e3,0xd3ec,
+0xd3f6,0xd400,0xd409,0xd413,0xd41c,0xd426,0xd430,0xd439,
+0xd443,0xd44d,0xd456,0xd460,0xd46a,0xd473,0xd47d,0xd487,
+0xd490,0xd49a,0xd4a3,0xd4ad,0xd4b7,0xd4c0,0xd4ca,0xd4d4,
+0xd4dd,0xd4e7,0xd4f0,0xd4fa,0xd504,0xd50d,0xd517,0xd521,
+0xd52a,0xd534,0xd53d,0xd547,0xd551,0xd55a,0xd564,0xd56d,
+0xd577,0xd581,0xd58a,0xd594,0xd59d,0xd5a7,0xd5b0,0xd5ba,
+0xd5c4,0xd5cd,0xd5d7,0xd5e0,0xd5ea,0xd5f4,0xd5fd,0xd607,
+0xd610,0xd61a,0xd623,0xd62d,0xd637,0xd640,0xd64a,0xd653,
+0xd65d,0xd666,0xd670,0xd679,0xd683,0xd68c,0xd696,0xd6a0,
+0xd6a9,0xd6b3,0xd6bc,0xd6c6,0xd6cf,0xd6d9,0xd6e2,0xd6ec,
+0xd6f5,0xd6ff,0xd708,0xd712,0xd71b,0xd725,0xd72f,0xd738,
+0xd742,0xd74b,0xd755,0xd75e,0xd768,0xd771,0xd77b,0xd784,
+0xd78e,0xd797,0xd7a1,0xd7aa,0xd7b4,0xd7bd,0xd7c7,0xd7d0,
+0xd7da,0xd7e3,0xd7ed,0xd7f6,0xd800,0xd809,0xd812,0xd81c,
+0xd825,0xd82f,0xd838,0xd842,0xd84b,0xd855,0xd85e,0xd868,
+0xd871,0xd87b,0xd884,0xd88e,0xd897,0xd8a0,0xd8aa,0xd8b3,
+0xd8bd,0xd8c6,0xd8d0,0xd8d9,0xd8e3,0xd8ec,0xd8f5,0xd8ff,
+0xd908,0xd912,0xd91b,0xd925,0xd92e,0xd938,0xd941,0xd94a,
+0xd954,0xd95d,0xd967,0xd970,0xd979,0xd983,0xd98c,0xd996,
+0xd99f,0xd9a9,0xd9b2,0xd9bb,0xd9c5,0xd9ce,0xd9d8,0xd9e1,
+0xd9ea,0xd9f4,0xd9fd,0xda07,0xda10,0xda19,0xda23,0xda2c,
+0xda35,0xda3f,0xda48,0xda52,0xda5b,0xda64,0xda6e,0xda77,
+0xda81,0xda8a,0xda93,0xda9d,0xdaa6,0xdaaf,0xdab9,0xdac2,
+0xdacb,0xdad5,0xdade,0xdae8,0xdaf1,0xdafa,0xdb04,0xdb0d,
+0xdb16,0xdb20,0xdb29,0xdb32,0xdb3c,0xdb45,0xdb4e,0xdb58,
+0xdb61,0xdb6a,0xdb74,0xdb7d,0xdb86,0xdb90,0xdb99,0xdba2,
+0xdbac,0xdbb5,0xdbbe,0xdbc8,0xdbd1,0xdbda,0xdbe4,0xdbed,
+0xdbf6,0xdc00,0xdc09,0xdc12,0xdc1b,0xdc25,0xdc2e,0xdc37,
+0xdc41,0xdc4a,0xdc53,0xdc5d,0xdc66,0xdc6f,0xdc78,0xdc82,
+0xdc8b,0xdc94,0xdc9e,0xdca7,0xdcb0,0xdcb9,0xdcc3,0xdccc,
+0xdcd5,0xdcde,0xdce8,0xdcf1,0xdcfa,0xdd04,0xdd0d,0xdd16,
+0xdd1f,0xdd29,0xdd32,0xdd3b,0xdd44,0xdd4e,0xdd57,0xdd60,
+0xdd69,0xdd73,0xdd7c,0xdd85,0xdd8e,0xdd98,0xdda1,0xddaa,
+0xddb3,0xddbd,0xddc6,0xddcf,0xddd8,0xdde2,0xddeb,0xddf4,
+0xddfd,0xde06,0xde10,0xde19,0xde22,0xde2b,0xde35,0xde3e,
+0xde47,0xde50,0xde59,0xde63,0xde6c,0xde75,0xde7e,0xde87,
+0xde91,0xde9a,0xdea3,0xdeac,0xdeb5,0xdebf,0xdec8,0xded1,
+0xdeda,0xdee3,0xdeed,0xdef6,0xdeff,0xdf08,0xdf11,0xdf1a,
+0xdf24,0xdf2d,0xdf36,0xdf3f,0xdf48,0xdf52,0xdf5b,0xdf64,
+0xdf6d,0xdf76,0xdf7f,0xdf89,0xdf92,0xdf9b,0xdfa4,0xdfad,
+0xdfb6,0xdfbf,0xdfc9,0xdfd2,0xdfdb,0xdfe4,0xdfed,0xdff6,
+0xe000,0xe009,0xe012,0xe01b,0xe024,0xe02d,0xe036,0xe03f,
+0xe049,0xe052,0xe05b,0xe064,0xe06d,0xe076,0xe07f,0xe088,
+0xe092,0xe09b,0xe0a4,0xe0ad,0xe0b6,0xe0bf,0xe0c8,0xe0d1,
+0xe0db,0xe0e4,0xe0ed,0xe0f6,0xe0ff,0xe108,0xe111,0xe11a,
+0xe123,0xe12c,0xe136,0xe13f,0xe148,0xe151,0xe15a,0xe163,
+0xe16c,0xe175,0xe17e,0xe187,0xe190,0xe199,0xe1a3,0xe1ac,
+0xe1b5,0xe1be,0xe1c7,0xe1d0,0xe1d9,0xe1e2,0xe1eb,0xe1f4,
+0xe1fd,0xe206,0xe20f,0xe218,0xe221,0xe22b,0xe234,0xe23d,
+0xe246,0xe24f,0xe258,0xe261,0xe26a,0xe273,0xe27c,0xe285,
+0xe28e,0xe297,0xe2a0,0xe2a9,0xe2b2,0xe2bb,0xe2c4,0xe2cd,
+0xe2d6,0xe2df,0xe2e8,0xe2f1,0xe2fa,0xe303,0xe30c,0xe315,
+0xe31f,0xe328,0xe331,0xe33a,0xe343,0xe34c,0xe355,0xe35e,
+0xe367,0xe370,0xe379,0xe382,0xe38b,0xe394,0xe39d,0xe3a6,
+0xe3af,0xe3b8,0xe3c1,0xe3ca,0xe3d3,0xe3dc,0xe3e5,0xe3ee,
+0xe3f7,0xe400,0xe408,0xe411,0xe41a,0xe423,0xe42c,0xe435,
+0xe43e,0xe447,0xe450,0xe459,0xe462,0xe46b,0xe474,0xe47d,
+0xe486,0xe48f,0xe498,0xe4a1,0xe4aa,0xe4b3,0xe4bc,0xe4c5,
+0xe4ce,0xe4d7,0xe4e0,0xe4e9,0xe4f2,0xe4fa,0xe503,0xe50c,
+0xe515,0xe51e,0xe527,0xe530,0xe539,0xe542,0xe54b,0xe554,
+0xe55d,0xe566,0xe56f,0xe578,0xe580,0xe589,0xe592,0xe59b,
+0xe5a4,0xe5ad,0xe5b6,0xe5bf,0xe5c8,0xe5d1,0xe5da,0xe5e3,
+0xe5eb,0xe5f4,0xe5fd,0xe606,0xe60f,0xe618,0xe621,0xe62a,
+0xe633,0xe63c,0xe644,0xe64d,0xe656,0xe65f,0xe668,0xe671,
+0xe67a,0xe683,0xe68c,0xe694,0xe69d,0xe6a6,0xe6af,0xe6b8,
+0xe6c1,0xe6ca,0xe6d3,0xe6db,0xe6e4,0xe6ed,0xe6f6,0xe6ff,
+0xe708,0xe711,0xe71a,0xe722,0xe72b,0xe734,0xe73d,0xe746,
+0xe74f,0xe758,0xe760,0xe769,0xe772,0xe77b,0xe784,0xe78d,
+0xe795,0xe79e,0xe7a7,0xe7b0,0xe7b9,0xe7c2,0xe7cb,0xe7d3,
+0xe7dc,0xe7e5,0xe7ee,0xe7f7,0xe800,0xe808,0xe811,0xe81a,
+0xe823,0xe82c,0xe834,0xe83d,0xe846,0xe84f,0xe858,0xe861,
+0xe869,0xe872,0xe87b,0xe884,0xe88d,0xe895,0xe89e,0xe8a7,
+0xe8b0,0xe8b9,0xe8c1,0xe8ca,0xe8d3,0xe8dc,0xe8e5,0xe8ed,
+0xe8f6,0xe8ff,0xe908,0xe911,0xe919,0xe922,0xe92b,0xe934,
+0xe93c,0xe945,0xe94e,0xe957,0xe960,0xe968,0xe971,0xe97a,
+0xe983,0xe98b,0xe994,0xe99d,0xe9a6,0xe9ae,0xe9b7,0xe9c0,
+0xe9c9,0xe9d2,0xe9da,0xe9e3,0xe9ec,0xe9f5,0xe9fd,0xea06,
+0xea0f,0xea18,0xea20,0xea29,0xea32,0xea3b,0xea43,0xea4c,
+0xea55,0xea5e,0xea66,0xea6f,0xea78,0xea80,0xea89,0xea92,
+0xea9b,0xeaa3,0xeaac,0xeab5,0xeabe,0xeac6,0xeacf,0xead8,
+0xeae0,0xeae9,0xeaf2,0xeafb,0xeb03,0xeb0c,0xeb15,0xeb1d,
+0xeb26,0xeb2f,0xeb38,0xeb40,0xeb49,0xeb52,0xeb5a,0xeb63,
+0xeb6c,0xeb74,0xeb7d,0xeb86,0xeb8f,0xeb97,0xeba0,0xeba9,
+0xebb1,0xebba,0xebc3,0xebcb,0xebd4,0xebdd,0xebe5,0xebee,
+0xebf7,0xec00,0xec08,0xec11,0xec1a,0xec22,0xec2b,0xec34,
+0xec3c,0xec45,0xec4e,0xec56,0xec5f,0xec68,0xec70,0xec79,
+0xec82,0xec8a,0xec93,0xec9c,0xeca4,0xecad,0xecb5,0xecbe,
+0xecc7,0xeccf,0xecd8,0xece1,0xece9,0xecf2,0xecfb,0xed03,
+0xed0c,0xed15,0xed1d,0xed26,0xed2e,0xed37,0xed40,0xed48,
+0xed51,0xed5a,0xed62,0xed6b,0xed74,0xed7c,0xed85,0xed8d,
+0xed96,0xed9f,0xeda7,0xedb0,0xedb8,0xedc1,0xedca,0xedd2,
+0xeddb,0xede4,0xedec,0xedf5,0xedfd,0xee06,0xee0f,0xee17,
+0xee20,0xee28,0xee31,0xee3a,0xee42,0xee4b,0xee53,0xee5c,
+0xee65,0xee6d,0xee76,0xee7e,0xee87,0xee8f,0xee98,0xeea1,
+0xeea9,0xeeb2,0xeeba,0xeec3,0xeecc,0xeed4,0xeedd,0xeee5,
+0xeeee,0xeef6,0xeeff,0xef08,0xef10,0xef19,0xef21,0xef2a,
+0xef32,0xef3b,0xef43,0xef4c,0xef55,0xef5d,0xef66,0xef6e,
+0xef77,0xef7f,0xef88,0xef90,0xef99,0xefa2,0xefaa,0xefb3,
+0xefbb,0xefc4,0xefcc,0xefd5,0xefdd,0xefe6,0xefee,0xeff7,
+0xf000,0xf008,0xf011,0xf019,0xf022,0xf02a,0xf033,0xf03b,
+0xf044,0xf04c,0xf055,0xf05d,0xf066,0xf06e,0xf077,0xf07f,
+0xf088,0xf090,0xf099,0xf0a1,0xf0aa,0xf0b2,0xf0bb,0xf0c3,
+0xf0cc,0xf0d4,0xf0dd,0xf0e5,0xf0ee,0xf0f6,0xf0ff,0xf107,
+0xf110,0xf118,0xf121,0xf129,0xf132,0xf13a,0xf143,0xf14b,
+0xf154,0xf15c,0xf165,0xf16d,0xf176,0xf17e,0xf187,0xf18f,
+0xf198,0xf1a0,0xf1a9,0xf1b1,0xf1ba,0xf1c2,0xf1cb,0xf1d3,
+0xf1dc,0xf1e4,0xf1ec,0xf1f5,0xf1fd,0xf206,0xf20e,0xf217,
+0xf21f,0xf228,0xf230,0xf239,0xf241,0xf24a,0xf252,0xf25a,
+0xf263,0xf26b,0xf274,0xf27c,0xf285,0xf28d,0xf296,0xf29e,
+0xf2a6,0xf2af,0xf2b7,0xf2c0,0xf2c8,0xf2d1,0xf2d9,0xf2e1,
+0xf2ea,0xf2f2,0xf2fb,0xf303,0xf30c,0xf314,0xf31c,0xf325,
+0xf32d,0xf336,0xf33e,0xf347,0xf34f,0xf357,0xf360,0xf368,
+0xf371,0xf379,0xf381,0xf38a,0xf392,0xf39b,0xf3a3,0xf3ac,
+0xf3b4,0xf3bc,0xf3c5,0xf3cd,0xf3d6,0xf3de,0xf3e6,0xf3ef,
+0xf3f7,0xf400,0xf408,0xf410,0xf419,0xf421,0xf429,0xf432,
+0xf43a,0xf443,0xf44b,0xf453,0xf45c,0xf464,0xf46d,0xf475,
+0xf47d,0xf486,0xf48e,0xf496,0xf49f,0xf4a7,0xf4b0,0xf4b8,
+0xf4c0,0xf4c9,0xf4d1,0xf4d9,0xf4e2,0xf4ea,0xf4f2,0xf4fb,
+0xf503,0xf50c,0xf514,0xf51c,0xf525,0xf52d,0xf535,0xf53e,
+0xf546,0xf54e,0xf557,0xf55f,0xf567,0xf570,0xf578,0xf580,
+0xf589,0xf591,0xf599,0xf5a2,0xf5aa,0xf5b2,0xf5bb,0xf5c3,
+0xf5cb,0xf5d4,0xf5dc,0xf5e4,0xf5ed,0xf5f5,0xf5fd,0xf606,
+0xf60e,0xf616,0xf61f,0xf627,0xf62f,0xf638,0xf640,0xf648,
+0xf651,0xf659,0xf661,0xf66a,0xf672,0xf67a,0xf682,0xf68b,
+0xf693,0xf69b,0xf6a4,0xf6ac,0xf6b4,0xf6bd,0xf6c5,0xf6cd,
+0xf6d6,0xf6de,0xf6e6,0xf6ee,0xf6f7,0xf6ff,0xf707,0xf710,
+0xf718,0xf720,0xf728,0xf731,0xf739,0xf741,0xf74a,0xf752,
+0xf75a,0xf762,0xf76b,0xf773,0xf77b,0xf784,0xf78c,0xf794,
+0xf79c,0xf7a5,0xf7ad,0xf7b5,0xf7bd,0xf7c6,0xf7ce,0xf7d6,
+0xf7de,0xf7e7,0xf7ef,0xf7f7,0xf800,0xf808,0xf810,0xf818,
+0xf821,0xf829,0xf831,0xf839,0xf842,0xf84a,0xf852,0xf85a,
+0xf863,0xf86b,0xf873,0xf87b,0xf883,0xf88c,0xf894,0xf89c,
+0xf8a4,0xf8ad,0xf8b5,0xf8bd,0xf8c5,0xf8ce,0xf8d6,0xf8de,
+0xf8e6,0xf8ef,0xf8f7,0xf8ff,0xf907,0xf90f,0xf918,0xf920,
+0xf928,0xf930,0xf939,0xf941,0xf949,0xf951,0xf959,0xf962,
+0xf96a,0xf972,0xf97a,0xf982,0xf98b,0xf993,0xf99b,0xf9a3,
+0xf9ab,0xf9b4,0xf9bc,0xf9c4,0xf9cc,0xf9d4,0xf9dd,0xf9e5,
+0xf9ed,0xf9f5,0xf9fd,0xfa06,0xfa0e,0xfa16,0xfa1e,0xfa26,
+0xfa2f,0xfa37,0xfa3f,0xfa47,0xfa4f,0xfa58,0xfa60,0xfa68,
+0xfa70,0xfa78,0xfa80,0xfa89,0xfa91,0xfa99,0xfaa1,0xfaa9,
+0xfab1,0xfaba,0xfac2,0xfaca,0xfad2,0xfada,0xfae2,0xfaeb,
+0xfaf3,0xfafb,0xfb03,0xfb0b,0xfb13,0xfb1c,0xfb24,0xfb2c,
+0xfb34,0xfb3c,0xfb44,0xfb4c,0xfb55,0xfb5d,0xfb65,0xfb6d,
+0xfb75,0xfb7d,0xfb85,0xfb8e,0xfb96,0xfb9e,0xfba6,0xfbae,
+0xfbb6,0xfbbe,0xfbc7,0xfbcf,0xfbd7,0xfbdf,0xfbe7,0xfbef,
+0xfbf7,0xfc00,0xfc08,0xfc10,0xfc18,0xfc20,0xfc28,0xfc30,
+0xfc38,0xfc40,0xfc49,0xfc51,0xfc59,0xfc61,0xfc69,0xfc71,
+0xfc79,0xfc81,0xfc8a,0xfc92,0xfc9a,0xfca2,0xfcaa,0xfcb2,
+0xfcba,0xfcc2,0xfcca,0xfcd2,0xfcdb,0xfce3,0xfceb,0xfcf3,
+0xfcfb,0xfd03,0xfd0b,0xfd13,0xfd1b,0xfd23,0xfd2c,0xfd34,
+0xfd3c,0xfd44,0xfd4c,0xfd54,0xfd5c,0xfd64,0xfd6c,0xfd74,
+0xfd7c,0xfd84,0xfd8d,0xfd95,0xfd9d,0xfda5,0xfdad,0xfdb5,
+0xfdbd,0xfdc5,0xfdcd,0xfdd5,0xfddd,0xfde5,0xfded,0xfdf5,
+0xfdfd,0xfe06,0xfe0e,0xfe16,0xfe1e,0xfe26,0xfe2e,0xfe36,
+0xfe3e,0xfe46,0xfe4e,0xfe56,0xfe5e,0xfe66,0xfe6e,0xfe76,
+0xfe7e,0xfe86,0xfe8e,0xfe97,0xfe9f,0xfea7,0xfeaf,0xfeb7,
+0xfebf,0xfec7,0xfecf,0xfed7,0xfedf,0xfee7,0xfeef,0xfef7,
+0xfeff,0xff07,0xff0f,0xff17,0xff1f,0xff27,0xff2f,0xff37,
+0xff3f,0xff47,0xff4f,0xff57,0xff5f,0xff67,0xff6f,0xff77,
+0xff7f,0xff87,0xff8f,0xff97,0xff9f,0xffa7,0xffaf,0xffb7,
+0xffbf,0xffc7,0xffcf,0xffd7,0xffdf,0xffe7,0xffef,0xfff7};
+
+static const signed short wavtable1[2304]={
+0x021d,0x03bc,0x0300,0xfc0d,0xfcd5,0xfd33,0xfdc5,0xfc83,
+0xfdc0,0x036d,0x0245,0x0238,0xfc1b,0xfc31,0xfd7a,0xfd10,
+0xfd6b,0xfd88,0x02a3,0x0313,0x027b,0xfdf9,0xfcba,0xfc2f,
+0xfc86,0xfda3,0xfceb,0x0270,0x02dc,0x029c,0xfc83,0xfcd1,
+0xfc44,0xfd06,0xfdf5,0xfc97,0x026f,0x0347,0x02b9,0xfc02,
+0xfcaf,0xfdc5,0xfd09,0xfd50,0xfc4b,0x0280,0x02b5,0x03df,
+0xfc9d,0xfd95,0xfd61,0xfce0,0xfc54,0xfc41,0x039d,0x02af,
+0x0260,0xfd93,0xfddb,0xfccf,0xfcbb,0xfc64,0xfd2e,0x0228,
+0x031d,0x023c,0xfca3,0xfd97,0xfc84,0xfdac,0xfd93,0xfd80,
+0x0217,0x0340,0x031f,0xfc99,0xfcbc,0xfd77,0xfdd0,0xfd5c,
+0xfd90,0x02fa,0x025d,0x0241,0xfc7a,0xfca5,0xfd56,0xfdc8,
+0xfd34,0xfd76,0x0340,0x0231,0x03eb,0xfc6f,0xfcd1,0xfdda,
+0xfd5c,0xfd98,0xfc2a,0x024e,0x02af,0x024c,0xfce7,0xfd59,
+0xfcea,0xfd57,0xfc65,0xfd5b,0x0286,0x0385,0x02dc,0xfdc7,
+0xfc64,0xfd7b,0xfcd0,0xfce6,0xfdf4,0x033c,0x02b1,0x0380,
+0xfdd3,0xfd2b,0xfda5,0xfd62,0xfc61,0xfcf0,0x036b,0x0270,
+0x0350,0xfdeb,0xfd2b,0xfdfc,0xfd14,0xfd08,0xfd42,0x023b,
+0x03ae,0x03fe,0xfdf2,0xfdc6,0xfc53,0xfd82,0xfc9c,0xfc04,
+0x0338,0x02e0,0x0283,0xfdaf,0xfdfb,0xfc63,0xfd69,0xfdee,
+0xfcd8,0x0205,0x02fd,0x0200,0xfcfa,0xfdfa,0xfd01,0xfdf5,
+0xfdc4,0xfdff,0x0232,0x038c,0x02f9,0xfc1b,0xfcbc,0xfd5d,
+0xfd97,0xfcdb,0xfdca,0x02b6,0x0391,0x02a3,0xfd95,0xfc6c,
+0xfda5,0xfc53,0xfcd1,0xfc85,0x02c0,0x020c,0x02a0,0xfd2e,
+0xfc63,0xfd4f,0xfc37,0xfde6,0xfc8d,0x02d1,0x02f5,0x022e,
+0xfdea,0xfcee,0xfcc6,0xfc06,0xfdcf,0xfda0,0x0374,0x0227,
+0x0279,0xfc49,0xfdde,0xfd57,0xfd05,0xfdb0,0xfcf0,0x022e,
+0x03ef,0x034e,0xfddb,0xfc65,0xfcc0,0xfda0,0xfc21,0xfd45,
+0x03a4,0x02ea,0x0309,0xfd59,0xfd3d,0xfdca,0xfcaf,0xfde0,
+0xfdb3,0x0236,0x0336,0x039e,0xfc72,0xfdff,0xfd18,0xfd8d,
+0xfd6d,0xfcb9,0x0265,0x0303,0x0263,0xfc64,0xfd25,0xfc67,
+0xfd23,0xfdbb,0xfd27,0x02e1,0x029f,0x03f0,0xfc39,0xfd2a,
+0xfd6b,0xfded,0xfc90,0xfc1f,0x028b,0x0252,0x0243,0xfd0b,
+0xfd1f,0xfd5f,0xfcc2,0xfd4d,0xfd71,0x0321,0x027c,0x0234,
+0xfc1c,0xfc8d,0xfd42,0xfd8e,0xfce8,0xfd92,0x0354,0x038e,
+0x02cf,0xfd0b,0xfda9,0xfd81,0xfd3b,0xfcd7,0xfc0c,0x03e6,
+0x0266,0x023f,0xfda9,0xfdcf,0xfd4d,0xfc32,0xfd1f,0xfd79,
+0x0299,0x03a7,0x0274,0xfda1,0xfcd0,0xfdc2,0xfca1,0xfcaa,
+0xfcfd,0x0277,0x0254,0x033d,0xfd21,0xfc01,0xfc3a,0xfcf6,
+0xfd4a,0xfd60,0x0284,0x039e,0x020e,0xfdb9,0xfd69,0xfc47,
+0xfcd5,0xfcba,0xfde2,0x036f,0x03dc,0x0265,0xfcb0,0xfdf2,
+0xfdb0,0xfd0d,0xfc47,0xfd22,0x03ad,0x02b4,0x02b5,0xfd83,
+0xfd82,0xfc55,0xfc9e,0xfc57,0xfc54,0x0235,0x02a0,0x0240,
+0xfd1a,0xfd83,0xfd0b,0xfd8f,0xfc8e,0xfd77,0x0223,0x0274,
+0x02e4,0xfd61,0xfce9,0xfc73,0xfdb7,0xfcfd,0xfde9,0x027f,
+0x0214,0x0219,0xfd67,0xfd61,0xfdd1,0xfce2,0xfdd6,0xfdcc,
+0x03bb,0x025c,0x0256,0xfdcc,0xfdd2,0xfd3e,0xfc85,0xfd36,
+0xfd45,0x0232,0x0244,0x0384,0xfd83,0xfc23,0xfc03,0xfd97,
+0xfd6e,0xfce9,0x025b,0x0383,0x026d,0xfdee,0xfd23,0xfddf,
+0xfd38,0xfceb,0xfd0d,0x02ea,0x0215,0x0270,0xfcf8,0xfc73,
+0xfd76,0xfde1,0xfdd5,0xfd07,0x02d9,0x0202,0x02df,0xfd23,
+0xfdf5,0xfd1d,0xfdf9,0xfdfb,0xfdf0,0x0355,0x0227,0x02b4,
+0xfc69,0xfdbf,0xfd17,0xfd39,0xfdaf,0xfc58,0x03b5,0x03fa,
+0x03ec,0xfc50,0xfc5d,0xfc19,0xfc90,0xfc0b,0xfc27,0x0220,
+0x03dc,0x02df,0xfdf3,0xfcf3,0xfd3a,0xfdbe,0xfc46,0xfdf0,
+0x0315,0x030e,0x0335,0xfda5,0xfd87,0xfd8c,0xfda0,0xfdaa,
+0xfd6d,0x0381,0x0204,0x02f2,0xfc76,0xfd6b,0xfd07,0xfcee,
+0xfdf7,0xfdd4,0x0205,0x02be,0x033c,0xfd3a,0xfcbb,0xfdc8,
+0xfdf5,0xfc3d,0xfd62,0x024a,0x0332,0x02fb,0xfc57,0xfc96,
+0xfd9f,0xfd60,0xfd72,0xfdc8,0x038b,0x028c,0x0250,0xfdbe,
+0xfdf3,0xfd0e,0xfcdc,0xfcc2,0xfd52,0x0210,0x028c,0x0282,
+0xfd5f,0xfd69,0xfcce,0xfdde,0xfcc2,0xfcda,0x0213,0x02c4,
+0x030c,0xfd20,0xfcd5,0xfde4,0xfdd8,0xfc2b,0xfdad,0x029a,
+0x0271,0x02d7,0xfcd3,0xfc4e,0xfc88,0xfc9d,0xfd05,0xfdfc,
+0x03b3,0x02d7,0x022a,0xfd5f,0xfe00,0xfced,0xfc95,0xfdfb,
+0xfda9,0x0225,0x0291,0x03d5,0xfd3f,0xfdf2,0xfd8a,0xfdb3,
+0xfcb5,0xfc53,0x025d,0x0398,0x0354,0xfde0,0xfc10,0xfd02,
+0xfd34,0xfcc5,0xfd3b,0x0270,0x026b,0x03d7,0xfd0d,0xfda9,
+0xfdad,0xfd07,0xfd12,0xfc50,0x025d,0x038d,0x0223,0xfde7,
+0xfd7a,0xfc35,0xfd35,0xfcd9,0xfdb8,0x02b2,0x03a7,0x0204,
+0xfd89,0xfd47,0xfc50,0xfc5c,0xfca9,0xfdf7,0x02bc,0x0264,
+0x0355,0xfcba,0xfdb8,0xfc03,0xfc41,0xfd24,0xfd39,0x02ff,
+0x0340,0x023e,0xfd91,0xfca3,0xfc5b,0xfdc1,0xfd5c,0xfd7d,
+0x020b,0x0382,0x039b,0xfc69,0xfc4f,0xfcd6,0xfde8,0xfceb,
+0xfcbf,0x02d2,0x03be,0x02b3,0xfd5c,0xfc30,0xfd79,0xfc04,
+0xfc80,0xfc59,0x0265,0x0303,0x03a0,0xfc64,0xfdd4,0xfd44,
+0xfd22,0xfdbb,0xfcb6,0x02f6,0x02f5,0x0248,0xfdcf,0xfc9e,
+0xfc9f,0xfdce,0xfdd0,0xfd65,0x0200,0x0237,0x0241,0xfdc9,
+0xfdbf,0xfd81,0xfe00,0xfd8c,0xfd76,0x0267,0x027e,0x02ba,
+0xfd02,0xfcb9,0xfc99,0xfd1d,0xfce5,0xfc46,0x023e,0x0282,
+0x024d,0xfd30,0xfd6c,0xfd1d,0xfd7c,0xfcda,0xfd5b,0x03e1,
+0x02aa,0x036e,0xfd6a,0xfcac,0xfdb7,0xfc3d,0xfc72,0xfd0f,
+0x021b,0x037a,0x0391,0xfc55,0xfc3d,0xfce5,0xfdc7,0xfcf9,
+0xfcd1,0x02b6,0x03a0,0x0220,0xfd8b,0xfd1e,0xfc25,0xfc53,
+0xfcb6,0xfdbe,0x0325,0x0258,0x02a8,0xfc50,0xfde9,0xfce2,
+0xfd87,0xfd40,0xfc77,0x021c,0x03b7,0x030e,0xfc14,0xfcc7,
+0xfd29,0xfdc6,0xfc8c,0xfdab,0x0330,0x03b6,0x024e,0xfd0b,
+0xfc53,0xfddc,0xfd76,0xfc8f,0xfd57,0x03bb,0x034f,0x032b,
+0xfcea,0xfd0b,0xfd61,0xfc85,0xfd44,0xfd7d,0x0223,0x0373,
+0x022c,0xfc4f,0xfdad,0xfc40,0xfdb6,0xfd06,0xfda3,0x028c,
+0x0378,0x025c,0xfdcb,0xfcfe,0xfdf4,0xfcc1,0xfcfe,0xfd37,
+0x035f,0x0249,0x0357,0xfc26,0xfd2f,0xfc2e,0xfd29,0xfd64,
+0xfd35,0x03ff,0x03e5,0x0204,0xfc1b,0xfdfc,0xfc12,0xfc01,
+0xfc34,0xfdf8,0x03a4,0x0266,0x0280,0xfdd0,0xfdb9,0xfcff,
+0xfcae,0xfd1e,0xfce0,0x0273,0x0234,0x023d,0xfd4d,0xfd42,
+0xfd89,0xfcff,0xfd93,0xfd7f,0x036c,0x0384,0x02d4,0xfcfe,
+0xfd94,0xfd83,0xfd12,0xfce8,0xfe00,0x0203,0x0359,0x0380,
+0xfca1,0xfc7a,0xfd12,0xfdf9,0xfd32,0xfcf0,0x0287,0x03b9,
+0x036f,0xfda6,0xfdd5,0xfcce,0xfccf,0xfc88,0xfd0d,0x0326,
+0x0356,0x0359,0xfd60,0xfd5d,0xfd35,0xfd86,0xfd38,0xfd32,
+0x0221,0x0247,0x0277,0xfd93,0xfd60,0xfd31,0xfdbb,0xfd68,
+0xfcf6,0x0291,0x0259,0x02ef,0xfcfc,0xfc3d,0xfc8e,0xfcb5,
+0xfd3d,0xfdda,0x02e4,0x0389,0x031b,0xfd72,0xfdc1,0xfd40,
+0xfde9,0xfcdf,0xfd96,0x0349,0x03f8,0x0238,0xfcbd,0xfc5a,
+0xfdcc,0xfd4d,0xfc0f,0xfd89,0x02eb,0x024d,0x03d7,0xfca3,
+0xfd32,0xfdca,0xfddf,0xfd59,0xfc4f,0x036e,0x0265,0x020e,
+0xfdf2,0xfc7a,0xfd8a,0xfd0f,0xfd20,0xfde4,0x0360,0x02d3,
+0x030b,0xfd9e,0xfd6f,0xfdda,0xfd27,0xfc02,0xfdb0,0x0216,
+0x02a2,0x0306,0xfd41,0xfcd8,0xfc05,0xfdd3,0xfc89,0xfdb6,
+0x03b6,0x0289,0x03ab,0xfda6,0xfc99,0xfdad,0xfc8f,0xfcc8,
+0xfca3,0x024e,0x02bf,0x0383,0xfcd5,0xfdfa,0xfd96,0xfd58,
+0xfc3a,0xfcea,0x026a,0x020f,0x0243,0xfd83,0xfd44,0xfdab,
+0xfd16,0xfde1,0xfd6f,0x02d5,0x0287,0x03cc,0xfc6b,0xfd4f,
+0xfd99,0xfdff,0xfcce,0xfc64,0x0281,0x0287,0x02c3,0xfcd6,
+0xfc8b,0xfc83,0xfcdd,0xfccf,0xfc30,0x02da,0x0297,0x0284,
+0xfc4e,0xfc68,0xfcbd,0xfdf7,0xfca4,0xfcd4,0x0327,0x023c,
+0x0242,0xfc7a,0xfc70,0xfd7a,0xfd83,0xfd81,0xfd73,0x036f,
+0x0263,0x0335,0xfdf4,0xfd3f,0xfc2d,0xfd0d,0xfd27,0xfd6e,
+0x029b,0x02d9,0x0349,0xfc49,0xfddb,0xfda9,0xfc99,0xfdf9,
+0xfd4c,0x030e,0x0249,0x02ef,0xfc81,0xfdc2,0xfca5,0xfdaa,
+0xfd62,0xfdd9,0x0322,0x02dd,0x03d0,0xfdc2,0xfd03,0xfd45,
+0xfd8c,0xfdf3,0xfc5d,0x0352,0x0367,0x02c4,0xfd2c,0xfdb4,
+0xfda6,0xfd3e,0xfd1a,0xfc2d,0x0366,0x02e7,0x02c0,0xfd88,
+0xfdaa,0xfc01,0xfd1d,0xfde4,0xfc36,0x03ad,0x0383,0x0249,
+0xfcc5,0xfde6,0xfdfe,0xfc9e,0xfcea,0xfd63,0x03af,0x0278,
+0x036b,0xfdba,0xfcda,0xfde3,0xfc9b,0xfcf3,0xfd13,0x03ff,
+0x02dc,0x027e,0xfd25,0xfd83,0xfc70,0xfc02,0xfdf5,0xfce5,
+0x030a,0x02f1,0x028f,0xfdc3,0xfc1b,0xfc3b,0xfdb0,0xfdd6,
+0xfcb9,0x034b,0x03b1,0x03b1,0xfcf5,0xfcf5,0xfc97,0xfd4a,
+0xfc97,0xfc97,0x03ae,0x03c9,0x023c,0xfc84,0xfdf1,0xfde2,
+0xfc9d,0xfc6a,0xfd80,0x03f0,0x022f,0x0356,0xfdd9,0xfcb7,
+0xfc5a,0xfc1f,0xfd9c,0xfd38,0x0364,0x02d9,0x0313,0xfd96,
+0xfd64,0xfdd0,0xfd1f,0xfdf9,0xfda3,0x03ca,0x02ae,0x0223,
+0xfd76,0xfdfa,0xfd23,0xfc69,0xfc69,0xfdb7,0x0217,0x027b,
+0x02a2,0xfd68,0xfd3f,0xfcbc,0xfdd0,0xfcec,0xfc89,0x0230,
+0x027c,0x033c,0xfd48,0xfc75,0xfdfe,0xfd9b,0xfcea,0xfd62,
+0x03e2,0x0250,0x0341,0xfdc1,0xfcd7,0xfc3c,0xfc3a,0xfd52,
+0xfd5b,0x0224,0x026d,0x02b6,0xfd67,0xfd19,0xfcb5,0xfdb5,
+0xfd0d,0xfc52,0x0226,0x0321,0x03bb,0xfca3,0xfdff,0xfd14,
+0xfdb1,0xfd8d,0xfc84,0x0246,0x020a,0x0286,0xfdae,0xfd21,
+0xfd6d,0xfd69,0xfdeb,0xfcd1,0x025e,0x0271,0x0332,0xfd1d,
+0xfc37,0xfc19,0xfd33,0xfd05,0xfd71,0x026f,0x024f,0x036a,
+0xfd30,0xfded,0xfc0e,0xfd0a,0xfd55,0xfd16,0x029d,0x0217,
+0x03e9,0xfd44,0xfd72,0xfdf5,0xfc94,0xfdd1,0xfc2d,0x02bd,
+0x03aa,0x0250,0xfd7e,0xfcd6,0xfde2,0xfc40,0xfca4,0xfd54,
+0x039d,0x0334,0x02e2,0xfd1b,0xfd65,0xfdb0,0xfcbd,0xfd6f,
+0xfdeb,0x02df,0x0316,0x0220,0xfdc9,0xfcf3,0xfcb8,0xfdf1,
+0xfd9e,0xfdbe,0x023f,0x0314,0x02a2,0xfc8a,0xfd0a,0xfdf9,
+0xfd79,0xfda1,0xfc88,0x028f,0x030f,0x0210,0xfc15,0xfd5b,
+0xfcd7,0xfcb8,0xfda9,0xfddf,0x020f,0x0201,0x02a5,0xfdef,
+0xfd45,0xfd59,0xfde0,0xfdfe,0xfc7f,0x030e,0x0354,0x03ac,
+0xfd75,0xfd32,0xfcf1,0xfdab,0xfd3b,0xfca1,0x0242,0x038e,
+0x0201,0xfdfe,0xfdbb,0xfc6e,0xfd72,0xfcd7,0xfdfd,0x02b4,
+0x0372,0x02de,0xfdac,0xfc1f,0xfd88,0xfc57,0xfd09,0xfdf2,
+0x024a,0x02ab,0x02cb,0xfcf2,0xfcce,0xfc46,0xfd62,0xfc71,
+0xfc19,0x02e3,0x0261,0x02cd,0xfc90,0xfdfa,0xfcaa,0xfdea,
+0xfd2b,0xfc12,0x030a,0x0305,0x02b9,0xfdb4,0xfdee,0xfdf1,
+0xfdb0,0xfdb8,0xfc49,0x039a,0x0311,0x032d,0xfd3d,0xfd24,
+0xfd90,0xfcc2,0xfda6,0xfd7a,0x02fe,0x028b,0x03d8,0xfc32,
+0xfd20,0xfd8e,0xfdc3,0xfcc5,0xfc4e,0x03d2,0x0254,0x0203,
+0xfdc7,0xfc28,0xfda8,0xfc5a,0xfd4a,0xfdfa,0x0217,0x02f5,
+0x021c,0xfce8,0xfdcb,0xfce1,0xfdd0,0xfdd1,0xfdc5,0x0296,
+0x02af,0x024d,0xfc86,0xfd06,0xfce9,0xfca7,0xfc64,0xfd5a,
+0x022a,0x0218,0x03d3,0xfdbc,0xfdee,0xfe00,0xfda7,0xfdcf,
+0xfc58,0x02d3,0x03d6,0x02b2,0xfd4a,0xfc30,0xfd6a,0xfc02,
+0xfc51,0xfc5c,0x03a8,0x03bc,0x020f,0xfc95,0xfc3b,0xfc26,
+0xfca8,0xfc82,0xfde1,0x024e,0x03ea,0x0223,0xfdbf,0xfd89,
+0xfde8,0xfd58,0xfc2c,0xfdb6,0x0205,0x028d,0x0339,0xfd6c,
+0xfcbf,0xfdf2,0xfdf6,0xfcbe,0xfd68,0x0340,0x0250,0x03ce,
+0xfc3c,0xfce8,0xfdcc,0xfd5b,0xfd52,0xfc60,0x0200,0x03bd,
+0x0387,0xfc43,0xfc79,0xfcb4,0xfe00,0xfc82,0xfce4,0x0277,
+0x0221,0x038a,0xfd60,0xfdd2,0xfc3b,0xfcf7,0xfdbb,0xfcde,
+0x0202,0x02d0,0x0289,0xfd2d,0xfd74,0xfc6f,0xfdfc,0xfc0c,
+0xfcc8,0x0254,0x02a7,0x02b6,0xfcea,0xfcd9,0xfc68,0xfd4b,
+0xfc7c,0xfc54,0x0269,0x02e4,0x03d3,0xfc84,0xfdb2,0xfd3c,
+0xfd19,0xfde9,0xfc58,0x02c7,0x02ad,0x036d,0xfc48,0xfd9f,
+0xfdb5,0xfc25,0xfc6b,0xfd10,0x028f,0x0343,0x0350,0xfdea,
+0xfde2,0xfd4d,0xfcba,0xfd57,0xfd42,0x02f2,0x0347,0x02ba,
+0xfd96,0xfdfd,0xfdc3,0xfdd4,0xfd50,0xfc46,0x0367,0x0203,
+0x0301,0xfc93,0xfd71,0xfcfa,0xfd1a,0xfdfa,0xfdbe,0x03bb,
+0x0354,0x023d,0xfce5,0xfde9,0xfc45,0xfc84,0xfd3b,0xfd7d,
+0x0280,0x035b,0x024b,0xfde7,0xfd21,0xfc26,0xfcdf,0xfd2f,
+0xfd5e,0x0318,0x035f,0x022a,0xfd65,0xfca6,0xfc59,0xfd9c,
+0xfd29,0xfda7,0x034b,0x02c4,0x02aa,0xfdb9,0xfdce,0xfc50,
+0xfd4a,0xfc2d,0xfc72,0x03cb,0x0300,0x0228,0xfd28,0xfdf4,
+0xfcc3,0xfc67,0xfdc0,0xfdac,0x037b,0x0218,0x02b2,0xfc59,
+0xfda7,0xfd2c,0xfcf8,0xfdcd,0xfc5d,0x03f8,0x0230,0x0297,
+0xfdd4,0xfd6d,0xfd29,0xfc0f,0xfd9a,0xfca4,0x021f,0x0366,
+0x02a2,0xfc64,0xfd34,0xfdc3,0xfdbf,0xfd1d,0xfc87,0x0259,
+0x03e7,0x0249,0xfdb6,0xfd52,0xfdc5,0xfd3f,0xfc31,0xfd64,
+0x03b1,0x03c6,0x0379,0xfc84,0xfccb,0xfcb8,0xfc98,0xfc70,
+0xfcfb,0x0211,0x0390,0x0309,0xfc51,0xfcdc,0xfd4c,0xfddd,
+0xfcd4,0xfdb2,0x023e,0x03fc,0x02ca,0xfdc4,0xfcdf,0xfd39,
+0xfd7d,0xfc08,0xfc1b,0x0261,0x039a,0x03a4,0xfddb,0xfdd5,
+0xfcb8,0xfd2b,0xfcc1,0xfcb0,0x0256,0x030a,0x03a1,0xfc73,
+0xfde1,0xfd3d,0xfd46,0xfdb0,0xfcb4,0x028b,0x0304,0x02e8,
+0xfc2a,0xfc4d,0xfdcf,0xfcc5,0xfdba,0xfde3,0x02b3,0x03bd,
+0x02d2,0xfd7a,0xfc30,0xfd5d,0xfc5a,0xfc82,0xfc04,0x02d9,
+0x02fe,0x03d8,0xfdde,0xfd43,0xfd20,0xfdf8,0xfdc3,0xfc4f,
+0x0223,0x0207,0x0278,0xfdd5,0xfd5d,0xfd80,0xfdb7,0xfdf2,
+0xfcf4,0x0247,0x0214,0x039a,0xfda1,0xfdf3,0xfc40,0xfd68,
+0xfdd6,0xfcc1,0x0279,0x03e3,0x025b,0xfd99,0xfd17,0xfdb6,
+0xfcf1,0xfc38,0xfd3a,0x02a5,0x023b,0x036a,0xfd0b,0xfdbe,
+0xfc2f,0xfc80,0xfd82,0xfd15,0x025a,0x0221,0x029a,0xfd7f,
+0xfcf1,0xfd3b,0xfd3c,0xfdbc,0xfc9e,0x0273,0x021e,0x036b,
+0xfd67,0xfde8,0xfc60,0xfcff,0xfdc1,0xfd14,0x02a0,0x03d7,
+0x0256,0xfd7b,0xfcee,0xfdc1,0xfc8e,0xfc50,0xfd44,0x02b4,
+0x03d3,0x02da,0xfd6a,0xfc24,0xfd46,0xfc57,0xfc57,0xfdf7,
+0x029c,0x027a,0x0368,0xfcc5,0xfdc7,0xfde4,0xfc99,0xfcef,
+0xfd1a,0x02c7,0x02c2,0x02a2,0xfc2a,0xfc57,0xfc5d,0xfc24,
+0xfc31,0xfc87,0x02e3,0x03d1,0x0253,0xfd3e,0xfca4,0xfdc8,
+0xfdea,0xfc5c,0xfd4d,0x02f7,0x0389,0x02fb,0xfd61,0xfdca,
+0xfd5d,0xfdcd,0xfce1,0xfdc7,0x02f4,0x0246,0x02fb,0xfca4,
+0xfdcd,0xfc9c,0xfdd2,0xfd69,0xfdc8,0x02ec,0x03f5,0x038c,
+0xfd1b,0xfd68,0xfc7e,0xfddd,0xfc16,0xfcdb,0x0324,0x03a9,
+0x03b6,0xfd20,0xfd16,0xfc9b,0xfd89,0xfca6,0xfc8e,0x0311,
+0x021f,0x03e6,0xfcbf,0xfd02,0xfdef,0xfda6,0xfdc0,0xfc33,
+0x03e7,0x02ac,0x03ae,0xfd64,0xfc68,0xfd8a,0xfc31,0xfc6d,
+0xfc9c,0x0272,0x0279,0x03e4,0xfcfa,0xfd9f,0xfd98,0xfd03,
+0xfcf1,0xfc36,0x0272,0x0237,0x0343,0xfd4b,0xfc03,0xfc63,
+0xfd03,0xfd8c,0xfd57,0x02e3,0x023b,0x03cd,0xfcc7,0xfd42,
+0xfde1,0xfdeb,0xfd83,0xfc63,0x028f,0x0301,0x02c3,0xfc28,
+0xfc76,0xfded,0xfcb9,0xfdbf,0xfc2e,0x02be,0x03b8,0x023b,
+0xfd73,0xfcef,0xfdec,0xfc3c,0xfc8a,0xfd82,0x02d7,0x02c8,
+0x0202,0xfc0c,0xfd26,0xfd35,0xfdfc,0xfc22,0xfdfc,0x02e8,
+0x02ae,0x02e5,0xfc1a,0xfde5,0xfc1e,0xfde3,0xfc67,0xfde8,
+0x02db,0x0228,0x02ca,0xfceb,0xfc03,0xfcfc,0xfdf6,0xfdac,
+0xfc1a,0x0338,0x03df,0x02d6,0xfce2,0xfdb7,0xfd41,0xfd68,
+0xfc41,0xfdfd,0x031b,0x0267,0x0220,0xfc45,0xfcb4,0xfd73,
+0xfd97,0xfd1e,0xfdbe,0x0366,0x023f,0x0338,0xfc2e,0xfd44,
+0xfc62,0xfd1d,0xfd7a,0xfd68,0x0323,0x0340,0x039b,0xfd73,
+0xfd2c,0xfd12,0xfd8a,0xfd5b,0xfcc0,0x0333,0x0347,0x0213,
+0xfd61,0xfcaf,0xfc99,0xfd71,0xfd50,0xfdda,0x0377,0x0312,
+0x0354,0xfd57,0xfd1e,0xfd72,0xfcff,0xfda5,0xfd3b,0x03a5,
+0x02fc,0x023a,0xfd48,0xfdf9,0xfcad,0xfcae,0xfdc6,0xfd86,
+0x02cc,0x038a,0x028e,0xfd86,0xfc6c,0xfdbd,0xfc15,0xfcde,
+0xfcbb,0x0310,0x0324,0x0233,0xfd98,0xfca0,0xfc8b,0xfda7,
+0xfd89,0xfd94,0x0306,0x0217,0x036c,0xfcd7,0xfd6a,0xfc6d,
+0xfdb7,0xfdd1,0xfd13,0x0327,0x0256,0x0289,0xfc51,0xfc01,
+0xfd0a,0xfd84,0xfd45,0xfcca,0x0331,0x02f7,0x02ce,0xfda2,
+0xfdc3,0xfdeb,0xfd74,0xfdcd,0xfc10,0x033f,0x03e1,0x034e,
+0xfcda,0xfd51,0xfccb,0xfd5d,0xfc3d,0xfd45,0x035a,0x0237,
+0x025d,0xfc49,0xfc09,0xfd62,0xfd30,0xfd8c,0xfd34,0x036c,
+0x03f4,0x028b,0xfc9e,0xfdd3,0xfd7d,0xfd12,0xfc17,0xfcc5,
+0x0354,0x0224,0x0225,0xfc6f,0xfc6e,0xfdb4,0xfd3a,0xfdb5,
+0xfdb3,0x037f,0x03f0,0x0367,0xfc8f,0xfd07,0xfca6,0xfcf2,
+0xfc1f,0xfd1b,0x037c,0x03e8,0x024f,0xfc98,0xfdfd,0xfdbe,
+0xfcf7,0xfc2f,0xfd55,0x03a7,0x0204,0x0344,0xfc50,0xfd04,
+0xfcb4,0xfca9,0xfdf7,0xfd56,0x03a3,0x0264,0x0308,0xfdd3,
+0xfd3e,0xfc5f,0xfcb1,0xfd23,0xfdb3,0x03c8,0x0266,0x0330,
+0xfdbb,0xfcfd,0xfc2c,0xfc6d,0xfd1e,0xfd75,0x020c,0x0309,
+0x02cf,0xfce4,0xfd1f,0xfdde,0xfde7,0xfdb2,0xfc0c,0x0225,
+0x02b6,0x0312,0xfd18,0xfcb5,0xfdeb,0xfdb3,0xfc53,0xfda5,
+0x0372,0x02f2,0x0216,0xfd77,0xfc68,0xfced,0xfd09,0xfdd5,
+0xfdd3,0x0255,0x0345,0x02fe,0xfc2f,0xfc81,0xfd8d,0xfd47,
+0xfd54,0xfdc2,0x027b,0x03ba,0x02c0,0xfdb0,0xfc97,0xfd70,
+0xfcec,0xfc86,0xfc37,0x0323,0x0226,0x031e,0xfca1,0xfd8e,
+0xfca6,0xfd8a,0xfdb1,0xfd92,0x02bb,0x028e,0x031e,0xfc82,
+0xfddf,0xfc03,0xfc46,0xfcbb,0xfd92,0x039c,0x02ff,0x02e2,
+0xfd4b,0xfd66,0xfdd7,0xfcbe,0xfdc1,0xfded,0x03ca,0x02a3,
+0x0260,0xfd81,0xfdc0,0xfcdf,0xfc69,0xfc86,0xfd2e,0x0278,
+0x02c2,0x02ac,0xfc97,0xfcb4,0xfc50,0xfcf4,0xfc30,0xfc6e,
+0x035a,0x02ff,0x0233,0xfd7d,0xfc4f,0xfcb4,0xfd31,0xfdc2,
+0xfd94,0x020f,0x02fd,0x02be,0xfcec,0xfd2c,0xfdf3,0xfde1,
+0xfdc4,0xfc3c,0x022f,0x039c,0x03eb,0xfc0f,0xfddd,0xfc77,
+0xfd9e,0xfcbf,0xfc29,0x026a,0x020c,0x0263,0xfd87,0xfd1e,
+0xfd8f,0xfd15,0xfde8,0xfd27,0x03e7,0x03ae,0x03c3,0xfc68,
+0xfc54,0xfc8a,0xfc30,0xfc9d,0xfc77,0x0223,0x0359,0x03a7,
+0xfc6c,0xfc19,0xfcf1,0xfdb8,0xfd32,0xfcaa,0x02de,0x039e,
+0x0239,0xfd67,0xfcce,0xfdfd,0xfdf1,0xfcba,0xfd86,0x03c7,
+0x02ea,0x0227,0xfd3f,0xfdf8,0xfcdc,0xfc6f,0xfde0,0xfdaf,
+0x0342,0x0279,0x0371,0xfdfd,0xfd32,0xfddf,0xfd59,0xfcf1,
+0xfd0a,0x03ad,0x02bd,0x02d7,0xfd7b,0xfd64,0xfc1b,0xfc9f,
+0xfc3f,0xfdfc,0x03b1,0x0234,0x027c,0xfdf7,0xfdb5,0xfd43,
+0xfc97,0xfd91,0xfcea,0x0200,0x0233,0x0319,0xfdcd,0xfce6,
+0xfc97,0xfe00,0xfd95,0xfd99,0x03d8,0x022c,0x023a,0xfdea,
+0xfddc,0xfd94,0xfc4f,0xfda4,0xfd84,0x03d8,0x021c,0x02e4,
+0xfdf9,0xfd38,0xfcf2,0xfc4e,0xfdc6,0xfde8,0x020f,0x02fc,
+0x036a,0xfced,0xfc7b,0xfd74,0xfde1,0xfdc6,0xfd15,0x0212,
+0x0298,0x03f6,0xfd50,0xfdf3,0xfd6e,0xfddb,0xfca2,0xfc13,
+0x0222,0x0203,0x0209,0xfdda,0xfdd4,0xfdf3,0xfdb8,0xfdf9,
+0xfded,0x022a,0x03a6,0x02a0,0xfc0d,0xfd28,0xfd9b,0xfda8,
+0xfcac,0xfc8e,0x0256,0x03b1,0x022c,0xfdd8,0xfd76,0xfdff,
+0xfd44,0xfc98,0xfda5,0x0273,0x0213,0x02dd,0xfd75,0xfc7d,
+0xfd08,0xfcff,0xfdd9,0xfdf3,0x0240,0x03fc,0x03f6,0xfdc2,
+0xfdc5,0xfc0d,0xfd76,0xfc08,0xfc13,0x026f,0x039c,0x0252,
+0xfdcd,0xfd2c,0xfde7,0xfd09,0xfcbd,0xfd4d,0x02a6,0x038d,
+0x025b,0xfda6,0xfce0,0xfde8,0xfc7d,0xfcd9,0xfd38,0x032e,
+0x02e8,0x021f,0xfdb0,0xfc9f,0xfcea,0xfd78,0xfde3,0xfdbf};
+
+static const unsigned short wavtable2[2304]={
+0x0e,0x0e,0x0e,0x14,0x14,0x13,0x15,0x14,0x14,0x0e,0x0e,0x0e,
+0x14,0x14,0x14,0x14,0x15,0x15,0x0e,0x0d,0x0e,0x12,0x14,0x13,
+0x15,0x12,0x15,0x0d,0x0d,0x0e,0x12,0x13,0x13,0x13,0x12,0x15,
+0x0d,0x0e,0x0d,0x13,0x12,0x12,0x13,0x14,0x13,0x0d,0x0e,0x0f,
+0x13,0x13,0x14,0x13,0x15,0x16,0x0d,0x0d,0x0d,0x11,0x11,0x12,
+0x12,0x13,0x13,0x0c,0x0e,0x0d,0x12,0x11,0x13,0x11,0x14,0x13,
+0x0d,0x0d,0x0d,0x12,0x12,0x11,0x13,0x12,0x12,0x0d,0x0d,0x0c,
+0x12,0x11,0x11,0x12,0x13,0x11,0x0d,0x0c,0x0d,0x11,0x11,0x10,
+0x12,0x11,0x12,0x0c,0x0d,0x0c,0x11,0x10,0x11,0x11,0x13,0x11,
+0x0c,0x0d,0x0d,0x10,0x11,0x11,0x11,0x12,0x12,0x0c,0x0d,0x0d,
+0x10,0x10,0x11,0x10,0x13,0x12,0x0c,0x0c,0x0d,0x0f,0x10,0x10,
+0x10,0x11,0x12,0x0b,0x0d,0x0d,0x0f,0x0f,0x11,0x0f,0x12,0x12,
+0x0d,0x0c,0x0d,0x10,0x11,0x11,0x12,0x10,0x13,0x0c,0x0c,0x0c,
+0x10,0x10,0x10,0x11,0x10,0x11,0x0d,0x0c,0x0c,0x11,0x11,0x0f,
+0x13,0x10,0x10,0x0d,0x0c,0x0b,0x10,0x10,0x0e,0x13,0x10,0x0f,
+0x0c,0x0c,0x0c,0x10,0x10,0x10,0x11,0x11,0x11,0x0c,0x0d,0x0b,
+0x10,0x0f,0x10,0x11,0x12,0x0f,0x0c,0x0c,0x0b,0x10,0x0e,0x0f,
+0x10,0x11,0x0f,0x0b,0x0d,0x0c,0x0f,0x0f,0x10,0x0f,0x12,0x10,
+0x0c,0x0c,0x0c,0x0f,0x0f,0x0f,0x10,0x10,0x10,0x0b,0x0c,0x0c,
+0x0f,0x0e,0x0f,0x0f,0x10,0x10,0x0b,0x0c,0x0c,0x0f,0x0f,0x10,
+0x0f,0x10,0x11,0x0b,0x0c,0x0d,0x0f,0x0f,0x10,0x0e,0x11,0x12,
+0x0b,0x0c,0x0b,0x0f,0x0e,0x0f,0x0f,0x11,0x0f,0x0b,0x0c,0x0b,
+0x0f,0x0e,0x0f,0x0e,0x11,0x0f,0x0b,0x0d,0x0c,0x0f,0x0e,0x10,
+0x0e,0x12,0x11,0x0b,0x0c,0x0c,0x0e,0x0e,0x10,0x0e,0x11,0x11,
+0x0c,0x0c,0x0c,0x0f,0x10,0x0f,0x11,0x10,0x11,0x0c,0x0b,0x0c,
+0x0f,0x10,0x0f,0x11,0x0f,0x10,0x0c,0x0c,0x0b,0x0f,0x0f,0x0f,
+0x11,0x10,0x0f,0x0c,0x0c,0x0b,0x0f,0x0e,0x0e,0x10,0x10,0x0f,
+0x0d,0x0b,0x0b,0x0f,0x0f,0x0e,0x12,0x0f,0x0f,0x0c,0x0b,0x0a,
+0x0f,0x0e,0x0d,0x11,0x0f,0x0d,0x0b,0x0b,0x0b,0x0e,0x0e,0x0e,
+0x0f,0x0f,0x0e,0x0b,0x0b,0x0a,0x0e,0x0d,0x0d,0x0f,0x0f,0x0d,
+0x0c,0x0b,0x0c,0x0e,0x0f,0x0f,0x10,0x0f,0x11,0x0b,0x0b,0x0c,
+0x0e,0x0f,0x0f,0x0f,0x0f,0x10,0x0b,0x0c,0x0b,0x0e,0x0e,0x0e,
+0x0f,0x10,0x0f,0x0b,0x0b,0x0b,0x0e,0x0e,0x0e,0x0e,0x0f,0x0f,
+0x0b,0x0b,0x0c,0x0e,0x0e,0x0f,0x0e,0x0f,0x10,0x0b,0x0b,0x0c,
+0x0e,0x0e,0x0f,0x0e,0x0f,0x11,0x0b,0x0c,0x0c,0x0e,0x0e,0x0f,
+0x0e,0x10,0x10,0x0a,0x0c,0x0c,0x0d,0x0e,0x0f,0x0d,0x10,0x10,
+0x0b,0x0c,0x0b,0x0e,0x0d,0x0e,0x0e,0x10,0x0e,0x0b,0x0b,0x0b,
+0x0e,0x0d,0x0e,0x0e,0x0f,0x0e,0x0a,0x0c,0x0b,0x0e,0x0d,0x0e,
+0x0d,0x11,0x0e,0x0a,0x0c,0x0b,0x0e,0x0d,0x0e,0x0d,0x10,0x0e,
+0x0b,0x0b,0x0b,0x0d,0x0d,0x0e,0x0e,0x0f,0x0f,0x0a,0x0b,0x0b,
+0x0d,0x0d,0x0e,0x0d,0x0f,0x0f,0x0a,0x0b,0x0c,0x0d,0x0e,0x0e,
+0x0d,0x0f,0x10,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x0d,0x0f,0x10,
+0x0b,0x0c,0x0b,0x0e,0x0d,0x0f,0x0e,0x10,0x0f,0x0a,0x0c,0x0c,
+0x0e,0x0d,0x0f,0x0d,0x11,0x10,0x0a,0x0d,0x0c,0x0e,0x0e,0x10,
+0x0d,0x12,0x10,0x0a,0x0c,0x0d,0x0e,0x0e,0x10,0x0d,0x11,0x12,
+0x0a,0x0c,0x0b,0x0d,0x0d,0x0f,0x0d,0x10,0x0f,0x0a,0x0c,0x0b,
+0x0d,0x0d,0x0f,0x0d,0x10,0x0f,0x0a,0x0c,0x0c,0x0e,0x0d,0x10,
+0x0d,0x11,0x10,0x0a,0x0c,0x0c,0x0d,0x0e,0x10,0x0c,0x10,0x11,
+0x0c,0x0b,0x0c,0x0f,0x10,0x0e,0x11,0x0e,0x10,0x0c,0x0b,0x0b,
+0x0e,0x0f,0x0d,0x11,0x0e,0x0f,0x0b,0x0b,0x0c,0x0e,0x0e,0x0e,
+0x0f,0x0e,0x10,0x0b,0x0b,0x0b,0x0d,0x0e,0x0e,0x0e,0x0e,0x0f,
+0x0b,0x0a,0x0b,0x0d,0x0e,0x0d,0x0f,0x0d,0x0f,0x0b,0x0a,0x0b,
+0x0d,0x0e,0x0d,0x0f,0x0d,0x0f,0x0c,0x0a,0x0a,0x0e,0x0e,0x0c,
+0x11,0x0d,0x0d,0x0c,0x0a,0x0a,0x0d,0x0d,0x0b,0x10,0x0d,0x0c,
+0x0b,0x0b,0x0b,0x0e,0x0e,0x0d,0x0f,0x0e,0x0e,0x0b,0x0b,0x0a,
+0x0d,0x0d,0x0d,0x0f,0x0e,0x0d,0x0b,0x0b,0x0a,0x0e,0x0c,0x0d,
+0x0e,0x0f,0x0d,0x0a,0x0c,0x0a,0x0e,0x0c,0x0d,0x0d,0x10,0x0c,
+0x0b,0x0b,0x0b,0x0d,0x0e,0x0d,0x0e,0x0e,0x0f,0x0b,0x0b,0x0b,
+0x0d,0x0d,0x0d,0x0e,0x0e,0x0e,0x0a,0x0b,0x0b,0x0d,0x0d,0x0e,
+0x0d,0x0e,0x0f,0x0a,0x0b,0x0b,0x0c,0x0d,0x0d,0x0d,0x0e,0x0f,
+0x0b,0x0a,0x0b,0x0d,0x0d,0x0d,0x0e,0x0d,0x0e,0x0b,0x0b,0x0a,
+0x0d,0x0c,0x0d,0x0e,0x0e,0x0d,0x0b,0x0a,0x0a,0x0c,0x0c,0x0c,
+0x0e,0x0d,0x0d,0x0a,0x0a,0x0a,0x0c,0x0c,0x0c,0x0d,0x0d,0x0d,
+0x0b,0x0b,0x0a,0x0d,0x0c,0x0c,0x0e,0x0e,0x0c,0x0a,0x0b,0x0a,
+0x0d,0x0c,0x0c,0x0d,0x0e,0x0c,0x0a,0x0b,0x0a,0x0c,0x0b,0x0c,
+0x0d,0x0e,0x0c,0x0a,0x0b,0x0a,0x0c,0x0b,0x0c,0x0c,0x0e,0x0c,
+0x0a,0x0a,0x0b,0x0c,0x0d,0x0d,0x0d,0x0d,0x0f,0x0a,0x0a,0x0b,
+0x0c,0x0d,0x0d,0x0d,0x0d,0x0e,0x0a,0x0b,0x0b,0x0c,0x0c,0x0d,
+0x0c,0x0e,0x0e,0x0a,0x0b,0x0b,0x0c,0x0d,0x0d,0x0c,0x0e,0x0f,
+0x0a,0x0a,0x0b,0x0c,0x0c,0x0c,0x0c,0x0d,0x0e,0x0a,0x0a,0x0a,
+0x0b,0x0c,0x0c,0x0c,0x0d,0x0d,0x0a,0x0a,0x0b,0x0b,0x0c,0x0c,
+0x0c,0x0d,0x0e,0x09,0x0a,0x0b,0x0b,0x0c,0x0d,0x0b,0x0d,0x0e,
+0x0b,0x0b,0x0b,0x0d,0x0d,0x0d,0x0e,0x0f,0x0e,0x0a,0x0b,0x0b,
+0x0d,0x0c,0x0d,0x0d,0x0f,0x0e,0x0a,0x0b,0x0a,0x0d,0x0c,0x0d,
+0x0d,0x0f,0x0d,0x0a,0x0b,0x0b,0x0d,0x0c,0x0d,0x0c,0x0f,0x0e,
+0x0a,0x0b,0x0b,0x0d,0x0d,0x0e,0x0d,0x0f,0x0f,0x0a,0x0b,0x0b,
+0x0d,0x0d,0x0e,0x0c,0x0f,0x0f,0x0a,0x0b,0x0b,0x0d,0x0d,0x0e,
+0x0c,0x0f,0x0f,0x0a,0x0b,0x0c,0x0c,0x0d,0x0f,0x0c,0x0f,0x10,
+0x0a,0x0c,0x0b,0x0e,0x0c,0x0e,0x0d,0x10,0x0e,0x0a,0x0c,0x0b,
+0x0e,0x0c,0x0f,0x0c,0x11,0x0e,0x0a,0x0c,0x0c,0x0d,0x0d,0x0f,
+0x0c,0x10,0x10,0x0a,0x0d,0x0c,0x0e,0x0d,0x10,0x0c,0x12,0x11,
+0x0a,0x0c,0x0b,0x0d,0x0c,0x0f,0x0c,0x10,0x0f,0x0a,0x0c,0x0b,
+0x0d,0x0c,0x0e,0x0c,0x10,0x0f,0x0a,0x0c,0x0c,0x0d,0x0d,0x0f,
+0x0c,0x11,0x10,0x0a,0x0c,0x0c,0x0d,0x0d,0x10,0x0c,0x10,0x11,
+0x0a,0x0c,0x0a,0x0d,0x0c,0x0e,0x0c,0x10,0x0d,0x0a,0x0c,0x0b,
+0x0d,0x0c,0x0e,0x0c,0x10,0x0e,0x0a,0x0c,0x0a,0x0d,0x0b,0x0d,
+0x0c,0x10,0x0d,0x0a,0x0b,0x0b,0x0c,0x0c,0x0e,0x0c,0x0f,0x0e,
+0x0a,0x0b,0x0b,0x0c,0x0c,0x0d,0x0c,0x0e,0x0e,0x0a,0x0b,0x0b,
+0x0c,0x0c,0x0e,0x0c,0x0f,0x0f,0x09,0x0b,0x0b,0x0c,0x0c,0x0e,
+0x0b,0x0f,0x0f,0x09,0x0b,0x0c,0x0c,0x0d,0x0e,0x0b,0x0f,0x10,
+0x0a,0x0c,0x0b,0x0d,0x0c,0x0f,0x0c,0x11,0x0e,0x09,0x0c,0x0b,
+0x0d,0x0c,0x0f,0x0b,0x11,0x0f,0x09,0x0c,0x0c,0x0d,0x0c,0x0f,
+0x0b,0x10,0x10,0x09,0x0c,0x0c,0x0d,0x0d,0x10,0x0b,0x11,0x11,
+0x09,0x0c,0x0b,0x0d,0x0c,0x0f,0x0b,0x11,0x0e,0x09,0x0c,0x0c,
+0x0d,0x0c,0x10,0x0b,0x11,0x10,0x09,0x0c,0x0c,0x0d,0x0c,0x0f,
+0x0b,0x11,0x10,0x09,0x0d,0x0c,0x0d,0x0d,0x10,0x0b,0x12,0x11,
+0x0c,0x0a,0x0b,0x0d,0x0e,0x0c,0x10,0x0c,0x0e,0x0b,0x0a,0x0a,
+0x0c,0x0d,0x0c,0x0e,0x0c,0x0d,0x0a,0x0a,0x0b,0x0c,0x0d,0x0c,
+0x0d,0x0c,0x0f,0x0a,0x0a,0x0a,0x0c,0x0c,0x0c,0x0d,0x0c,0x0d,
+0x0b,0x09,0x0a,0x0c,0x0d,0x0b,0x0f,0x0b,0x0d,0x0b,0x0a,0x0a,
+0x0c,0x0c,0x0b,0x0e,0x0c,0x0c,0x0a,0x0a,0x09,0x0b,0x0b,0x0b,
+0x0d,0x0c,0x0b,0x0a,0x0a,0x09,0x0b,0x0b,0x0a,0x0d,0x0c,0x0a,
+0x0a,0x0a,0x0a,0x0c,0x0c,0x0c,0x0d,0x0d,0x0d,0x0a,0x0a,0x0a,
+0x0c,0x0b,0x0c,0x0c,0x0d,0x0d,0x0a,0x0a,0x0a,0x0b,0x0b,0x0b,
+0x0c,0x0c,0x0d,0x0a,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0c,
+0x0a,0x0a,0x0a,0x0c,0x0b,0x0b,0x0c,0x0d,0x0c,0x0a,0x0a,0x09,
+0x0b,0x0b,0x0b,0x0c,0x0d,0x0b,0x09,0x0a,0x09,0x0b,0x0a,0x0b,
+0x0b,0x0c,0x0b,0x09,0x0a,0x09,0x0b,0x0a,0x0b,0x0b,0x0d,0x0b,
+0x0a,0x09,0x0b,0x0b,0x0c,0x0b,0x0d,0x0b,0x0e,0x0a,0x0a,0x0a,
+0x0b,0x0c,0x0b,0x0d,0x0c,0x0d,0x0a,0x0a,0x0a,0x0b,0x0c,0x0c,
+0x0c,0x0c,0x0d,0x09,0x0a,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x0d,
+0x0a,0x09,0x0a,0x0b,0x0c,0x0a,0x0d,0x0b,0x0c,0x0a,0x09,0x0a,
+0x0b,0x0b,0x0a,0x0c,0x0b,0x0c,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,
+0x0b,0x0c,0x0c,0x09,0x09,0x0a,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,
+0x09,0x0a,0x0a,0x0b,0x0b,0x0c,0x0b,0x0d,0x0d,0x09,0x0a,0x0a,
+0x0b,0x0b,0x0c,0x0b,0x0d,0x0d,0x09,0x0a,0x0b,0x0b,0x0b,0x0c,
+0x0b,0x0c,0x0e,0x09,0x0a,0x0b,0x0b,0x0b,0x0c,0x0b,0x0d,0x0e,
+0x09,0x0a,0x0a,0x0a,0x0a,0x0b,0x0b,0x0c,0x0c,0x09,0x0a,0x0a,
+0x0a,0x0a,0x0b,0x0a,0x0c,0x0d,0x09,0x09,0x0a,0x0a,0x0a,0x0b,
+0x0a,0x0b,0x0c,0x09,0x0a,0x0a,0x0a,0x0a,0x0c,0x0a,0x0c,0x0d,
+0x0a,0x0b,0x0a,0x0c,0x0c,0x0d,0x0d,0x0e,0x0d,0x0a,0x0b,0x0a,
+0x0c,0x0c,0x0d,0x0c,0x0e,0x0d,0x0a,0x0b,0x0a,0x0c,0x0b,0x0d,
+0x0c,0x0f,0x0d,0x0a,0x0b,0x0a,0x0c,0x0b,0x0d,0x0c,0x0e,0x0d,
+0x0a,0x0a,0x0a,0x0c,0x0b,0x0c,0x0c,0x0d,0x0d,0x0a,0x0a,0x0a,
+0x0b,0x0b,0x0c,0x0c,0x0d,0x0d,0x09,0x0b,0x0a,0x0c,0x0b,0x0c,
+0x0b,0x0e,0x0d,0x09,0x0b,0x0a,0x0b,0x0b,0x0c,0x0b,0x0e,0x0d,
+0x0a,0x0b,0x0b,0x0c,0x0c,0x0d,0x0c,0x0e,0x0e,0x09,0x0b,0x0b,
+0x0c,0x0c,0x0d,0x0b,0x0e,0x0e,0x09,0x0b,0x0b,0x0b,0x0c,0x0d,
+0x0b,0x0e,0x0f,0x09,0x0b,0x0c,0x0b,0x0c,0x0e,0x0b,0x0e,0x10,
+0x09,0x0b,0x0b,0x0c,0x0b,0x0d,0x0b,0x0e,0x0e,0x09,0x0b,0x0b,
+0x0c,0x0c,0x0d,0x0b,0x0e,0x0e,0x09,0x0b,0x0b,0x0b,0x0c,0x0d,
+0x0b,0x0e,0x0f,0x09,0x0b,0x0c,0x0b,0x0c,0x0e,0x0a,0x0e,0x10,
+0x09,0x0b,0x0a,0x0c,0x0b,0x0d,0x0b,0x0f,0x0d,0x09,0x0b,0x0b,
+0x0c,0x0b,0x0e,0x0b,0x0f,0x0e,0x09,0x0c,0x0a,0x0c,0x0b,0x0d,
+0x0b,0x10,0x0d,0x09,0x0b,0x0b,0x0c,0x0b,0x0e,0x0b,0x0f,0x0e,
+0x09,0x0b,0x0b,0x0c,0x0c,0x0e,0x0b,0x0f,0x0f,0x09,0x0b,0x0c,
+0x0c,0x0c,0x0f,0x0b,0x0f,0x10,0x09,0x0c,0x0b,0x0c,0x0c,0x0e,
+0x0b,0x10,0x0f,0x09,0x0c,0x0c,0x0c,0x0d,0x0f,0x0b,0x10,0x10,
+0x09,0x0c,0x0b,0x0d,0x0b,0x0e,0x0b,0x11,0x0e,0x09,0x0c,0x0b,
+0x0d,0x0c,0x0f,0x0b,0x11,0x0f,0x09,0x0c,0x0b,0x0c,0x0c,0x0e,
+0x0a,0x10,0x0f,0x09,0x0c,0x0c,0x0c,0x0c,0x0f,0x0a,0x10,0x10,
+0x09,0x0c,0x0b,0x0d,0x0b,0x0f,0x0a,0x11,0x0e,0x09,0x0d,0x0c,
+0x0d,0x0c,0x10,0x0a,0x12,0x10,0x09,0x0d,0x0c,0x0d,0x0c,0x10,
+0x0a,0x12,0x10,0x09,0x0c,0x0d,0x0d,0x0d,0x10,0x0a,0x11,0x12,
+0x0a,0x0b,0x0a,0x0c,0x0b,0x0c,0x0c,0x0f,0x0c,0x09,0x0b,0x0a,
+0x0c,0x0a,0x0c,0x0b,0x0f,0x0c,0x09,0x0a,0x0a,0x0b,0x0b,0x0c,
+0x0b,0x0d,0x0c,0x09,0x0a,0x0a,0x0b,0x0a,0x0b,0x0a,0x0d,0x0c,
+0x09,0x0b,0x0a,0x0c,0x0b,0x0c,0x0b,0x0e,0x0d,0x09,0x0b,0x0a,
+0x0b,0x0b,0x0c,0x0b,0x0e,0x0d,0x09,0x0b,0x0a,0x0c,0x0b,0x0d,
+0x0a,0x0f,0x0d,0x09,0x0b,0x0b,0x0c,0x0b,0x0e,0x0a,0x0f,0x0e,
+0x09,0x0a,0x0a,0x0b,0x0b,0x0c,0x0a,0x0d,0x0d,0x09,0x0b,0x0a,
+0x0b,0x0a,0x0c,0x0a,0x0e,0x0c,0x09,0x0a,0x0a,0x0b,0x0b,0x0c,
+0x0a,0x0d,0x0d,0x09,0x0a,0x0b,0x0b,0x0b,0x0d,0x0a,0x0d,0x0e,
+0x09,0x0b,0x0b,0x0b,0x0b,0x0d,0x0a,0x0e,0x0e,0x09,0x0b,0x0b,
+0x0b,0x0c,0x0e,0x0a,0x0e,0x0f,0x09,0x0b,0x0b,0x0b,0x0b,0x0d,
+0x0a,0x0e,0x0e,0x09,0x0b,0x0b,0x0b,0x0b,0x0e,0x0a,0x0e,0x0f,
+0x09,0x0c,0x0a,0x0c,0x0b,0x0d,0x0b,0x10,0x0d,0x09,0x0c,0x0a,
+0x0c,0x0b,0x0e,0x0a,0x10,0x0d,0x09,0x0b,0x0b,0x0c,0x0b,0x0e,
+0x0a,0x0f,0x0e,0x09,0x0b,0x0b,0x0c,0x0c,0x0e,0x0a,0x0f,0x0f,
+0x09,0x0c,0x0b,0x0c,0x0b,0x0e,0x0a,0x10,0x0f,0x09,0x0c,0x0c,
+0x0c,0x0c,0x0f,0x0a,0x10,0x10,0x09,0x0c,0x0b,0x0d,0x0c,0x0f,
+0x0a,0x11,0x0f,0x09,0x0d,0x0c,0x0d,0x0c,0x10,0x0a,0x12,0x11,
+0x09,0x0b,0x0a,0x0c,0x0b,0x0d,0x0a,0x0f,0x0d,0x09,0x0c,0x0b,
+0x0c,0x0b,0x0e,0x0a,0x10,0x0e,0x09,0x0c,0x0b,0x0c,0x0b,0x0e,
+0x0a,0x10,0x0f,0x09,0x0b,0x0c,0x0c,0x0c,0x0f,0x0a,0x0f,0x10,
+0x09,0x0c,0x0b,0x0c,0x0b,0x0f,0x0a,0x11,0x0e,0x09,0x0c,0x0c,
+0x0c,0x0c,0x10,0x0a,0x11,0x10,0x08,0x0c,0x0b,0x0c,0x0b,0x0e,
+0x09,0x10,0x0f,0x08,0x0c,0x0c,0x0c,0x0c,0x0f,0x09,0x11,0x10,
+0x0a,0x09,0x09,0x0a,0x0b,0x0a,0x0c,0x0a,0x0b,0x09,0x09,0x09,
+0x0a,0x0a,0x09,0x0b,0x0a,0x0a,0x09,0x0a,0x09,0x0a,0x0a,0x0a,
+0x0b,0x0c,0x0b,0x09,0x09,0x09,0x0a,0x09,0x0a,0x0a,0x0b,0x0a,
+0x09,0x09,0x0a,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x09,0x09,0x0a,
+0x09,0x0a,0x0a,0x0a,0x0a,0x0c,0x09,0x09,0x09,0x09,0x09,0x0a,
+0x0a,0x0b,0x0b,0x08,0x09,0x09,0x09,0x09,0x0a,0x09,0x0b,0x0b,
+0x09,0x0a,0x09,0x0a,0x0a,0x0b,0x0a,0x0c,0x0b,0x08,0x0a,0x09,
+0x0a,0x09,0x0a,0x09,0x0c,0x0b,0x08,0x0a,0x0a,0x0a,0x09,0x0b,
+0x09,0x0c,0x0c,0x08,0x09,0x0a,0x09,0x0a,0x0b,0x09,0x0b,0x0d,
+0x09,0x09,0x09,0x09,0x09,0x09,0x0a,0x0a,0x0a,0x07,0x08,0x08,
+0x07,0x07,0x07,0x07,0x08,0x08,0x08,0x0a,0x09,0x09,0x09,0x0a,
+0x08,0x0c,0x0b,0x08,0x09,0x09,0x08,0x08,0x0a,0x08,0x0a,0x0b,
+0x09,0x0b,0x0a,0x0b,0x0a,0x0c,0x0a,0x0f,0x0c,0x09,0x0b,0x0a,
+0x0b,0x0a,0x0d,0x0a,0x0f,0x0c,0x09,0x0a,0x0a,0x0a,0x0a,0x0c,
+0x0a,0x0d,0x0d,0x08,0x0a,0x0b,0x0a,0x0b,0x0d,0x09,0x0d,0x0e,
+0x09,0x0b,0x0a,0x0b,0x0a,0x0d,0x0a,0x0f,0x0d,0x09,0x0b,0x0b,
+0x0b,0x0b,0x0e,0x0a,0x0f,0x0e,0x08,0x0b,0x0b,0x0b,0x0b,0x0d,
+0x09,0x0e,0x0e,0x08,0x0b,0x0c,0x0b,0x0b,0x0e,0x09,0x0f,0x10,
+0x08,0x0a,0x09,0x0a,0x09,0x0b,0x09,0x0d,0x0b,0x08,0x0b,0x0a,
+0x0b,0x0a,0x0c,0x09,0x0e,0x0d,0x08,0x0b,0x0a,0x0a,0x0a,0x0c,
+0x09,0x0e,0x0d,0x08,0x0a,0x0b,0x0a,0x0b,0x0d,0x09,0x0d,0x0e,
+0x08,0x0c,0x0b,0x0b,0x0a,0x0e,0x09,0x10,0x0e,0x08,0x0c,0x0b,
+0x0b,0x0b,0x0e,0x09,0x10,0x0f,0x08,0x0b,0x0a,0x0a,0x0a,0x0c,
+0x09,0x0e,0x0d,0x08,0x0b,0x0a,0x0a,0x0a,0x0d,0x08,0x0e,0x0d};
+
+static const signed short etable1[5120]={
+0x0026,0xfffc,0x000f,0xfffc,0x000e,0xfff3,0x000c,0xfff5,
+0xfffe,0xfffa,0xfffa,0xfff5,0xffd3,0xfff0,0xfff5,0xfff3,
+0xfff9,0x0006,0xfff4,0x0004,0xffec,0x0003,0xfff0,0x000c,
+0xffff,0x000c,0x002e,0x0018,0x0000,0x0021,0xfffd,0x0009,
+0xfff4,0xfff4,0xfff8,0xfff9,0x0011,0xfffa,0x0000,0xfffe,
+0x003c,0xfff0,0x0003,0xffea,0x000a,0xffe0,0x0000,0xffe4,
+0xffef,0xffee,0xfffd,0xffe7,0xffdb,0xffe9,0xfff6,0x0003,
+0x0002,0x0003,0x0000,0x0003,0xfff2,0x0000,0xfff2,0xffff,
+0x0000,0x0002,0x0020,0x0009,0xffff,0x0019,0x0007,0x000d,
+0xfffb,0x000d,0x0008,0x0001,0x0002,0x0008,0xfff6,0x0006,
+0x001b,0xfff4,0x001c,0xfffe,0x0006,0xfff9,0x000f,0x0009,
+0xfff5,0x0001,0xfff3,0xfff5,0xffd8,0x0004,0xffe3,0xfff2,
+0xffed,0xfffb,0xffe9,0xfff8,0xffe2,0xfff3,0xffef,0x0000,
+0xfff2,0x000c,0x0022,0x0014,0xfffe,0x0019,0x0002,0xfff0,
+0xfffc,0xfff4,0x000f,0x0010,0x001d,0x0007,0x0018,0x000a,
+0x0031,0xffe8,0x0010,0xffec,0x0002,0xffe6,0x0002,0xfff9,
+0xffe7,0xfff6,0xfff5,0xffe7,0xffe0,0xfffd,0xffe5,0x0002,
+0xfff8,0xfff8,0xfff5,0xfff7,0xffe8,0xffef,0xfff0,0xfff2,
+0xfff3,0x0002,0x0014,0x0005,0xfffc,0x0011,0x000e,0xfff4,
+0x0003,0x000d,0x0021,0x0019,0x000e,0x0017,0x000f,0x0013,
+0x002e,0xfffa,0x0015,0x0008,0xfffe,0xfff0,0xfffb,0xfff8,
+0xfff5,0x0004,0x0008,0x000f,0xffe8,0x0004,0xfffe,0xffe6,
+0xfffd,0xfff0,0xfff0,0xfff2,0xfff7,0xfffe,0xffff,0x0004,
+0x0013,0x0007,0x0024,0x0011,0x0009,0x000d,0x0000,0x001f,
+0xfffb,0xfff4,0x0007,0xfff8,0x000b,0xfff1,0xfff3,0xfffc,
+0x0044,0xffee,0x0009,0xfff7,0xfffa,0xffdd,0xffee,0xffe7,
+0xffe6,0xfff9,0x000a,0x0001,0xfff0,0xfffd,0xffff,0xfff7,
+0x0006,0xffed,0xfffc,0xfff1,0xfffc,0xfffa,0x0000,0xfff8,
+0x0014,0xfffe,0x0017,0x0002,0x0007,0x0005,0x000c,0x0023,
+0x0001,0x000d,0x0018,0x0000,0xfffd,0x0000,0xffea,0x0004,
+0x0023,0xfff2,0x0022,0x000a,0xfff6,0xfff6,0xffff,0x000c,
+0xffec,0x000c,0x0000,0x000f,0xffee,0x0018,0xffec,0xffe5,
+0xfff2,0xffe4,0xffe5,0xffe5,0xffec,0xffed,0xfffe,0xfff8,
+0x0005,0x0007,0x0019,0x000d,0x0005,0x0005,0x0006,0x0005,
+0x0002,0xfff4,0x001f,0x000f,0x0017,0xffff,0x000c,0x0008,
+0x0039,0xffe6,0x0016,0xfff9,0xfff2,0xffe4,0xfff2,0xfffd,
+0xffdd,0x0000,0x0003,0x0001,0xfff5,0x0010,0xffee,0xfff6,
+0xfffc,0xffe1,0xfff1,0xffe4,0xfff2,0xffe9,0xffff,0xffeb,
+0x0007,0xfffe,0x000b,0xffff,0x0003,0xffff,0x0012,0x0009,
+0x000a,0x000d,0x0031,0x0018,0x0008,0x000e,0x0002,0x0010,
+0x0019,0x000f,0x0016,0x000b,0x0012,0x0004,0x000f,0xffea,
+0x0008,0xfffe,0xffef,0xfff7,0xffd0,0xffec,0xffe2,0xffef,
+0xfff0,0x000b,0xffff,0x0010,0x0002,0x000a,0xfffb,0x001a,
+0xfffe,0xfffc,0x0016,0x0000,0x0002,0x000a,0xfffa,0x000d,
+0xfff2,0x000a,0xffe9,0x0000,0x000a,0xfffe,0x0001,0x0000,
+0x002f,0x0003,0x000b,0xfffa,0x000f,0xfff3,0x0002,0xffda,
+0xfffa,0xfff3,0xfff1,0xffea,0xffd8,0xffe4,0xffe4,0x0000,
+0xfffb,0x0008,0x000a,0x000f,0x0007,0x0007,0xfffc,0x000d,
+0xffff,0xfff2,0x0009,0xfff2,0x0000,0x0002,0x0004,0x0012,
+0xfff9,0x0024,0xfffa,0x0008,0xfffd,0x000d,0xfff9,0x0008,
+0x000e,0x0007,0x0024,0x000d,0x000a,0x000a,0x0012,0x0000,
+0x0000,0x0005,0xffe7,0xfff8,0xffd5,0x0000,0xffd0,0xffee,
+0xffe5,0x0000,0xfff4,0x0003,0xfff9,0xfffa,0xfff9,0x000d,
+0xfff1,0xfffb,0x000b,0xfffd,0x0000,0x0002,0x0000,0xfff4,
+0xfffa,0x000a,0x0000,0x0017,0x0016,0x000b,0x001a,0x000c,
+0x0024,0xfffb,0x0018,0xfffc,0x0007,0xfff9,0x0006,0xffef,
+0xfff2,0xfffb,0xffea,0xffea,0xffdd,0xfff8,0xffd2,0xffff,
+0xffef,0xfffd,0x0000,0x0002,0xfffe,0xfff6,0xfffb,0x0000,
+0xfff2,0xfff1,0xfffe,0xffee,0xfffe,0xfffc,0x000b,0xfff9,
+0x0001,0x0024,0x0012,0x0020,0x0007,0x001b,0x0011,0x0014,
+0x0021,0x000d,0x001d,0x0018,0x0001,0x0001,0xfffe,0xffee,
+0x0000,0x0009,0xfffd,0x0011,0xffe5,0x0000,0xffeb,0xffe2,
+0xfff4,0xfff5,0xfffb,0xfffe,0x000c,0x0004,0x0009,0x0013,
+0x0012,0xfff7,0x000d,0xfffa,0x000b,0xfff8,0xfffe,0x0023,
+0xfff8,0x000a,0xfff9,0xffff,0x0004,0xfff5,0xfff6,0xfffe,
+0x0037,0x0001,0x0011,0x0006,0xffff,0xfff0,0xfff1,0xffdd,
+0xfff1,0xfffe,0x0000,0x0004,0xffed,0xfff8,0xffec,0xfff3,
+0xffff,0xfff2,0x0007,0xfffd,0x0012,0x0000,0x000a,0x0005,
+0x0013,0xffed,0x0000,0xffeb,0x0008,0xfff0,0x0009,0x0027,
+0x0000,0x0024,0x000a,0x0007,0xfff7,0x0004,0xffec,0x0005,
+0x0016,0x0005,0x002a,0x001a,0xfffa,0x0008,0x0001,0x0002,
+0xfff7,0x0011,0xfff6,0x0012,0xffeb,0x0013,0xffd9,0xffe1,
+0xffe9,0xffe9,0xfff0,0xfff1,0x0002,0xfff4,0x0007,0x0006,
+0x0005,0xfff7,0x0001,0xfff6,0x0007,0xfff0,0x0004,0x0009,
+0x0000,0x000a,0x0011,0x0016,0x0010,0x0002,0x000e,0x0009,
+0x002c,0xfffa,0x001e,0x0008,0xfff7,0xfff6,0xfff5,0xfff2,
+0xffe9,0x0005,0xfff8,0x0004,0xfff2,0x000c,0xffdb,0xfff2,
+0xfff4,0xffe6,0xfffc,0xfff0,0x0008,0xfff0,0x0009,0xfff9,
+0x0006,0xffed,0xfff4,0xffe7,0x0005,0xffe8,0x000f,0x000d,
+0x0008,0x0024,0x0022,0x001f,0x0001,0x0012,0x0004,0x0012,
+0xfffd,0xfffb,0xfff7,0xfff9,0x000f,0xffff,0x0005,0x000d,
+0x0002,0x000c,0x0005,0x0002,0xffeb,0xffe9,0xfffe,0xfff0,
+0x0000,0x0005,0xfffa,0x000d,0xffe9,0x0003,0xffe0,0x000a,
+0xfff1,0x0008,0x002c,0x001c,0x0009,0x0025,0xfffe,0x000d,
+0xfff7,0xfff1,0xfff4,0xffe5,0xfff9,0xfff4,0x0000,0xfff5,
+0x0012,0xffef,0xffeb,0xffe7,0x000b,0xffed,0xfffa,0xfffd,
+0xfff5,0x0000,0x0007,0xfff5,0xfff3,0xffe1,0xffff,0x0000,
+0x0009,0x0001,0x0005,0x000c,0xffee,0x0000,0xffe1,0xfffe,
+0xfff3,0xffff,0x001e,0x000e,0x0007,0x001d,0x0009,0x0012,
+0xffff,0x000a,0x0004,0xffee,0xffea,0x0003,0xfff6,0xfffe,
+0xfff3,0xfff3,0x0003,0xfffb,0x0007,0x0004,0x0009,0x0022,
+0xfffb,0x0014,0xfffe,0x0003,0xfff0,0xfffd,0xffec,0xffef,
+0xfff5,0xfff9,0xffef,0x0000,0xffde,0xfff3,0xffdf,0xfffe,
+0xffe4,0x0008,0x0020,0x0018,0x0005,0x001d,0x0003,0xfff4,
+0x0000,0xfff1,0x000b,0xfffd,0x0003,0x0002,0x0018,0x0001,
+0x0008,0xffe7,0xfff8,0xffe9,0x0003,0xfff3,0xfffd,0x0011,
+0xffec,0x0008,0x0000,0xfff6,0xfff8,0xfff5,0xffee,0x0000,
+0xffff,0xfff6,0xfffb,0x0000,0xffe4,0xffef,0xffe0,0xfff1,
+0xffe6,0xffff,0x0013,0x0009,0x0003,0x0015,0x000f,0xfff9,
+0x0006,0x0009,0x001d,0x0005,0xfff6,0x0011,0x000f,0x0009,
+0x0004,0xfffa,0xfffd,0x0005,0xffff,0xfffc,0xfff5,0x0010,
+0xfffa,0x0017,0x0013,0x001d,0x0000,0xfffd,0x0006,0xffe2,
+0x0003,0xffef,0xfff6,0xfffb,0xfff3,0xfffe,0xffef,0x0003,
+0x0005,0x0003,0x0023,0x0015,0x0011,0x0011,0x0002,0x0023,
+0xfffe,0xfff1,0x0003,0xffe4,0xfff3,0xffeb,0xfff3,0xfff3,
+0x001a,0xffed,0xfff1,0xfff4,0xfffb,0xffea,0xffe8,0x0000,
+0xffeb,0x000c,0x0015,0x000f,0x0008,0xfff5,0x0007,0xfff4,
+0x000e,0xffec,0x0002,0xfffa,0xfff9,0xfffa,0xfff0,0xfff7,
+0x0006,0xfffb,0x0015,0x0007,0x000f,0x000a,0x000d,0x0027,
+0x0005,0x000a,0x0014,0xffed,0xffe4,0xfffb,0xffea,0xfffb,
+0xfffb,0xfff1,0x0009,0x0007,0xfff7,0x0002,0xfff8,0x0025,
+0xfff2,0x001f,0x000b,0x001d,0x0005,0x0010,0xfff5,0xffe2,
+0xfff9,0xffe3,0xffeb,0xffee,0xffe9,0xffed,0xffee,0xfff7,
+0xfff9,0x0003,0x0017,0x0011,0x000e,0x0009,0x0008,0x0009,
+0x0006,0xfff1,0x001b,0xfffc,0xfffe,0xfffa,0x000c,0xffff,
+0x0010,0xffe5,0xfffe,0xfff6,0xfff3,0xfff0,0xffec,0x0014,
+0xffe3,0x0014,0x000e,0x0010,0x000d,0x0008,0xfff7,0xfff3,
+0x0002,0xffdf,0xfff7,0xffed,0xffef,0xffe9,0xffef,0xffea,
+0xfffa,0xfffa,0x0009,0x0002,0x000c,0x0002,0x0014,0x000d,
+0x000d,0x000a,0x002d,0x0004,0xfff0,0x0008,0x0002,0x0007,
+0xfff0,0x000e,0xfffe,0x0008,0x0014,0x0011,0x0009,0x0002,
+0x000e,0x0010,0xfffa,0x0005,0xffe8,0xffe4,0xffeb,0xffec,
+0xfff8,0x0009,0x0004,0x0019,0xffff,0x000b,0xffea,0x0018,
+0xfff1,0xfff8,0x0015,0x0005,0x000b,0x000e,0xfffb,0x0012,
+0xfff5,0x0007,0xffe5,0xffec,0xfff2,0xfff9,0x0001,0xfff7,
+0x0006,0x0002,0xfff2,0xfff7,0x0010,0xffff,0xfffd,0xfff2,
+0x0000,0x0005,0xfffd,0xfff8,0xfff0,0xffdc,0xffed,0xfffd,
+0x0001,0x0006,0x0011,0x0018,0x0004,0x0007,0xffeb,0x000b,
+0xfff2,0xffee,0x0007,0xfff7,0x0009,0x0007,0x0006,0x0016,
+0xfffd,0x0021,0xfff6,0xfff5,0xffe4,0x0007,0xfff9,0x0000,
+0xffe6,0x0006,0x000b,0x000a,0x000c,0x0017,0x000c,0x0017,
+0x0005,0x0018,0xfff3,0x0005,0xffed,0xfff8,0xffda,0xffeb,
+0xffec,0xfffe,0xfffa,0x000c,0xfff5,0xfffb,0xffe9,0x000b,
+0xffe3,0xfff7,0x0009,0x0000,0x0007,0x0006,0x0001,0xfff9,
+0xfffe,0x0007,0xfffd,0x0003,0xfffe,0x0006,0x001b,0x0003,
+0xfffc,0xfffa,0x0000,0xfff9,0x0008,0x0004,0x0000,0x0006,
+0xfff7,0x000d,0xfff5,0xfff9,0xfff5,0xfff1,0xffdb,0xfffc,
+0xfff7,0xfffb,0x0005,0x000b,0xfffb,0xfff7,0xffea,0xffff,
+0xffe5,0xffee,0xfffc,0xfff2,0x0005,0x0000,0x000c,0xfffd,
+0x0004,0x0020,0x000e,0x000c,0xffef,0x0016,0x0011,0x000b,
+0xfff8,0x000c,0x0003,0x0015,0x0003,0x000e,0xfff8,0x0005,
+0x0004,0x001c,0x0007,0x0020,0xfffe,0xfff8,0xfff4,0xffde,
+0xfffc,0xfff4,0x0001,0x0006,0x0009,0x0004,0xfff9,0x0011,
+0x0004,0xfff3,0x000b,0xffff,0x0013,0xfffc,0x0000,0x0027,
+0xfffc,0x0007,0xfff5,0xffeb,0xffec,0xfff0,0xfff6,0xfff5,
+0x000d,0x0000,0xfff8,0x0003,0x0000,0xfffc,0xffeb,0xfff5,
+0xfff7,0x0010,0x000a,0x0012,0x0005,0xfff0,0xfff6,0xfff0,
+0x0005,0xfff1,0x000d,0x0005,0x000f,0x0001,0xfffa,0x0004,
+0x0006,0xffe9,0xfffe,0xfff0,0x0011,0xfff4,0x000a,0x002c,
+0x0003,0x0021,0x0006,0xfff4,0xffde,0xffff,0xffec,0xfffd,
+0xffee,0x0004,0x0011,0x0017,0xfffc,0x0014,0xfffc,0x001a,
+0xfffd,0x0024,0x0000,0x0020,0x0002,0x000c,0xffe3,0xffde,
+0xfff0,0xffe8,0xfff6,0xfffa,0x0000,0xfff4,0xfff8,0x0004,
+0xfff8,0xfff3,0x0000,0xfffa,0x0010,0xfff4,0x0005,0x000d,
+0x0003,0x0007,0x000d,0x0003,0xfff8,0xfffe,0x000e,0x0000,
+0x0003,0xfff9,0x0005,0x0005,0xfff8,0x0002,0xffef,0x0009,
+0xffee,0x0018,0x0002,0x0013,0x000a,0x0004,0xffe4,0xffef,
+0xfffb,0xffe4,0x0002,0xfff9,0x0004,0xfff1,0xfff9,0xfff8,
+0xfffa,0xffe9,0xfff3,0xffeb,0x000e,0xffec,0x0011,0x0012,
+0x000b,0x0021,0x001e,0x000b,0xffe9,0x000d,0x0005,0x0009,
+0x003c,0x000a,0x0007,0xffff,0x0009,0xfff8,0x0006,0xfff3,
+0x0002,0xfff1,0xffff,0xfff6,0xfff3,0xfff5,0x000f,0x0000,
+0x0006,0x0009,0xffff,0x0000,0xfff3,0x0001,0xfff5,0xfffd,
+0xfff3,0x0015,0x000d,0x001a,0xfff9,0x001f,0xfff6,0xfff9,
+0xfff0,0xffdf,0xffe1,0xfff6,0x0016,0xfff8,0x0001,0xfffe,
+0x0052,0xffff,0xfffc,0xffed,0x0006,0xffe5,0xfffa,0xffe3,
+0xfff4,0xffe6,0x0001,0xffe8,0xfffb,0xffee,0x0011,0x0011,
+0x0011,0x0006,0x000a,0x0000,0xfff9,0xfffe,0xfff7,0xfff0,
+0xfff4,0x000b,0x0000,0x000b,0xfff7,0x0017,0x0000,0xfffd,
+0xfff8,0xfff8,0xfff3,0xffff,0x0008,0x0007,0xfff9,0x0006,
+0x0031,0x0002,0x0015,0x0000,0x0001,0xfffe,0x0009,0x0008,
+0xfffa,0xfffa,0xfff8,0xfff6,0xfff8,0x0009,0xfffe,0x0000,
+0xfffc,0xfffe,0xfff3,0xfff4,0xffe9,0xfff1,0xfff4,0xfff0,
+0xffe6,0x0015,0x0002,0x0015,0xfff5,0x0017,0xfffc,0xffdf,
+0xfff9,0xffdf,0xfffa,0x000d,0x0022,0x0005,0x001b,0x000a,
+0x0047,0xfff6,0x0009,0xffef,0xffff,0xffec,0xfffd,0xfff8,
+0xffeb,0xffee,0xfffa,0xffe8,0x0000,0x0001,0x0000,0x0010,
+0x0006,0xfffb,0x0000,0xfff3,0xffef,0xffed,0xfff5,0xffe3,
+0xffe7,0x000b,0xfff5,0x0006,0xfff3,0x000f,0x0007,0xffe3,
+0x0000,0xfff8,0x000b,0x0016,0x0014,0x0015,0x0011,0x0012,
+0x0043,0x0008,0x000e,0x000b,0xfff9,0xfff5,0xfff5,0xfff7,
+0xfff9,0xfffd,0x000d,0x0010,0x0008,0x0009,0x0018,0xfff4,
+0x000a,0xfff3,0xfffb,0xffef,0xfffe,0xfffc,0x0003,0xfff6,
+0x0006,0x0011,0x0004,0x0013,0x0000,0x000b,0xfffa,0x000d,
+0xfff7,0xffdf,0xfff2,0xfff6,0x0010,0xffef,0xfff6,0xfffc,
+0x005a,0xfffd,0x0002,0xfffa,0xfff6,0xffe3,0xffe8,0xffe6,
+0xffeb,0xfff1,0x000f,0x0002,0x0010,0x0001,0x0019,0x0004,
+0x0015,0xfff0,0x0006,0xffee,0x0003,0xfff8,0x0005,0xffe8,
+0x0008,0x0007,0xfff7,0x0004,0xffff,0x0003,0x0005,0x0012,
+0xffff,0xfff9,0x0002,0xffff,0x0002,0xffff,0xffed,0x0003,
+0x0039,0x0000,0x001b,0x000d,0xfff2,0xfffb,0xfff9,0x000b,
+0xfff1,0x0004,0x0005,0x0010,0x000d,0x001d,0x0006,0xfff3,
+0x0000,0xffe7,0xfff0,0xffe1,0xfff4,0xffea,0x0002,0xffe9,
+0xfffa,0x0010,0xfff9,0x000e,0xfffe,0x0003,0x0000,0xfff4,
+0x0000,0xffdf,0x0009,0x000d,0x001c,0xfffd,0x000e,0x0007,
+0x004f,0xfff5,0x000f,0xfffc,0xffee,0xffe9,0xffec,0xfffb,
+0xffe2,0xfff9,0x0007,0x0002,0x0015,0x0015,0x0008,0x0003,
+0x000a,0xffe4,0xfffc,0xffe1,0xfffa,0xffe7,0x0003,0xffdb,
+0xfffc,0x0007,0xffec,0x0000,0xfffc,0xfffc,0x000b,0xfff9,
+0x0006,0xfff8,0x001b,0x0016,0x000e,0x000c,0x0005,0x0010,
+0x002f,0x001e,0x000f,0x000e,0x000e,0x0009,0x0009,0xffe9,
+0x000d,0xfff6,0xfff4,0xfff9,0xfff0,0xfff1,0xfffd,0xfffd,
+0xffff,0x000e,0x0009,0x000c,0x0009,0x0008,0x0000,0x000a,
+0xfff2,0x0004,0xfff7,0x0002,0xfffb,0x0008,0xfff3,0xfffd,
+0xffee,0xfff6,0xffd3,0xfffd,0x0010,0xfffc,0x0004,0x0000,
+0x0045,0x0011,0x0003,0xfffd,0x000a,0xfff8,0xfffd,0xffd8,
+0xffff,0xffeb,0xfff6,0xffeb,0xfff8,0xffe9,0xffff,0x000d,
+0x0008,0x000b,0x0015,0x000b,0x000f,0x0004,0x0000,0xfffe,
+0xfff3,0xfffb,0xffe9,0xfff4,0xfff9,0x0000,0xffff,0x0000,
+0xfff6,0x000e,0xffe4,0x0005,0x0001,0x000b,0xfffb,0x0007,
+0x0024,0x0015,0x001c,0x0010,0x0006,0x0010,0x000c,0xfffe,
+0x0004,0xfffe,0xffec,0xfff9,0xfff5,0x0004,0xffec,0xfffc,
+0xfff4,0x0002,0xffff,0x0000,0x0000,0xfff8,0xfffe,0xfffe,
+0xffe5,0x0004,0xffeb,0xfffe,0xfff7,0x0000,0xfffa,0xffe3,
+0xfff7,0xfff6,0xffeb,0x0015,0x001c,0x000a,0x001d,0x000b,
+0x003a,0x0009,0x0010,0xffff,0x0002,0xfffe,0x0000,0xffed,
+0xfff6,0xfff3,0xffef,0xffeb,0xfffd,0xfffd,0xffed,0x000c,
+0xfffe,0x0000,0x000a,0xffff,0x0005,0xfff4,0x0000,0xfff1,
+0xffe6,0xfffb,0xffde,0xfff0,0xfff5,0xfff9,0x0004,0xffe7,
+0xfffe,0x000e,0xfffd,0x001d,0x000d,0x0019,0x0014,0x0014,
+0x0037,0x001c,0x0015,0x001b,0xfffe,0x0007,0xfff8,0xffec,
+0x0004,0x0001,0x0001,0x0012,0x0005,0x0004,0x0005,0xfff0,
+0x0002,0xfff8,0x0005,0xfffb,0x0013,0x0002,0x000e,0x0003,
+0x0006,0x0000,0xffee,0xfffc,0x0002,0xfff5,0xfff8,0x0012,
+0xfff5,0xfff6,0xffe3,0xfffd,0x000a,0xfff3,0xfff8,0xfffd,
+0x004d,0x0010,0x0009,0x0009,0xfffa,0xfff5,0xffeb,0xffdb,
+0xfff6,0xfff6,0x0004,0x0005,0x000d,0xfffd,0x0007,0x0000,
+0x000d,0xfff5,0x0011,0xfffa,0x0019,0xffff,0x000f,0xfff7,
+0x0007,0xfff7,0xffe0,0xffed,0x0000,0xffee,0x0002,0x0016,
+0xfffd,0x000f,0xfff4,0x0005,0xfffc,0x0002,0xffef,0x0005,
+0x002c,0x0014,0x0022,0x001d,0xfff6,0x000d,0xfffc,0x0000,
+0xfffc,0x0009,0xfffb,0x0013,0x000a,0x0018,0xfff5,0xffef,
+0xfff8,0xffec,0xfffb,0xffed,0x0009,0xfff2,0x000c,0xfff7,
+0xfffa,0x0000,0xffe2,0xfff7,0x0000,0xffed,0xfffe,0xfff9,
+0xfffe,0xfff6,0xfffb,0x0014,0x0015,0x0001,0x0011,0x0009,
+0x0042,0x0008,0x0017,0x000b,0xfff2,0xfffb,0xffef,0xfff0,
+0xffed,0xfffe,0xfffd,0x0005,0x0012,0x0011,0xfff6,0x0000,
+0x0001,0xffe9,0x0006,0xffec,0x000f,0xffee,0x000e,0xffea,
+0xfffb,0xfff6,0xffd4,0xffe9,0xfffe,0xffe6,0x0009,0xfffd,
+0x0004,0x000e,0x000c,0x001d,0x0007,0x0010,0x0007,0x0012,
+0x0012,0x0009,0xffef,0xfffc,0x000b,0x0003,0x0000,0x000b,
+0x0007,0x0004,0x000a,0x0003,0x000a,0xffee,0x0018,0xfffd,
+0x000e,0x0007,0x0004,0x000a,0xfff0,0x0001,0xffe5,0xfffc,
+0xffe5,0x0011,0x000c,0x001e,0x0000,0x0023,0xfff7,0xfffd,
+0xfff4,0xffdc,0xffdd,0xffe2,0xfffe,0xfff3,0x0002,0xfff5,
+0x0028,0xfffe,0xffe3,0xffea,0x0007,0xfff2,0xfff4,0xfffb,
+0xfff9,0xfff9,0x000c,0xfff7,0x0012,0xffe6,0x001a,0x000e,
+0x0018,0x0004,0x0010,0x0009,0xfff6,0xfffe,0xffe6,0xffee,
+0xffe6,0x0007,0xffff,0x000f,0xffff,0x001b,0x0002,0x0000,
+0xfffc,0xfff5,0xffef,0xffeb,0xfff0,0x0001,0xfff9,0xfffd,
+0x0008,0x0001,0xfffd,0xfffe,0x0003,0x000a,0x0003,0x0020,
+0xffff,0x000c,0x0002,0x0004,0x000f,0x0001,0x0007,0xfffd,
+0x0002,0xfffc,0xfffa,0xfffd,0xffe6,0xfff1,0xffe3,0xffef,
+0xffd8,0x0011,0x0000,0x001a,0xfffe,0x001b,0xfffe,0xffe3,
+0xfffc,0xffdc,0xfff6,0xfffa,0x0009,0x0000,0x001b,0x0000,
+0x001e,0xfff5,0xfff1,0xffec,0x0000,0xfff8,0xfff7,0x000f,
+0xfff1,0x0000,0x0005,0xfff7,0x0017,0xfffa,0x0008,0x000d,
+0x000d,0xfff9,0x0005,0xfffd,0xffec,0xffed,0xffe5,0xffe1,
+0xffd9,0x0007,0xfff3,0x000b,0xfffc,0x0013,0x0008,0xffe7,
+0x0003,0xfff5,0x0007,0x0002,0xfffc,0x0010,0x0012,0x0009,
+0x001a,0x0007,0xfff5,0x0008,0xfffb,0x0001,0xffef,0x000e,
+0xffff,0x000f,0x0018,0x001e,0x0020,0x0001,0x0021,0xfff0,
+0x0012,0xfff2,0x0000,0xfff8,0xfffa,0xfffc,0xfff4,0xfff4,
+0xfffa,0x000d,0x0002,0x0017,0x0008,0x000f,0xfffc,0x0011,
+0xfffb,0xffdc,0xffee,0xffe2,0xfff8,0xffea,0xfff6,0xfff2,
+0x0030,0xfffc,0xffe9,0xfff7,0xfff7,0xffef,0xffe2,0xfffe,
+0xfff0,0x0003,0x001a,0x0010,0x0028,0xfffa,0x0023,0x0001,
+0x001c,0xffef,0x000c,0xfff7,0x0000,0xfff8,0xfff5,0xffe7,
+0xfffb,0x0003,0xfff6,0x0008,0x0006,0x0007,0x0006,0x0016,
+0x0001,0xfff5,0xffff,0xffeb,0xffea,0xfff9,0xffed,0xfffb,
+0x000f,0x0000,0x0002,0x000a,0xfff3,0x0007,0xfff2,0x0023,
+0xfff6,0x0017,0x0010,0x001f,0x0025,0x0015,0x0010,0xffef,
+0x0006,0xffe6,0xfff6,0xffeb,0xfff0,0xffeb,0xfff3,0xffe7,
+0xffed,0x000d,0xfff8,0x0013,0x0005,0x0007,0x0001,0xfff8,
+0x0002,0xffdc,0x0005,0xfffa,0x0003,0xfff8,0x000f,0xffff,
+0x0025,0xfff4,0xfff7,0xfff9,0xffef,0xfff5,0xffe6,0x0012,
+0xffe7,0x000c,0x0013,0x0011,0x002d,0x000e,0x0011,0x0000,
+0x0011,0xffe2,0x0001,0xffea,0xfff6,0xffe7,0xfff4,0xffda,
+0xffee,0x0003,0xffea,0x0004,0x0003,0x0000,0x000d,0xfffd,
+0x000a,0xfff5,0x0017,0x0002,0xfff6,0x0007,0x0005,0x0007,
+0x0005,0x001d,0xfff7,0x000b,0x000f,0x0016,0x0003,0x0000,
+0x0012,0x0008,0xffff,0x0006,0x0007,0xffe9,0x0006,0xfffa,
+0x0005,0x000c,0x000f,0x0015,0x0005,0x0008,0xffef,0x0009,
+0xffe4,0x0000,0xfff5,0x0006,0x0002,0x000c,0xfff5,0x0000,
+0xfff2,0xfff3,0xffcf,0xffea,0xfff8,0xfff7,0x0004,0xfff7,
+0x001b,0x0010,0xffeb,0xfffa,0x000c,0x0003,0xfff7,0xfff0,
+0x0003,0xfffe,0x0001,0xfff9,0x000f,0xffe1,0x0007,0x000a,
+0x0010,0x0009,0x001b,0x0015,0x000b,0x0005,0xfff0,0xfffd,
+0xffe6,0xfff7,0xffe8,0xfff9,0x0000,0x0004,0x0000,0x0004,
+0xfffa,0x000b,0xffe0,0xfff2,0xffe9,0x0006,0xfffb,0xffff,
+0xfffc,0x0014,0x0003,0x000d,0x0008,0x001c,0x0006,0x0015,
+0x000a,0x0010,0xfff8,0x0007,0x000c,0xfffd,0xfff5,0xfff9,
+0xfffb,0x0000,0x0004,0x0008,0xfffc,0xfff8,0xffee,0xfffd,
+0xffd7,0x0000,0xffea,0x0002,0x0000,0x0004,0xfffb,0xffe7,
+0xfffa,0xfff2,0xffe7,0x0001,0x0002,0x0004,0x001d,0x0002,
+0x0011,0x0008,0xfff8,0xfffc,0x0004,0x000a,0xfffa,0x0005,
+0xfffc,0x0005,0xfffa,0xfffa,0x0014,0xfff6,0xfff7,0x0009,
+0x0004,0xfffe,0x0010,0x0007,0x0001,0xfff4,0xffef,0xfff0,
+0xffd9,0xfff7,0xffdc,0xfff4,0xfffe,0xfffd,0x0006,0xffeb,
+0x0001,0x000b,0xfff9,0x000a,0xfff5,0x0014,0x0014,0x000b,
+0x000d,0x001b,0xfffd,0x0018,0xffff,0x0013,0xfff2,0x0003,
+0x0009,0x0014,0x000c,0x0021,0x001d,0xfffd,0x000f,0xffec,
+0x0009,0xfff7,0x000b,0x0003,0x0010,0x0002,0xfffe,0x0002,
+0xfff9,0xfffd,0xffec,0x0000,0x000a,0xfff9,0xfff9,0x0016,
+0xfff9,0xfff3,0xffdf,0xffe9,0xfff2,0xffee,0xfff9,0xfff4,
+0x0023,0x000f,0xfff1,0x0006,0xfffc,0x0001,0xffe5,0xfff4,
+0xfffb,0x0008,0x000f,0x0013,0x0025,0xfff5,0x0010,0xfffe,
+0x0014,0xfff4,0x0017,0x0002,0x0016,0xffff,0xffff,0xfff5,
+0xfffb,0xfff3,0xffde,0xfff2,0x0008,0xfff2,0x0004,0x001a,
+0x0000,0x000b,0xfff0,0xfff2,0xffe3,0xfffe,0xffef,0xfffd,
+0x0003,0x0013,0x0009,0x001a,0xfff8,0x001a,0xfff6,0x0018,
+0x0000,0x001c,0x0005,0x0021,0x0022,0x0011,0xfffe,0xffec,
+0xffff,0xffea,0x0000,0xfff6,0x0006,0xfff2,0xfffd,0xfff6,
+0xffec,0xfffc,0xffe0,0xfffc,0x0007,0xfff1,0x0000,0xfffd,
+0x0000,0xfff3,0xfff7,0x0000,0xfffd,0xfffc,0x0011,0x0000,
+0x0019,0x0007,0xfffe,0x0008,0xfff4,0x0007,0xffe9,0x0008,
+0xfff3,0x0010,0x0007,0x0014,0x002a,0x0009,0x0000,0xfffd,
+0x0009,0xffe7,0x000c,0xfff6,0x000c,0xffee,0xfffe,0xffe8,
+0xffed,0xfff3,0xffd2,0xffed,0x0005,0xffea,0x000a,0x0000,
+0x0008,0x000b,0x0008,0x0009,0xffef,0x000b,0x0007,0x0008,
+0xffe7,0xfff9,0x0002,0xfff8,0x000c,0xfff9,0x0017,0xfff8,
+0x000d,0xfff0,0xfff9,0xffec,0xffd6,0xfff7,0x0000,0x0003,
+0xfff7,0x0019,0xfff4,0x000a,0xfff4,0x0012,0x0002,0x0018,
+0x0013,0x000d,0x002e,0x0013,0xfffb,0x0016,0xfff6,0x0000,
+0xfff8,0xfff5,0xfff8,0xfff7,0x0011,0xfff5,0xfff9,0xfff8,
+0xfffd,0xffed,0xfff7,0xffe6,0x0008,0xffe6,0x000a,0xffe8,
+0x0000,0xffe4,0xfffb,0xffdf,0xffde,0xffef,0x0002,0x0014,
+0x0001,0x0016,0x0000,0x000a,0xfffa,0x000e,0x0003,0x000a,
+0x0014,0x0004,0x0020,0x0004,0xfff9,0x000f,0x0000,0x0003,
+0x0000,0x000d,0x0009,0x0000,0x0003,0x0004,0xffef,0x0000,
+0xffdd,0xfff1,0x000f,0xfffa,0x0004,0xffff,0x001b,0x000c,
+0x0005,0xfff8,0xfff1,0xffed,0xffdb,0x000b,0xfff0,0x0002,
+0xffec,0x000c,0xffe9,0xfffe,0xffea,0x0001,0x0001,0x000b,
+0x0005,0x000d,0x0022,0x000e,0xfff8,0x000e,0xfffc,0xffe6,
+0x0000,0xfff5,0x0010,0x000e,0x001d,0x0002,0x0011,0x0003,
+0xfff3,0xffe5,0x0003,0xffe8,0x0001,0xffed,0x000e,0xfffd,
+0xfff7,0xffec,0xfff4,0xffdf,0xffe3,0x0003,0xfff1,0x0014,
+0xfff7,0x0009,0xfff5,0xfffd,0xfff0,0xfffe,0x0002,0xfffe,
+0x0007,0x0003,0x0014,0x0000,0xfff6,0x0007,0x0007,0xffea,
+0x0007,0x000d,0x0021,0x0017,0x000e,0x0012,0x0007,0x000c,
+0xffef,0xfff8,0x0008,0x0004,0xfffc,0xfff6,0x0006,0xfffb,
+0x0004,0xfffb,0x0006,0x0006,0xffec,0x000a,0x0009,0xfff7,
+0xfffc,0x0002,0xfff0,0xfff9,0xffff,0x000c,0x0011,0x0010,
+0x0027,0x0009,0x0024,0x000c,0x0002,0x0003,0xfffa,0x0015,
+0xffff,0xfff5,0x0007,0xfff6,0x000b,0xffec,0xffec,0xfff5,
+0x0004,0xffec,0xfffd,0xfff3,0xfff8,0xffe4,0xfffa,0xffeb,
+0xfff6,0xfff0,0x0008,0xfff9,0xfff4,0x0003,0x000b,0x0007,
+0x0005,0x0000,0xfffc,0xfff8,0x0004,0x0008,0x0012,0x0003,
+0x0029,0x0000,0x0016,0xfffe,0x0000,0xfffc,0x0005,0x0019,
+0x0006,0x000e,0x0019,0xffff,0xfffe,0xfffc,0xffe3,0xfffe,
+0xffe5,0xfff0,0x0015,0x0006,0xfff4,0xfffd,0x0009,0x0010,
+0xfffd,0x0002,0xffff,0x0007,0xfff1,0x001f,0xfff9,0xfff6,
+0xfff0,0xfff7,0xffe5,0xffeb,0xfff5,0xfffb,0x0010,0x0003,
+0x001a,0x0009,0x0018,0x0007,0x0000,0xfffc,0x0000,0xfffc,
+0x0006,0xfff5,0x0020,0x000e,0x0017,0xfffa,0x0005,0x0001,
+0xfffb,0xffe3,0x0009,0xfff5,0xfff1,0xffea,0xfffd,0x0000,
+0xffee,0xfff8,0x0001,0xfffa,0xfff9,0x0017,0xfffa,0x0006,
+0xfffb,0xfff4,0xfff1,0xffeb,0xfffb,0xfff8,0x0011,0xfff7,
+0x001c,0x0000,0x000b,0xfffa,0xfffe,0xfff4,0x000b,0x0000,
+0x000e,0x000d,0x0031,0x0016,0x0008,0x0009,0xfffc,0x0009,
+0xffdb,0x000c,0x0009,0x0007,0x0011,0x000b,0x001a,0xffee,
+0x0019,0xfff4,0xffed,0xffef,0xffd3,0xfff2,0xffef,0x0000,
+0xffef,0x001e,0xffff,0x0016,0x000a,0x0019,0x000c,0x0026,
+0x0012,0xfffd,0x0016,0xfffc,0xfffd,0x0000,0xfff3,0x0003,
+0xfff6,0x000b,0xffe9,0xfffe,0x000a,0xfff9,0xfffb,0xfff9,
+0xfff1,0x0000,0xfffe,0xfff6,0x000d,0xfff9,0x000e,0xffdd,
+0x000a,0xffe9,0xfff0,0xffe1,0xffdb,0xffeb,0xfff0,0x0011,
+0xfffa,0x001a,0x000a,0x0015,0x0010,0x0015,0x000d,0x0019,
+0x0013,0xfff3,0x0008,0xffed,0xfffb,0xfff9,0xffff,0x0008,
+0xfffe,0x0024,0xfffb,0x0006,0xfffd,0x0008,0xfff1,0x0001,
+0xffd0,0x0004,0x0017,0x0009,0x0009,0x0011,0x001e,0x0002,
+0x0010,0xfffd,0xffe6,0xfff0,0xffd8,0x0006,0xffdd,0xffff,
+0xffe4,0x0011,0xfff4,0x0009,0x0000,0x0008,0x000b,0x0019,
+0x0005,0xfffd,0x000a,0xfff8,0xfffa,0xfff9,0xfffa,0xffea,
+0xffff,0x000b,0x0001,0x0015,0x0016,0x0007,0x0013,0x0005,
+0xffe6,0xfff9,0x000b,0xfff8,0x0005,0xffff,0x0011,0xfff2,
+0x0001,0xfff1,0xffe8,0xffe2,0xffe0,0xffff,0xffdf,0x0010,
+0xffee,0x000e,0x0000,0x0008,0x0006,0x0004,0x000c,0x000c,
+0x0006,0xfff3,0xfffe,0xffe9,0xfff8,0xfff1,0x0004,0xffef,
+0x0005,0x0024,0x0012,0x001e,0x0008,0x0016,0x000a,0x000e,
+0xffe2,0x000b,0x000f,0x0014,0x0000,0x0008,0x0009,0xfff1,
+0x000f,0x0000,0xfffb,0x0009,0xffe9,0x0006,0xfff8,0xfff3,
+0xfff3,0x0007,0xfffb,0x0003,0x0014,0x0013,0x001b,0x001f,
+0x0027,0xfff9,0x000d,0xfff5,0x0004,0xffed,0xfff8,0x0019,
+0xfffd,0x000b,0xfff9,0xfffe,0x0004,0xfff0,0xffee,0xfff7,
+0xfff8,0xffff,0x0003,0x0002,0xfffd,0xfff6,0xfffd,0xffe0,
+0x0001,0xfff4,0xfffe,0xfffc,0xfff1,0xffff,0xfff9,0x0003,
+0xfffe,0x0004,0x0006,0x0003,0x001a,0x000f,0x001d,0x0011,
+0x0028,0xffef,0x0000,0xffe6,0x0002,0xffe5,0x0002,0x001d,
+0x0004,0x0024,0x000a,0x0006,0xfff7,0x0000,0xffe5,0x0000,
+0xffd8,0x0002,0x001d,0x0016,0xfff9,0x000e,0x000c,0x0005,
+0x0007,0x0007,0xfff4,0x0009,0xffee,0x001a,0xffe6,0xfff2,
+0xffe8,0xfffc,0xfff0,0xfff7,0x000a,0x0002,0x001a,0x0012,
+0x001a,0xfff9,0x0001,0xfff1,0x0001,0xffe5,0xfffe,0x0000,
+0x0004,0x000b,0x0011,0x0015,0x0010,0xffff,0x0007,0x0003,
+0xffee,0xfff7,0x0011,0x0004,0xfff5,0xfffd,0x0000,0xfff5,
+0xfff9,0xfffc,0xfff6,0xfffd,0xfff6,0x0012,0xffe8,0x0003,
+0xfff2,0xfff9,0xfffc,0xfff6,0x0010,0xffff,0x001b,0x0004,
+0x001b,0xffef,0xfff4,0xffe2,0x0000,0xffdd,0x0009,0x0003,
+0x000c,0x0024,0x0023,0x001e,0x0002,0x000d,0xfffe,0x000b,
+0xffbe,0xfff8,0xffe9,0xfff5,0x000e,0x0005,0x0011,0x0010,
+0x0013,0x0002,0x0003,0xfffb,0xffee,0xffef,0x000a,0x0000,
+0xffff,0x0017,0xfffa,0x0014,0xfff1,0x0012,0xfff2,0x0016,
+0x0005,0x000a,0x002c,0x0017,0x0002,0x001a,0xfff7,0x0003,
+0xfffc,0xfff2,0xfff4,0xffe3,0xfff9,0xfff0,0xfff9,0xffee,
+0xffd4,0xffec,0xffde,0xffe3,0x000a,0xfff3,0x0004,0x0000,
+0x0004,0xfff7,0x0005,0xffed,0xfff6,0xffe8,0x000b,0x0011,
+0x0008,0x0014,0x0005,0x0013,0xfff7,0x000e,0xfff4,0x0009,
+0x0006,0x0000,0x001e,0x0009,0x0000,0x0013,0x0002,0x0007,
+0x0002,0x000a,0x0005,0xffec,0xffeb,0xffff,0xffef,0xfff7,
+0xffb3,0xfff0,0xfff7,0xfff7,0x0006,0x000b,0x0015,0x0025,
+0x000a,0x000a,0xfffc,0xfffb,0xfff3,0x0003,0xfff9,0x0000,
+0xfff3,0x000b,0xffef,0x0006,0xffe7,0x0001,0xfff1,0x0009,
+0xfff9,0x0009,0x0020,0x0013,0x0000,0x0012,0xfffe,0xffea,
+0x0003,0xfff1,0x000c,0xfffb,0x0004,0xfffe,0x0011,0xfffb,
+0xffc9,0xffe4,0xffeb,0xffe5,0x0002,0xfff9,0x0008,0x0014,
+0xfffc,0xffff,0xffff,0xffee,0xfffb,0xfffc,0xfffb,0x0010,
+0xfffe,0x0008,0xfffb,0x0005,0xffed,0xfffe,0xfff2,0xfffd,
+0xfffa,0x0000,0x0012,0x0004,0xfffe,0x000b,0x0008,0xffee,
+0x000b,0x000a,0x001d,0x0003,0xfff6,0x000d,0x0008,0x0003,
+0xffc6,0xfff7,0xfff0,0x0001,0xfffe,0x0002,0x0000,0x0013,
+0x000a,0x000d,0x0011,0x0015,0x0003,0x0003,0x0013,0xfff4,
+0x0002,0x0000,0xfff6,0x0001,0xfffb,0x000c,0x0000,0x000f,
+0x001a,0x0005,0x0022,0x0010,0x000b,0x0007,0xfffc,0x0019,
+0x0002,0xfff2,0x0003,0xffe3,0xfff3,0xffe7,0xffec,0xffec,
+0xffdc,0xffeb,0xffe4,0xfff0,0xfffa,0xfff0,0xfff4,0x0002,
+0xfffc,0x0002,0x0014,0x0007,0x000b,0xfffc,0x0014,0x0004,
+0x000c,0xfffe,0x0001,0x0000,0x0000,0x0008,0x0002,0x0002,
+0x001b,0xfffc,0x0015,0x0002,0x0009,0x0000,0x0006,0x001d,
+0x0009,0x000a,0x0015,0xffeb,0xffe4,0xfff6,0xffe3,0xfff5,
+0xffbb,0xffef,0xfffd,0x0003,0xfff6,0x0008,0x0003,0x0028,
+0x0001,0x0015,0x000a,0x0015,0x0008,0x0017,0x0001,0xfff3,
+0xfff8,0xfff5,0xffeb,0xfff5,0xfff1,0xfffc,0x0000,0x0002,
+0x000d,0x0005,0x0017,0x000c,0x0007,0x0000,0x0001,0x0000,
+0x000a,0xfff2,0x001c,0xfffb,0xffff,0xfff5,0x0005,0xfff9,
+0xffd1,0xffe2,0xfff1,0xfff2,0xfff2,0xfff7,0xfff7,0x0017,
+0xfff3,0x000a,0x000c,0x0007,0x0010,0x000f,0x0003,0x0003,
+0x0001,0xfff2,0xfff7,0xfff4,0xfff7,0xfff8,0x0000,0xfff6,
+0x000e,0xfffc,0x0009,0xfffe,0x0005,0xfff8,0x000d,0x0003,
+0x0012,0x000a,0x002d,0x0003,0xfff0,0x0004,0xfffc,0x0000,
+0xffb1,0x000b,0xfff1,0x0004,0x0012,0x0017,0x0014,0x0005,
+0x001e,0x0007,0xfff9,0xfffe,0xffeb,0xffeb,0xfff8,0xfffd,
+0xfff6,0x001c,0x0004,0x001f,0x0006,0x0019,0xfffd,0x0025,
+0x0004,0xfff9,0x0014,0x0000,0x0004,0x0004,0xfff5,0x0007,
+0xfffa,0x0008,0xffe5,0xffea,0xfff2,0xfff4,0xfffb,0xfff0,
+0xffc7,0x0000,0xffe5,0xfff3,0x000e,0x0005,0x0007,0xfff5,
+0x000f,0xfffc,0xfffb,0xfff0,0xfff3,0xffe3,0xfffa,0x000d,
+0x0000,0x0019,0x0010,0x001f,0x000c,0x0016,0xfffe,0x0017,
+0x0006,0xfff0,0x0007,0xfff2,0x0002,0xfffd,0x0000,0x000c,
+0x0000,0x0021,0xfff7,0xfff3,0xffe4,0x0003,0xfff2,0xfff9,
+0xffa6,0x0003,0xfffe,0x0006,0x000a,0x001d,0x0018,0x001a,
+0x0015,0x000f,0xfff1,0xfffe,0xfff0,0xffff,0xffe7,0xfffc,
+0xffeb,0x0010,0xfffa,0x0012,0xfffd,0x0008,0xfffb,0x0018,
+0xfff8,0xfff9,0x0009,0xfffc,0x0001,0xfffd,0xfffb,0xffee,
+0x0001,0x0007,0xfffe,0x0001,0xfffe,0x0001,0x0013,0xfffd,
+0xffbd,0xfff8,0xfff2,0xfff5,0x0007,0x000b,0x000b,0x0009,
+0x0007,0x0003,0xfff3,0xfff0,0xfff8,0xfff7,0xffe8,0x000c,
+0xfff6,0x000d,0x0005,0x0011,0x0002,0x0004,0xfffd,0x000a,
+0xfffa,0xffef,0xfffc,0xffed,0x0000,0xfff5,0x0006,0xfff3,
+0x0009,0x0021,0x000e,0x000a,0xfff0,0x0011,0x000a,0x0004,
+0xffb9,0x000a,0xfff7,0x0011,0x0001,0x0014,0x0003,0x0008,
+0x0015,0x0012,0x0006,0x0018,0x0000,0xffff,0x0000,0xfff0,
+0xfffa,0x0005,0x0000,0x000d,0x0011,0x0013,0x000b,0x001d,
+0x0019,0xfff5,0x000b,0xfffa,0x000d,0xfff1,0xfff9,0x001d,
+0x0000,0x0008,0xfff5,0xffea,0xffec,0xffeb,0xffef,0xffee,
+0xffcf,0xfffe,0xffeb,0x0000,0xffff,0x0002,0xfff7,0xfff8,
+0x0006,0x0006,0x0008,0x000a,0x0008,0xfff7,0x0002,0x0000,
+0x0004,0x0002,0x000d,0x000c,0x0017,0x000f,0x000c,0x0010,
+0x001a,0xffeb,0xfffe,0xffeb,0x000b,0xffe9,0x0004,0x0021,
+0x0007,0x0021,0x0006,0xfff3,0xffde,0xfffb,0xffe5,0xfff6,
+0xffae,0x0001,0x0004,0x0013,0xfffa,0x001b,0x0006,0x001d,
+0x000c,0x001a,0xffff,0x0018,0x0005,0x0012,0xffef,0xffef,
+0xffef,0xfffa,0xfff6,0x0000,0x0007,0x0002,0x0009,0x0010,
+0x000c,0xfff5,0x0000,0xfff5,0x0009,0xffe9,0x0000,0x0003,
+0x0008,0x0008,0x000d,0x0001,0xfff8,0xfff9,0x0007,0xfffa,
+0xffc4,0xfff6,0xfff9,0x0001,0xfff7,0x0008,0xfffa,0x000d,
+0xfffe,0x000f,0x0001,0x000a,0x000d,0x000b,0xfff1,0x0000,
+0xfffa,0xfff7,0x0001,0x0000,0x000d,0xffff,0x000b,0x0003,
+0x000d,0xffeb,0xfff3,0xffe6,0x0007,0xffe1,0x000a,0x0007,
+0x0010,0x0021,0x001f,0x000a,0xffea,0x0008,0xffff,0x0002,
+0xfffd,0x0007,0xfffb,0xfffb,0x0008,0xfffe,0x0011,0xfff7,
+0x0012,0xffe8,0xfffe,0xffed,0xfff6,0xfffc,0x001c,0x0011,
+0x0005,0x001c,0xfffe,0x0007,0xfffc,0x000f,0x0007,0x0008,
+0x0006,0x0017,0x000d,0x0015,0xfff2,0x0014,0xffef,0xffee,
+0xfff5,0xffdf,0xffe2,0xfff5,0x0017,0xfff3,0xfffb,0xfff7,
+0x0012,0xfffc,0xffef,0xffe9,0x0004,0xffec,0x0004,0xffe6,
+0x0003,0xffdc,0x0000,0xffe0,0xfffe,0xfff4,0x001d,0x0022,
+0x0010,0x0018,0x000a,0x0006,0x0000,0x000c,0x0008,0xfffc,
+0x0008,0x000d,0x0000,0x0006,0xfff0,0x000c,0xfffb,0xfff3,
+0xfffd,0xfff9,0xfff3,0xfffd,0x0008,0x0002,0xfff2,0x0000,
+0xfff3,0x0000,0x0008,0xfffd,0x0000,0x0004,0x0015,0x000b,
+0x0009,0xfff0,0xfff6,0xffee,0xfffb,0x0010,0x000a,0x0010,
+0xfffb,0x000f,0xfff3,0xfffb,0xfff1,0xffff,0x0006,0xfffc,
+0xfffa,0x0017,0x0002,0x0010,0xffef,0x000c,0xfff6,0xffd4,
+0xfffd,0xffdf,0xfffa,0x000c,0x0022,0x0001,0x0014,0x0003,
+0x0008,0xfff4,0xfffd,0xffeb,0xfffd,0xfff2,0x0008,0xfffb,
+0xfffc,0xffe4,0xfff9,0xffe0,0x0002,0x0008,0x000c,0x0022,
+0x0004,0x000c,0xffff,0xfffa,0xfff7,0xfffc,0x0007,0xffef,
+0xfffc,0x000d,0xfff5,0x0001,0xffed,0x0004,0x0000,0xffd9,
+0x0004,0xfff9,0x000b,0x0015,0x0014,0x0010,0x000a,0x000b,
+0x0004,0x0006,0x0000,0x0007,0xfff8,0xfffc,0x0000,0xfffa,
+0x0009,0xfff3,0x000b,0x0007,0x000b,0x000f,0x0025,0x0004,
+0x0009,0x0005,0xfffb,0xfff5,0x0005,0x0009,0x0016,0x0001,
+0x001b,0x0012,0x0004,0x000e,0xfffb,0x0000,0xfff4,0x0003,
+0xfffc,0xffe0,0xfff2,0xfff4,0x0011,0xffea,0xffef,0xfff5,
+0x001a,0xfffa,0xfff5,0xfff6,0xfff4,0xffe9,0xfff4,0xffe9,
+0xfffb,0xffe8,0x000d,0xfffb,0x0013,0x0008,0x0026,0x0015,
+0x0014,0x0002,0x0006,0xfff4,0x000b,0x0005,0x0017,0xfff5,
+0x001d,0x0009,0xfff7,0x0000,0xfff9,0xfffa,0xffff,0x0007,
+0x0002,0xfff9,0x0003,0xfffd,0x0002,0xfffa,0xffe5,0xfffd,
+0xfffa,0xfffe,0x000e,0x0009,0xfff0,0x0001,0x0003,0x000e,
+0x0000,0xfffb,0x0003,0x0008,0x0010,0x0024,0x0013,0x0003,
+0xffff,0xfffa,0xffef,0xffe8,0xfffc,0xfff9,0x0015,0xfff5,
+0x000e,0x0012,0xfff9,0x0009,0xfff7,0xfff9,0xfffa,0xffea,
+0x0003,0xffdf,0x000a,0x000b,0x001c,0xfff9,0x0007,0x0000,
+0x0010,0xfff2,0x0002,0xfff8,0xffec,0xffef,0xfff7,0xfffe,
+0xfff2,0xfff0,0x0006,0xfffb,0x0018,0x001c,0x0015,0x0014,
+0x0008,0xfff7,0xfffc,0xffe7,0x0001,0xfff5,0x0016,0xffe8,
+0x000f,0x0008,0xffeb,0xfffb,0xfff5,0xfff2,0x0005,0xffee,
+0x000b,0xfff9,0x001b,0x0014,0x000e,0x0007,0xffff,0x0009,
+0xfff0,0x001b,0x0002,0x000a,0x000d,0x0010,0x0014,0xffec,
+0x001d,0xffec,0xfff2,0xfff0,0xfff3,0xfff8,0x0009,0x000d,
+0xfffe,0x0021,0x0009,0x0013,0x0011,0x0017,0x0011,0x0016,
+0x0006,0x0006,0xfff7,0xfffe,0xfff4,0xfffe,0xffec,0xfff3,
+0xfff3,0xfff6,0xffd3,0xfffc,0x0010,0xfff8,0xfffe,0xfff9,
+0x0005,0x000f,0xfff7,0xfff9,0x0009,0xfffe,0x0008,0xffdb,
+0x000e,0xffe1,0xfff5,0xffe3,0xfffb,0xfff0,0x000b,0x001e,
+0x0007,0x001d,0x0015,0x0012,0x0017,0x0013,0x0012,0x0009,
+0x0007,0xfffd,0xffe9,0xffef,0xfff2,0xfff7,0xfff8,0xfff7,
+0xfffa,0x000f,0xffe5,0x0004,0x0002,0x0006,0xfff4,0x0001,
+0xffe6,0x0013,0x000f,0x000c,0x0005,0x0016,0x0018,0x0000,
+0x0015,0xfff4,0xffeb,0xfff1,0xfff8,0x000b,0xfff9,0x000c,
+0xfff2,0x0014,0xfffe,0x0006,0x0007,0x0006,0x0010,0x0009,
+0xfffa,0x0005,0xffeb,0xfff9,0xfff1,0xfff6,0xfff3,0xffd9,
+0xfffb,0xfff6,0xffec,0x0013,0x001c,0x0005,0x0016,0x0005,
+0xfffc,0x0006,0x0003,0xfffb,0x0001,0x0004,0x000b,0xfff0,
+0x0006,0xffe9,0xffed,0xffe3,0x0000,0x0003,0xfffa,0x001e,
+0xfffd,0x0011,0x000a,0x0005,0x000d,0x0002,0x0011,0xfffd,
+0xfffb,0xfffd,0xffdd,0xffeb,0xffef,0xffef,0xfffe,0xffdd,
+0x0002,0x000f,0xfffd,0x001c,0x000d,0x0015,0x000d,0x000d,
+0xfff8,0x0019,0x0008,0x0017,0xfffd,0x000d,0x0003,0xffef,
+0x0014,0xfff8,0x0000,0x000a,0x0008,0x000b,0x0012,0x0000,
+0x0001,0x000a,0x0005,0x0000,0x001c,0x0011,0x0020,0x000f,
+0x001a,0x0001,0xffed,0xfff7,0xfffd,0xffeb,0xfff1,0x0007,
+0xfffa,0xfff7,0xffe3,0xfffb,0x000a,0xffef,0xfff1,0xfff7,
+0x000d,0x000d,0xfffd,0x0005,0xfff9,0xfffc,0xfff7,0xffde,
+0x0005,0xffec,0x0002,0xfffd,0x0010,0x0003,0x0014,0x0011,
+0x000b,0x0007,0x0011,0x0000,0x0022,0x000d,0x0021,0x0002,
+0x001c,0xfff8,0xffe0,0xffe8,0xfffb,0xffe3,0xfffd,0x000c,
+0x0000,0x000f,0xfff5,0x0003,0xfffd,0xfffe,0xffe8,0xffff,
+0xffee,0x0011,0x0015,0x0019,0xfff5,0x0013,0x0006,0x0003,
+0x000b,0x0000,0xfff9,0x000b,0x000d,0x001f,0x0001,0x0000,
+0xfff7,0xffff,0xfffb,0xfff4,0x0012,0x0000,0x001f,0x0002,
+0x000d,0x0001,0xffe2,0xfff2,0xfff9,0xffe3,0xfff7,0xffee,
+0x0001,0xfff6,0xfffc,0x0012,0x0016,0xfffd,0x000a,0x0002,
+0x0003,0x0005,0x0009,0x0007,0xfff1,0x0001,0xfffb,0xfff3,
+0xfffe,0xfff4,0xfffb,0xfffe,0x0015,0x0017,0x0002,0x0010,
+0x0000,0xfffb,0x0006,0xfff3,0x0017,0xfffd,0x0020,0xfff6,
+0x000f,0xfff8,0xffd4,0xffe4,0xfff7,0xffdb,0x0002,0xfff3,
+0x0009,0x000f,0x000c,0x001b,0x0007,0x000c,0x0000,0x000b,
+0xffd4,0x0006,0xffe2,0xfff8,0x0009,0x000a,0x000b,0x000e,
+0x0017,0xfffb,0x0008,0xfffc,0x000e,0xfff4,0x0025,0x000e,
+0x000c,0x001a,0x0004,0x0010,0xfff8,0x0010,0xfff7,0x0007,
+0xfffa,0x0013,0x000c,0x0019,0xfffb,0x0018,0xfff1,0xfff3,
+0xfff8,0xffdc,0xffde,0xffe1,0xffff,0xffee,0xfffc,0xffee,
+0xffea,0xfffb,0xffd6,0xffe6,0x0006,0xfff8,0xffff,0xfffe,
+0x0009,0xffef,0x000a,0xffee,0x0015,0xffed,0x0027,0x001f,
+0x0017,0x0017,0x0010,0x000f,0xfffe,0x000c,0xfff9,0xfffa,
+0xfffb,0x0009,0xffff,0x000a,0xfff9,0x0010,0xfffc,0xfff7,
+0x0000,0xfff6,0xffef,0xffea,0xfff0,0xfffe,0xfff2,0xfff7,
+0xffc9,0xffff,0xffef,0xfffa,0x0001,0x0010,0x000f,0x0023,
+0x000f,0x0002,0x0000,0xfffc,0x0013,0x0008,0x0014,0x000d,
+0x0001,0x000e,0xfff9,0x0003,0xffee,0x0000,0xfff6,0xfffb,
+0xffed,0x0013,0x0000,0x0015,0xfff8,0x0010,0xfff7,0xffd9,
+0x0000,0xffdc,0xfff6,0xfff9,0x0009,0xfffc,0x0014,0xfffb,
+0xffdf,0xfff3,0xffe3,0xffe8,0xffff,0xffff,0x0002,0x0012,
+0x0000,0xfff7,0x0003,0xffef,0x001b,0x0000,0x0015,0x001e,
+0x000c,0x000b,0x0005,0x0002,0xfff4,0xfffc,0xfff7,0xffed,
+0xffee,0x0009,0xfff3,0x0006,0xfff5,0x0008,0x0002,0xffdd,
+0x0008,0xfff6,0x0007,0x0001,0xfffc,0x000b,0x000a,0x0002,
+0xffdc,0x0005,0xffe8,0x0004,0xfff9,0x0007,0xfffa,0x0011,
+0x000e,0x0005,0x0016,0x0016,0x0023,0x0008,0x002e,0x0001,
+0x0011,0x0003,0x0000,0xfffe,0x0002,0x000a,0x0005,0x0000,
+0x000e,0x000f,0x0002,0x0012,0x0002,0x0004,0xfff5,0x0007,
+0xffff,0xffdc,0xffee,0xffe0,0xfff9,0xffe5,0xffef,0xffec,
+0xfff2,0xfff9,0xffdc,0xfff3,0xfff6,0xfff6,0xffee,0x0000,
+0x0000,0xfffb,0x0019,0x0008,0x002b,0x0000,0x0030,0x0012,
+0x001b,0x0000,0x000c,0xfffd,0x0007,0x0006,0x0007,0xfff3,
+0x000f,0x0005,0xfff5,0x0003,0x0000,0xfffe,0x0000,0x000c,
+0x0006,0xfff6,0x0000,0xffe9,0xffea,0xfff5,0xffe6,0xfff4,
+0xffd1,0xfffd,0xfff5,0x0006,0xfff1,0x000d,0xfffe,0x0026,
+0x0006,0x000d,0x000f,0x0016,0x0028,0x001c,0x001c,0x0000,
+0x0005,0xfff8,0xfff6,0xfff1,0xfff9,0xfff9,0x0004,0xfff3,
+0x0001,0x000e,0xfff7,0x000e,0x0000,0xfffe,0xfffc,0xffee,
+0x0007,0xffdc,0x0006,0xfff8,0x0003,0xfff3,0x0007,0xfff8,
+0xffe7,0xfff1,0xffea,0xfff5,0xffee,0xfffc,0xfff1,0x0016,
+0xfff8,0x0002,0x0011,0x0009,0x0030,0x0014,0x001e,0x0011,
+0x0010,0xfff5,0x0001,0xfff0,0xfffe,0xfff6,0x0005,0xffe6,
+0x0002,0x0004,0xffea,0x0000,0xfffe,0xfff6,0x0006,0xfff3,
+0x000e,0xfff6,0x0017,0x0000,0xfff6,0x0002,0xffff,0x0000,
+0xffc7,0x001a,0xffea,0x0007,0x000e,0x001c,0x000e,0x0003,
+0x0023,0x0000,0xfffd,0xffff,0x000b,0xfff0,0x0012,0x000a,
+0x0004,0x001f,0x000f,0x001c,0x000e,0x0017,0x0001,0x0015,
+0xfff9,0x0002,0xfff5,0x0001,0xfffd,0x0001,0xffee,0xfff7,
+0xfff6,0xfff3,0xffcf,0xffe8,0xfff8,0xfff2,0xfffe,0xfff0,
+0xffdd,0x000e,0xffde,0xfff6,0x000a,0x000a,0x0001,0xfff4,
+0x0014,0xfff4,0x0000,0xfff1,0x0012,0xffe8,0x0014,0x001b,
+0x000e,0x001c,0x001b,0x001b,0x0014,0x0013,0x0002,0x0008,
+0xfffb,0xfff9,0xffe7,0xfff3,0xfffb,0xfffb,0xfffa,0xfffb,
+0xfffe,0x000c,0xffe1,0xfff1,0xffe9,0x0001,0xfff4,0xfff8,
+0xffbc,0x0012,0xfff7,0x0009,0x0006,0x0023,0x0012,0x0019,
+0x001a,0x0007,0xfff6,0xffff,0x0010,0x0003,0x0001,0x0009,
+0xfffa,0x0013,0x0004,0x000f,0x0004,0x0006,0x0000,0x0008,
+0xffec,0x0002,0xffe9,0xfffe,0xfff9,0xfffb,0xfff4,0xffdd,
+0xffff,0xfff3,0xffe8,0x0000,0x0003,0x0000,0x0016,0xfffc,
+0xffd2,0x0006,0xffeb,0xfff8,0x0002,0x0010,0x0005,0x0008,
+0x000b,0xfffc,0xfff8,0xfff1,0x0018,0xfffc,0x0002,0x001a,
+0x0003,0x0010,0x0010,0x000e,0x0009,0x0002,0x0001,0xfffc,
+0xffed,0xfff9,0xffdc,0xffef,0xfff7,0xfff3,0x0000,0xffe1,
+0x0005,0x000c,0xfff9,0x0008,0xfff5,0x000f,0x000d,0x0004,
+0xffcf,0x0018,0xfff0,0x0014,0xfffe,0x001a,0xfffe,0x0007,
+0x0019,0x000a,0x000b,0x0019,0x0020,0x0003,0x001b,0xfffe,
+0x0008,0x0008,0x000b,0x0009,0x0018,0x0011,0x0010,0x000e,
+0x000d,0xfffe,0xffec,0xfffb,0x0004,0xffef,0xfff2,0x000c,
+0xfffd,0xfff3,0xffdf,0xffe7,0xfff2,0xffe9,0xfff1,0xffed,
+0xffe5,0x000c,0xffe4,0x0002,0xfffa,0x0007,0xfff1,0xfff7,
+0x000b,0xffff,0x000d,0x000b,0x0028,0xfffc,0x001d,0x000e,
+0x0013,0x0005,0x0017,0x0008,0x001e,0x000d,0x0011,0x0000,
+0x000e,0xfff4,0xffde,0xffec,0x0002,0xffe7,0xfffe,0x0010,
+0x0004,0x000c,0xfff1,0xfff0,0xffe3,0xfff9,0xffe8,0xfff6,
+0xffc4,0x0010,0xfffd,0x0016,0xfff6,0x0020,0x0000,0x001c,
+0x0011,0x0012,0x0003,0x0019,0x0025,0x0017,0x000a,0xfffd,
+0xfffe,0xfffd,0x0000,0xfffd,0x000e,0x0000,0x000e,0x0001,
+0x0000,0xfffe,0xffe0,0xfff7,0x0001,0xffe7,0xfff9,0xfff3,
+0x0005,0xfff3,0xfff8,0xffff,0xfffe,0xfff8,0x000a,0xfffa,
+0xffda,0x0004,0xfff1,0x0004,0xfff2,0x000d,0xfff4,0x000b,
+0x0002,0x0006,0x0006,0x000b,0x002d,0x0010,0x000b,0x000d,
+0x0007,0xfffa,0x000c,0xfffc,0x0014,0xfffd,0x0010,0xfff4,
+0x0001,0xfff4,0xffd2,0xffe8,0x0000,0xffdf,0x0003,0xfff7,
+0x000c,0x000c,0x0008,0x0007,0xffef,0x0006,0x0000,0x0002};
+
+static const signed short etable2[5120]={
+0x0049,0xffe0,0xffc4,0xfff1,0xffe6,0x003b,0x0002,0xffdf,
+0x001e,0xfff6,0xfffd,0xffef,0x0008,0x001e,0xffff,0xffe6,
+0xfffc,0xffea,0x000a,0x0010,0xffdc,0xfffb,0xfff5,0x0038,
+0x0025,0x0006,0xfff6,0xfffb,0xfff3,0xfffd,0x0006,0xfffb,
+0x000b,0x0004,0xffed,0xfffb,0xfff0,0x0029,0x0018,0x000d,
+0x0004,0xfff5,0xffdb,0x0017,0xfffb,0x002e,0xfffe,0xffe3,
+0xfffb,0xffd9,0xffeb,0xfff7,0x0000,0x0031,0x000c,0xfff7,
+0xfff0,0xffe6,0x0016,0x000f,0xffd3,0xffec,0xfffb,0x0028,
+0x0016,0x0011,0xffe6,0x001f,0xfff2,0x0002,0xfff2,0x000a,
+0x001e,0x0014,0xffe5,0xfff7,0xffd9,0x0027,0x0012,0x0005,
+0x0022,0xffe7,0xffd0,0xffe4,0xfff5,0x0022,0xfffe,0xffd7,
+0x0009,0xfff9,0xffef,0x0015,0x0014,0x0018,0xffef,0xffdf,
+0x0000,0xffe8,0x000a,0x002a,0x0003,0xfffb,0x000a,0x002a,
+0x000b,0x0008,0xfffd,0x0003,0x0010,0x0009,0x0016,0xfffe,
+0x0000,0xffdf,0xfff6,0x0012,0x0007,0x003a,0x000a,0x001c,
+0xffde,0xfffc,0xffe7,0x000a,0x0009,0x0015,0xfff9,0xffdc,
+0xffe6,0xffdc,0xffdd,0x001c,0x000c,0x002a,0xfffd,0xfff0,
+0xfff4,0xffe4,0x0015,0x002a,0xfffb,0xffeb,0x0010,0x001a,
+0xfffc,0x0013,0xffed,0x0027,0x000f,0x000f,0x0001,0x000d,
+0x0013,0xffef,0xffef,0x000e,0xfff1,0x0037,0x0004,0x0013,
+0x001c,0xffec,0xffcd,0xfff2,0xfffa,0x0007,0x0000,0xffe6,
+0x001b,0xfffc,0x0012,0xffd8,0xfffa,0x0010,0xffff,0xfff1,
+0x0000,0xffc9,0xfffb,0xfff0,0xffed,0x000e,0xfffd,0x0031,
+0x000e,0x0001,0xffea,0xffe2,0xfff4,0x0000,0x0018,0x000f,
+0x0009,0xffef,0xffd3,0xffe3,0x0004,0x001c,0x0033,0x0023,
+0xffd8,0x0000,0xffe4,0x0018,0x000e,0xfffb,0xfffc,0xffeb,
+0xfff9,0xffdf,0x0000,0xffe0,0xfff1,0x0023,0x000c,0x0001,
+0xfff5,0xffc6,0x0005,0xfff0,0xffe4,0x0000,0x0001,0x0021,
+0x0000,0x000b,0xffd9,0x0005,0xfff2,0x0006,0x0003,0x001f,
+0x001c,0xffff,0xffcb,0xffdf,0xffed,0x0019,0x002e,0x001a,
+0xfff5,0xfff2,0xffd9,0xffe5,0x0009,0xffef,0xfffc,0xffdf,
+0x0006,0x0000,0x0004,0xffff,0x0005,0x000a,0xffef,0xffea,
+0x0005,0xffc7,0xfffb,0x0009,0x0014,0x000d,0x0012,0x0023,
+0xfff5,0x0003,0xfff0,0xffea,0x0011,0x000d,0x0028,0x0013,
+0xffff,0xffc9,0xffdd,0xfffb,0x001b,0x002c,0x0025,0x0031,
+0xffb0,0x0006,0xfff0,0x000b,0x001e,0xffe2,0xfff7,0xffe4,
+0xffe4,0xffe3,0xfff3,0x0006,0xfffe,0x001c,0xfffd,0xfffb,
+0xfff9,0xffc4,0x0005,0x0009,0x000b,0xffff,0x0018,0x0013,
+0xffe5,0x000d,0xffe0,0x000d,0x000f,0x0013,0x0013,0x0023,
+0x0011,0xffd9,0xffd5,0xfff7,0x0004,0x002a,0x0020,0x0029,
+0x004e,0xffeb,0xffd5,0x0004,0xffda,0x0011,0x0011,0xfffb,
+0x0037,0x0018,0xfff1,0xffdc,0x000e,0x0004,0x0018,0xffe8,
+0x000c,0x0005,0x0011,0x001f,0xffca,0xfffb,0xfffe,0x001b,
+0x002b,0xfff4,0x0002,0x0009,0xfff7,0xfff1,0x0016,0xfffd,
+0x001c,0x0015,0xffec,0x0003,0x0014,0x001c,0x0009,0xfffb,
+0x0009,0xffff,0xffec,0x002b,0xffef,0x0003,0x000c,0x0000,
+0x0014,0xfffc,0xffdf,0xffe3,0x0006,0x0016,0x0026,0xfff9,
+0x0000,0x0001,0x001d,0x001e,0xffc1,0xffeb,0x0003,0x000b,
+0x001b,0xffff,0xfff2,0x002d,0xfff6,0xfff7,0x0001,0x000c,
+0x002f,0x0025,0xffe4,0x0000,0xfffe,0x001a,0x0004,0xfff3,
+0x0027,0xfff2,0xffe2,0xfff8,0xffea,0xfff8,0x000c,0xfff4,
+0x0022,0x001b,0xffe3,0x0002,0x001a,0xfffe,0x0008,0xffe1,
+0x0010,0x0003,0x0011,0x0039,0xfff2,0xfffa,0x0013,0x000d,
+0x0010,0xfff6,0x0008,0x0011,0x0014,0xfffe,0x0026,0x0000,
+0x0011,0xfff0,0xfff5,0x001b,0x002c,0x002d,0xfffc,0x0008,
+0xffe3,0x0005,0xfff9,0x001e,0xffff,0xffeb,0x0007,0xfff9,
+0x0000,0x0000,0xffd1,0x0009,0x0012,0x000f,0x0016,0xfff2,
+0x0004,0x0000,0x001c,0x0039,0xffe9,0xffeb,0x0019,0xfffe,
+0x0001,0x0000,0xfff9,0x0035,0x0013,0x0003,0x0011,0x000f,
+0x0024,0x0000,0xffed,0x0018,0x0015,0x002b,0xfff7,0x0000,
+0x0021,0xfff6,0xffde,0x0005,0xffef,0xffdd,0x000f,0x0001,
+0x0035,0x001e,0x0006,0xffc5,0x0000,0xfff6,0x0018,0xfff3,
+0x0011,0xffe5,0x0001,0xffff,0xffdb,0x000d,0x0004,0x0014,
+0x0014,0xffee,0xfff6,0xfff0,0xfff8,0xfff5,0x0027,0x0012,
+0x001a,0x0000,0xffd2,0xffec,0x0029,0x000f,0x0025,0x000f,
+0xffdd,0x000a,0xfff5,0x002c,0x0003,0xffd0,0x000a,0x0006,
+0x0011,0x0002,0xfff5,0xffcd,0xfff8,0x0008,0x0026,0x0003,
+0x0004,0xffe1,0x000c,0xfffe,0xffd2,0xffff,0x000a,0x0004,
+0x0005,0xfff9,0xffe6,0x0013,0xfff6,0xfffb,0x0012,0x0022,
+0x002d,0x000f,0xffca,0xffe8,0x0012,0x000d,0x001f,0x0007,
+0xfffb,0xfffd,0xffeb,0xfff9,0xfffe,0xffc4,0x000a,0xfffb,
+0x0020,0x0022,0xfff9,0xffec,0x000b,0xfff0,0x0008,0xffec,
+0x0015,0xffe3,0x0001,0x0018,0x0002,0x000d,0x001b,0x0006,
+0xfffb,0xfff1,0xfffd,0xfff8,0x0015,0x0001,0x0037,0x0015,
+0x000f,0xffda,0xffdb,0x0003,0x0041,0x0020,0x0017,0x001e,
+0xffb6,0x0011,0x0000,0x001f,0x0012,0xffb7,0x0005,0x0000,
+0xfffd,0x0005,0xffe7,0xfff4,0x0003,0x0001,0x0016,0xfffd,
+0x0009,0xffdf,0x000c,0x0018,0xfffa,0xfffe,0x0021,0xfff7,
+0xffeb,0xfffb,0xffec,0x001b,0x0013,0x0007,0x0022,0x0025,
+0x0022,0xffea,0xffd4,0x0000,0x0029,0x001d,0x0011,0x0015,
+0x004c,0xffdd,0xffe1,0xffe4,0xffcf,0x002b,0xffd8,0x0000,
+0x001d,0xfff2,0x0008,0x0005,0x000a,0x0012,0xffe6,0xffd2,
+0x0000,0x0007,0x0006,0x0003,0xffe7,0xfff9,0xfffe,0x0028,
+0x001c,0x000e,0x0012,0xfffd,0xffe5,0xffe4,0xfff8,0xffd3,
+0xfff3,0x0022,0xfff3,0xffe5,0xfff1,0x001f,0x000c,0x0003,
+0x0007,0xfff1,0xfff7,0x0009,0xffe4,0x001d,0xffd3,0x0005,
+0xfffa,0xffd5,0xfff7,0x000c,0x0002,0x0024,0xfff4,0xffe2,
+0xfff5,0x0003,0x0011,0x0003,0xffde,0xffea,0x0003,0x0018,
+0x000c,0x0018,0x0002,0x0020,0xffe4,0xffea,0xffe3,0xffe3,
+0x0005,0x0032,0xffeb,0xffe1,0xffda,0x001d,0x0007,0xfffb,
+0x0024,0xffe3,0xffed,0xffd7,0xffde,0x0012,0xffd3,0xfffa,
+0x0008,0xfff6,0xfffb,0x002b,0x0017,0x000b,0xffd6,0xffcb,
+0x0005,0x0005,0x0006,0x001e,0x000e,0xfff8,0x0014,0x001a,
+0x0001,0x0010,0x0019,0x0004,0x0003,0xfff1,0x0007,0xffd7,
+0xffe9,0xfffd,0xfffc,0xfffd,0x0008,0x0030,0xffff,0x0011,
+0xffe0,0xfff8,0x0003,0xfffe,0xfff3,0x0004,0xffce,0xffff,
+0xffe5,0xffd9,0xffe9,0x0033,0x000f,0x001e,0xffe5,0xffdb,
+0xfff9,0x0001,0x0011,0x001d,0x0005,0xffe9,0x0019,0x000a,
+0xfff2,0x001a,0x0008,0x0029,0x0001,0xfff7,0xfff3,0xffe6,
+0xfffb,0x000c,0xfff4,0xfff9,0xfff2,0x002d,0xfffa,0x0009,
+0x001f,0xffe8,0xffe9,0xffe5,0xffe3,0xfff7,0xffd5,0x0008,
+0x001a,0xfff9,0x001e,0xffef,0xfffc,0x0003,0xffe6,0xffdd,
+0x0005,0xffe8,0xfff6,0xffe4,0xfff7,0x000c,0x0005,0x0021,
+0x0005,0x0008,0x0005,0xffe3,0xffe6,0xffe8,0x0009,0xffe9,
+0xfff2,0x000c,0xffd9,0xffcc,0x0005,0x0012,0x0027,0x0018,
+0xffdb,0xfffd,0x0000,0x000a,0xfff9,0xffea,0xffd0,0x000c,
+0xfff8,0xffdc,0x000c,0xfff7,0xfff4,0x0016,0xfff4,0xffed,
+0xfffa,0xffe4,0x0000,0xffe3,0xffee,0xfffd,0x000b,0x0011,
+0xfff6,0x0012,0xfff6,0x0007,0xffe5,0xffee,0xfff5,0xfff9,
+0x0003,0x001c,0xffd1,0xffc9,0xffee,0x000f,0x0022,0x0010,
+0xfff8,0xffef,0xfff6,0xffd8,0xfff3,0xffde,0xffd1,0x0000,
+0x0005,0xfffc,0x0010,0x0015,0x0008,0xfffe,0xffd6,0xffd5,
+0x000a,0xffe6,0xfff6,0xfffe,0x001f,0x000b,0x001b,0x0013,
+0xffeb,0x000a,0x000c,0xffec,0x0003,0xfff5,0x0019,0xffec,
+0xffe7,0xffe7,0xffe3,0xffe4,0x001c,0x0022,0x0019,0x0026,
+0xffb3,0x0002,0x000b,0xffff,0x0007,0xffd1,0xffcc,0x0005,
+0xffe3,0xffdf,0xffff,0x001c,0x0000,0x000f,0xffe4,0xffe6,
+0xfffe,0xffe2,0x0000,0xfffe,0x0016,0xfffc,0x0021,0x0003,
+0xffdc,0x0015,0xfffd,0x000f,0x0002,0xfffb,0x0004,0xfffc,
+0xfffa,0xfff7,0xffdb,0xffe1,0x0005,0x0020,0x0014,0x001e,
+0x0051,0xffe7,0xfff2,0xfff8,0xffc3,0x0000,0xffe7,0x001c,
+0x0036,0x0014,0xfffd,0xfff2,0x0011,0xfff8,0x0000,0xffd4,
+0x0010,0x0023,0x000d,0x0012,0xffd5,0xfff9,0x0006,0x000b,
+0x0021,0xfffc,0x001e,0x000b,0xffea,0xffd8,0x0006,0xffd5,
+0x0003,0x0032,0xfff2,0xffee,0x0016,0x0012,0xffff,0xfff0,
+0x000c,0xfffc,0x0008,0x001d,0xffd9,0xfff4,0xffe2,0x0021,
+0x0013,0xfff8,0xffeb,0xfffa,0x0008,0x0009,0x000d,0xffe4,
+0x0004,0x001f,0x0018,0x0012,0xffcc,0xffe9,0x000c,0xfffc,
+0x0012,0x0005,0x000e,0x002f,0xffe8,0xffde,0xfff2,0xffe5,
+0x0016,0x0042,0xffea,0xffea,0xffff,0x0010,0xfffa,0xffe8,
+0x0029,0xffee,0xfffe,0xffeb,0xffd3,0xffe8,0xffe2,0x0015,
+0x0021,0x0018,0xffef,0x0018,0x001d,0xfff1,0xfff0,0xffcd,
+0x0015,0x0021,0x000d,0x002d,0xfffd,0xfff8,0x001c,0xfffe,
+0x0007,0xfffe,0x0025,0x0013,0x0007,0xffe5,0x0016,0xffd9,
+0xfff9,0x000c,0xfffb,0x0005,0x002d,0x0023,0xfff1,0xffff,
+0xffe5,0x0001,0x0014,0x0011,0xffe8,0xffda,0xffdd,0x001a,
+0xffff,0xfffc,0xffdd,0x0020,0x0015,0x0003,0xfffe,0xffdd,
+0x0008,0x001d,0x0018,0x002c,0xfff4,0xffe8,0x0022,0xffee,
+0xfff8,0x0007,0x0015,0x0037,0x0005,0xffeb,0x0002,0xffe9,
+0x000b,0x001c,0xfff3,0x0001,0x0016,0x0021,0xffeb,0xfff6,
+0x0024,0xfff3,0xfffb,0xfff9,0xffd8,0xffcd,0xffe4,0x0024,
+0x0034,0x001b,0x0012,0xffdc,0x0002,0xffea,0x0000,0xffdf,
+0x0015,0x0002,0xfffd,0xfff3,0xffe6,0x000b,0x000e,0x0004,
+0x000a,0xfff6,0x0012,0xfff2,0xffea,0xffdc,0x0018,0xffeb,
+0x0001,0x001c,0xffd8,0xffd6,0x002a,0x0005,0x0019,0x0005,
+0xffe0,0x0006,0x0011,0x001f,0xffed,0xffbf,0xffdf,0x0029,
+0x0010,0xffff,0x0000,0xffe3,0xfffa,0xfffc,0x000d,0xffef,
+0x0009,0xffff,0x0008,0xfff2,0xffdd,0xfffd,0x0013,0xfff5,
+0xfffc,0x0000,0x0001,0x0015,0xffe9,0xffe2,0x0003,0xfffb,
+0x0014,0x002c,0xffd0,0xffd2,0x0013,0x0003,0x0014,0xfffd,
+0xfffd,0xfff9,0x0006,0xffec,0xffe7,0xffb3,0xffe0,0x001d,
+0x001f,0x001e,0x0004,0x0002,0x000e,0xffe3,0xfff0,0xffd8,
+0x001a,0x0000,0xfffd,0x000c,0x000d,0x000a,0x0024,0xfff7,
+0xfff1,0xfff8,0x0018,0xfffa,0x0007,0xffea,0x0028,0xffef,
+0xfff8,0xfff7,0xffe1,0xffee,0x0042,0x0016,0x000b,0x0013,
+0xffb8,0x000d,0x001d,0x0012,0xfffc,0xffa6,0xffdb,0x0022,
+0xfffc,0x0001,0xfff3,0x0009,0x0006,0xfff5,0xfffe,0xffe8,
+0x000d,0xfffd,0x0007,0x000b,0x0004,0xfffc,0x002a,0xffe7,
+0xffe1,0x0001,0x0008,0x001d,0x0006,0xffef,0x0013,0xfffe,
+0x000a,0x0006,0xffda,0xffea,0x002a,0x0013,0x0006,0x000b,
+0x0074,0xffec,0xffbc,0xffe2,0xffe4,0x0053,0x001c,0xffee,
+0x0020,0xffea,0xfff3,0xffeb,0x0005,0x001c,0x0005,0xfff9,
+0xffe8,0xfff8,0xffea,0x0011,0xffe9,0x001e,0xffe7,0x002d,
+0x000f,0xfff7,0xfff5,0xffee,0x0016,0xfff6,0x0004,0xfffe,
+0x0013,0xfff4,0x0017,0x0003,0xffd5,0x0002,0x000c,0xfffc,
+0x002f,0x0000,0xffd3,0x0007,0xfff9,0x0045,0x0017,0xfff3,
+0xfffe,0xffcd,0xffe0,0xfff2,0xfffd,0x002f,0x0013,0x0008,
+0xffdb,0xfff5,0xfff6,0x0010,0xffe0,0x000f,0xffed,0x001d,
+0x0000,0x0001,0xffe4,0x0012,0x0014,0xfffc,0xfff0,0x000d,
+0x0026,0x0003,0x000f,0x0000,0xffbe,0x0000,0x0007,0xfff3,
+0x004d,0xfff3,0xffc8,0xffd5,0xfff3,0x0039,0x0017,0xffe6,
+0x000b,0xffed,0xffe5,0x0010,0x0011,0x0016,0xfff6,0xfff1,
+0xffed,0xfff6,0xffea,0x002b,0x0010,0x001e,0xfffe,0x001f,
+0xfff5,0xfffa,0xfffb,0xfff7,0x0034,0x0002,0x0014,0x0000,
+0x0008,0xffce,0x0021,0x001b,0xffed,0x0013,0xffff,0x0009,
+0x0008,0x0006,0xffdf,0xfffc,0x0007,0x002c,0x0012,0xffeb,
+0xffe9,0xffd0,0xffd2,0x0018,0x0009,0x0028,0x0003,0x0001,
+0xffe0,0xfff3,0xfff5,0x002b,0x0007,0x000e,0x0003,0x000f,
+0xffe6,0x0003,0xffeb,0x001a,0x0032,0x0008,0x0000,0x0010,
+0x001b,0xffde,0x0019,0x0017,0xffd5,0x0011,0xfffa,0x0001,
+0x0047,0xfff7,0xffc5,0xffe3,0xfff8,0x001e,0x001a,0xfff5,
+0x001e,0xfff0,0x0008,0xffd4,0xfff7,0x000e,0x0005,0x0002,
+0xffed,0xffd8,0xffda,0xfff1,0xfff9,0x0032,0xffef,0x0026,
+0xfff9,0xfff2,0xffe8,0xffd5,0x0016,0xfffa,0x0016,0x0013,
+0x0011,0xffde,0xfffe,0xffec,0xffe9,0xfff6,0x0027,0x0010,
+0x0002,0x000b,0xffdc,0x0009,0x000d,0x0011,0x0015,0xfffa,
+0xfffb,0xffd3,0xfff6,0xffdc,0xffee,0x0021,0x0013,0x0013,
+0xffe1,0xffd4,0xffe5,0xfff1,0xfff0,0x0022,0xfff5,0x0016,
+0xffea,0xfffc,0xffd8,0xfff9,0x0015,0x0000,0x0001,0x0023,
+0x0024,0xffee,0xfff6,0xffe8,0xffd2,0xfff4,0x0022,0x0008,
+0x0020,0xfffe,0xffd1,0xffd6,0x0007,0x0005,0x0015,0xffee,
+0x0009,0xfff4,0xfffb,0xfffb,0x0002,0x0008,0xfff6,0xfffc,
+0xfff2,0xffd6,0xffda,0x000a,0x0021,0x0031,0x0005,0x0018,
+0xffdf,0xfff4,0xffef,0xffdd,0x0034,0x0006,0x0026,0x0016,
+0x0007,0xffb8,0x0007,0x0003,0x0000,0x0006,0x0019,0x001e,
+0xffdc,0x0012,0xffe8,0xfffd,0x001c,0xfff9,0x0010,0xfff3,
+0xffe6,0xffd7,0xffe8,0x0001,0xfffb,0x001a,0x0003,0x000c,
+0xffe5,0xffd2,0xffe5,0x000a,0x0018,0x0022,0x000a,0x0008,
+0xffcf,0xfffe,0xffde,0x0000,0x0033,0x000c,0x0011,0x0026,
+0x0019,0xffc8,0x0000,0x0000,0xffea,0x0003,0x0014,0x0016,
+0x0079,0xfff7,0xffce,0xfff6,0xffd8,0x0028,0x002b,0x0009,
+0x003a,0x000c,0xffe7,0xffd7,0x000b,0x0002,0x001f,0xfffb,
+0xfff8,0x0013,0xfff1,0x0020,0xffd7,0x001e,0xfff0,0x0010,
+0x0014,0xffe4,0x0000,0xfffd,0x001a,0xffea,0x0013,0x0000,
+0x0024,0x0004,0x0016,0x000c,0xfffa,0xfff7,0xffff,0xffe8,
+0x0034,0x000a,0xffe5,0x001b,0xffee,0x001a,0x0026,0x000e,
+0x0017,0xfff0,0xffd4,0xffdf,0x0003,0x0014,0x002d,0x000a,
+0xffec,0x000f,0xfffd,0x001f,0xffce,0x000e,0xfff6,0x0000,
+0x0005,0xffef,0xfff1,0x0020,0x0018,0xfff0,0xffff,0x000f,
+0x0037,0x0014,0x000e,0x0008,0xffe3,0xfff4,0xfff9,0xffe0,
+0x0052,0xfffd,0xffda,0xffe9,0xffe8,0x000f,0x0026,0x0002,
+0x0025,0x000f,0xffd9,0xfffe,0x0017,0xfffc,0x000f,0xfff4,
+0xfffd,0x0011,0xfff1,0x003a,0xffff,0x001d,0x0006,0x0002,
+0xfffb,0xffe6,0x0007,0x0004,0x0038,0xfff7,0x0023,0x0003,
+0x0019,0xffdf,0x0020,0x0024,0x0011,0x0007,0xfff1,0xfff7,
+0x000d,0x0011,0xfff1,0x000f,0xfffd,0x0001,0x0021,0x0007,
+0x0001,0xfff4,0xffc6,0x0005,0x000f,0x000d,0x001d,0x0003,
+0xfff0,0x000d,0xfffc,0x0039,0xfff6,0x000d,0x000b,0xfff3,
+0xffeb,0xfff1,0xfff7,0x0028,0x0037,0xfffd,0x000e,0x0013,
+0x002c,0xffef,0x0018,0x0020,0xfffb,0x0004,0xffeb,0xffee,
+0x004c,0x0001,0xffd7,0xfff7,0xffed,0xfff4,0x0029,0x0011,
+0x0037,0x0012,0xfffd,0xffc1,0xfffd,0xfff4,0x001e,0x0005,
+0xfffd,0xfff4,0xffe1,0x0000,0xffe8,0x0031,0xfff8,0x0009,
+0xffff,0xffdf,0xfff4,0xffe3,0x001b,0xffee,0x0025,0x0015,
+0x0022,0xffef,0xfffd,0xfff5,0x000e,0xffe9,0x0019,0xfffe,
+0x0007,0x0016,0xffee,0x001d,0x0001,0xffe7,0x0024,0x0015,
+0x0014,0xfff7,0xffea,0xffc8,0xfff5,0x0006,0x002d,0x0015,
+0xfff1,0xfff0,0xffec,0xffff,0xffdf,0x0022,0xfffe,0xfffa,
+0xffef,0xffe9,0xffe4,0x0006,0x0019,0xfff4,0x0010,0x0025,
+0x0035,0xffff,0xfff5,0xfff1,0xfff8,0xffe7,0x0014,0xfff5,
+0x0025,0x0008,0xffe3,0xffea,0xfffc,0xffdb,0x0024,0x0009,
+0x0022,0x0016,0xffef,0xffe8,0x0008,0xffee,0x000f,0xfffe,
+0x0001,0xfff2,0xffe1,0x0019,0x000f,0x0030,0x000d,0xfffc,
+0xffe4,0xffe1,0xfffb,0xffeb,0x0039,0xfffc,0x0035,0x0018,
+0x0017,0xffc9,0x0006,0x000c,0x0025,0xfffa,0x000b,0x000b,
+0xffe1,0x001c,0xfffa,0x0010,0x0010,0xffce,0x001f,0x000e,
+0x0000,0xfffa,0xffdc,0xffef,0x0000,0x0000,0x001d,0x000e,
+0xfff5,0xffee,0xffec,0x0019,0x0006,0x0021,0x0013,0xffec,
+0xffd5,0xffeb,0xffeb,0x000e,0x0037,0x0000,0x0020,0x0028,
+0x002a,0xffd9,0xffff,0x0008,0x000e,0xfff8,0x0006,0x0003,
+0x0077,0xffe8,0xffd9,0xffd4,0xffcd,0x0042,0xfff2,0x000f,
+0x001f,0xffe6,0xffff,0x0000,0x0007,0x0010,0xffed,0xffe4,
+0xffed,0x0016,0xffe6,0x0004,0xfff3,0x001c,0xfff0,0x001d,
+0x0005,0xffff,0x0010,0xfff0,0x0008,0xffdd,0xfff6,0xffd6,
+0xfffc,0x0011,0x001d,0xffed,0xffd6,0xfff9,0x0000,0xfff1,
+0x0032,0xfffd,0xfff0,0xfffb,0xffe2,0x0035,0xffed,0x0014,
+0xfffd,0xffc9,0xffed,0x0008,0x0000,0x0022,0xfffb,0xfff5,
+0xffe0,0x0012,0xfff1,0x0004,0xffea,0x000d,0xfff6,0x000d,
+0xfff7,0x0008,0x0000,0x0013,0x0007,0xffe3,0xffe1,0xffe6,
+0x000d,0x0021,0x0015,0xffea,0xffbf,0xfff7,0xfffc,0xffe9,
+0x004f,0xffef,0xffe5,0xffc8,0xffdc,0x0029,0xffed,0x0008,
+0x000a,0xffea,0xfff1,0x0027,0x0014,0x0009,0xffdd,0xffdd,
+0xfff1,0x0014,0xffe6,0x001f,0x001a,0x001b,0x0006,0x000f,
+0xffec,0x0000,0x0017,0xfff8,0x0026,0xffea,0x0005,0xffda,
+0xfff1,0xffec,0x0027,0x0004,0xffee,0x0009,0xfff3,0xffff,
+0x000a,0x0003,0xfffc,0xffee,0xfff1,0x001b,0xffe8,0x000d,
+0xffe8,0xffcd,0xffde,0x002f,0x000c,0x001c,0xffeb,0xffed,
+0xffe5,0x0010,0xfff1,0x001e,0x0011,0x000c,0x000c,0x0000,
+0xffdc,0x000a,0x0007,0x001b,0x0025,0xfff0,0xfff1,0xffea,
+0x0003,0xfffc,0x001f,0x0001,0xffd6,0x0007,0xffee,0xfff7,
+0x004a,0xfff4,0xffe2,0xffd6,0xffe2,0x000e,0xfff0,0x0017,
+0x001d,0xffed,0x0014,0xffeb,0xfff9,0x0001,0xffed,0xffef,
+0xfff2,0xfff6,0xffd5,0xffe5,0x0003,0x0030,0xfff8,0x0016,
+0xfff0,0xfff9,0x0004,0xffd6,0x0009,0xffe1,0x0006,0xffec,
+0xfffa,0xfffc,0x0003,0xffd5,0xffea,0xffec,0x001c,0x0005,
+0x0005,0x0007,0xfff9,0xfffc,0xfff7,0x0000,0xffeb,0x001c,
+0xfffa,0xffd0,0x0002,0xfff2,0xfff1,0x0014,0xfffb,0x0000,
+0xffe5,0xfff2,0xffe0,0xffe4,0xfffb,0x0020,0xfffe,0x0006,
+0xffe0,0x0003,0xfff4,0xfffb,0x0008,0xffe7,0xfff2,0xfffc,
+0x000c,0x000b,0xfffc,0xffd1,0xffd3,0xffea,0x0016,0xfffe,
+0x0022,0xfffa,0xffee,0xffc9,0xfff1,0xfff5,0xffeb,0x0010,
+0x0008,0xfff0,0x0006,0x0010,0x0005,0xfffc,0xffdd,0xffe8,
+0xfff6,0xfff4,0xffd5,0xffff,0x002b,0x002f,0x000e,0x0008,
+0xffd5,0xfffb,0x000a,0xffde,0x0027,0xffee,0x0016,0xfff0,
+0xffef,0xffd6,0x000d,0xffed,0x0001,0xfffd,0x000e,0x0014,
+0xffde,0x000e,0x0004,0xffef,0x0005,0xffe8,0xffe6,0x0014,
+0xffe5,0xffd3,0xfff4,0x0018,0xfffe,0x000d,0xffeb,0xfff8,
+0xffea,0xfff0,0xffe0,0xfffe,0x0022,0x001f,0x0014,0xfff9,
+0xffc6,0x0005,0xfffb,0x0002,0x0026,0xfff4,0x0002,0xffff,
+0x0001,0xffe6,0x0005,0xffe9,0xffeb,0xfffa,0x0008,0x000b,
+0x007c,0xfff3,0xffeb,0xffe9,0xffc2,0x0017,0x0000,0x002b,
+0x0039,0x0008,0xfff3,0xffee,0x000e,0xfff6,0x0006,0xffe6,
+0xfffd,0x0031,0xffed,0x0013,0xffe1,0x001b,0xfff9,0x0000,
+0x000b,0xffec,0x001d,0xffff,0x000c,0xffd1,0x0004,0xffd9,
+0x000b,0x0022,0x001c,0xfff7,0xfffb,0xffed,0xfff3,0xffde,
+0x0037,0x0006,0x0001,0x000e,0xffd7,0x000a,0xfffc,0x0030,
+0x0016,0xffec,0xffe1,0xfff6,0x0005,0x0007,0x0014,0xfff7,
+0xfff0,0x002d,0xfff8,0x0013,0xffd8,0x000c,0xffff,0xfff1,
+0xfffc,0xfff6,0x000c,0x0022,0x000b,0xffd7,0xfff0,0xffe8,
+0x001e,0x0031,0x0014,0xfff3,0xffe4,0xffea,0xffee,0xffd5,
+0x0054,0xfffa,0xfff7,0xffdc,0xffd1,0xffff,0xfffc,0x0024,
+0x0024,0x000c,0xffe5,0x0014,0x001a,0xffef,0xfff7,0xffdf,
+0x0001,0x002f,0xffed,0x002e,0x0009,0x001b,0x000f,0xfff3,
+0xfff1,0xffee,0x0023,0x0006,0x002a,0xffdf,0x0014,0xffdc,
+0x0001,0xfffc,0x0026,0x000e,0x0012,0xfffe,0xffe5,0xffec,
+0x000f,0x000d,0x000d,0x0001,0xffe6,0xfff2,0xfff7,0x0029,
+0x0001,0xfff0,0xffd2,0x001b,0x0012,0x0001,0x0004,0xfff0,
+0xfff5,0x002b,0xfff8,0x002d,0x0000,0x000b,0x0015,0xffe3,
+0xffe2,0xfff8,0x0013,0x002a,0x0029,0xffe4,0x0000,0xffec,
+0x0014,0x000b,0x001e,0x000a,0xfffc,0xfffb,0xffe0,0xffe4,
+0x004f,0xfffe,0xfff4,0xffea,0xffd6,0xffe4,0xffff,0x0033,
+0x0036,0x000f,0x0008,0xffd7,0x0000,0xffe8,0x0006,0xfff1,
+0x0001,0x0011,0xffdc,0xfff4,0xfff2,0x002f,0x0000,0xfffa,
+0xfff5,0xffe6,0x0010,0xffe5,0x000d,0xffd5,0x0016,0xffee,
+0x000a,0x000c,0x0002,0xffde,0x000f,0xffdf,0x000d,0xfff3,
+0x000a,0x0012,0x000a,0x000f,0xffeb,0xffd7,0xfffa,0x0038,
+0x0013,0xfff3,0xfff7,0xffdf,0xfff7,0xfffa,0x0014,0x0001,
+0xfff5,0x000d,0xffe8,0xfff3,0xffe9,0x0020,0x0006,0xffea,
+0xffe6,0xfff1,0x0000,0x0008,0x000c,0xffdb,0x0001,0xfffe,
+0x001c,0x001b,0xfffb,0xffdb,0xfff9,0xffdd,0x0008,0xffeb,
+0x0027,0x0004,0x0000,0xffdd,0xffe5,0xffcb,0xfffa,0x002c,
+0x0021,0x0012,0xfffb,0xfffe,0x000b,0xffe1,0xfff7,0xffea,
+0x0006,0x000f,0xffdc,0x000d,0x0019,0x002e,0x0017,0xffec,
+0xffdb,0xffe8,0x0017,0xffed,0x002b,0xffe3,0x0026,0xfff2,
+0x0000,0xffe6,0x000c,0xfff6,0x0026,0xfff0,0x0000,0x0000,
+0xffe3,0x0019,0x0016,0x0002,0xfffa,0xffbd,0xfff5,0x0031,
+0xffff,0xfff6,0xffe8,0x0005,0x0003,0xfff3,0x0004,0xfffb,
+0xfffa,0x000b,0xffe7,0x000c,0x0010,0x001f,0x001c,0xffdc,
+0xffcb,0xfff3,0x0006,0x0010,0x002a,0xffe8,0x0011,0x0001,
+0x0012,0xfff6,0x0004,0xfff3,0x000f,0xffee,0xfffb,0xfff9,
+0x001d,0xffe7,0xffea,0xfffe,0x0006,0x0043,0x000b,0xffcf,
+0x0001,0x000a,0x0018,0xfffb,0xfffd,0x000d,0xfffc,0x0005,
+0x0006,0xfff5,0x0019,0xfff4,0xfff0,0xffe1,0xffe4,0x0024,
+0x0035,0x000d,0xfffa,0xfff0,0xffd6,0x0018,0xffef,0xffff,
+0xffee,0x000a,0xfffc,0x000d,0xfff1,0x0012,0x0005,0x0007,
+0xffd9,0xfffc,0x0000,0x0023,0x001b,0x0035,0x0006,0xffd4,
+0xffdf,0xffee,0x0005,0x0002,0xfff5,0x001f,0x0009,0x0016,
+0xfffa,0xfff1,0x0024,0xfff3,0xffe7,0xffd2,0xffe9,0x0014,
+0x0025,0x0018,0xffe9,0x0013,0xffd5,0x001d,0xffda,0x000e,
+0x0000,0x001a,0xfff4,0x000a,0xffda,0x0010,0x0000,0x0000,
+0xfff6,0xffee,0xfff6,0xfff1,0x0015,0x0029,0x0006,0xffc8,
+0xffed,0x000d,0x0009,0x0021,0x0009,0x0006,0xffec,0xffff,
+0x000b,0xfff3,0x0018,0x000d,0x0017,0xffe0,0xfffa,0x0016,
+0x001a,0x000f,0x0000,0xfff8,0xfff4,0x0025,0xffff,0x0002,
+0xffe4,0xffe5,0x0005,0x0025,0x0007,0x0023,0xfff8,0x0015,
+0xffb1,0x0002,0x000c,0x0016,0x002a,0x001c,0x0001,0xffcd,
+0xffca,0xfff1,0xfff8,0x0029,0x0000,0x0018,0xfffa,0x000f,
+0xffff,0xffef,0x0024,0x000c,0x000e,0xffd1,0x0000,0x0006,
+0x000b,0x001a,0xfff0,0x001b,0xfff3,0x002b,0xffea,0x0012,
+0xfff6,0xfff4,0xfffe,0x0022,0xfff1,0x0021,0xfff3,0x000d,
+0xfff1,0xfff3,0xfff3,0xffff,0x001a,0x000e,0x0009,0xffd7,
+0xffff,0x0010,0x002e,0xffe5,0xffee,0xffff,0xfffc,0x0010,
+0x000b,0xffd5,0x0008,0xffd3,0x0000,0xfff5,0xffeb,0x001d,
+0x001e,0x0008,0xffed,0xffd6,0xffd7,0x001c,0x0000,0x0014,
+0xffec,0xfff5,0xffe2,0xfff6,0x0004,0x0005,0x0020,0x001c,
+0xffac,0x0006,0x0009,0x0024,0x002f,0x0001,0x0004,0xffdc,
+0xffdc,0xfff4,0x001b,0xffec,0xffe6,0x0011,0x0009,0x0021,
+0xffff,0xffd1,0x0013,0xffd2,0xfff7,0xffe5,0xfff1,0x000d,
+0x000f,0x0012,0xffdd,0xfffa,0xffd6,0x0021,0xffec,0x0024,
+0xffff,0x0004,0xffda,0xfff2,0xffee,0x0002,0x001b,0x0014,
+0xffc9,0xfffa,0xffff,0xfff2,0x0029,0xfff6,0x0004,0xffd0,
+0xffea,0x0014,0x001f,0x000a,0xfffb,0xfff9,0xffec,0x0009,
+0x0010,0xffd3,0x0008,0xffed,0x0028,0xfff4,0x0001,0x000f,
+0x0004,0x000a,0xfff4,0xffde,0xfff5,0x0029,0x0010,0x0018,
+0xffe2,0xffcf,0xffec,0x000d,0x001c,0x0016,0x0012,0x002b,
+0xff84,0x000d,0x0015,0x0017,0x003e,0xffe9,0x0000,0xffd5,
+0xffc7,0xfff8,0x000d,0x0012,0xfff2,0x000a,0xfffa,0x001a,
+0x0003,0xffcf,0x0013,0xffed,0x001f,0xffe5,0x0007,0x0000,
+0xfff5,0x0014,0xffe3,0x0001,0xfff4,0x002f,0xfffc,0x0027,
+0xfff5,0xffde,0xffe4,0x0009,0x0005,0x0013,0x000d,0x0022,
+0x0022,0xfff2,0xfffc,0x0011,0xfffb,0x0018,0x001a,0xffec,
+0x001b,0x002d,0x000c,0xffe8,0x0002,0xfff3,0x0015,0x0008,
+0x0016,0x0010,0x0020,0x0002,0xffde,0xffe1,0xffec,0x0007,
+0x003a,0xfffb,0x0005,0xfffe,0xffda,0x000c,0xfffe,0x0001,
+0xffff,0x001a,0xfffb,0x0017,0x0015,0x0006,0xfff8,0xfff5,
+0xffde,0x0006,0x0012,0x0037,0x000f,0x000b,0x0015,0xfff0,
+0xfff8,0x0010,0xfffa,0xfff0,0xfffb,0x0004,0x0023,0x0018,
+0x000a,0x000c,0x002b,0x0001,0xffd5,0xffd1,0xfff2,0xfff8,
+0x002b,0x0005,0xfff6,0x0022,0xffd9,0x0012,0xffea,0x0010,
+0x0011,0x002a,0xfff3,0x0013,0xffff,0x0003,0xfff2,0xffec,
+0xfffb,0xfff9,0x0007,0x0004,0x0009,0x0000,0x0015,0xffe4,
+0x0006,0x0030,0xfffe,0x000e,0x000f,0xffec,0x0005,0x0000,
+0x001b,0x000e,0x0020,0x001c,0x0005,0xffe0,0x0002,0xfffa,
+0x0020,0xfffd,0x000c,0x0005,0xfff8,0x0019,0x000e,0x0004,
+0xfff4,0xfff5,0x0004,0x002f,0x002d,0x0016,0xffea,0x0002,
+0xffb6,0x000c,0x001e,0x002a,0x001e,0xfff2,0x0010,0xffe9,
+0xffe3,0x0013,0xffec,0x0015,0x0007,0xffff,0x0013,0x0011,
+0x000e,0x000a,0x002b,0x001b,0xfffd,0xffd0,0x0008,0xffea,
+0x0010,0x0007,0xfffc,0x002a,0xfff7,0x001f,0xfffa,0x0014,
+0x0006,0x0004,0xfffd,0x002b,0x0016,0x0014,0xffe4,0xfffb,
+0xfff6,0xfffd,0x0004,0x0012,0x000f,0xffe5,0x0018,0xfff3,
+0x0018,0x0033,0x0022,0xffd1,0xfff4,0xffe4,0x0015,0x0013,
+0x001b,0xfff0,0x000f,0xffe2,0xffef,0xfff4,0xfff4,0x0000,
+0x0024,0xfff6,0xfff9,0xffe5,0xffdb,0x0010,0x000f,0x0016,
+0xfffd,0x0004,0xffe1,0xffff,0x002a,0xfff9,0x0012,0x0009,
+0xffb1,0x0011,0x001b,0x0038,0x0024,0xffd7,0x0013,0xfff8,
+0xfff6,0x0016,0x000f,0xffd9,0xffec,0xfff7,0x0023,0x0023,
+0x000f,0xffec,0x001a,0xffe1,0xffe6,0xffe5,0xfffa,0xfff1,
+0x0014,0x0000,0xffe9,0x0008,0xffda,0x0016,0xfffb,0x0026,
+0x000f,0x0014,0xffd9,0xfffc,0x0012,0xfff7,0x000d,0x0001,
+0xffce,0x0003,0x0010,0x0005,0x001e,0xffcb,0x0013,0xffec,
+0x0003,0x0037,0x0013,0xfff8,0x0000,0xffde,0x0005,0x000b,
+0x0020,0xffee,0x000f,0xfffc,0x0016,0xfff3,0x000a,0xfff3,
+0x0009,0xfff8,0x0000,0xffed,0xfff9,0x001d,0x001f,0x001a,
+0xfff3,0xffdf,0xffeb,0x0016,0x0041,0x0009,0x0004,0x0017,
+0xff89,0x0018,0x0027,0x002c,0x0033,0xffbe,0x000e,0xfff1,
+0xffe1,0x001a,0x0001,0x0000,0xfff9,0xfff0,0x0013,0x001c,
+0x0013,0xffea,0x001a,0xfffc,0x000d,0xffe4,0x0010,0xffe3,
+0xfffb,0x0001,0xfff0,0x0010,0xfff8,0x0023,0x000a,0x002a,
+0x0004,0xffef,0xffe3,0x0013,0x002a,0x0007,0x0000,0x000f,
+0x001f,0xffe4,0x0006,0xfff0,0xfff0,0x0032,0xffe1,0xfff2,
+0x0000,0x0006,0x0024,0x0011,0x0000,0x0000,0xffe3,0xfff2,
+0x000b,0x0012,0x0014,0xffe7,0xfffa,0xffdf,0xffed,0x0014,
+0x002b,0x0015,0x0015,0xfff2,0xffc9,0x0000,0xffe0,0xffd8,
+0xffd6,0x0027,0x0001,0xfff8,0xfff2,0x0008,0xfffa,0xfffd,
+0xffdb,0xfff8,0x001d,0x0016,0x0004,0x0025,0xffdc,0xfff7,
+0xffde,0xffea,0x0011,0x0018,0xfff8,0x0012,0xfff1,0x0002,
+0xffff,0x000e,0x001f,0xffe7,0xfff1,0xffd0,0xfff3,0x0004,
+0x001c,0x001f,0x0005,0x0015,0xffc7,0x0004,0xffcb,0xffe8,
+0xffe9,0x0037,0xfffa,0xfff4,0xffdb,0x0006,0xfff5,0xfff5,
+0xfff9,0xffea,0x0012,0xffe3,0xffff,0x0019,0xffdc,0xffeb,
+0xffec,0x0009,0x0016,0x0038,0x000b,0xfffa,0xffd3,0xffeb,
+0x000f,0x0010,0x0014,0x0001,0x0021,0xffde,0x0002,0x0006,
+0x0011,0x0017,0x001c,0xfffa,0xffe7,0x000c,0xfff0,0xffdb,
+0xffcb,0x0001,0x000b,0x000f,0x0008,0x0019,0xffec,0x000b,
+0xffb4,0xffff,0x0029,0x0009,0x0013,0x000c,0xffd7,0xffef,
+0xffc9,0xffee,0x0003,0x003f,0x0003,0x000c,0xffe2,0xfffb,
+0x0003,0x000c,0x001f,0x0000,0x0018,0xffcf,0x0008,0xfff7,
+0x0001,0x0021,0x000c,0x001d,0xffe5,0x0012,0xffdb,0xffeb,
+0xffde,0x0011,0x0003,0x000b,0xfff2,0x0017,0xffe7,0x0002,
+0xfff3,0xffef,0x000f,0xfff1,0x0003,0xffff,0xffdf,0xfff9,
+0xffff,0x000c,0x003a,0xfffb,0xfff1,0xfff3,0xffe3,0xfffd,
+0x0010,0xfff3,0x0004,0xffc7,0x000a,0xfff3,0xfff5,0x000d,
+0x0015,0x000f,0x0009,0xffd8,0xffc9,0x0003,0xfff2,0xffed,
+0xffd4,0x0011,0xffe8,0xffe0,0x0005,0xfffc,0x0015,0x0012,
+0xffae,0x0003,0x0026,0x0017,0x0018,0xfff1,0xffda,0xfffe,
+0xffdb,0xfff1,0x0027,0x0002,0xffe9,0x0004,0xfff1,0x000c,
+0x0003,0xffef,0x000f,0xffc6,0x0001,0xffe3,0xfffa,0xfffe,
+0x0005,0x001a,0xfff9,0xfffc,0xffc8,0x0009,0xffdd,0xfffd,
+0xffe7,0x0021,0xffe0,0xffdc,0xffef,0xfff9,0x000f,0x0009,
+0xffcc,0xfff6,0x001b,0xffe5,0x0012,0xffe6,0xffda,0xfff2,
+0xffe9,0x0010,0x002c,0x0021,0xfffd,0xffec,0xffd3,0xfff6,
+0x0014,0xfff1,0x0003,0xffe1,0x0032,0xfff2,0x000a,0x0000,
+0xfffb,0x0011,0x000f,0xffe0,0xffe8,0x0010,0x0001,0xfff1,
+0xffc9,0xffec,0xfff2,0xfff8,0x001d,0x000c,0x0007,0x0020,
+0xff87,0x0009,0x0032,0x000a,0x0028,0xffd8,0xffd5,0xfff7,
+0xffc6,0xfff4,0x0019,0x0029,0xfff5,0xfffe,0xffe1,0x0005,
+0x0008,0xffed,0x000f,0xffe0,0x0029,0xffe2,0x0010,0xfff0,
+0xffec,0x001c,0x0000,0x0003,0xffe6,0x0016,0xffed,0x0000,
+0xffdc,0xfffc,0xffea,0xfff4,0x0006,0x0009,0x0001,0x0018,
+0x0024,0xffee,0x0018,0x0003,0xffe4,0x0007,0xfff0,0x000d,
+0x001a,0x0029,0x0018,0xffff,0x0005,0xffe6,0xfffd,0xfff4,
+0x001b,0x002e,0x001b,0xfff6,0xffe8,0xffde,0xfff6,0xfff8,
+0x0031,0x0002,0x0022,0x0000,0xffcd,0xfff4,0xffef,0xffda,
+0xffe7,0x0038,0x0000,0x0000,0x0016,0xfffd,0xffec,0xffea,
+0xffe0,0x0002,0x002f,0x002a,0xfff9,0xfffb,0xffeb,0x0012,
+0xfff7,0x000c,0x0005,0x0005,0xfffe,0xfff8,0x000a,0x0004,
+0x000e,0x002a,0x0026,0xfff6,0xffdf,0xffcf,0xfffb,0xffe8,
+0x0021,0x000c,0x0011,0x0023,0xffcc,0xfffa,0xffda,0xffea,
+0xfff9,0x0048,0xfff9,0xfffd,0x0000,0xfffa,0xffe7,0xffe2,
+0xfffe,0xfff5,0x0024,0xfff7,0xfff3,0xffef,0xffeb,0x0006,
+0x0005,0x002d,0x000a,0x0024,0x0012,0xffdf,0xffed,0xffed,
+0x001f,0x002c,0x001b,0x000f,0x0010,0xffde,0x000b,0xffea,
+0x0016,0x0004,0x0028,0x0007,0xffeb,0x0000,0xffff,0xffdd,
+0xffdc,0x0012,0x000a,0x0018,0x002e,0x000c,0xffde,0xfff8,
+0xffb9,0x0009,0x003b,0x001d,0x0008,0xffe2,0xffe6,0x000b,
+0xffe2,0x0010,0xfff8,0x002c,0x0009,0xfff2,0xfffb,0xfffe,
+0x0013,0x0028,0x0026,0x000f,0x0007,0xffce,0x0011,0xffda,
+0x0007,0x000e,0x0018,0x002b,0xffea,0x0006,0xffea,0xffed,
+0xffef,0x0022,0x0002,0x0014,0x0017,0x000a,0xffd9,0xfff0,
+0xfff8,0xfffa,0x0021,0x0004,0xfff9,0xffd4,0xffee,0x0015,
+0x0017,0x0030,0x002e,0xffe8,0xfff7,0xffd8,0xfffd,0xffff,
+0x0020,0x000d,0x000b,0xffd5,0xfff9,0xfff2,0xfffd,0xfff1,
+0x001a,0xfffd,0x0015,0xffe6,0xffce,0xfff8,0x0000,0xfff0,
+0xffe5,0x0022,0xffe7,0xffe9,0x002b,0xffef,0x0006,0xffff,
+0xffb3,0x000d,0x0038,0x002b,0x000d,0xffc7,0xffe9,0x001a,
+0xfff5,0x0013,0x001b,0xfff0,0xffef,0xffea,0x000a,0x000f,
+0x0013,0x000a,0x0016,0xffd5,0xfff0,0xffe2,0x0002,0xffe1,
+0x000b,0x0006,0x0005,0x0009,0xffcc,0xfffe,0xffec,0x0000,
+0xfff8,0x0032,0xffdf,0xffe5,0x0013,0xffed,0x0001,0xfff7,
+0xffd1,0x0000,0x002d,0xfff9,0x0007,0xffbb,0xffe9,0x000d,
+0x0002,0x0033,0x0020,0x000e,0x0003,0xffd1,0xffed,0xfff8,
+0x0025,0x000b,0x000a,0xfff0,0x0020,0xfff1,0x0013,0xffe3,
+0x0000,0xffff,0x001c,0xffee,0xffec,0x0004,0x0010,0xfff3,
+0xffda,0xfffd,0xfff1,0x0000,0x0042,0x0000,0xfff9,0x000d,
+0xff8c,0x0014,0x0044,0x001e,0x001c,0xffad,0xffe4,0x0012,
+0xffe0,0x0016,0x000d,0x0015,0xfffb,0xffe4,0xfffb,0x0007,
+0x0018,0x0008,0x0016,0xffef,0x0017,0xffe2,0x0019,0xffd3,
+0xfff1,0x0009,0x000b,0x0012,0xffea,0x000a,0xfffc,0x0002,
+0xffed,0x000c,0xffe9,0xfffd,0x002b,0xfffe,0xfff4,0x0004,
+0x0048,0xfff3,0xffe3,0xffee,0x0004,0x005a,0x0025,0xffde,
+0x0004,0xffff,0x000d,0xfff7,0xfffa,0x000b,0x0002,0x0018,
+0xfff3,0x0003,0xfff9,0xfff5,0xfffc,0x0004,0xffd6,0x0019,
+0x001f,0xffff,0xfff8,0xffe3,0xfffa,0x0011,0xffed,0x0002,
+0xfff6,0xfffa,0x0026,0x0016,0xffd6,0xffed,0xfffa,0xfff5,
+0x0003,0x0007,0xfffa,0x0014,0x0019,0x004d,0x0020,0xffe3,
+0xffe1,0xffe2,0xfffc,0xfffe,0xfff2,0x001d,0x0010,0x0028,
+0xffe6,0x0000,0x0003,0xfff4,0xfff3,0xfff6,0xffdc,0x0009,
+0x000f,0x0008,0xffe8,0x0006,0xfff9,0x0016,0xffd8,0x0011,
+0x0008,0x0009,0x001f,0x0012,0xffbe,0xffea,0xfff5,0xffed,
+0x0020,0xfffa,0xffef,0xffe1,0x0013,0x0041,0x0021,0xffd7,
+0xfff0,0x0001,0x0000,0x001d,0x0006,0x0004,0xfff3,0x0011,
+0xfff7,0x0001,0xfff8,0x000e,0x0023,0x0003,0xffed,0x000b,
+0x0004,0x0000,0xffff,0xffeb,0x0017,0x001e,0xfffd,0x0005,
+0xffec,0xffd4,0x0030,0x002e,0xffed,0xfffd,0xffec,0x0003,
+0xffdc,0x000d,0x0005,0x0007,0x0028,0x0033,0x001c,0xffdc,
+0xffcc,0xffe5,0xffee,0x0024,0xfffe,0x0016,0x0000,0x0021,
+0xffeb,0xfffe,0x0003,0x000d,0x001a,0xfff5,0xfff2,0xfffc,
+0xfff6,0x000a,0xffee,0x000e,0x0016,0x0024,0xffe8,0x0015,
+0xffff,0xffe4,0x0028,0x002a,0xffd6,0xfffb,0xffe7,0xfffb,
+0x001b,0xffff,0xffec,0xffef,0x0018,0x0026,0x0023,0xffe6,
+0x0001,0x0004,0x0023,0xffe0,0xffeb,0xfffd,0x0002,0x0023,
+0xfff8,0xffe3,0xffe8,0xffd4,0x000c,0x0018,0xffde,0x0012,
+0x0008,0xfff9,0xffeb,0xffc9,0xfffb,0x0015,0xfffe,0x0017,
+0xfff5,0xffe4,0x000d,0xffff,0xffea,0xffdf,0x0015,0x000a,
+0xffd7,0x0012,0x0002,0x0015,0x002d,0x0018,0x001e,0xffeb,
+0xffdf,0xffe8,0x0011,0xffe8,0xffe3,0x000f,0x0010,0x0033,
+0xffeb,0xffdf,0xfff3,0xffd3,0x0003,0x0008,0xffe4,0x0002,
+0xfff9,0x0002,0xffdb,0xffed,0xfff9,0x001b,0xffea,0x0027,
+0x0007,0xfff4,0x0005,0xfffb,0xffd3,0xffdd,0x000f,0x0001,
+0xfff4,0x0004,0xfff8,0xffe3,0x0027,0x000c,0x001e,0xffdf,
+0xffed,0x0008,0x0015,0x0006,0xfff8,0xfff7,0xfff3,0x001c,
+0xfffc,0xffe1,0xffe8,0xffee,0x0034,0x0017,0xfff4,0x0004,
+0xffee,0xfffb,0xfff2,0xffd1,0x0018,0x0022,0x000e,0x001b,
+0xffea,0xffbe,0x0016,0x0016,0x0001,0xfff0,0x0006,0x0018,
+0xffaf,0x0019,0x000e,0x0008,0x003d,0x0000,0x0019,0xffe4,
+0xffca,0xffec,0x0003,0x000e,0xffef,0x0008,0x0000,0x002c,
+0xfff0,0xffdd,0xfff3,0xffee,0x002b,0x0007,0xfffa,0xfff5,
+0xffdf,0x0004,0xffe2,0xfff5,0x0016,0x0028,0xfffa,0x002b,
+0xfffd,0xffce,0x000e,0x0012,0xffea,0xffee,0x0001,0x0010,
+0x004d,0xfffe,0xfff5,0x0001,0xfff9,0x002f,0x0034,0xfffb,
+0x001d,0x0021,0x0001,0xffe4,0x0000,0xfff1,0x001c,0x001a,
+0x0002,0x001e,0x0000,0x0002,0xffea,0x0004,0xffdf,0xfffd,
+0x0024,0xffeb,0x0003,0xfff1,0xfffe,0x0005,0xfffc,0x0004,
+0x0006,0x0009,0x0025,0x001f,0xfffb,0xffe0,0xffec,0xffe2,
+0x0008,0x0011,0x000a,0x0028,0x000d,0x0022,0x002f,0x0000,
+0xfffb,0x0004,0xfff0,0xffeb,0xfff8,0x0002,0x002a,0x002b,
+0xfff6,0x001a,0x000a,0x0002,0xffe1,0xfff5,0xffe5,0xffed,
+0x0015,0xfff6,0xfff4,0x0014,0xfffd,0x000b,0xffe7,0x0014,
+0x0019,0x0019,0x001d,0x001c,0xffe4,0xffde,0xffe7,0xffda,
+0x0025,0x0003,0x0000,0xfff6,0x0007,0x0016,0x0030,0xfff4,
+0x0008,0x0024,0xfff4,0x0009,0x000c,0xffea,0x000c,0x0013,
+0x0006,0x001c,0x0000,0x001d,0x0012,0x0003,0xfff5,0xffef,
+0x000a,0xffee,0x000a,0xfff9,0x001b,0x0012,0x000b,0x0007,
+0xfffd,0xffe4,0x002f,0x0037,0x0012,0xfff1,0xffde,0xfff0,
+0xffe1,0x0018,0x0017,0x001b,0x001d,0x0009,0x002b,0xfff8,
+0xffe6,0x0007,0xffe2,0x0011,0x0004,0xfffd,0x001a,0x0023,
+0xfffb,0x0018,0x000a,0x001c,0x0009,0xfff4,0xfffb,0xffdf,
+0xfffb,0xfff8,0xfffb,0x001d,0x001a,0x0018,0xfff7,0x0017,
+0x000e,0xfff4,0x0027,0x0034,0xfffb,0xffee,0xffd9,0xffe8,
+0x0020,0x0008,0xfffd,0x0002,0x000d,0xfffc,0x0032,0x0001,
+0x001b,0x0027,0x0017,0xffcd,0xfff1,0xffe2,0x001b,0x0025,
+0x0007,0xffff,0xffef,0xffe3,0xfffb,0x0017,0xffe7,0xfff6,
+0x000e,0xffe6,0xfff8,0xffd7,0xffff,0x0009,0x000d,0x001a,
+0x0005,0xfff4,0x000c,0x0007,0x000e,0xffd3,0x0006,0xfff7,
+0xffdc,0x001d,0x0013,0x0029,0x0022,0xffee,0x002d,0x0006,
+0xfff8,0x000a,0x0005,0xffd5,0xffe9,0xfff5,0x002a,0x0035,
+0xfffb,0xfffb,0xfffa,0xffe2,0xfff2,0x0008,0xffec,0xffe6,
+0xffff,0xfff0,0xffe7,0xfffc,0xfffd,0x000f,0xfff9,0x0029,
+0x0017,0x0003,0x0004,0x0003,0xfff8,0xffd0,0x0001,0xffef,
+0xfff9,0x000f,0x0009,0xfff7,0x001c,0xffe3,0x002d,0xfffb,
+0x0006,0x002b,0x0009,0xfff4,0xfffe,0xffdc,0x000c,0x001e,
+0x000b,0xfffd,0xffef,0xfffd,0x0022,0x0016,0xfffd,0xffe8,
+0xfff4,0xffe8,0xfffe,0xffe0,0x001c,0x0016,0x001d,0x001d,
+0xfffb,0xffce,0x0015,0x001f,0x0026,0xffe3,0xfff9,0x0005,
+0xffb4,0x0023,0x001f,0x001c,0x0031,0xffd5,0x0028,0x0000,
+0xffe3,0x000e,0xfff8,0xfffb,0xfff6,0xffee,0x001a,0x002e,
+0x0000,0xfff9,0xfffa,0xfffd,0x0019,0x0007,0x0002,0xffd8,
+0xffe4,0xfff2,0xffee,0x0003,0x001b,0x001c,0x0008,0x002d,
+0x000d,0xffde,0x000d,0x001b,0x000f,0xffe1,0xfff4,0xfffd,
+0x004a,0xffef,0x0000,0xffe1,0xffee,0x0049,0xfffb,0x0000,
+0x0003,0xfffb,0x0019,0x000c,0xfffd,0xffff,0xffea,0x0003,
+0xfff7,0x0021,0xfff4,0xffe8,0x0006,0x0002,0xffdf,0x0009,
+0x0015,0x0005,0x0014,0xffe5,0xffed,0xfff9,0xffde,0xffdb,
+0xffde,0x0016,0x002c,0x0000,0xffd7,0xffe3,0xffef,0xffeb,
+0x0005,0x0003,0x0015,0x0007,0x0002,0x003c,0xfff6,0x0005,
+0xffe0,0xffde,0x0007,0x0014,0xfff5,0x0010,0xfff8,0x0014,
+0xffeb,0x001d,0xffff,0xffe8,0xfffe,0xfff3,0xffe5,0xfffa,
+0x0005,0x000f,0x0003,0x0008,0xffeb,0xffff,0xffc9,0xffeb,
+0xfff1,0x0026,0x0025,0xfffd,0xffbf,0xffe0,0xffe9,0xffe2,
+0x0023,0xfff6,0x000b,0xffd4,0xfffd,0x0030,0xfff6,0xfffa,
+0xffef,0xfffe,0x000b,0x0033,0x0008,0xfff8,0xffda,0xfffd,
+0xfffc,0x001f,0xfff4,0x0002,0x002e,0x0001,0xfff6,0xfffc,
+0xfffb,0x0007,0x001a,0xffed,0x000a,0x0005,0xffee,0xffde,
+0xffd3,0xfff1,0x0036,0x0018,0xffee,0xfff3,0xffe1,0xfff9,
+0xffdf,0x000a,0x0022,0xfffb,0x0011,0x0023,0xfff1,0xffff,
+0xffcb,0xffe2,0xfffa,0x003b,0x0000,0x000a,0xffe8,0x000d,
+0xffef,0x001b,0xffff,0x0001,0x0025,0xfff3,0xfffc,0xffec,
+0xffec,0x0012,0x000a,0x0010,0x0008,0x000b,0xffd9,0xffee,
+0xffe6,0x0000,0x002e,0x0014,0xffd7,0xfff1,0xffdb,0xfff1,
+0x001d,0xfffb,0x0007,0xffe2,0x0001,0x0015,0xfff9,0x0007,
+0x0000,0x0000,0x002f,0xfff7,0xffee,0xfff1,0xffea,0x000e,
+0xfffc,0x0000,0xffe4,0xffc7,0x0017,0x0015,0xffe7,0x0002,
+0xffff,0x0000,0x0007,0xffcb,0xffed,0xfffd,0xffef,0xfff1,
+0xffdc,0x0000,0x0013,0xffe8,0xffeb,0xffd5,0x0009,0x0000,
+0xffd9,0x000e,0x001e,0x0008,0x0016,0x0008,0xfff4,0x000c,
+0xffde,0xffe5,0x001d,0xfffe,0xffe6,0x0002,0xfff8,0x001f,
+0xfff0,0xfffd,0xffef,0xffc7,0x000e,0x0006,0xffed,0xfff3,
+0xfff0,0x000a,0xfff8,0xffef,0xffec,0x0002,0xffda,0x0000,
+0xffef,0x0010,0x000b,0xffe5,0xffd4,0xffd3,0x0004,0xfff8,
+0xfff7,0x0001,0x0014,0xffd5,0x0011,0xfffd,0xfff4,0x0000,
+0xffec,0x0004,0x0021,0x001d,0xfffa,0xffea,0xffda,0x0007,
+0x0000,0xffff,0xffe3,0xffe2,0x003f,0x0015,0xfffd,0xfff5,
+0xffe5,0x0001,0x000e,0xffd3,0x000a,0x0009,0xffff,0xfff4,
+0xffd1,0xffdb,0x001c,0x0000,0x0002,0xffe6,0xfffc,0x000d,
+0xffb2,0x0015,0x002b,0xfffc,0x0026,0xffef,0xffef,0x0005,
+0xffc9,0xffe8,0x000f,0x0024,0xfff2,0xfffc,0xffe8,0x0018,
+0xfff4,0xfffb,0xffef,0xffe1,0x0036,0x0005,0x0002,0xffe5,
+0xffd5,0x000c,0xfffe,0xfff7,0x0009,0x000f,0xffea,0x0003,
+0xffe4,0xffeb,0x0014,0xfffd,0xffec,0xffe4,0xfff7,0x0005,
+0x0050,0xfffa,0x0010,0xfff5,0xffe2,0x001e,0x0009,0x001c,
+0x001c,0x001d,0x000d,0xfffa,0x0002,0xffe4,0x0003,0x0005,
+0x0007,0x003c,0xfffb,0xfff7,0xfff5,0x0001,0xffe8,0xffed,
+0x001b,0xfff3,0x0020,0xfff3,0xfff1,0xffed,0xffed,0xffdd,
+0xffef,0x0027,0x002b,0x0009,0xfffc,0xffd6,0xffe0,0xffd7,
+0x000b,0x000e,0x0027,0x001b,0xfff7,0x0011,0x0004,0x0021,
+0xfffa,0x0000,0xfffc,0x0001,0xfffb,0xfff6,0x0011,0x0016,
+0xfffb,0x0039,0x0005,0xfff7,0xffec,0xfff3,0xffee,0xffdd,
+0x000b,0xfffd,0x0010,0x0016,0xffef,0xfff3,0xffd8,0xffed,
+0x0001,0x0037,0x0023,0x0005,0xffe5,0xffd4,0xffdb,0xffcf,
+0x0028,0x0000,0x001c,0xffe8,0xfff2,0x0005,0x0004,0x0015,
+0x0007,0x0021,0x0000,0x0020,0x000f,0xffdd,0xfff4,0xffff,
+0x000b,0x003a,0xfffb,0x0010,0x001c,0x0000,0xffff,0xffdf,
+0x0000,0xfff5,0x0027,0xfffb,0x000e,0xfffa,0xfffd,0xffe1,
+0xffe4,0x0001,0x0035,0x0021,0x0013,0xffe7,0xffd2,0xffe6,
+0xffe4,0x0014,0x0033,0x000e,0x0006,0xfff9,0x0000,0x001a,
+0xffe5,0x0004,0xffee,0x0028,0x0006,0xfff0,0x0001,0x000f,
+0x0000,0x0037,0x0005,0x0010,0x0013,0xfff2,0x0003,0xffcf,
+0xfff2,0xffff,0x0016,0x001e,0x000c,0x0000,0xffe8,0xfff1,
+0xfff7,0x0011,0x002d,0x001d,0xfffc,0xffe4,0xffcd,0xffdd,
+0x0022,0x0004,0x0019,0xfff6,0xfff7,0xffeb,0x0007,0x0024,
+0x001a,0x0024,0x0023,0xffe4,0xfff4,0xffd6,0x0003,0x0010,
+0x000c,0x001c,0xffeb,0xffd6,0x0005,0x0015,0xfff0,0xffe6,
+0x0004,0xffed,0x0013,0xffd9,0xfff1,0xfff1,0xffff,0xfff3,
+0xffed,0x0011,0x0011,0xfff2,0x000f,0xffc9,0xfffc,0xffed,
+0xffde,0x0019,0x0030,0x001c,0x000b,0xffde,0x0002,0x0029,
+0xfff7,0x0007,0x0011,0xffeb,0xffec,0xffe8,0x0011,0x0021,
+0x0000,0x0018,0xfff6,0xffd6,0xfffd,0x0005,0xfff6,0xffd6,
+0xfff5,0xfff8,0x0003,0xfffd,0xfff0,0xfff7,0xffea,0x0002,
+0x0000,0x0021,0x000a,0xffee,0xfff9,0xffc6,0xfff6,0xffe4,
+0xfffc,0x000b,0x0025,0xffe9,0x0005,0xffd2,0x0002,0x001d,
+0x0005,0x0027,0x0015,0x0009,0x0000,0xffcf,0xfff4,0x0009,
+0x0010,0x001a,0xffea,0xfff1,0x002d,0x0014,0x0005,0xffd8,
+0xffea,0xffef,0x001a,0xffe1,0x000e,0xfffe,0x000e,0xfff6,
+0xffe2,0xffec,0x001b,0x0009,0x0027,0xffd9,0xffee,0xfffb,
+0xffb7,0x0020,0x003c,0x000f,0x001a,0xffc5,0xfffe,0x0021,
+0xffe2,0x000a,0x0003,0x0011,0xfff8,0xffe2,0x0001,0x001a,
+0x0004,0x0016,0xfff6,0xfff0,0x0024,0x0005,0x000b,0xffc8,
+0xffdb,0xfffa,0x000a,0x0005,0x000d,0x0003,0xfffa,0x0005,
+0xfff5,0xfffc,0x0013,0x0005,0x0010,0xffd7,0xffe8,0xfff3};
+
+static const unsigned long ftable1[128]={
+0x4cc9f,0x482af,0x47bd8,0x44700,0x5238b,0x47377,0x4898a,0x411fd,
+0x4f8b7,0x4a7d5,0x4bd1b,0x47feb,0x5554b,0x49414,0x4c9d4,0x4403c,
+0x4fa57,0x550c5,0x4ea1c,0x54c6e,0x51f27,0x4ff8f,0x4cab5,0x4bbb6,
+0x53e58,0x5a5e8,0x5551f,0x5d6ff,0x56499,0x540f7,0x528e8,0x51740,
+0x4cdba,0x459db,0x4a644,0x4445b,0x4e123,0x423bc,0x480ed,0x3ed60,
+0x5292b,0x49d33,0x51d8c,0x49f1d,0x53cc4,0x45a9d,0x4e88c,0x43073,
+0x4d3a1,0x4e583,0x4f702,0x5182e,0x4b81f,0x47291,0x49dc4,0x46541,
+0x5434b,0x55bc1,0x5a27e,0x5d510,0x51ba0,0x4c587,0x51f83,0x4d45f,
+0x4d45f,0x51f83,0x4c587,0x51ba0,0x5d510,0x5a27e,0x55bc1,0x5434b,
+0x46541,0x49dc4,0x47291,0x4b81f,0x5182e,0x4f702,0x4e583,0x4d3a1,
+0x43073,0x4e88c,0x45a9d,0x53cc4,0x49f1d,0x51d8c,0x49d33,0x5292b,
+0x3ed60,0x480ed,0x423bc,0x4e123,0x4445b,0x4a644,0x459db,0x4cdba,
+0x51740,0x528e8,0x540f7,0x56499,0x5d6ff,0x5551f,0x5a5e8,0x53e58,
+0x4bbb6,0x4cab5,0x4ff8f,0x51f27,0x54c6e,0x4ea1c,0x550c5,0x4fa57,
+0x4403c,0x4c9d4,0x49414,0x5554b,0x47feb,0x4bd1b,0x4a7d5,0x4f8b7,
+0x411fd,0x4898a,0x47377,0x5238b,0x44700,0x47bd8,0x482af,0x4cc9f};
+
+static const unsigned long ftable2[128]={
+0x2f8e9,0x3444d,0x3637b,0x3608d,0x3372e,0x33ab9,0x33946,0x2f157,
+0x307a1,0x36a4d,0x380c9,0x39279,0x332c0,0x34673,0x33a8a,0x2fec4,
+0x2ecea,0x36011,0x34bb0,0x37829,0x36353,0x39c4a,0x35f32,0x33352,
+0x2cb99,0x33db5,0x3221f,0x3595c,0x31b84,0x35804,0x31d79,0x3075f,
+0x27f6f,0x2f99d,0x2d6c9,0x3369b,0x2df25,0x34928,0x3018c,0x32129,
+0x28cb2,0x31e5c,0x2eebd,0x36c4e,0x2e2a3,0x3601f,0x30a58,0x33aa8,
+0x28cee,0x336a2,0x2e69d,0x37dcd,0x323b6,0x3fa08,0x34b2d,0x3acd3,
+0x27abc,0x3214b,0x2cff6,0x3695f,0x2f09d,0x3ae92,0x315ba,0x37761,
+0x37761,0x315ba,0x3ae92,0x2f09d,0x3695f,0x2cff6,0x3214b,0x27abc,
+0x3acd3,0x34b2d,0x3fa08,0x323b6,0x37dcd,0x2e69d,0x336a2,0x28cee,
+0x33aa8,0x30a58,0x3601f,0x2e2a3,0x36c4e,0x2eebd,0x31e5c,0x28cb2,
+0x32129,0x3018c,0x34928,0x2df25,0x3369b,0x2d6c9,0x2f99d,0x27f6f,
+0x3075f,0x31d79,0x35804,0x31b84,0x3595c,0x3221f,0x33db5,0x2cb99,
+0x33352,0x35f32,0x39c4a,0x36353,0x37829,0x34bb0,0x36011,0x2ecea,
+0x2fec4,0x33a8a,0x34673,0x332c0,0x39279,0x380c9,0x36a4d,0x307a1,
+0x2f157,0x33946,0x33ab9,0x3372e,0x3608d,0x3637b,0x3444d,0x2f8e9};
+
+static const short decodetable1[63]={
+0x0000,0x0006,0x0010,0x0012,0x0014,0x0017,0x0019,0x001d,
+0x0020,0x0024,0x0029,0x002e,0x0033,0x0039,0x0041,0x0049,
+0x0051,0x005b,0x0067,0x0073,0x0081,0x0091,0x00a3,0x00b7,
+0x00cd,0x00e6,0x0103,0x0122,0x0146,0x016d,0x019a,0x01cc,
+0x0204,0x0243,0x028a,0x02da,0x0333,0x0397,0x0407,0x0485,
+0x0512,0x05b0,0x0662,0x0729,0x0809,0x0904,0x0a1e,0x0b5a,
+0x0cbd,0x0e4b,0x1009,0x11fe,0x1430,0x16a7,0x196b,0x1c85,
+0x2000,0x23e7,0x2849,0x2d33,0x32b7,0x38e7,0x3fd9};
+
+static const short decodetable2[127]={
+0xf037,0xf043,0xf04e,0xf058,0xf062,0xf06c,0xf077,0xf082,
+0xf08e,0xf09a,0xf0a6,0xf0b2,0xf0be,0xf0cc,0xf0d9,0xf0e6,
+0xf0f4,0xf101,0xf10f,0xf11e,0xf12d,0xf13d,0xf14c,0xf15c,
+0xf16d,0xf17d,0xf18d,0xf19d,0xf1ae,0xf1bf,0xf1d1,0xf1e3,
+0xf1f6,0xf20a,0xf21d,0xf232,0xf247,0xf25d,0xf274,0xf28c,
+0xf2a4,0xf2bc,0xf2d4,0xf2ee,0xf307,0xf321,0xf33c,0xf358,
+0xf374,0xf391,0xf3af,0xf3ce,0xf3ee,0xf410,0xf433,0xf459,
+0xf480,0xf4a9,0xf4d1,0xf4ff,0xf52c,0xf55c,0xf58c,0xf5c4,
+0xf5fb,0xf637,0xf673,0xf6ad,0xf6e8,0xf727,0xf766,0xf7a7,
+0xf7e8,0xf82a,0xf86b,0xf8ae,0xf8f0,0xf93a,0xf984,0xf9d0,
+0xfa1c,0xfa6c,0xfabc,0xfb0d,0xfb5f,0xfbbe,0xfc1e,0xfc80,
+0xfce2,0xfd45,0xfda8,0xfe19,0xfe8a,0xff0e,0xff92,0x0015,
+0x0098,0x012c,0x01bf,0x0248,0x02d0,0x0353,0x03d6,0x0451,
+0x04cd,0x053e,0x05b0,0x0621,0x0692,0x0707,0x077c,0x07e7,
+0x0852,0x08c2,0x0931,0x09aa,0x0a23,0x0aa4,0x0b25,0x0baa,
+0x0c2e,0x0ca8,0x0d23,0x0d93,0x0e04,0x0e75,0x0ee6};
+
+static const short decodetable3[63]={
+0xf3ed,0xf54d,0xf6ae,0xf7af,0xf8b1,0xf990,0xfa6f,0xfb39,
+0xfc03,0xfcbd,0xfd77,0xfe1d,0xfec4,0xff58,0xffec,0x007b,
+0x010b,0x0196,0x0220,0x02a5,0x032a,0x03a9,0x0429,0x04a1,
+0x0519,0x058c,0x05fe,0x066d,0x06dc,0x0747,0x07b2,0x0817,
+0x087b,0x08d9,0x0937,0x0990,0x09e8,0x0a3a,0x0a8c,0x0ad9,
+0x0b26,0x0b6d,0x0bb4,0x0bf8,0x0c3d,0x0c7e,0x0cbf,0x0cfc,
+0x0d3a,0x0d72,0x0dab,0x0ddf,0x0e13,0x0e44,0x0e75,0x0ea1,
+0x0ecd,0x0ef5,0x0f1e,0x0f44,0x0f6b,0x0f8f,0x0fb4};
+
+static const short decodetable4[63]={
+0xf23b,0xf2ae,0xf321,0xf38c,0xf3f7,0xf457,0xf4b6,0xf517,
+0xf578,0xf5d5,0xf631,0xf68d,0xf6e8,0xf742,0xf79b,0xf7f0,
+0xf845,0xf896,0xf8e7,0xf937,0xf986,0xf9d6,0xfa26,0xfa74,
+0xfac3,0xfb0f,0xfb5c,0xfbaa,0xfbf8,0xfc46,0xfc94,0xfce2,
+0xfd2f,0xfd7f,0xfdcf,0xfe22,0xfe76,0xfec9,0xff1c,0xff73,
+0xffca,0x0020,0x0077,0x00d0,0x0128,0x0186,0x01e4,0x0248,
+0x02ab,0x0315,0x037f,0x03f1,0x0463,0x04e0,0x055d,0x05e8,
+0x0673,0x0710,0x07ad,0x0872,0x0938,0x0a2f,0x0b26};
+
+static const short decodetable5[31]={
+0xf8cb,0xfa55,0xfbdf,0xfcea,0xfdf6,0xfed4,0xffb3,0x006f,
+0x012d,0x01da,0x0287,0x032b,0x03cf,0x046a,0x0505,0x059a,
+0x062e,0x06bf,0x0751,0x07e2,0x0873,0x0904,0x0994,0x0a22,
+0x0aaf,0x0b3c,0x0bc9,0x0c56,0x0ce3,0x0d6a,0x0df1};
+
+static const short decodetable6[31]={
+0xf57d,0xf679,0xf775,0xf83d,0xf904,0xf9b5,0xfa65,0xfb04,
+0xfba2,0xfc2f,0xfcbb,0xfd40,0xfdc5,0xfe44,0xfec4,0xff44,
+0xffc5,0x0047,0x00c9,0x0150,0x01d6,0x0266,0x02f7,0x0396,
+0x0435,0x04f3,0x05b1,0x0692,0x0774,0x0899,0x09bf};
+
+static const short decodetable7[15]={
+0xfaa4,0xfc65,0xfe26,0xff56,0x0085,0x017f,0x0278,0x0362,
+0x044c,0x0537,0x0623,0x071f,0x081b,0x0946,0x0a70};
+
+static const short decodetable8[15]={
+0xf6ab,0xf7d8,0xf905,0xfa1b,0xfb31,0xfc32,0xfd33,0xfe22,
+0xff11,0xfffe,0x00ea,0x01f6,0x0302,0x0462,0x05c2};
+
+static const short decodetable9[15]={
+0xf9df,0xfb3f,0xfca0,0xfdbc,0xfed8,0xffd1,0x00c8,0x01b3,
+0x029e,0x038f,0x047f,0x0596,0x06ad,0x07ff,0x0951};
+
+static const short decodetable10[15]={
+0xf768,0xf890,0xf9b8,0xfac9,0xfbda,0xfcd0,0xfdc7,0xfea8,
+0xff88,0x006c,0x0152,0x0258,0x035f,0x04da,0x0655};
+
+static const short decodetable11[7]={
+0xfd97,0xff2b,0x00be,0x01f0,0x0322,0x0476,0x05cb};
+
+static const short *decodetable[11]={
+decodetable1,decodetable2,decodetable3,decodetable4,decodetable5,
+decodetable6,decodetable7,decodetable8,decodetable9,decodetable10,
+decodetable11};
+
+#endif /* RA144TABLES_H */
diff --git a/contrib/ffmpeg/libavcodec/ra288.c b/contrib/ffmpeg/libavcodec/ra288.c
new file mode 100644
index 000000000..9ba5209ab
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ra288.c
@@ -0,0 +1,258 @@
+/*
+ * RealAudio 2.0 (28.8K)
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "ra288.h"
+
+typedef struct {
+        float   history[8];
+        float   output[40];
+        float   pr1[36];
+        float   pr2[10];
+        int     phase, phasep;
+
+        float   st1a[111],st1b[37],st1[37];
+        float   st2a[38],st2b[11],st2[11];
+        float   sb[41];
+        float   lhist[10];
+} Real288_internal;
+
+static int ra288_decode_init(AVCodecContext * avctx)
+{
+        Real288_internal *glob=avctx->priv_data;
+        memset(glob,0,sizeof(Real288_internal));
+        return 0;
+}
+
+static void prodsum(float *tgt, float *src, int len, int n);
+static void co(int n, int i, int j, float *in, float *out, float *st1, float *st2, const float *table);
+static int pred(float *in, float *tgt, int n);
+static void colmult(float *tgt, float *m1, const float *m2, int n);
+
+
+/* initial decode */
+static void unpack(unsigned short *tgt, unsigned char *src, unsigned int len)
+{
+  int x,y,z;
+  int n,temp;
+  int buffer[len];
+
+  for (x=0;x<len;tgt[x++]=0)
+    buffer[x]=9+(x&1);
+
+  for (x=y=z=0;x<len/*was 38*/;x++) {
+    n=buffer[y]-z;
+    temp=src[x];
+    if (n<8) temp&=255>>(8-n);
+    tgt[y]+=temp<<z;
+    if (n<=8) {
+      tgt[++y]+=src[x]>>n;
+      z=8-n;
+    } else z+=8;
+  }
+}
+
+static void update(Real288_internal *glob)
+{
+  int x,y;
+  float buffer1[40],temp1[37];
+  float buffer2[8],temp2[11];
+
+  for (x=0,y=glob->phasep+5;x<40;buffer1[x++]=glob->output[(y++)%40]);
+  co(36,40,35,buffer1,temp1,glob->st1a,glob->st1b,table1);
+  if (pred(temp1,glob->st1,36))
+    colmult(glob->pr1,glob->st1,table1a,36);
+
+  for (x=0,y=glob->phase+1;x<8;buffer2[x++]=glob->history[(y++)%8]);
+  co(10,8,20,buffer2,temp2,glob->st2a,glob->st2b,table2);
+  if (pred(temp2,glob->st2,10))
+    colmult(glob->pr2,glob->st2,table2a,10);
+}
+
+/* Decode and produce output */
+static void decode(Real288_internal *glob, unsigned int input)
+{
+  unsigned int x,y;
+  float f;
+  double sum,sumsum;
+  float *p1,*p2;
+  float buffer[5];
+  const float *table;
+
+  for (x=36;x--;glob->sb[x+5]=glob->sb[x]);
+  for (x=5;x--;) {
+    p1=glob->sb+x;p2=glob->pr1;
+    for (sum=0,y=36;y--;sum-=(*(++p1))*(*(p2++)));
+    glob->sb[x]=sum;
+  }
+
+  f=amptable[input&7];
+  table=codetable+(input>>3)*5;
+
+  /* convert log and do rms */
+  for (sum=32,x=10;x--;sum-=glob->pr2[x]*glob->lhist[x]);
+  if (sum<0) sum=0; else if (sum>60) sum=60;
+
+  sumsum=exp(sum*0.1151292546497)*f;    /* pow(10.0,sum/20)*f */
+  for (sum=0,x=5;x--;) { buffer[x]=table[x]*sumsum; sum+=buffer[x]*buffer[x]; }
+  if ((sum/=5)<1) sum=1;
+
+  /* shift and store */
+  for (x=10;--x;glob->lhist[x]=glob->lhist[x-1]);
+  *glob->lhist=glob->history[glob->phase]=10*log10(sum)-32;
+
+  for (x=1;x<5;x++) for (y=x;y--;buffer[x]-=glob->pr1[x-y-1]*buffer[y]);
+
+  /* output */
+  for (x=0;x<5;x++) {
+    f=glob->sb[4-x]+buffer[x];
+    if (f>4095) f=4095; else if (f<-4095) f=-4095;
+    glob->output[glob->phasep+x]=glob->sb[4-x]=f;
+  }
+}
+
+/* column multiply */
+static void colmult(float *tgt, float *m1, const float *m2, int n)
+{
+  while (n--)
+    *(tgt++)=(*(m1++))*(*(m2++));
+}
+
+static int pred(float *in, float *tgt, int n)
+{
+  int x,y;
+  float *p1,*p2;
+  double f0,f1,f2;
+  float temp;
+
+  if (in[n]==0) return 0;
+  if ((f0=*in)<=0) return 0;
+
+  for (x=1;;x++) {
+    if (n<x) return 1;
+
+    p1=in+x;
+    p2=tgt;
+    f1=*(p1--);
+    for (y=x;--y;f1+=(*(p1--))*(*(p2++)));
+
+    p1=tgt+x-1;
+    p2=tgt;
+    *(p1--)=f2=-f1/f0;
+    for (y=x>>1;y--;) {
+      temp=*p2+*p1*f2;
+      *(p1--)+=*p2*f2;
+      *(p2++)=temp;
+    }
+    if ((f0+=f1*f2)<0) return 0;
+  }
+}
+
+static void co(int n, int i, int j, float *in, float *out, float *st1, float *st2, const float *table)
+{
+  int a,b,c;
+  unsigned int x;
+  float *fp;
+  float buffer1[37];
+  float buffer2[37];
+  float work[111];
+
+  /* rotate and multiply */
+  c=(b=(a=n+i)+j)-i;
+  fp=st1+i;
+  for (x=0;x<b;x++) {
+    if (x==c) fp=in;
+    work[x]=*(table++)*(*(st1++)=*(fp++));
+  }
+
+  prodsum(buffer1,work+n,i,n);
+  prodsum(buffer2,work+a,j,n);
+
+  for (x=0;x<=n;x++) {
+    *st2=*st2*(0.5625)+buffer1[x];
+    out[x]=*(st2++)+buffer2[x];
+  }
+  *out*=1.00390625; /* to prevent clipping */
+}
+
+/* product sum (lsf) */
+static void prodsum(float *tgt, float *src, int len, int n)
+{
+  unsigned int x;
+  float *p1,*p2;
+  double sum;
+
+  while (n>=0)
+  {
+    p1=(p2=src)-n;
+    for (sum=0,x=len;x--;sum+=(*p1++)*(*p2++));
+    tgt[n--]=sum;
+  }
+}
+
+static void * decode_block(AVCodecContext * avctx, unsigned char *in, signed short int *out,unsigned len)
+{
+  int x,y;
+  Real288_internal *glob=avctx->priv_data;
+  unsigned short int buffer[len];
+
+  unpack(buffer,in,len);
+  for (x=0;x<32;x++)
+  {
+    glob->phasep=(glob->phase=x&7)*5;
+    decode(glob,buffer[x]);
+    for (y=0;y<5;*(out++)=8*glob->output[glob->phasep+(y++)]);
+    if (glob->phase==3) update(glob);
+  }
+  return out;
+}
+
+/* Decode a block (celp) */
+static int ra288_decode_frame(AVCodecContext * avctx,
+            void *data, int *data_size,
+            uint8_t * buf, int buf_size)
+{
+    void *datao;
+
+    if (buf_size < avctx->block_align)
+    {
+        av_log(avctx, AV_LOG_ERROR, "ffra288: Error! Input buffer is too small [%d<%d]\n",buf_size,avctx->block_align);
+        return 0;
+    }
+
+    datao = data;
+    data = decode_block(avctx, buf, (signed short *)data, avctx->block_align);
+
+    *data_size = (char *)data - (char *)datao;
+    return avctx->block_align;
+}
+
+AVCodec ra_288_decoder =
+{
+    "real_288",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_RA_288,
+    sizeof(Real288_internal),
+    ra288_decode_init,
+    NULL,
+    NULL,
+    ra288_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/ra288.h b/contrib/ffmpeg/libavcodec/ra288.h
new file mode 100644
index 000000000..8cc290397
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ra288.h
@@ -0,0 +1,205 @@
+/*
+ * RealAudio 2.0 (28.8K)
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef RA288TABLES_H
+#define RA288TABLES_H
+
+static const float amptable[8]={ 0.515625, 0.90234375, 1.57910156, 2.76342773,
+                         -0.515625,-0.90234375,-1.57910156,-2.76342773 };
+
+static const float codetable[640]={
+         0.326171875,        -1.4404296875,        -0.6123046875,        -0.8740234375,        -1.24658203125,
+        -2.45703125,        -2.23486328125,        -0.51025390625,         1.419921875,         1.6201171875,
+        -1.37646484375,        -1.30712890625,        -0.462890625,        -1.37939453125,        -2.1728515625,
+        -3.26123046875,        -0.166015625,         0.7236328125,        -0.623046875,         0.6162109375,
+        -0.2744140625,        -3.29931640625,         0.62548828125,         0.08740234375,        -0.6220703125,
+        -1.2265625,        -3.4814453125,        -2.40478515625,         3.37548828125,         1.17724609375,
+        -1.2099609375,        -0.076171875,         2.28662109375,        -1.89111328125,         0,
+        -4.0078125,         1.044921875,        -0.2333984375,        -1.35986328125,         0.26025390625,
+         0.92236328125,         1.34716796875,         0.67431640625,        -3.39599609375,        -2.88720703125,
+         2.4814453125,        -1.201171875,        -2.8212890625,         0.87744140625,         0.27734375,
+        -1.078125,        -1.61572265625,        -2.20849609375,        -3.044921875,        -3.66455078125,
+        -1.32763671875,         2.1279296875,        -1.458984375,        -0.56103515625,         1.30078125,
+         0.61474609375,         0.48583984375,         1.32373046875,        -1.203125,        -5.0732421875,
+         0.8408203125,        -3.69580078125,        -1.3388671875,         1.06005859375,        -1.13720703125,
+         0.50390625,         0.36474609375,        -0.4189453125,        -3.8798828125,        -6.27099609375,
+         1.5166015625,         2.37109375,        -2.04736328125,        -1.24072265625,         0.50537109375,
+         0.9091796875,        -0.46875,        -3.236328125,         0.2001953125,         2.8720703125,
+        -1.21728515625,        -1.283203125,        -1.953125,        -0.029296875,         3.5166015625,
+        -1.3046875,         0.7060546875,         0.75,                -1.87060546875,         0.60205078125,
+        -2.5888671875,         3.375,                 0.77587890625,        -2.04443359375,         1.78955078125,
+        -1.6875,        -3.9892578125,        -3.76416015625,         0.67578125,         2.2939453125,
+        -2.29443359375,        -3.03173828125,        -5.45703125,         3.95703125,         8.2177734375,
+         0.4541015625,         3.419921875,         0.61962890625,        -4.38330078125,         1.25341796875,
+         2.27001953125,         5.763671875,         1.68017578125,        -2.76220703125,         0.58544921875,
+         1.2412109375,        -0.08935546875,        -4.32568359375,        -3.89453125,         1.5771484375,
+        -1.40234375,        -0.98193359375,        -4.74267578125,        -4.09423828125,         6.33935546875,
+         1.5068359375,         1.044921875,        -1.796875,        -4.70849609375,        -1.4140625,
+        -3.71533203125,         3.18115234375,        -1.11474609375,        -1.2314453125,         3.091796875,
+        -1.62744140625,        -2.744140625,        -4.4580078125,        -5.43505859375,         2.70654296875,
+        -0.19873046875,        -3.28173828125,        -8.5283203125,        -1.41064453125,         5.6484375,
+         1.802734375,         3.318359375,        -0.1279296875,        -5.2958984375,        -0.90625,
+         3.55224609375,         6.544921875,        -1.45947265625,        -5.17333984375,         2.41015625,
+         0.119140625,        -1.08349609375,         1.296875,         1.84375,        -2.642578125,
+        -1.97412109375,        -2.8974609375,         1.04052734375,         0.42138671875,        -1.3994140625,
+        -1.6123046875,         0.85107421875,        -0.9794921875,        -0.0625,        -1.001953125,
+        -3.10595703125,         1.6318359375,        -0.77294921875,        -0.01025390625,         0.5576171875,
+        -1.87353515625,        -0.89404296875,         3.12353515625,         1.24267578125,        -1.390625,
+        -4.556640625,        -3.1875,         2.59228515625,         0.9697265625,        -1.09619140625,
+        -2.1923828125,         0.365234375,         0.94482421875,        -1.47802734375,        -0.24072265625,
+        -4.51904296875,         2.6201171875,         1.55908203125,        -2.19384765625,         0.87109375,
+         2.3359375,        -0.1806640625,         0.9111328125,         0.51611328125,        -0.92236328125,
+         3.5849609375,        -1.3134765625,        -1.25830078125,         0.330078125,        -0.29833984375,
+        -0.2451171875,         1.09130859375,        -0.9033203125,        -0.86767578125,        -1.00048828125,
+         0.49365234375,         1.89453125,        -1.20361328125,         1.07861328125,        -0.07421875,
+         1.265625,         1.38134765625,         2.728515625,         1.38623046875,        -3.5673828125,
+        -1.48876953125,        -2.4013671875,         2.90771484375,         4.49267578125,        -2.17138671875,
+         0.34033203125,         1.908203125,         2.8310546875,        -2.17333984375,        -2.267578125,
+        -1.03564453125,         2.658203125,        -1.2548828125,         0.15673828125,        -0.5869140625,
+         1.3896484375,        -1.0185546875,         1.724609375,         0.2763671875,        -0.345703125,
+        -2.08935546875,         0.4638671875,         2.431640625,         1.83056640625,         0.220703125,
+        -1.212890625,         1.7099609375,         0.83935546875,        -0.0830078125,         0.1162109375,
+        -1.67724609375,         0.12841796875,         1.0322265625,        -0.97900390625,         1.15283203125,
+        -3.5830078125,        -0.58984375,         4.56396484375,        -0.59375,        -1.95947265625,
+        -6.5908203125,        -0.21435546875,         3.919921875,        -2.06640625,         0.17626953125,
+        -1.82080078125,         2.65283203125,         0.978515625,        -2.30810546875,        -0.61474609375,
+        -1.9462890625,         3.78076171875,         4.11572265625,        -1.80224609375,        -0.48193359375,
+         2.5380859375,        -0.20654296875,         0.5615234375,        -0.62548828125,         0.3984375,
+         3.61767578125,         2.00634765625,        -1.92822265625,         1.3134765625,         0.0146484384313,
+         0.6083984375,         1.49169921875,        -0.01708984375,        -0.6689453125,        -0.1201171875,
+        -0.72705078125,         2.75146484375,        -0.3310546875,        -1.28271484375,         1.5478515625,
+         2.3583984375,        -2.23876953125,         0.98046875,        -0.5185546875,         0.39013671875,
+        -0.06298828125,         0.35009765625,         2.2431640625,         7.29345703125,         5.2275390625,
+         0.20361328125,         1.34716796875,         0.9033203125,        -2.46923828125,        -0.56298828125,
+        -1.89794921875,         3.59423828125,        -2.81640625,         2.09228515625,         0.3251953125,
+         0.70458984375,        -0.4580078125,         0.009765625,        -1.03466796875,        -0.82861328125,
+        -1.8125,        -1.6611328125,        -1.080078125,         0.0537109375,         1.04296875,
+        -1.44140625,         0.005859375,        -0.765625,        -1.708984375,        -0.90576171875,
+        -0.64208984375,        -0.84521484375,         0.56640625,        -0.2724609375,         0.83447265625,
+         0.04296875,        -2.23095703125,         0.0947265625,        -0.2216796875,        -1.44384765625,
+        -1.38623046875,        -0.8134765625,        -0.13330078125,         1.017578125,        -0.07568359375,
+        -0.09228515625,        -1.16015625,         0.81201171875,        -0.5078125,        -1.19580078125,
+        -1.3876953125,        -0.66845703125,         0.310546875,        -0.12109375,        -1.30712890625,
+         0.74072265625,         0.03857421875,        -1.47119140625,        -1.79150390625,        -0.47509765625,
+         0.93408203125,        -1.21728515625,        -2.59375,        -0.36572265625,         0.62060546875,
+        -1.41748046875,        -1.623046875,        -1.833984375,        -1.8017578125,        -0.89306640625,
+        -1.42236328125,        -0.75537109375,        -1.34765625,        -0.6865234375,         0.548828125,
+         0.900390625,        -0.8955078125,         0.22265625,         0.3447265625,        -2.0859375,
+         0.22802734375,        -2.078125,        -0.93212890625,         0.74267578125,         0.5537109375,
+        -0.06201171875,        -0.4853515625,        -0.31103515625,        -0.72802734375,        -3.1708984375,
+         0.42626953125,        -0.99853515625,        -1.869140625,        -1.36328125,        -0.2822265625,
+         1.12841796875,        -0.88720703125,         1.28515625,        -1.490234375,         0.9609375,
+         0.31298828125,         0.5830078125,         0.92431640625,         2.00537109375,         3.0966796875,
+        -0.02197265625,         0.5849609375,         1.0546875,        -0.70751953125,         1.07568359375,
+        -0.978515625,         0.83642578125,         1.7177734375,         1.294921875,         2.07568359375,
+         1.43359375,        -1.9375,         0.625,                 0.06396484375,        -0.720703125,
+         1.38037109375,         0.00390625,        -0.94140625,         1.2978515625,         1.71533203125,
+         1.56201171875,        -0.3984375,         1.31201171875,        -0.85009765625,        -0.68701171875,
+         1.439453125,         1.96728515625,         0.1923828125,        -0.12353515625,         0.6337890625,
+         2.0927734375,         0.02490234375,        -2.20068359375,        -0.015625,        -0.32177734375,
+         1.90576171875,         2.7568359375,        -2.728515625,        -1.265625,         2.78662109375,
+        -0.2958984375,         0.6025390625,        -0.78466796875,        -2.53271484375,         0.32421875,
+        -0.25634765625,         1.767578125,        -1.0703125,        -1.23388671875,         0.83349609375,
+         2.09814453125,        -1.58740234375,        -1.11474609375,         0.396484375,        -1.10546875,
+         2.81494140625,         0.2578125,        -1.60498046875,         0.66015625,         0.81640625,
+         1.33544921875,         0.60595703125,        -0.53857421875,        -1.59814453125,        -1.66357421875,
+         1.96923828125,         0.8046875,        -1.44775390625,        -0.5732421875,         0.705078125,
+         0.0361328125,         0.4482421875,         0.97607421875,         0.44677734375,        -0.5009765625,
+        -1.21875,        -0.78369140625,         0.9931640625,         1.4404296875,         0.11181640625,
+        -1.05859375,         0.99462890625,         0.00732421921566,-0.6171875,        -0.1015625,
+        -1.734375,         0.7470703125,         0.28369140625,         0.72802734375,         0.4697265625,
+        -1.27587890625,        -1.1416015625,         1.76806640625,        -0.7265625,        -1.06689453125,
+        -0.85302734375,         0.03955078125,         2.7041015625,         0.69921875,        -1.10205078125,
+        -0.49755859375,         0.42333984375,         0.1044921875,        -1.115234375,        -0.7373046875,
+        -0.822265625,         1.375,                -0.11181640625,         1.24560546875,        -0.67822265625,
+         1.32177734375,         0.24609375,         0.23388671875,         1.35888671875,        -0.49267578125,
+         1.22900390625,        -0.72607421875,        -0.779296875,         0.30322265625,         0.94189453125,
+        -0.072265625,         1.0771484375,        -2.09375,         0.630859375,        -0.68408203125,
+        -0.25732421875,         0.60693359375,        -1.33349609375,         0.93212890625,         0.625,
+         1.04931640625,        -0.73291015625,         1.80078125,         0.2978515625,        -2.24169921875,
+         1.6142578125,        -1.64501953125,         0.91552734375,         1.775390625,        -0.59423828125,
+         1.2568359375,         1.22705078125,         0.70751953125,        -1.5009765625,        -2.43115234375,
+         0.3974609375,         0.8916015625,        -1.21923828125,         2.0673828125,        -1.99072265625,
+         0.8125,        -0.107421875,         1.6689453125,         0.4892578125,         0.54443359375,
+         0.38134765625,         0.8095703125,         1.91357421875,         2.9931640625,         1.533203125,
+         0.560546875,         1.98486328125,         0.740234375,         0.39794921875,         0.09716796875,
+         0.58154296875,         1.21533203125,         1.25048828125,         1.18212890625,         1.19287109375,
+         0.3759765625,        -2.88818359375,         2.69287109375,        -0.1796875,        -1.56201171875,
+         0.5810546875,         0.51123046875,         1.8271484375,         3.38232421875,        -1.02001953125,
+         0.142578125,         1.51318359375,         2.103515625,        -0.3701171875,        -1.19873046875,
+         0.25537109375,         1.91455078125,         1.974609375,         0.6767578125,         0.04150390625,
+         2.13232421875,         0.4912109375,        -0.611328125,        -0.7158203125,        -0.67529296875,
+         1.880859375,         0.77099609375,        -0.03759765625,         1.0078125,         0.423828125,
+         2.49462890625,         1.42529296875,        -0.0986328125,         0.17529296875,        -0.24853515625,
+         1.7822265625,         1.5654296875,         1.12451171875,         0.82666015625,         0.6328125,
+         1.41845703125,        -1.90771484375,         0.11181640625,        -0.583984375,        -1.138671875,
+         2.91845703125,        -1.75048828125,         0.39306640625,         1.86767578125,        -1.5322265625,
+         1.8291015625,        -0.2958984375,         0.02587890625,        -0.13134765625,        -1.61181640625,
+         0.2958984375,         0.9853515625,        -0.642578125,         1.984375,         0.1943359375
+};
+
+static const float table1[111]={
+        0.576690972,        0.580838025,        0.585013986,        0.589219987,        0.59345597,        0.597723007,
+        0.602020264,        0.606384277,        0.610748291,        0.615142822,        0.619598389,        0.624084473,
+        0.628570557,        0.633117676,        0.637695313,        0.642272949,        0.646911621,        0.651580811,
+        0.656280518,        0.66104126,        0.665802002,        0.670593262,        0.675445557,        0.680328369,
+        0.685241699,        0.690185547,        0.695159912,        0.700164795,        0.705230713,        0.710327148,
+        0.715454102,        0.720611572,        0.725830078,        0.731048584,        0.736328125,        0.741638184,
+        0.747009277,        0.752380371,        0.7578125,        0.763305664,        0.768798828,        0.774353027,
+        0.779937744,        0.785583496,        0.791229248,        0.796936035,        0.802703857,        0.808502197,
+        0.814331055,        0.820220947,        0.826141357,        0.832092285,        0.838104248,        0.844146729,
+        0.850250244,        0.856384277,        0.862548828,        0.868774414,        0.875061035,        0.881378174,
+        0.88772583,        0.894134521,        0.900604248,        0.907104492,        0.913635254,        0.920227051,
+        0.926879883,        0.933563232,        0.940307617,        0.94708252,        0.953918457,        0.96081543,
+        0.96774292,        0.974731445,        0.981781006,        0.988861084,        0.994842529,        0.998565674,
+        0.999969482,        0.99911499,        0.996002197,        0.990600586,        0.982910156,        0.973022461,
+        0.960876465,        0.946533203,        0.930053711,        0.911437988,        0.89074707,        0.868041992,
+        0.843322754,        0.816680908,        0.788208008,        0.757904053,        0.725891113,        0.692199707,
+        0.656921387,        0.620178223,        0.582000732,        0.542480469,        0.501739502,        0.459838867,
+        0.416900635,        0.373016357,        0.328277588,        0.282775879,        0.236663818,        0.189971924,
+        0.142852783,        0.0954284668,        0.0477600098
+};
+
+static const float table2[38]={
+        0.505699992,        0.524200022,        0.54339999,        0.563300014,        0.583953857,        0.60534668,
+        0.627502441,        0.650482178,        0.674316406,        0.699005127,        0.724578857,        0.75112915,
+        0.778625488,        0.807128906,        0.836669922,        0.86730957,        0.899078369,        0.932006836,
+        0.961486816,        0.982757568,        0.995635986,        1,                0.995819092,        0.983154297,
+        0.96206665,        0.932769775,        0.895507813,        0.850585938,        0.798400879,        0.739379883,
+        0.674072266,        0.602996826,        0.526763916,        0.446014404,        0.361480713,        0.273834229,
+        0.183868408,        0.0923461914
+};
+
+static const float table1a[36]={
+        0.98828125,        0.976699829,        0.965254128,        0.953942537,        0.942763507,        0.931715488,
+        0.920796931,        0.910006344,        0.899342179,        0.888803005,        0.878387332,        0.868093729,
+        0.857920766,        0.847867012,        0.837931097,        0.828111589,        0.818407178,        0.808816493,
+        0.799338162,        0.789970934,        0.780713439,        0.771564424,        0.762522638,        0.753586829,
+        0.744755745,        0.736028135,        0.727402806,        0.718878567,        0.710454226,        0.702128589,
+        0.693900526,        0.685768902,        0.677732527,        0.669790328,        0.66194123,        0.654184103
+};
+
+static const float table2a[10]={
+        0.90625,        0.821289063,        0.74432373,        0.674499512,        0.61126709,
+        0.553955078,        0.50201416,        0.454956055,        0.41229248,        0.373657227
+};
+
+#endif /* RA288TABLES_H */
diff --git a/contrib/ffmpeg/libavcodec/rangecoder.c b/contrib/ffmpeg/libavcodec/rangecoder.c
new file mode 100644
index 000000000..1f35d0852
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/rangecoder.c
@@ -0,0 +1,181 @@
+/*
+ * Range coder
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file rangecoder.c
+ * Range coder.
+ * based upon
+ *    "Range encoding: an algorithm for removing redundancy from a digitised
+ *                     message.
+ *     G. N. N. Martin                  Presented in March 1979 to the Video &
+ *                                      Data Recording Conference,
+ *     IBM UK Scientific Center         held in Southampton July 24-27 1979."
+ *
+ */
+
+#include <string.h>
+
+#include "avcodec.h"
+#include "common.h"
+#include "rangecoder.h"
+
+
+void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size){
+    c->bytestream_start=
+    c->bytestream= buf;
+    c->bytestream_end= buf + buf_size;
+
+    c->low= 0;
+    c->range= 0xFF00;
+    c->outstanding_count= 0;
+    c->outstanding_byte= -1;
+}
+
+void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size){
+    /* cast to avoid compiler warning */
+    ff_init_range_encoder(c, (uint8_t *) buf, buf_size);
+
+    c->low =(*c->bytestream++)<<8;
+    c->low+= *c->bytestream++;
+}
+
+void ff_build_rac_states(RangeCoder *c, int factor, int max_p){
+    const int64_t one= 1LL<<32;
+    int64_t p;
+    int last_p8, p8, i;
+
+    memset(c->zero_state, 0, sizeof(c->zero_state));
+    memset(c-> one_state, 0, sizeof(c-> one_state));
+
+#if 0
+    for(i=1; i<256; i++){
+        if(c->one_state[i])
+            continue;
+
+        p= (i*one + 128) >> 8;
+        last_p8= i;
+        for(;;){
+            p+= ((one-p)*factor + one/2) >> 32;
+            p8= (256*p + one/2) >> 32; //FIXME try without the one
+            if(p8 <= last_p8) p8= last_p8+1;
+            if(p8 > max_p) p8= max_p;
+            if(p8 < last_p8)
+                break;
+            c->one_state[last_p8]=     p8;
+            if(p8 == last_p8)
+                break;
+            last_p8= p8;
+        }
+    }
+#endif
+#if 1
+    last_p8= 0;
+    p= one/2;
+    for(i=0; i<128; i++){
+        p8= (256*p + one/2) >> 32; //FIXME try without the one
+        if(p8 <= last_p8) p8= last_p8+1;
+        if(last_p8 && last_p8<256 && p8<=max_p)
+            c->one_state[last_p8]= p8;
+
+        p+= ((one-p)*factor + one/2) >> 32;
+        last_p8= p8;
+    }
+#endif
+    for(i=256-max_p; i<=max_p; i++){
+        if(c->one_state[i])
+            continue;
+
+        p= (i*one + 128) >> 8;
+        p+= ((one-p)*factor + one/2) >> 32;
+        p8= (256*p + one/2) >> 32; //FIXME try without the one
+        if(p8 <= i) p8= i+1;
+        if(p8 > max_p) p8= max_p;
+        c->one_state[    i]=     p8;
+    }
+
+    for(i=1; i<255; i++)
+        c->zero_state[i]= 256-c->one_state[256-i];
+#if 0
+    for(i=0; i<256; i++)
+        av_log(NULL, AV_LOG_DEBUG, "%3d %3d\n", i, c->one_state[i]);
+#endif
+}
+
+/**
+ *
+ * @return the number of bytes written
+ */
+int ff_rac_terminate(RangeCoder *c){
+    c->range=0xFF;
+    c->low +=0xFF;
+    renorm_encoder(c);
+    c->range=0xFF;
+    renorm_encoder(c);
+
+    assert(c->low   == 0);
+    assert(c->range >= 0x100);
+
+    return c->bytestream - c->bytestream_start;
+}
+
+#if 0 //selftest
+#define SIZE 10240
+int main(){
+    RangeCoder c;
+    uint8_t b[9*SIZE];
+    uint8_t r[9*SIZE];
+    int i;
+    uint8_t state[10]= {0};
+
+    ff_init_range_encoder(&c, b, SIZE);
+    ff_build_rac_states(&c, 0.05*(1LL<<32), 128+64+32+16);
+
+    memset(state, 128, sizeof(state));
+
+    for(i=0; i<SIZE; i++){
+        r[i]= random()%7;
+    }
+
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_rac(&c, state, r[i]&1);
+STOP_TIMER("put_rac")
+    }
+
+    ff_put_rac_terminate(&c);
+
+    ff_init_range_decoder(&c, b, SIZE);
+
+    memset(state, 128, sizeof(state));
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( (r[i]&1) != get_rac(&c, state) )
+            av_log(NULL, AV_LOG_DEBUG, "rac failure at %d\n", i);
+STOP_TIMER("get_rac")
+    }
+
+    return 0;
+}
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/rangecoder.h b/contrib/ffmpeg/libavcodec/rangecoder.h
new file mode 100644
index 000000000..68bd3b60e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/rangecoder.h
@@ -0,0 +1,127 @@
+/*
+ * Range coder
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file rangecoder.h
+ * Range coder.
+ */
+
+typedef struct RangeCoder{
+    int low;
+    int range;
+    int outstanding_count;
+    int outstanding_byte;
+    uint8_t zero_state[256];
+    uint8_t  one_state[256];
+    uint8_t *bytestream_start;
+    uint8_t *bytestream;
+    uint8_t *bytestream_end;
+}RangeCoder;
+
+void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size);
+void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size);
+int ff_rac_terminate(RangeCoder *c);
+void ff_build_rac_states(RangeCoder *c, int factor, int max_p);
+
+static inline void renorm_encoder(RangeCoder *c){
+    //FIXME optimize
+    while(c->range < 0x100){
+        if(c->outstanding_byte < 0){
+            c->outstanding_byte= c->low>>8;
+        }else if(c->low <= 0xFF00){
+            *c->bytestream++ = c->outstanding_byte;
+            for(;c->outstanding_count; c->outstanding_count--)
+                *c->bytestream++ = 0xFF;
+            c->outstanding_byte= c->low>>8;
+        }else if(c->low >= 0x10000){
+            *c->bytestream++ = c->outstanding_byte + 1;
+            for(;c->outstanding_count; c->outstanding_count--)
+                *c->bytestream++ = 0x00;
+            c->outstanding_byte= (c->low>>8) & 0xFF;
+        }else{
+            c->outstanding_count++;
+        }
+
+        c->low = (c->low & 0xFF)<<8;
+        c->range <<= 8;
+    }
+}
+
+static inline void put_rac(RangeCoder *c, uint8_t * const state, int bit){
+    int range1= (c->range * (*state)) >> 8;
+
+    assert(*state);
+    assert(range1 < c->range);
+    assert(range1 > 0);
+    if(!bit){
+        c->range -= range1;
+        *state= c->zero_state[*state];
+    }else{
+        c->low += c->range - range1;
+        c->range = range1;
+        *state= c->one_state[*state];
+    }
+
+    renorm_encoder(c);
+}
+
+static inline void refill(RangeCoder *c){
+    if(c->range < 0x100){
+        c->range <<= 8;
+        c->low <<= 8;
+        if(c->bytestream < c->bytestream_end)
+            c->low+= c->bytestream[0];
+        c->bytestream++;
+    }
+}
+
+static inline int get_rac(RangeCoder *c, uint8_t * const state){
+    int range1= (c->range * (*state)) >> 8;
+    int attribute_unused one_mask;
+
+    c->range -= range1;
+#if 1
+    if(c->low < c->range){
+        *state= c->zero_state[*state];
+        refill(c);
+        return 0;
+    }else{
+        c->low -= c->range;
+        *state= c->one_state[*state];
+        c->range = range1;
+        refill(c);
+        return 1;
+    }
+#else
+    one_mask= (c->range - c->low-1)>>31;
+
+    c->low -= c->range & one_mask;
+    c->range += (range1 - c->range) & one_mask;
+
+    *state= c->zero_state[(*state) + (256&one_mask)];
+
+    refill(c);
+
+    return one_mask&1;
+#endif
+}
+
diff --git a/contrib/ffmpeg/libavcodec/ratecontrol.c b/contrib/ffmpeg/libavcodec/ratecontrol.c
new file mode 100644
index 000000000..d96c837e6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ratecontrol.c
@@ -0,0 +1,944 @@
+/*
+ * Rate control for video encoders
+ *
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file ratecontrol.c
+ * Rate control for video encoders.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "ratecontrol.h"
+#include "mpegvideo.h"
+#include "eval.h"
+
+#undef NDEBUG // allways check asserts, the speed effect is far too small to disable them
+#include <assert.h>
+
+#ifndef M_E
+#define M_E 2.718281828
+#endif
+
+static int init_pass2(MpegEncContext *s);
+static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num);
+
+void ff_write_pass1_stats(MpegEncContext *s){
+    snprintf(s->avctx->stats_out, 256, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d hbits:%d;\n",
+            s->current_picture_ptr->display_picture_number, s->current_picture_ptr->coded_picture_number, s->pict_type,
+            s->current_picture.quality, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits,
+            s->f_code, s->b_code, s->current_picture.mc_mb_var_sum, s->current_picture.mb_var_sum, s->i_count, s->skip_count, s->header_bits);
+}
+
+static inline double qp2bits(RateControlEntry *rce, double qp){
+    if(qp<=0.0){
+        av_log(NULL, AV_LOG_ERROR, "qp<=0.0\n");
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ qp;
+}
+
+static inline double bits2qp(RateControlEntry *rce, double bits){
+    if(bits<0.9){
+        av_log(NULL, AV_LOG_ERROR, "bits<0.9\n");
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ bits;
+}
+
+int ff_rate_control_init(MpegEncContext *s)
+{
+    RateControlContext *rcc= &s->rc_context;
+    int i;
+    char *error = NULL;
+    static const char *const_names[]={
+        "PI",
+        "E",
+        "iTex",
+        "pTex",
+        "tex",
+        "mv",
+        "fCode",
+        "iCount",
+        "mcVar",
+        "var",
+        "isI",
+        "isP",
+        "isB",
+        "avgQP",
+        "qComp",
+/*        "lastIQP",
+        "lastPQP",
+        "lastBQP",
+        "nextNonBQP",*/
+        "avgIITex",
+        "avgPITex",
+        "avgPPTex",
+        "avgBPTex",
+        "avgTex",
+        NULL
+    };
+    static double (*func1[])(void *, double)={
+        (void *)bits2qp,
+        (void *)qp2bits,
+        NULL
+    };
+    static const char *func1_names[]={
+        "bits2qp",
+        "qp2bits",
+        NULL
+    };
+    emms_c();
+
+    rcc->rc_eq_eval = ff_parse(s->avctx->rc_eq, const_names, func1, func1_names, NULL, NULL, &error);
+    if (!rcc->rc_eq_eval) {
+        av_log(s->avctx, AV_LOG_ERROR, "Error parsing rc_eq \"%s\": %s\n", s->avctx->rc_eq, error? error : "");
+        return -1;
+    }
+
+    for(i=0; i<5; i++){
+        rcc->pred[i].coeff= FF_QP2LAMBDA * 7.0;
+        rcc->pred[i].count= 1.0;
+
+        rcc->pred[i].decay= 0.4;
+        rcc->i_cplx_sum [i]=
+        rcc->p_cplx_sum [i]=
+        rcc->mv_bits_sum[i]=
+        rcc->qscale_sum [i]=
+        rcc->frame_count[i]= 1; // 1 is better cuz of 1/0 and such
+        rcc->last_qscale_for[i]=FF_QP2LAMBDA * 5;
+    }
+    rcc->buffer_index= s->avctx->rc_initial_buffer_occupancy;
+
+    if(s->flags&CODEC_FLAG_PASS2){
+        int i;
+        char *p;
+
+        /* find number of pics */
+        p= s->avctx->stats_in;
+        for(i=-1; p; i++){
+            p= strchr(p+1, ';');
+        }
+        i+= s->max_b_frames;
+        if(i<=0 || i>=INT_MAX / sizeof(RateControlEntry))
+            return -1;
+        rcc->entry = (RateControlEntry*)av_mallocz(i*sizeof(RateControlEntry));
+        rcc->num_entries= i;
+
+        /* init all to skipped p frames (with b frames we might have a not encoded frame at the end FIXME) */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            rce->pict_type= rce->new_pict_type=P_TYPE;
+            rce->qscale= rce->new_qscale=FF_QP2LAMBDA * 2;
+            rce->misc_bits= s->mb_num + 10;
+            rce->mb_var_sum= s->mb_num*100;
+        }
+
+        /* read stats */
+        p= s->avctx->stats_in;
+        for(i=0; i<rcc->num_entries - s->max_b_frames; i++){
+            RateControlEntry *rce;
+            int picture_number;
+            int e;
+            char *next;
+
+            next= strchr(p, ';');
+            if(next){
+                (*next)=0; //sscanf in unbelieavle slow on looong strings //FIXME copy / dont write
+                next++;
+            }
+            e= sscanf(p, " in:%d ", &picture_number);
+
+            assert(picture_number >= 0);
+            assert(picture_number < rcc->num_entries);
+            rce= &rcc->entry[picture_number];
+
+            e+=sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d hbits:%d",
+                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits,
+                   &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count, &rce->skip_count, &rce->header_bits);
+            if(e!=14){
+                av_log(s->avctx, AV_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
+                return -1;
+            }
+
+            p= next;
+        }
+
+        if(init_pass2(s) < 0) return -1;
+
+        //FIXME maybe move to end
+        if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID) {
+#ifdef CONFIG_XVID
+            return ff_xvid_rate_control_init(s);
+#else
+            av_log(s->avctx, AV_LOG_ERROR, "XviD ratecontrol requires libavcodec compiled with XviD support\n");
+            return -1;
+#endif
+        }
+    }
+
+    if(!(s->flags&CODEC_FLAG_PASS2)){
+
+        rcc->short_term_qsum=0.001;
+        rcc->short_term_qcount=0.001;
+
+        rcc->pass1_rc_eq_output_sum= 0.001;
+        rcc->pass1_wanted_bits=0.001;
+
+        /* init stuff with the user specified complexity */
+        if(s->avctx->rc_initial_cplx){
+            for(i=0; i<60*30; i++){
+                double bits= s->avctx->rc_initial_cplx * (i/10000.0 + 1.0)*s->mb_num;
+                RateControlEntry rce;
+                double q;
+
+                if     (i%((s->gop_size+3)/4)==0) rce.pict_type= I_TYPE;
+                else if(i%(s->max_b_frames+1))    rce.pict_type= B_TYPE;
+                else                              rce.pict_type= P_TYPE;
+
+                rce.new_pict_type= rce.pict_type;
+                rce.mc_mb_var_sum= bits*s->mb_num/100000;
+                rce.mb_var_sum   = s->mb_num;
+                rce.qscale   = FF_QP2LAMBDA * 2;
+                rce.f_code   = 2;
+                rce.b_code   = 1;
+                rce.misc_bits= 1;
+
+                if(s->pict_type== I_TYPE){
+                    rce.i_count   = s->mb_num;
+                    rce.i_tex_bits= bits;
+                    rce.p_tex_bits= 0;
+                    rce.mv_bits= 0;
+                }else{
+                    rce.i_count   = 0; //FIXME we do know this approx
+                    rce.i_tex_bits= 0;
+                    rce.p_tex_bits= bits*0.9;
+                    rce.mv_bits= bits*0.1;
+                }
+                rcc->i_cplx_sum [rce.pict_type] += rce.i_tex_bits*rce.qscale;
+                rcc->p_cplx_sum [rce.pict_type] += rce.p_tex_bits*rce.qscale;
+                rcc->mv_bits_sum[rce.pict_type] += rce.mv_bits;
+                rcc->frame_count[rce.pict_type] ++;
+
+                bits= rce.i_tex_bits + rce.p_tex_bits;
+
+                q= get_qscale(s, &rce, rcc->pass1_wanted_bits/rcc->pass1_rc_eq_output_sum, i);
+                rcc->pass1_wanted_bits+= s->bit_rate/(1/av_q2d(s->avctx->time_base)); //FIXME missbehaves a little for variable fps
+            }
+        }
+
+    }
+
+    return 0;
+}
+
+void ff_rate_control_uninit(MpegEncContext *s)
+{
+    RateControlContext *rcc= &s->rc_context;
+    emms_c();
+
+    ff_eval_free(rcc->rc_eq_eval);
+    av_freep(&rcc->entry);
+
+#ifdef CONFIG_XVID
+    if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+        ff_xvid_rate_control_uninit(s);
+#endif
+}
+
+int ff_vbv_update(MpegEncContext *s, int frame_size){
+    RateControlContext *rcc= &s->rc_context;
+    const double fps= 1/av_q2d(s->avctx->time_base);
+    const int buffer_size= s->avctx->rc_buffer_size;
+    const double min_rate= s->avctx->rc_min_rate/fps;
+    const double max_rate= s->avctx->rc_max_rate/fps;
+
+//printf("%d %f %d %f %f\n", buffer_size, rcc->buffer_index, frame_size, min_rate, max_rate);
+    if(buffer_size){
+        int left;
+
+        rcc->buffer_index-= frame_size;
+        if(rcc->buffer_index < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "rc buffer underflow\n");
+            rcc->buffer_index= 0;
+        }
+
+        left= buffer_size - rcc->buffer_index - 1;
+        rcc->buffer_index += clip(left, min_rate, max_rate);
+
+        if(rcc->buffer_index > buffer_size){
+            int stuffing= ceil((rcc->buffer_index - buffer_size)/8);
+
+            if(stuffing < 4 && s->codec_id == CODEC_ID_MPEG4)
+                stuffing=4;
+            rcc->buffer_index -= 8*stuffing;
+
+            if(s->avctx->debug & FF_DEBUG_RC)
+                av_log(s->avctx, AV_LOG_DEBUG, "stuffing %d bytes\n", stuffing);
+
+            return stuffing;
+        }
+    }
+    return 0;
+}
+
+/**
+ * modifies the bitrate curve from pass1 for one frame
+ */
+static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num){
+    RateControlContext *rcc= &s->rc_context;
+    AVCodecContext *a= s->avctx;
+    double q, bits;
+    const int pict_type= rce->new_pict_type;
+    const double mb_num= s->mb_num;
+    int i;
+
+    double const_values[]={
+        M_PI,
+        M_E,
+        rce->i_tex_bits*rce->qscale,
+        rce->p_tex_bits*rce->qscale,
+        (rce->i_tex_bits + rce->p_tex_bits)*(double)rce->qscale,
+        rce->mv_bits/mb_num,
+        rce->pict_type == B_TYPE ? (rce->f_code + rce->b_code)*0.5 : rce->f_code,
+        rce->i_count/mb_num,
+        rce->mc_mb_var_sum/mb_num,
+        rce->mb_var_sum/mb_num,
+        rce->pict_type == I_TYPE,
+        rce->pict_type == P_TYPE,
+        rce->pict_type == B_TYPE,
+        rcc->qscale_sum[pict_type] / (double)rcc->frame_count[pict_type],
+        a->qcompress,
+/*        rcc->last_qscale_for[I_TYPE],
+        rcc->last_qscale_for[P_TYPE],
+        rcc->last_qscale_for[B_TYPE],
+        rcc->next_non_b_qscale,*/
+        rcc->i_cplx_sum[I_TYPE] / (double)rcc->frame_count[I_TYPE],
+        rcc->i_cplx_sum[P_TYPE] / (double)rcc->frame_count[P_TYPE],
+        rcc->p_cplx_sum[P_TYPE] / (double)rcc->frame_count[P_TYPE],
+        rcc->p_cplx_sum[B_TYPE] / (double)rcc->frame_count[B_TYPE],
+        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / (double)rcc->frame_count[pict_type],
+        0
+    };
+
+    bits= ff_parse_eval(rcc->rc_eq_eval, const_values, rce);
+    if (isnan(bits)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Error evaluating rc_eq \"%s\"\n", s->avctx->rc_eq);
+        return -1;
+    }
+
+    rcc->pass1_rc_eq_output_sum+= bits;
+    bits*=rate_factor;
+    if(bits<0.0) bits=0.0;
+    bits+= 1.0; //avoid 1/0 issues
+
+    /* user override */
+    for(i=0; i<s->avctx->rc_override_count; i++){
+        RcOverride *rco= s->avctx->rc_override;
+        if(rco[i].start_frame > frame_num) continue;
+        if(rco[i].end_frame   < frame_num) continue;
+
+        if(rco[i].qscale)
+            bits= qp2bits(rce, rco[i].qscale); //FIXME move at end to really force it?
+        else
+            bits*= rco[i].quality_factor;
+    }
+
+    q= bits2qp(rce, bits);
+
+    /* I/B difference */
+    if     (pict_type==I_TYPE && s->avctx->i_quant_factor<0.0)
+        q= -q*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+    else if(pict_type==B_TYPE && s->avctx->b_quant_factor<0.0)
+        q= -q*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+
+    return q;
+}
+
+static double get_diff_limited_q(MpegEncContext *s, RateControlEntry *rce, double q){
+    RateControlContext *rcc= &s->rc_context;
+    AVCodecContext *a= s->avctx;
+    const int pict_type= rce->new_pict_type;
+    const double last_p_q    = rcc->last_qscale_for[P_TYPE];
+    const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
+
+    if     (pict_type==I_TYPE && (a->i_quant_factor>0.0 || rcc->last_non_b_pict_type==P_TYPE))
+        q= last_p_q    *FFABS(a->i_quant_factor) + a->i_quant_offset;
+    else if(pict_type==B_TYPE && a->b_quant_factor>0.0)
+        q= last_non_b_q*    a->b_quant_factor  + a->b_quant_offset;
+
+    /* last qscale / qdiff stuff */
+    if(rcc->last_non_b_pict_type==pict_type || pict_type!=I_TYPE){
+        double last_q= rcc->last_qscale_for[pict_type];
+        const int maxdiff= FF_QP2LAMBDA * a->max_qdiff;
+
+        if     (q > last_q + maxdiff) q= last_q + maxdiff;
+        else if(q < last_q - maxdiff) q= last_q - maxdiff;
+    }
+
+    rcc->last_qscale_for[pict_type]= q; //Note we cant do that after blurring
+
+    if(pict_type!=B_TYPE)
+        rcc->last_non_b_pict_type= pict_type;
+
+    return q;
+}
+
+/**
+ * gets the qmin & qmax for pict_type
+ */
+static void get_qminmax(int *qmin_ret, int *qmax_ret, MpegEncContext *s, int pict_type){
+    int qmin= s->avctx->lmin;
+    int qmax= s->avctx->lmax;
+
+    assert(qmin <= qmax);
+
+    if(pict_type==B_TYPE){
+        qmin= (int)(qmin*FFABS(s->avctx->b_quant_factor)+s->avctx->b_quant_offset + 0.5);
+        qmax= (int)(qmax*FFABS(s->avctx->b_quant_factor)+s->avctx->b_quant_offset + 0.5);
+    }else if(pict_type==I_TYPE){
+        qmin= (int)(qmin*FFABS(s->avctx->i_quant_factor)+s->avctx->i_quant_offset + 0.5);
+        qmax= (int)(qmax*FFABS(s->avctx->i_quant_factor)+s->avctx->i_quant_offset + 0.5);
+    }
+
+    qmin= clip(qmin, 1, FF_LAMBDA_MAX);
+    qmax= clip(qmax, 1, FF_LAMBDA_MAX);
+
+    if(qmax<qmin) qmax= qmin;
+
+    *qmin_ret= qmin;
+    *qmax_ret= qmax;
+}
+
+static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q, int frame_num){
+    RateControlContext *rcc= &s->rc_context;
+    int qmin, qmax;
+    double bits;
+    const int pict_type= rce->new_pict_type;
+    const double buffer_size= s->avctx->rc_buffer_size;
+    const double fps= 1/av_q2d(s->avctx->time_base);
+    const double min_rate= s->avctx->rc_min_rate / fps;
+    const double max_rate= s->avctx->rc_max_rate / fps;
+
+    get_qminmax(&qmin, &qmax, s, pict_type);
+
+    /* modulation */
+    if(s->avctx->rc_qmod_freq && frame_num%s->avctx->rc_qmod_freq==0 && pict_type==P_TYPE)
+        q*= s->avctx->rc_qmod_amp;
+
+    bits= qp2bits(rce, q);
+//printf("q:%f\n", q);
+    /* buffer overflow/underflow protection */
+    if(buffer_size){
+        double expected_size= rcc->buffer_index;
+        double q_limit;
+
+        if(min_rate){
+            double d= 2*(buffer_size - expected_size)/buffer_size;
+            if(d>1.0) d=1.0;
+            else if(d<0.0001) d=0.0001;
+            q*= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
+
+            q_limit= bits2qp(rce, FFMAX((min_rate - buffer_size + rcc->buffer_index)*3, 1));
+            if(q > q_limit){
+                if(s->avctx->debug&FF_DEBUG_RC){
+                    av_log(s->avctx, AV_LOG_DEBUG, "limiting QP %f -> %f\n", q, q_limit);
+                }
+                q= q_limit;
+            }
+        }
+
+        if(max_rate){
+            double d= 2*expected_size/buffer_size;
+            if(d>1.0) d=1.0;
+            else if(d<0.0001) d=0.0001;
+            q/= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
+
+            q_limit= bits2qp(rce, FFMAX(rcc->buffer_index/3, 1));
+            if(q < q_limit){
+                if(s->avctx->debug&FF_DEBUG_RC){
+                    av_log(s->avctx, AV_LOG_DEBUG, "limiting QP %f -> %f\n", q, q_limit);
+                }
+                q= q_limit;
+            }
+        }
+    }
+//printf("q:%f max:%f min:%f size:%f index:%d bits:%f agr:%f\n", q,max_rate, min_rate, buffer_size, rcc->buffer_index, bits, s->avctx->rc_buffer_aggressivity);
+    if(s->avctx->rc_qsquish==0.0 || qmin==qmax){
+        if     (q<qmin) q=qmin;
+        else if(q>qmax) q=qmax;
+    }else{
+        double min2= log(qmin);
+        double max2= log(qmax);
+
+        q= log(q);
+        q= (q - min2)/(max2-min2) - 0.5;
+        q*= -4.0;
+        q= 1.0/(1.0 + exp(q));
+        q= q*(max2-min2) + min2;
+
+        q= exp(q);
+    }
+
+    return q;
+}
+
+//----------------------------------
+// 1 Pass Code
+
+static double predict_size(Predictor *p, double q, double var)
+{
+     return p->coeff*var / (q*p->count);
+}
+
+/*
+static double predict_qp(Predictor *p, double size, double var)
+{
+//printf("coeff:%f, count:%f, var:%f, size:%f//\n", p->coeff, p->count, var, size);
+     return p->coeff*var / (size*p->count);
+}
+*/
+
+static void update_predictor(Predictor *p, double q, double var, double size)
+{
+    double new_coeff= size*q / (var + 1);
+    if(var<10) return;
+
+    p->count*= p->decay;
+    p->coeff*= p->decay;
+    p->count++;
+    p->coeff+= new_coeff;
+}
+
+static void adaptive_quantization(MpegEncContext *s, double q){
+    int i;
+    const float lumi_masking= s->avctx->lumi_masking / (128.0*128.0);
+    const float dark_masking= s->avctx->dark_masking / (128.0*128.0);
+    const float temp_cplx_masking= s->avctx->temporal_cplx_masking;
+    const float spatial_cplx_masking = s->avctx->spatial_cplx_masking;
+    const float p_masking = s->avctx->p_masking;
+    const float border_masking = s->avctx->border_masking;
+    float bits_sum= 0.0;
+    float cplx_sum= 0.0;
+    float cplx_tab[s->mb_num];
+    float bits_tab[s->mb_num];
+    const int qmin= s->avctx->mb_lmin;
+    const int qmax= s->avctx->mb_lmax;
+    Picture * const pic= &s->current_picture;
+    const int mb_width = s->mb_width;
+    const int mb_height = s->mb_height;
+
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        float temp_cplx= sqrt(pic->mc_mb_var[mb_xy]); //FIXME merge in pow()
+        float spat_cplx= sqrt(pic->mb_var[mb_xy]);
+        const int lumi= pic->mb_mean[mb_xy];
+        float bits, cplx, factor;
+        int mb_x = mb_xy % s->mb_stride;
+        int mb_y = mb_xy / s->mb_stride;
+        int mb_distance;
+        float mb_factor = 0.0;
+#if 0
+        if(spat_cplx < q/3) spat_cplx= q/3; //FIXME finetune
+        if(temp_cplx < q/3) temp_cplx= q/3; //FIXME finetune
+#endif
+        if(spat_cplx < 4) spat_cplx= 4; //FIXME finetune
+        if(temp_cplx < 4) temp_cplx= 4; //FIXME finetune
+
+        if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode
+            cplx= spat_cplx;
+            factor= 1.0 + p_masking;
+        }else{
+            cplx= temp_cplx;
+            factor= pow(temp_cplx, - temp_cplx_masking);
+        }
+        factor*=pow(spat_cplx, - spatial_cplx_masking);
+
+        if(lumi>127)
+            factor*= (1.0 - (lumi-128)*(lumi-128)*lumi_masking);
+        else
+            factor*= (1.0 - (lumi-128)*(lumi-128)*dark_masking);
+
+        if(mb_x < mb_width/5){
+            mb_distance = mb_width/5 - mb_x;
+            mb_factor = (float)mb_distance / (float)(mb_width/5);
+        }else if(mb_x > 4*mb_width/5){
+            mb_distance = mb_x - 4*mb_width/5;
+            mb_factor = (float)mb_distance / (float)(mb_width/5);
+        }
+        if(mb_y < mb_height/5){
+            mb_distance = mb_height/5 - mb_y;
+            mb_factor = FFMAX(mb_factor, (float)mb_distance / (float)(mb_height/5));
+        }else if(mb_y > 4*mb_height/5){
+            mb_distance = mb_y - 4*mb_height/5;
+            mb_factor = FFMAX(mb_factor, (float)mb_distance / (float)(mb_height/5));
+        }
+
+        factor*= 1.0 - border_masking*mb_factor;
+
+        if(factor<0.00001) factor= 0.00001;
+
+        bits= cplx*factor;
+        cplx_sum+= cplx;
+        bits_sum+= bits;
+        cplx_tab[i]= cplx;
+        bits_tab[i]= bits;
+    }
+
+    /* handle qmin/qmax cliping */
+    if(s->flags&CODEC_FLAG_NORMALIZE_AQP){
+        float factor= bits_sum/cplx_sum;
+        for(i=0; i<s->mb_num; i++){
+            float newq= q*cplx_tab[i]/bits_tab[i];
+            newq*= factor;
+
+            if     (newq > qmax){
+                bits_sum -= bits_tab[i];
+                cplx_sum -= cplx_tab[i]*q/qmax;
+            }
+            else if(newq < qmin){
+                bits_sum -= bits_tab[i];
+                cplx_sum -= cplx_tab[i]*q/qmin;
+            }
+        }
+        if(bits_sum < 0.001) bits_sum= 0.001;
+        if(cplx_sum < 0.001) cplx_sum= 0.001;
+    }
+
+    for(i=0; i<s->mb_num; i++){
+        const int mb_xy= s->mb_index2xy[i];
+        float newq= q*cplx_tab[i]/bits_tab[i];
+        int intq;
+
+        if(s->flags&CODEC_FLAG_NORMALIZE_AQP){
+            newq*= bits_sum/cplx_sum;
+        }
+
+        intq= (int)(newq + 0.5);
+
+        if     (intq > qmax) intq= qmax;
+        else if(intq < qmin) intq= qmin;
+//if(i%s->mb_width==0) printf("\n");
+//printf("%2d%3d ", intq, ff_sqrt(s->mc_mb_var[i]));
+        s->lambda_table[mb_xy]= intq;
+    }
+}
+
+void ff_get_2pass_fcode(MpegEncContext *s){
+    RateControlContext *rcc= &s->rc_context;
+    int picture_number= s->picture_number;
+    RateControlEntry *rce;
+
+    rce= &rcc->entry[picture_number];
+    s->f_code= rce->f_code;
+    s->b_code= rce->b_code;
+}
+
+//FIXME rd or at least approx for dquant
+
+float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
+{
+    float q;
+    int qmin, qmax;
+    float br_compensation;
+    double diff;
+    double short_term_q;
+    double fps;
+    int picture_number= s->picture_number;
+    int64_t wanted_bits;
+    RateControlContext *rcc= &s->rc_context;
+    AVCodecContext *a= s->avctx;
+    RateControlEntry local_rce, *rce;
+    double bits;
+    double rate_factor;
+    int var;
+    const int pict_type= s->pict_type;
+    Picture * const pic= &s->current_picture;
+    emms_c();
+
+#ifdef CONFIG_XVID
+    if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+        return ff_xvid_rate_estimate_qscale(s, dry_run);
+#endif
+
+    get_qminmax(&qmin, &qmax, s, pict_type);
+
+    fps= 1/av_q2d(s->avctx->time_base);
+//printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);
+        /* update predictors */
+    if(picture_number>2 && !dry_run){
+        const int last_var= s->last_pict_type == I_TYPE ? rcc->last_mb_var_sum : rcc->last_mc_mb_var_sum;
+        update_predictor(&rcc->pred[s->last_pict_type], rcc->last_qscale, sqrt(last_var), s->frame_bits);
+    }
+
+    if(s->flags&CODEC_FLAG_PASS2){
+        assert(picture_number>=0);
+        assert(picture_number<rcc->num_entries);
+        rce= &rcc->entry[picture_number];
+        wanted_bits= rce->expected_bits;
+    }else{
+        rce= &local_rce;
+        wanted_bits= (uint64_t)(s->bit_rate*(double)picture_number/fps);
+    }
+
+    diff= s->total_bits - wanted_bits;
+    br_compensation= (a->bit_rate_tolerance - diff)/a->bit_rate_tolerance;
+    if(br_compensation<=0.0) br_compensation=0.001;
+
+    var= pict_type == I_TYPE ? pic->mb_var_sum : pic->mc_mb_var_sum;
+
+    short_term_q = 0; /* avoid warning */
+    if(s->flags&CODEC_FLAG_PASS2){
+        if(pict_type!=I_TYPE)
+            assert(pict_type == rce->new_pict_type);
+
+        q= rce->new_qscale / br_compensation;
+//printf("%f %f %f last:%d var:%d type:%d//\n", q, rce->new_qscale, br_compensation, s->frame_bits, var, pict_type);
+    }else{
+        rce->pict_type=
+        rce->new_pict_type= pict_type;
+        rce->mc_mb_var_sum= pic->mc_mb_var_sum;
+        rce->mb_var_sum   = pic->   mb_var_sum;
+        rce->qscale   = FF_QP2LAMBDA * 2;
+        rce->f_code   = s->f_code;
+        rce->b_code   = s->b_code;
+        rce->misc_bits= 1;
+
+        bits= predict_size(&rcc->pred[pict_type], rce->qscale, sqrt(var));
+        if(pict_type== I_TYPE){
+            rce->i_count   = s->mb_num;
+            rce->i_tex_bits= bits;
+            rce->p_tex_bits= 0;
+            rce->mv_bits= 0;
+        }else{
+            rce->i_count   = 0; //FIXME we do know this approx
+            rce->i_tex_bits= 0;
+            rce->p_tex_bits= bits*0.9;
+
+            rce->mv_bits= bits*0.1;
+        }
+        rcc->i_cplx_sum [pict_type] += rce->i_tex_bits*rce->qscale;
+        rcc->p_cplx_sum [pict_type] += rce->p_tex_bits*rce->qscale;
+        rcc->mv_bits_sum[pict_type] += rce->mv_bits;
+        rcc->frame_count[pict_type] ++;
+
+        bits= rce->i_tex_bits + rce->p_tex_bits;
+        rate_factor= rcc->pass1_wanted_bits/rcc->pass1_rc_eq_output_sum * br_compensation;
+
+        q= get_qscale(s, rce, rate_factor, picture_number);
+        if (q < 0)
+            return -1;
+
+        assert(q>0.0);
+//printf("%f ", q);
+        q= get_diff_limited_q(s, rce, q);
+//printf("%f ", q);
+        assert(q>0.0);
+
+        if(pict_type==P_TYPE || s->intra_only){ //FIXME type dependant blur like in 2-pass
+            rcc->short_term_qsum*=a->qblur;
+            rcc->short_term_qcount*=a->qblur;
+
+            rcc->short_term_qsum+= q;
+            rcc->short_term_qcount++;
+//printf("%f ", q);
+            q= short_term_q= rcc->short_term_qsum/rcc->short_term_qcount;
+//printf("%f ", q);
+        }
+        assert(q>0.0);
+
+        q= modify_qscale(s, rce, q, picture_number);
+
+        rcc->pass1_wanted_bits+= s->bit_rate/fps;
+
+        assert(q>0.0);
+    }
+
+    if(s->avctx->debug&FF_DEBUG_RC){
+        av_log(s->avctx, AV_LOG_DEBUG, "%c qp:%d<%2.1f<%d %d want:%d total:%d comp:%f st_q:%2.2f size:%d var:%d/%d br:%d fps:%d\n",
+        av_get_pict_type_char(pict_type), qmin, q, qmax, picture_number, (int)wanted_bits/1000, (int)s->total_bits/1000,
+        br_compensation, short_term_q, s->frame_bits, pic->mb_var_sum, pic->mc_mb_var_sum, s->bit_rate/1000, (int)fps
+        );
+    }
+
+    if     (q<qmin) q=qmin;
+    else if(q>qmax) q=qmax;
+
+    if(s->adaptive_quant)
+        adaptive_quantization(s, q);
+    else
+        q= (int)(q + 0.5);
+
+    if(!dry_run){
+        rcc->last_qscale= q;
+        rcc->last_mc_mb_var_sum= pic->mc_mb_var_sum;
+        rcc->last_mb_var_sum= pic->mb_var_sum;
+    }
+#if 0
+{
+    static int mvsum=0, texsum=0;
+    mvsum += s->mv_bits;
+    texsum += s->i_tex_bits + s->p_tex_bits;
+    printf("%d %d//\n\n", mvsum, texsum);
+}
+#endif
+    return q;
+}
+
+//----------------------------------------------
+// 2-Pass code
+
+static int init_pass2(MpegEncContext *s)
+{
+    RateControlContext *rcc= &s->rc_context;
+    AVCodecContext *a= s->avctx;
+    int i, toobig;
+    double fps= 1/av_q2d(s->avctx->time_base);
+    double complexity[5]={0,0,0,0,0};   // aproximate bits at quant=1
+    uint64_t const_bits[5]={0,0,0,0,0}; // quantizer idependant bits
+    uint64_t all_const_bits;
+    uint64_t all_available_bits= (uint64_t)(s->bit_rate*(double)rcc->num_entries/fps);
+    double rate_factor=0;
+    double step;
+    //int last_i_frame=-10000000;
+    const int filter_size= (int)(a->qblur*4) | 1;
+    double expected_bits;
+    double *qscale, *blured_qscale, qscale_sum;
+
+    /* find complexity & const_bits & decide the pict_types */
+    for(i=0; i<rcc->num_entries; i++){
+        RateControlEntry *rce= &rcc->entry[i];
+
+        rce->new_pict_type= rce->pict_type;
+        rcc->i_cplx_sum [rce->pict_type] += rce->i_tex_bits*rce->qscale;
+        rcc->p_cplx_sum [rce->pict_type] += rce->p_tex_bits*rce->qscale;
+        rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits;
+        rcc->frame_count[rce->pict_type] ++;
+
+        complexity[rce->new_pict_type]+= (rce->i_tex_bits+ rce->p_tex_bits)*(double)rce->qscale;
+        const_bits[rce->new_pict_type]+= rce->mv_bits + rce->misc_bits;
+    }
+    all_const_bits= const_bits[I_TYPE] + const_bits[P_TYPE] + const_bits[B_TYPE];
+
+    if(all_available_bits < all_const_bits){
+        av_log(s->avctx, AV_LOG_ERROR, "requested bitrate is too low\n");
+        return -1;
+    }
+
+    qscale= av_malloc(sizeof(double)*rcc->num_entries);
+    blured_qscale= av_malloc(sizeof(double)*rcc->num_entries);
+    toobig = 0;
+
+    for(step=256*256; step>0.0000001; step*=0.5){
+        expected_bits=0;
+        rate_factor+= step;
+
+        rcc->buffer_index= s->avctx->rc_buffer_size/2;
+
+        /* find qscale */
+        for(i=0; i<rcc->num_entries; i++){
+            qscale[i]= get_qscale(s, &rcc->entry[i], rate_factor, i);
+        }
+        assert(filter_size%2==1);
+
+        /* fixed I/B QP relative to P mode */
+        for(i=rcc->num_entries-1; i>=0; i--){
+            RateControlEntry *rce= &rcc->entry[i];
+
+            qscale[i]= get_diff_limited_q(s, rce, qscale[i]);
+        }
+
+        /* smooth curve */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            const int pict_type= rce->new_pict_type;
+            int j;
+            double q=0.0, sum=0.0;
+
+            for(j=0; j<filter_size; j++){
+                int index= i+j-filter_size/2;
+                double d= index-i;
+                double coeff= a->qblur==0 ? 1.0 : exp(-d*d/(a->qblur * a->qblur));
+
+                if(index < 0 || index >= rcc->num_entries) continue;
+                if(pict_type != rcc->entry[index].new_pict_type) continue;
+                q+= qscale[index] * coeff;
+                sum+= coeff;
+            }
+            blured_qscale[i]= q/sum;
+        }
+
+        /* find expected bits */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            double bits;
+            rce->new_qscale= modify_qscale(s, rce, blured_qscale[i], i);
+            bits= qp2bits(rce, rce->new_qscale) + rce->mv_bits + rce->misc_bits;
+//printf("%d %f\n", rce->new_bits, blured_qscale[i]);
+            bits += 8*ff_vbv_update(s, bits);
+
+            rce->expected_bits= expected_bits;
+            expected_bits += bits;
+        }
+
+        /*
+        av_log(s->avctx, AV_LOG_INFO,
+            "expected_bits: %f all_available_bits: %d rate_factor: %f\n",
+            expected_bits, (int)all_available_bits, rate_factor);
+        */
+        if(expected_bits > all_available_bits) {
+            rate_factor-= step;
+            ++toobig;
+        }
+    }
+    av_free(qscale);
+    av_free(blured_qscale);
+
+    /* check bitrate calculations and print info */
+    qscale_sum = 0.0;
+    for(i=0; i<rcc->num_entries; i++){
+        /* av_log(s->avctx, AV_LOG_DEBUG, "[lavc rc] entry[%d].new_qscale = %.3f  qp = %.3f\n",
+            i, rcc->entry[i].new_qscale, rcc->entry[i].new_qscale / FF_QP2LAMBDA); */
+        qscale_sum += clip(rcc->entry[i].new_qscale / FF_QP2LAMBDA, s->avctx->qmin, s->avctx->qmax);
+    }
+    assert(toobig <= 40);
+    av_log(s->avctx, AV_LOG_DEBUG,
+        "[lavc rc] requested bitrate: %d bps  expected bitrate: %d bps\n",
+        s->bit_rate,
+        (int)(expected_bits / ((double)all_available_bits/s->bit_rate)));
+    av_log(s->avctx, AV_LOG_DEBUG,
+        "[lavc rc] estimated target average qp: %.3f\n",
+        (float)qscale_sum / rcc->num_entries);
+    if (toobig == 0) {
+        av_log(s->avctx, AV_LOG_INFO,
+            "[lavc rc] Using all of requested bitrate is not "
+            "necessary for this video with these parameters.\n");
+    } else if (toobig == 40) {
+        av_log(s->avctx, AV_LOG_ERROR,
+            "[lavc rc] Error: bitrate too low for this video "
+            "with these parameters.\n");
+        return -1;
+    } else if (fabs(expected_bits/all_available_bits - 1.0) > 0.01) {
+        av_log(s->avctx, AV_LOG_ERROR,
+            "[lavc rc] Error: 2pass curve failed to converge\n");
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/ratecontrol.h b/contrib/ffmpeg/libavcodec/ratecontrol.h
new file mode 100644
index 000000000..c428923a5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ratecontrol.h
@@ -0,0 +1,103 @@
+/*
+ * Ratecontrol
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RATECONTROL_H
+#define AVCODEC_RATECONTROL_H
+
+/**
+ * @file ratecontrol.h
+ * ratecontrol header.
+ */
+
+#include "eval.h"
+
+typedef struct Predictor{
+    double coeff;
+    double count;
+    double decay;
+} Predictor;
+
+typedef struct RateControlEntry{
+    int pict_type;
+    float qscale;
+    int mv_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int misc_bits;
+    int header_bits;
+    uint64_t expected_bits;
+    int new_pict_type;
+    float new_qscale;
+    int mc_mb_var_sum;
+    int mb_var_sum;
+    int i_count;
+    int skip_count;
+    int f_code;
+    int b_code;
+}RateControlEntry;
+
+/**
+ * rate control context.
+ */
+typedef struct RateControlContext{
+    FILE *stats_file;
+    int num_entries;              ///< number of RateControlEntries
+    RateControlEntry *entry;
+    double buffer_index;          ///< amount of bits in the video/audio buffer
+    Predictor pred[5];
+    double short_term_qsum;       ///< sum of recent qscales
+    double short_term_qcount;     ///< count of recent qscales
+    double pass1_rc_eq_output_sum;///< sum of the output of the rc equation, this is used for normalization
+    double pass1_wanted_bits;     ///< bits which should have been outputed by the pass1 code (including complexity init)
+    double last_qscale;
+    double last_qscale_for[5];    ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff
+    int last_mc_mb_var_sum;
+    int last_mb_var_sum;
+    uint64_t i_cplx_sum[5];
+    uint64_t p_cplx_sum[5];
+    uint64_t mv_bits_sum[5];
+    uint64_t qscale_sum[5];
+    int frame_count[5];
+    int last_non_b_pict_type;
+
+    void *non_lavc_opaque;        ///< context for non lavc rc code (for example xvid)
+    float dry_run_qscale;         ///< for xvid rc
+    int last_picture_number;      ///< for xvid rc
+    AVEvalExpr * rc_eq_eval;
+}RateControlContext;
+
+struct MpegEncContext;
+
+/* rate control */
+int ff_rate_control_init(struct MpegEncContext *s);
+float ff_rate_estimate_qscale(struct MpegEncContext *s, int dry_run);
+void ff_write_pass1_stats(struct MpegEncContext *s);
+void ff_rate_control_uninit(struct MpegEncContext *s);
+int ff_vbv_update(struct MpegEncContext *s, int frame_size);
+void ff_get_2pass_fcode(struct MpegEncContext *s);
+
+int ff_xvid_rate_control_init(struct MpegEncContext *s);
+void ff_xvid_rate_control_uninit(struct MpegEncContext *s);
+float ff_xvid_rate_estimate_qscale(struct MpegEncContext *s, int dry_run);
+
+#endif /* AVCODEC_RATECONTROL_H */
+
diff --git a/contrib/ffmpeg/libavcodec/raw.c b/contrib/ffmpeg/libavcodec/raw.c
new file mode 100644
index 000000000..f4fddf73c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/raw.c
@@ -0,0 +1,210 @@
+/*
+ * Raw Video Codec
+ * Copyright (c) 2001 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file raw.c
+ * Raw Video Codec
+ */
+
+#include "avcodec.h"
+
+typedef struct RawVideoContext {
+    unsigned char * buffer;  /* block of memory for holding one frame */
+    int             length;  /* number of bytes in buffer */
+    AVFrame pic;             ///< AVCodecContext.coded_frame
+} RawVideoContext;
+
+typedef struct PixelFormatTag {
+    int pix_fmt;
+    unsigned int fourcc;
+} PixelFormatTag;
+
+static const PixelFormatTag pixelFormatTags[] = {
+    { PIX_FMT_YUV420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */
+    { PIX_FMT_YUV420P, MKTAG('I', 'Y', 'U', 'V') },
+    { PIX_FMT_YUV420P, MKTAG('Y', 'V', '1', '2') },
+    { PIX_FMT_YUV410P, MKTAG('Y', 'U', 'V', '9') },
+    { PIX_FMT_YUV411P, MKTAG('Y', '4', '1', 'B') },
+    { PIX_FMT_YUV422P, MKTAG('Y', '4', '2', 'B') },
+    { PIX_FMT_GRAY8,   MKTAG('Y', '8', '0', '0') },
+    { PIX_FMT_GRAY8,   MKTAG(' ', ' ', 'Y', '8') },
+
+
+    { PIX_FMT_YUV422,  MKTAG('Y', 'U', 'Y', '2') }, /* Packed formats */
+    { PIX_FMT_YUV422,  MKTAG('Y', '4', '2', '2') },
+    { PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') },
+    { PIX_FMT_GRAY8,   MKTAG('G', 'R', 'E', 'Y') },
+    { PIX_FMT_RGB555,  MKTAG('R', 'G', 'B', 15) },
+    { PIX_FMT_BGR555,  MKTAG('B', 'G', 'R', 15) },
+    { PIX_FMT_RGB565,  MKTAG('R', 'G', 'B', 16) },
+    { PIX_FMT_BGR565,  MKTAG('B', 'G', 'R', 16) },
+
+    /* quicktime */
+    { PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') },
+
+    { -1, 0 },
+};
+
+static int findPixelFormat(unsigned int fourcc)
+{
+    const PixelFormatTag * tags = pixelFormatTags;
+    while (tags->pix_fmt >= 0) {
+        if (tags->fourcc == fourcc)
+            return tags->pix_fmt;
+        tags++;
+    }
+    return PIX_FMT_YUV420P;
+}
+
+unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat fmt)
+{
+    const PixelFormatTag * tags = pixelFormatTags;
+    while (tags->pix_fmt >= 0) {
+        if (tags->pix_fmt == fmt)
+            return tags->fourcc;
+        tags++;
+    }
+    return 0;
+}
+
+/* RAW Decoder Implementation */
+
+static int raw_init_decoder(AVCodecContext *avctx)
+{
+    RawVideoContext *context = avctx->priv_data;
+
+    if (avctx->codec_tag)
+        avctx->pix_fmt = findPixelFormat(avctx->codec_tag);
+    else if (avctx->bits_per_sample){
+        switch(avctx->bits_per_sample){
+        case  8: avctx->pix_fmt= PIX_FMT_PAL8  ; break;
+        case 15: avctx->pix_fmt= PIX_FMT_RGB555; break;
+        case 16: avctx->pix_fmt= PIX_FMT_RGB555; break;
+        case 24: avctx->pix_fmt= PIX_FMT_BGR24 ; break;
+        case 32: avctx->pix_fmt= PIX_FMT_RGBA32; break;
+        }
+    }
+
+    context->length = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+    context->buffer = av_malloc(context->length);
+    context->pic.pict_type = FF_I_TYPE;
+    context->pic.key_frame = 1;
+
+    avctx->coded_frame= &context->pic;
+
+    if (!context->buffer)
+        return -1;
+
+    return 0;
+}
+
+static void flip(AVCodecContext *avctx, AVPicture * picture){
+    if(!avctx->codec_tag && avctx->bits_per_sample && picture->linesize[2]==0){
+        picture->data[0] += picture->linesize[0] * (avctx->height-1);
+        picture->linesize[0] *= -1;
+    }
+}
+
+static int raw_decode(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    RawVideoContext *context = avctx->priv_data;
+
+    AVFrame * frame = (AVFrame *) data;
+    AVPicture * picture = (AVPicture *) data;
+
+    frame->interlaced_frame = avctx->coded_frame->interlaced_frame;
+    frame->top_field_first = avctx->coded_frame->top_field_first;
+
+    if(buf_size < context->length - (avctx->pix_fmt==PIX_FMT_PAL8 ? 256*4 : 0))
+        return -1;
+
+    avpicture_fill(picture, buf, avctx->pix_fmt, avctx->width, avctx->height);
+    if(avctx->pix_fmt==PIX_FMT_PAL8 && buf_size < context->length){
+        frame->data[1]= context->buffer;
+    }
+    if (avctx->palctrl && avctx->palctrl->palette_changed) {
+        memcpy(frame->data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
+        avctx->palctrl->palette_changed = 0;
+    }
+
+    flip(avctx, picture);
+
+    if (avctx->codec_tag == MKTAG('Y', 'V', '1', '2'))
+    {
+        // swap fields
+        unsigned char *tmp = picture->data[1];
+        picture->data[1] = picture->data[2];
+        picture->data[2] = tmp;
+    }
+
+    *data_size = sizeof(AVPicture);
+    return buf_size;
+}
+
+static int raw_close_decoder(AVCodecContext *avctx)
+{
+    RawVideoContext *context = avctx->priv_data;
+
+    av_freep(&context->buffer);
+    return 0;
+}
+
+/* RAW Encoder Implementation */
+#ifdef CONFIG_RAWVIDEO_ENCODER
+static int raw_init_encoder(AVCodecContext *avctx)
+{
+    avctx->coded_frame = (AVFrame *)avctx->priv_data;
+    avctx->coded_frame->pict_type = FF_I_TYPE;
+    avctx->coded_frame->key_frame = 1;
+    if(!avctx->codec_tag)
+        avctx->codec_tag = avcodec_pix_fmt_to_codec_tag(avctx->pix_fmt);
+    return 0;
+}
+
+static int raw_encode(AVCodecContext *avctx,
+                            unsigned char *frame, int buf_size, void *data)
+{
+    return avpicture_layout((AVPicture *)data, avctx->pix_fmt, avctx->width,
+                                               avctx->height, frame, buf_size);
+}
+
+AVCodec rawvideo_encoder = {
+    "rawvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RAWVIDEO,
+    sizeof(AVFrame),
+    raw_init_encoder,
+    raw_encode,
+};
+#endif // CONFIG_RAWVIDEO_ENCODER
+
+AVCodec rawvideo_decoder = {
+    "rawvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RAWVIDEO,
+    sizeof(RawVideoContext),
+    raw_init_decoder,
+    NULL,
+    raw_close_decoder,
+    raw_decode,
+};
diff --git a/contrib/ffmpeg/libavcodec/resample.c b/contrib/ffmpeg/libavcodec/resample.c
new file mode 100644
index 000000000..043e812c8
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/resample.c
@@ -0,0 +1,249 @@
+/*
+ * Sample rate convertion for both audio and video
+ * Copyright (c) 2000 Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file resample.c
+ * Sample rate convertion for both audio and video.
+ */
+
+#include "avcodec.h"
+
+struct AVResampleContext;
+
+struct ReSampleContext {
+    struct AVResampleContext *resample_context;
+    short *temp[2];
+    int temp_len;
+    float ratio;
+    /* channel convert */
+    int input_channels, output_channels, filter_channels;
+};
+
+/* n1: number of samples */
+static void stereo_to_mono(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q[1] = (p[2] + p[3]) >> 1;
+        q[2] = (p[4] + p[5]) >> 1;
+        q[3] = (p[6] + p[7]) >> 1;
+        q += 4;
+        p += 8;
+        n -= 4;
+    }
+    while (n > 0) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q++;
+        p += 2;
+        n--;
+    }
+}
+
+/* n1: number of samples */
+static void mono_to_stereo(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+    int v;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        v = p[0]; q[0] = v; q[1] = v;
+        v = p[1]; q[2] = v; q[3] = v;
+        v = p[2]; q[4] = v; q[5] = v;
+        v = p[3]; q[6] = v; q[7] = v;
+        q += 8;
+        p += 4;
+        n -= 4;
+    }
+    while (n > 0) {
+        v = p[0]; q[0] = v; q[1] = v;
+        q += 2;
+        p += 1;
+        n--;
+    }
+}
+
+/* XXX: should use more abstract 'N' channels system */
+static void stereo_split(short *output1, short *output2, short *input, int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        *output1++ = *input++;
+        *output2++ = *input++;
+    }
+}
+
+static void stereo_mux(short *output, short *input1, short *input2, int n)
+{
+    int i;
+
+    for(i=0;i<n;i++) {
+        *output++ = *input1++;
+        *output++ = *input2++;
+    }
+}
+
+static void ac3_5p1_mux(short *output, short *input1, short *input2, int n)
+{
+    int i;
+    short l,r;
+
+    for(i=0;i<n;i++) {
+      l=*input1++;
+      r=*input2++;
+      *output++ = l;           /* left */
+      *output++ = (l/2)+(r/2); /* center */
+      *output++ = r;           /* right */
+      *output++ = 0;           /* left surround */
+      *output++ = 0;           /* right surroud */
+      *output++ = 0;           /* low freq */
+    }
+}
+
+ReSampleContext *audio_resample_init(int output_channels, int input_channels,
+                                      int output_rate, int input_rate)
+{
+    ReSampleContext *s;
+
+    if ( input_channels > 2)
+      {
+        av_log(NULL, AV_LOG_ERROR, "Resampling with input channels greater than 2 unsupported.");
+        return NULL;
+      }
+
+    s = av_mallocz(sizeof(ReSampleContext));
+    if (!s)
+      {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.");
+        return NULL;
+      }
+
+    s->ratio = (float)output_rate / (float)input_rate;
+
+    s->input_channels = input_channels;
+    s->output_channels = output_channels;
+
+    s->filter_channels = s->input_channels;
+    if (s->output_channels < s->filter_channels)
+        s->filter_channels = s->output_channels;
+
+/*
+ * ac3 output is the only case where filter_channels could be greater than 2.
+ * input channels can't be greater than 2, so resample the 2 channels and then
+ * expand to 6 channels after the resampling.
+ */
+    if(s->filter_channels>2)
+      s->filter_channels = 2;
+
+    s->resample_context= av_resample_init(output_rate, input_rate, 16, 10, 0, 1.0);
+
+    return s;
+}
+
+/* resample audio. 'nb_samples' is the number of input samples */
+/* XXX: optimize it ! */
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples)
+{
+    int i, nb_samples1;
+    short *bufin[2];
+    short *bufout[2];
+    short *buftmp2[2], *buftmp3[2];
+    int lenout;
+
+    if (s->input_channels == s->output_channels && s->ratio == 1.0 && 0) {
+        /* nothing to do */
+        memcpy(output, input, nb_samples * s->input_channels * sizeof(short));
+        return nb_samples;
+    }
+
+    /* XXX: move those malloc to resample init code */
+    for(i=0; i<s->filter_channels; i++){
+        bufin[i]= (short*) av_malloc( (nb_samples + s->temp_len) * sizeof(short) );
+        memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short));
+        buftmp2[i] = bufin[i] + s->temp_len;
+    }
+
+    /* make some zoom to avoid round pb */
+    lenout= (int)(nb_samples * s->ratio) + 16;
+    bufout[0]= (short*) av_malloc( lenout * sizeof(short) );
+    bufout[1]= (short*) av_malloc( lenout * sizeof(short) );
+
+    if (s->input_channels == 2 &&
+        s->output_channels == 1) {
+        buftmp3[0] = output;
+        stereo_to_mono(buftmp2[0], input, nb_samples);
+    } else if (s->output_channels >= 2 && s->input_channels == 1) {
+        buftmp3[0] = bufout[0];
+        memcpy(buftmp2[0], input, nb_samples*sizeof(short));
+    } else if (s->output_channels >= 2) {
+        buftmp3[0] = bufout[0];
+        buftmp3[1] = bufout[1];
+        stereo_split(buftmp2[0], buftmp2[1], input, nb_samples);
+    } else {
+        buftmp3[0] = output;
+        memcpy(buftmp2[0], input, nb_samples*sizeof(short));
+    }
+
+    nb_samples += s->temp_len;
+
+    /* resample each channel */
+    nb_samples1 = 0; /* avoid warning */
+    for(i=0;i<s->filter_channels;i++) {
+        int consumed;
+        int is_last= i+1 == s->filter_channels;
+
+        nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i], &consumed, nb_samples, lenout, is_last);
+        s->temp_len= nb_samples - consumed;
+        s->temp[i]= av_realloc(s->temp[i], s->temp_len*sizeof(short));
+        memcpy(s->temp[i], bufin[i] + consumed, s->temp_len*sizeof(short));
+    }
+
+    if (s->output_channels == 2 && s->input_channels == 1) {
+        mono_to_stereo(output, buftmp3[0], nb_samples1);
+    } else if (s->output_channels == 2) {
+        stereo_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
+    } else if (s->output_channels == 6) {
+        ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
+    }
+
+    for(i=0; i<s->filter_channels; i++)
+        av_free(bufin[i]);
+
+    av_free(bufout[0]);
+    av_free(bufout[1]);
+    return nb_samples1;
+}
+
+void audio_resample_close(ReSampleContext *s)
+{
+    av_resample_close(s->resample_context);
+    av_freep(&s->temp[0]);
+    av_freep(&s->temp[1]);
+    av_free(s);
+}
diff --git a/contrib/ffmpeg/libavcodec/resample2.c b/contrib/ffmpeg/libavcodec/resample2.c
new file mode 100644
index 000000000..3ae0ba855
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/resample2.c
@@ -0,0 +1,274 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file resample2.c
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#include "dsputil.h"
+
+#if 1
+#define FILTER_SHIFT 15
+
+#define FELEM int16_t
+#define FELEM2 int32_t
+#define FELEM_MAX INT16_MAX
+#define FELEM_MIN INT16_MIN
+#else
+#define FILTER_SHIFT 22
+
+#define FELEM int32_t
+#define FELEM2 int64_t
+#define FELEM_MAX INT32_MAX
+#define FELEM_MIN INT32_MIN
+#endif
+
+
+typedef struct AVResampleContext{
+    FELEM *filter_bank;
+    int filter_length;
+    int ideal_dst_incr;
+    int dst_incr;
+    int index;
+    int frac;
+    int src_incr;
+    int compensation_distance;
+    int phase_shift;
+    int phase_mask;
+    int linear;
+}AVResampleContext;
+
+/**
+ * 0th order modified bessel function of the first kind.
+ */
+static double bessel(double x){
+    double v=1;
+    double t=1;
+    int i;
+
+    for(i=1; i<50; i++){
+        t *= i;
+        v += pow(x*x/4, i)/(t*t);
+    }
+    return v;
+}
+
+/**
+ * builds a polyphase filterbank.
+ * @param factor resampling factor
+ * @param scale wanted sum of coefficients for each filter
+ * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2->kaiser windowed sinc beta=16
+ */
+void av_build_filter(FELEM *filter, double factor, int tap_count, int phase_count, int scale, int type){
+    int ph, i, v;
+    double x, y, w, tab[tap_count];
+    const int center= (tap_count-1)/2;
+
+    /* if upsampling, only need to interpolate, no filter */
+    if (factor > 1.0)
+        factor = 1.0;
+
+    for(ph=0;ph<phase_count;ph++) {
+        double norm = 0;
+        double e= 0;
+        for(i=0;i<tap_count;i++) {
+            x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
+            if (x == 0) y = 1.0;
+            else        y = sin(x) / x;
+            switch(type){
+            case 0:{
+                const float d= -0.5; //first order derivative = -0.5
+                x = fabs(((double)(i - center) - (double)ph / phase_count) * factor);
+                if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
+                else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
+                break;}
+            case 1:
+                w = 2.0*x / (factor*tap_count) + M_PI;
+                y *= 0.3635819 - 0.4891775 * cos(w) + 0.1365995 * cos(2*w) - 0.0106411 * cos(3*w);
+                break;
+            case 2:
+                w = 2.0*x / (factor*tap_count*M_PI);
+                y *= bessel(16*sqrt(FFMAX(1-w*w, 0)));
+                break;
+            }
+
+            tab[i] = y;
+            norm += y;
+        }
+
+        /* normalize so that an uniform color remains the same */
+        for(i=0;i<tap_count;i++) {
+            v = clip(lrintf(tab[i] * scale / norm + e), FELEM_MIN, FELEM_MAX);
+            filter[ph * tap_count + i] = v;
+            e += tab[i] * scale / norm - v;
+        }
+    }
+}
+
+/**
+ * initalizes a audio resampler.
+ * note, if either rate is not a integer then simply scale both rates up so they are
+ */
+AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_size, int phase_shift, int linear, double cutoff){
+    AVResampleContext *c= av_mallocz(sizeof(AVResampleContext));
+    double factor= FFMIN(out_rate * cutoff / in_rate, 1.0);
+    int phase_count= 1<<phase_shift;
+
+    c->phase_shift= phase_shift;
+    c->phase_mask= phase_count-1;
+    c->linear= linear;
+
+    c->filter_length= FFMAX((int)ceil(filter_size/factor), 1);
+    c->filter_bank= av_mallocz(c->filter_length*(phase_count+1)*sizeof(FELEM));
+    av_build_filter(c->filter_bank, factor, c->filter_length, phase_count, 1<<FILTER_SHIFT, 1);
+    memcpy(&c->filter_bank[c->filter_length*phase_count+1], c->filter_bank, (c->filter_length-1)*sizeof(FELEM));
+    c->filter_bank[c->filter_length*phase_count]= c->filter_bank[c->filter_length - 1];
+
+    c->src_incr= out_rate;
+    c->ideal_dst_incr= c->dst_incr= in_rate * phase_count;
+    c->index= -phase_count*((c->filter_length-1)/2);
+
+    return c;
+}
+
+void av_resample_close(AVResampleContext *c){
+    av_freep(&c->filter_bank);
+    av_freep(&c);
+}
+
+/**
+ * Compensates samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions ocur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+void av_resample_compensate(AVResampleContext *c, int sample_delta, int compensation_distance){
+//    sample_delta += (c->ideal_dst_incr - c->dst_incr)*(int64_t)c->compensation_distance / c->ideal_dst_incr;
+    c->compensation_distance= compensation_distance;
+    c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance;
+}
+
+/**
+ * resamples.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx if this is 0 then the context wont be modified, that way several channels can be resampled with the same context
+ * @return the number of samples written in dst or -1 if an error occured
+ */
+int av_resample(AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx){
+    int dst_index, i;
+    int index= c->index;
+    int frac= c->frac;
+    int dst_incr_frac= c->dst_incr % c->src_incr;
+    int dst_incr=      c->dst_incr / c->src_incr;
+    int compensation_distance= c->compensation_distance;
+
+  if(compensation_distance == 0 && c->filter_length == 1 && c->phase_shift==0){
+        int64_t index2= ((int64_t)index)<<32;
+        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
+        dst_size= FFMIN(dst_size, (src_size-1-index) * (int64_t)c->src_incr / c->dst_incr);
+
+        for(dst_index=0; dst_index < dst_size; dst_index++){
+            dst[dst_index] = src[index2>>32];
+            index2 += incr;
+        }
+        frac += dst_index * dst_incr_frac;
+        index += dst_index * dst_incr;
+        index += frac / c->src_incr;
+        frac %= c->src_incr;
+  }else{
+    for(dst_index=0; dst_index < dst_size; dst_index++){
+        FELEM *filter= c->filter_bank + c->filter_length*(index & c->phase_mask);
+        int sample_index= index >> c->phase_shift;
+        FELEM2 val=0;
+
+        if(sample_index < 0){
+            for(i=0; i<c->filter_length; i++)
+                val += src[FFABS(sample_index + i) % src_size] * filter[i];
+        }else if(sample_index + c->filter_length > src_size){
+            break;
+        }else if(c->linear){
+            int64_t v=0;
+            int sub_phase= (frac<<8) / c->src_incr;
+            for(i=0; i<c->filter_length; i++){
+                int64_t coeff= filter[i]*(256 - sub_phase) + filter[i + c->filter_length]*sub_phase;
+                v += src[sample_index + i] * coeff;
+            }
+            val= v>>8;
+        }else{
+            for(i=0; i<c->filter_length; i++){
+                val += src[sample_index + i] * (FELEM2)filter[i];
+            }
+        }
+
+        val = (val + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;
+        dst[dst_index] = (unsigned)(val + 32768) > 65535 ? (val>>31) ^ 32767 : val;
+
+        frac += dst_incr_frac;
+        index += dst_incr;
+        if(frac >= c->src_incr){
+            frac -= c->src_incr;
+            index++;
+        }
+
+        if(dst_index + 1 == compensation_distance){
+            compensation_distance= 0;
+            dst_incr_frac= c->ideal_dst_incr % c->src_incr;
+            dst_incr=      c->ideal_dst_incr / c->src_incr;
+        }
+    }
+  }
+    *consumed= FFMAX(index, 0) >> c->phase_shift;
+    if(index>=0) index &= c->phase_mask;
+
+    if(compensation_distance){
+        compensation_distance -= dst_index;
+        assert(compensation_distance > 0);
+    }
+    if(update_ctx){
+        c->frac= frac;
+        c->index= index;
+        c->dst_incr= dst_incr_frac + c->src_incr*dst_incr;
+        c->compensation_distance= compensation_distance;
+    }
+#if 0
+    if(update_ctx && !c->compensation_distance){
+#undef rand
+        av_resample_compensate(c, rand() % (8000*2) - 8000, 8000*2);
+av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", c->dst_incr, c->ideal_dst_incr, c->compensation_distance);
+    }
+#endif
+
+    return dst_index;
+}
diff --git a/contrib/ffmpeg/libavcodec/roqvideo.c b/contrib/ffmpeg/libavcodec/roqvideo.c
new file mode 100644
index 000000000..4595b047c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/roqvideo.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file roqvideo.c
+ * Id RoQ Video Decoder by Dr. Tim Ferguson
+ * For more information about the Id RoQ format, visit:
+ *   http://www.csse.monash.edu.au/~timf/
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+typedef struct {
+  unsigned char y0, y1, y2, y3, u, v;
+} roq_cell;
+
+typedef struct {
+  int idx[4];
+} roq_qcell;
+
+static int uiclip[1024], *uiclp;  /* clipping table */
+#define avg2(a,b) uiclp[(((int)(a)+(int)(b)+1)>>1)]
+#define avg4(a,b,c,d) uiclp[(((int)(a)+(int)(b)+(int)(c)+(int)(d)+2)>>2)]
+
+typedef struct RoqContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame last_frame;
+    AVFrame current_frame;
+    int first_frame;
+    int y_stride;
+    int c_stride;
+
+    roq_cell cells[256];
+    roq_qcell qcells[256];
+
+    unsigned char *buf;
+    int size;
+
+} RoqContext;
+
+#define RoQ_INFO              0x1001
+#define RoQ_QUAD_CODEBOOK     0x1002
+#define RoQ_QUAD_VQ           0x1011
+#define RoQ_SOUND_MONO        0x1020
+#define RoQ_SOUND_STEREO      0x1021
+
+#define RoQ_ID_MOT              0x00
+#define RoQ_ID_FCC              0x01
+#define RoQ_ID_SLD              0x02
+#define RoQ_ID_CCC              0x03
+
+#define get_byte(in_buffer) *(in_buffer++)
+#define get_word(in_buffer) ((unsigned short)(in_buffer += 2, \
+  (in_buffer[-1] << 8 | in_buffer[-2])))
+#define get_long(in_buffer) ((unsigned long)(in_buffer += 4, \
+  (in_buffer[-1] << 24 | in_buffer[-2] << 16 | in_buffer[-3] << 8 | in_buffer[-4])))
+
+
+static void apply_vector_2x2(RoqContext *ri, int x, int y, roq_cell *cell)
+{
+    unsigned char *yptr;
+
+    yptr = ri->current_frame.data[0] + (y * ri->y_stride) + x;
+    *yptr++ = cell->y0;
+    *yptr++ = cell->y1;
+    yptr += (ri->y_stride - 2);
+    *yptr++ = cell->y2;
+    *yptr++ = cell->y3;
+    ri->current_frame.data[1][(y/2) * (ri->c_stride) + x/2] = cell->u;
+    ri->current_frame.data[2][(y/2) * (ri->c_stride) + x/2] = cell->v;
+}
+
+static void apply_vector_4x4(RoqContext *ri, int x, int y, roq_cell *cell)
+{
+    unsigned long row_inc, c_row_inc;
+    register unsigned char y0, y1, u, v;
+    unsigned char *yptr, *uptr, *vptr;
+
+    yptr = ri->current_frame.data[0] + (y * ri->y_stride) + x;
+    uptr = ri->current_frame.data[1] + (y/2) * (ri->c_stride) + x/2;
+    vptr = ri->current_frame.data[2] + (y/2) * (ri->c_stride) + x/2;
+
+    row_inc = ri->y_stride - 4;
+    c_row_inc = (ri->c_stride) - 2;
+    *yptr++ = y0 = cell->y0; *uptr++ = u = cell->u; *vptr++ = v = cell->v;
+    *yptr++ = y0;
+    *yptr++ = y1 = cell->y1; *uptr++ = u; *vptr++ = v;
+    *yptr++ = y1;
+
+    yptr += row_inc;
+
+    *yptr++ = y0;
+    *yptr++ = y0;
+    *yptr++ = y1;
+    *yptr++ = y1;
+
+    yptr += row_inc; uptr += c_row_inc; vptr += c_row_inc;
+
+    *yptr++ = y0 = cell->y2; *uptr++ = u; *vptr++ = v;
+    *yptr++ = y0;
+    *yptr++ = y1 = cell->y3; *uptr++ = u; *vptr++ = v;
+    *yptr++ = y1;
+
+    yptr += row_inc;
+
+    *yptr++ = y0;
+    *yptr++ = y0;
+    *yptr++ = y1;
+    *yptr++ = y1;
+}
+
+static void apply_motion_4x4(RoqContext *ri, int x, int y, unsigned char mv,
+    signed char mean_x, signed char mean_y)
+{
+    int i, hw, mx, my;
+    unsigned char *pa, *pb;
+
+    mx = x + 8 - (mv >> 4) - mean_x;
+    my = y + 8 - (mv & 0xf) - mean_y;
+
+    /* check MV against frame boundaries */
+    if ((mx < 0) || (mx > ri->avctx->width - 4) ||
+        (my < 0) || (my > ri->avctx->height - 4)) {
+        av_log(ri->avctx, AV_LOG_ERROR, "motion vector out of bounds: MV = (%d, %d), boundaries = (0, 0, %d, %d)\n",
+            mx, my, ri->avctx->width, ri->avctx->height);
+        return;
+    }
+
+    pa = ri->current_frame.data[0] + (y * ri->y_stride) + x;
+    pb = ri->last_frame.data[0] + (my * ri->y_stride) + mx;
+    for(i = 0; i < 4; i++) {
+        pa[0] = pb[0];
+        pa[1] = pb[1];
+        pa[2] = pb[2];
+        pa[3] = pb[3];
+        pa += ri->y_stride;
+        pb += ri->y_stride;
+    }
+
+    hw = ri->y_stride/2;
+    pa = ri->current_frame.data[1] + (y * ri->y_stride)/4 + x/2;
+    pb = ri->last_frame.data[1] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
+
+    for(i = 0; i < 2; i++) {
+        switch(((my & 0x01) << 1) | (mx & 0x01)) {
+
+        case 0:
+            pa[0] = pb[0];
+            pa[1] = pb[1];
+            pa[hw] = pb[hw];
+            pa[hw+1] = pb[hw+1];
+            break;
+
+        case 1:
+            pa[0] = avg2(pb[0], pb[1]);
+            pa[1] = avg2(pb[1], pb[2]);
+            pa[hw] = avg2(pb[hw], pb[hw+1]);
+            pa[hw+1] = avg2(pb[hw+1], pb[hw+2]);
+            break;
+
+        case 2:
+            pa[0] = avg2(pb[0], pb[hw]);
+            pa[1] = avg2(pb[1], pb[hw+1]);
+            pa[hw] = avg2(pb[hw], pb[hw*2]);
+            pa[hw+1] = avg2(pb[hw+1], pb[(hw*2)+1]);
+            break;
+
+        case 3:
+            pa[0] = avg4(pb[0], pb[1], pb[hw], pb[hw+1]);
+            pa[1] = avg4(pb[1], pb[2], pb[hw+1], pb[hw+2]);
+            pa[hw] = avg4(pb[hw], pb[hw+1], pb[hw*2], pb[(hw*2)+1]);
+            pa[hw+1] = avg4(pb[hw+1], pb[hw+2], pb[(hw*2)+1], pb[(hw*2)+1]);
+            break;
+        }
+
+        pa = ri->current_frame.data[2] + (y * ri->y_stride)/4 + x/2;
+        pb = ri->last_frame.data[2] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
+    }
+}
+
+static void apply_motion_8x8(RoqContext *ri, int x, int y,
+    unsigned char mv, signed char mean_x, signed char mean_y)
+{
+    int mx, my, i, j, hw;
+    unsigned char *pa, *pb;
+
+    mx = x + 8 - (mv >> 4) - mean_x;
+    my = y + 8 - (mv & 0xf) - mean_y;
+
+    /* check MV against frame boundaries */
+    if ((mx < 0) || (mx > ri->avctx->width - 8) ||
+        (my < 0) || (my > ri->avctx->height - 8)) {
+        av_log(ri->avctx, AV_LOG_ERROR, "motion vector out of bounds: MV = (%d, %d), boundaries = (0, 0, %d, %d)\n",
+            mx, my, ri->avctx->width, ri->avctx->height);
+        return;
+    }
+
+    pa = ri->current_frame.data[0] + (y * ri->y_stride) + x;
+    pb = ri->last_frame.data[0] + (my * ri->y_stride) + mx;
+    for(i = 0; i < 8; i++) {
+        pa[0] = pb[0];
+        pa[1] = pb[1];
+        pa[2] = pb[2];
+        pa[3] = pb[3];
+        pa[4] = pb[4];
+        pa[5] = pb[5];
+        pa[6] = pb[6];
+        pa[7] = pb[7];
+        pa += ri->y_stride;
+        pb += ri->y_stride;
+    }
+
+    hw = ri->c_stride;
+    pa = ri->current_frame.data[1] + (y * ri->y_stride)/4 + x/2;
+    pb = ri->last_frame.data[1] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
+    for(j = 0; j < 2; j++) {
+        for(i = 0; i < 4; i++) {
+            switch(((my & 0x01) << 1) | (mx & 0x01)) {
+
+            case 0:
+                pa[0] = pb[0];
+                pa[1] = pb[1];
+                pa[2] = pb[2];
+                pa[3] = pb[3];
+                break;
+
+            case 1:
+                pa[0] = avg2(pb[0], pb[1]);
+                pa[1] = avg2(pb[1], pb[2]);
+                pa[2] = avg2(pb[2], pb[3]);
+                pa[3] = avg2(pb[3], pb[4]);
+                break;
+
+            case 2:
+                pa[0] = avg2(pb[0], pb[hw]);
+                pa[1] = avg2(pb[1], pb[hw+1]);
+                pa[2] = avg2(pb[2], pb[hw+2]);
+                pa[3] = avg2(pb[3], pb[hw+3]);
+                break;
+
+            case 3:
+                pa[0] = avg4(pb[0], pb[1], pb[hw], pb[hw+1]);
+                pa[1] = avg4(pb[1], pb[2], pb[hw+1], pb[hw+2]);
+                pa[2] = avg4(pb[2], pb[3], pb[hw+2], pb[hw+3]);
+                pa[3] = avg4(pb[3], pb[4], pb[hw+3], pb[hw+4]);
+                break;
+            }
+            pa += ri->c_stride;
+            pb += ri->c_stride;
+        }
+
+        pa = ri->current_frame.data[2] + (y * ri->y_stride)/4 + x/2;
+        pb = ri->last_frame.data[2] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
+    }
+}
+
+static void roqvideo_decode_frame(RoqContext *ri)
+{
+    unsigned int chunk_id = 0, chunk_arg = 0;
+    unsigned long chunk_size = 0;
+    int i, j, k, nv1, nv2, vqflg = 0, vqflg_pos = -1;
+    int vqid, bpos, xpos, ypos, xp, yp, x, y;
+    int frame_stats[2][4] = {{0},{0}};
+    roq_qcell *qcell;
+    unsigned char *buf = ri->buf;
+    unsigned char *buf_end = ri->buf + ri->size;
+
+    while (buf < buf_end) {
+        chunk_id = get_word(buf);
+        chunk_size = get_long(buf);
+        chunk_arg = get_word(buf);
+
+        if(chunk_id == RoQ_QUAD_VQ)
+            break;
+        if(chunk_id == RoQ_QUAD_CODEBOOK) {
+            if((nv1 = chunk_arg >> 8) == 0)
+                nv1 = 256;
+            if((nv2 = chunk_arg & 0xff) == 0 && nv1 * 6 < chunk_size)
+                nv2 = 256;
+            for(i = 0; i < nv1; i++) {
+                ri->cells[i].y0 = get_byte(buf);
+                ri->cells[i].y1 = get_byte(buf);
+                ri->cells[i].y2 = get_byte(buf);
+                ri->cells[i].y3 = get_byte(buf);
+                ri->cells[i].u = get_byte(buf);
+                ri->cells[i].v = get_byte(buf);
+            }
+            for(i = 0; i < nv2; i++)
+                for(j = 0; j < 4; j++)
+                    ri->qcells[i].idx[j] = get_byte(buf);
+        }
+    }
+
+    bpos = xpos = ypos = 0;
+    while(bpos < chunk_size) {
+        for (yp = ypos; yp < ypos + 16; yp += 8)
+            for (xp = xpos; xp < xpos + 16; xp += 8) {
+                if (vqflg_pos < 0) {
+                    vqflg = buf[bpos++]; vqflg |= (buf[bpos++] << 8);
+                    vqflg_pos = 7;
+                }
+                vqid = (vqflg >> (vqflg_pos * 2)) & 0x3;
+                frame_stats[0][vqid]++;
+                vqflg_pos--;
+
+                switch(vqid) {
+                case RoQ_ID_MOT:
+                    apply_motion_8x8(ri, xp, yp, 0, 8, 8);
+                    break;
+                case RoQ_ID_FCC:
+                    apply_motion_8x8(ri, xp, yp, buf[bpos++], chunk_arg >> 8,
+                        chunk_arg & 0xff);
+                    break;
+                case RoQ_ID_SLD:
+                    qcell = ri->qcells + buf[bpos++];
+                    apply_vector_4x4(ri, xp, yp, ri->cells + qcell->idx[0]);
+                    apply_vector_4x4(ri, xp+4, yp, ri->cells + qcell->idx[1]);
+                    apply_vector_4x4(ri, xp, yp+4, ri->cells + qcell->idx[2]);
+                    apply_vector_4x4(ri, xp+4, yp+4, ri->cells + qcell->idx[3]);
+                    break;
+                case RoQ_ID_CCC:
+                    for (k = 0; k < 4; k++) {
+                        x = xp; y = yp;
+                        if(k & 0x01) x += 4;
+                        if(k & 0x02) y += 4;
+
+                        if (vqflg_pos < 0) {
+                            vqflg = buf[bpos++];
+                            vqflg |= (buf[bpos++] << 8);
+                            vqflg_pos = 7;
+                        }
+                        vqid = (vqflg >> (vqflg_pos * 2)) & 0x3;
+                        frame_stats[1][vqid]++;
+                        vqflg_pos--;
+                        switch(vqid) {
+                        case RoQ_ID_MOT:
+                            apply_motion_4x4(ri, x, y, 0, 8, 8);
+                            break;
+                        case RoQ_ID_FCC:
+                            apply_motion_4x4(ri, x, y, buf[bpos++],
+                                chunk_arg >> 8, chunk_arg & 0xff);
+                            break;
+                        case RoQ_ID_SLD:
+                            qcell = ri->qcells + buf[bpos++];
+                            apply_vector_2x2(ri, x, y, ri->cells + qcell->idx[0]);
+                            apply_vector_2x2(ri, x+2, y, ri->cells + qcell->idx[1]);
+                            apply_vector_2x2(ri, x, y+2, ri->cells + qcell->idx[2]);
+                            apply_vector_2x2(ri, x+2, y+2, ri->cells + qcell->idx[3]);
+                            break;
+                        case RoQ_ID_CCC:
+                            apply_vector_2x2(ri, x, y, ri->cells + buf[bpos]);
+                            apply_vector_2x2(ri, x+2, y, ri->cells + buf[bpos+1]);
+                            apply_vector_2x2(ri, x, y+2, ri->cells + buf[bpos+2]);
+                            apply_vector_2x2(ri, x+2, y+2, ri->cells + buf[bpos+3]);
+                            bpos += 4;
+                            break;
+                        }
+                    }
+                    break;
+                default:
+                    av_log(ri->avctx, AV_LOG_ERROR, "Unknown vq code: %d\n", vqid);
+            }
+        }
+
+        xpos += 16;
+        if (xpos >= ri->avctx->width) {
+            xpos -= ri->avctx->width;
+            ypos += 16;
+        }
+        if(ypos >= ri->avctx->height)
+            break;
+    }
+}
+
+
+static int roq_decode_init(AVCodecContext *avctx)
+{
+    RoqContext *s = avctx->priv_data;
+    int i;
+
+    s->avctx = avctx;
+    s->first_frame = 1;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    uiclp = uiclip+512;
+    for(i = -512; i < 512; i++)
+        uiclp[i] = (i < 0 ? 0 : (i > 255 ? 255 : i));
+
+    return 0;
+}
+
+static int roq_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    RoqContext *s = avctx->priv_data;
+
+    if (avctx->get_buffer(avctx, &s->current_frame)) {
+        av_log(avctx, AV_LOG_ERROR, "  RoQ: get_buffer() failed\n");
+        return -1;
+    }
+    s->y_stride = s->current_frame.linesize[0];
+    s->c_stride = s->current_frame.linesize[1];
+
+    s->buf = buf;
+    s->size = buf_size;
+    roqvideo_decode_frame(s);
+
+    /* release the last frame if it is allocated */
+    if (s->first_frame)
+        s->first_frame = 0;
+    else
+        avctx->release_buffer(avctx, &s->last_frame);
+
+    /* shuffle frames */
+    s->last_frame = s->current_frame;
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->current_frame;
+
+    return buf_size;
+}
+
+static int roq_decode_end(AVCodecContext *avctx)
+{
+    RoqContext *s = avctx->priv_data;
+
+    /* release the last frame */
+    if (s->last_frame.data[0])
+        avctx->release_buffer(avctx, &s->last_frame);
+
+    return 0;
+}
+
+AVCodec roq_decoder = {
+    "roqvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ROQ,
+    sizeof(RoqContext),
+    roq_decode_init,
+    NULL,
+    roq_decode_end,
+    roq_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/rpza.c b/contrib/ffmpeg/libavcodec/rpza.c
new file mode 100644
index 000000000..9a996da37
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/rpza.c
@@ -0,0 +1,292 @@
+/*
+ * Quicktime Video (RPZA) Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file rpza.c
+ * QT RPZA Video Decoder by Roberto Togni <rtogni@bresciaonline.it>
+ * For more information about the RPZA format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * The RPZA decoder outputs RGB555 colorspace data.
+ *
+ * Note that this decoder reads big endian RGB555 pixel values from the
+ * bytestream, arranges them in the host's endian order, and outputs
+ * them to the final rendered map in the same host endian order. This is
+ * intended behavior as the ffmpeg documentation states that RGB555 pixels
+ * shall be stored in native CPU endianness.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+typedef struct RpzaContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+} RpzaContext;
+
+#define ADVANCE_BLOCK() \
+{ \
+    pixel_ptr += 4; \
+    if (pixel_ptr >= width) \
+    { \
+        pixel_ptr = 0; \
+        row_ptr += stride * 4; \
+    } \
+    total_blocks--; \
+    if (total_blocks < 0) \
+    { \
+        av_log(s->avctx, AV_LOG_ERROR, "warning: block counter just went negative (this should not happen)\n"); \
+        return; \
+    } \
+}
+
+static void rpza_decode_stream(RpzaContext *s)
+{
+    int width = s->avctx->width;
+    int stride = s->frame.linesize[0] / 2;
+    int row_inc = stride - 4;
+    int stream_ptr = 0;
+    int chunk_size;
+    unsigned char opcode;
+    int n_blocks;
+    unsigned short colorA = 0, colorB;
+    unsigned short color4[4];
+    unsigned char index, idx;
+    unsigned short ta, tb;
+    unsigned short *pixels = (unsigned short *)s->frame.data[0];
+
+    int row_ptr = 0;
+    int pixel_ptr = 0;
+    int block_ptr;
+    int pixel_x, pixel_y;
+    int total_blocks;
+
+    /* First byte is always 0xe1. Warn if it's different */
+    if (s->buf[stream_ptr] != 0xe1)
+        av_log(s->avctx, AV_LOG_ERROR, "First chunk byte is 0x%02x instead of 0xe1\n",
+            s->buf[stream_ptr]);
+
+    /* Get chunk size, ingnoring first byte */
+    chunk_size = BE_32(&s->buf[stream_ptr]) & 0x00FFFFFF;
+    stream_ptr += 4;
+
+    /* If length mismatch use size from MOV file and try to decode anyway */
+    if (chunk_size != s->size)
+        av_log(s->avctx, AV_LOG_ERROR, "MOV chunk size != encoded chunk size; using MOV chunk size\n");
+
+    chunk_size = s->size;
+
+    /* Number of 4x4 blocks in frame. */
+    total_blocks = ((s->avctx->width + 3) / 4) * ((s->avctx->height + 3) / 4);
+
+    /* Process chunk data */
+    while (stream_ptr < chunk_size) {
+        opcode = s->buf[stream_ptr++]; /* Get opcode */
+
+        n_blocks = (opcode & 0x1f) + 1; /* Extract block counter from opcode */
+
+        /* If opcode MSbit is 0, we need more data to decide what to do */
+        if ((opcode & 0x80) == 0) {
+            colorA = (opcode << 8) | (s->buf[stream_ptr++]);
+            opcode = 0;
+            if ((s->buf[stream_ptr] & 0x80) != 0) {
+                /* Must behave as opcode 110xxxxx, using colorA computed
+                 * above. Use fake opcode 0x20 to enter switch block at
+                 * the right place */
+                opcode = 0x20;
+                n_blocks = 1;
+            }
+        }
+
+        switch (opcode & 0xe0) {
+
+        /* Skip blocks */
+        case 0x80:
+            while (n_blocks--) {
+              ADVANCE_BLOCK();
+            }
+            break;
+
+        /* Fill blocks with one color */
+        case 0xa0:
+            colorA = BE_16 (&s->buf[stream_ptr]);
+            stream_ptr += 2;
+            while (n_blocks--) {
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++){
+                        pixels[block_ptr] = colorA;
+                        block_ptr++;
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* Fill blocks with 4 colors */
+        case 0xc0:
+            colorA = BE_16 (&s->buf[stream_ptr]);
+            stream_ptr += 2;
+        case 0x20:
+            colorB = BE_16 (&s->buf[stream_ptr]);
+            stream_ptr += 2;
+
+            /* sort out the colors */
+            color4[0] = colorB;
+            color4[1] = 0;
+            color4[2] = 0;
+            color4[3] = colorA;
+
+            /* red components */
+            ta = (colorA >> 10) & 0x1F;
+            tb = (colorB >> 10) & 0x1F;
+            color4[1] |= ((11 * ta + 21 * tb) >> 5) << 10;
+            color4[2] |= ((21 * ta + 11 * tb) >> 5) << 10;
+
+            /* green components */
+            ta = (colorA >> 5) & 0x1F;
+            tb = (colorB >> 5) & 0x1F;
+            color4[1] |= ((11 * ta + 21 * tb) >> 5) << 5;
+            color4[2] |= ((21 * ta + 11 * tb) >> 5) << 5;
+
+            /* blue components */
+            ta = colorA & 0x1F;
+            tb = colorB & 0x1F;
+            color4[1] |= ((11 * ta + 21 * tb) >> 5);
+            color4[2] |= ((21 * ta + 11 * tb) >> 5);
+
+            while (n_blocks--) {
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    index = s->buf[stream_ptr++];
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++){
+                        idx = (index >> (2 * (3 - pixel_x))) & 0x03;
+                        pixels[block_ptr] = color4[idx];
+                        block_ptr++;
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* Fill block with 16 colors */
+        case 0x00:
+            block_ptr = row_ptr + pixel_ptr;
+            for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                for (pixel_x = 0; pixel_x < 4; pixel_x++){
+                    /* We already have color of upper left pixel */
+                    if ((pixel_y != 0) || (pixel_x !=0)) {
+                        colorA = BE_16 (&s->buf[stream_ptr]);
+                        stream_ptr += 2;
+                    }
+                    pixels[block_ptr] = colorA;
+                    block_ptr++;
+                }
+                block_ptr += row_inc;
+            }
+            ADVANCE_BLOCK();
+            break;
+
+        /* Unknown opcode */
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "Unknown opcode %d in rpza chunk."
+                 " Skip remaining %d bytes of chunk data.\n", opcode,
+                 chunk_size - stream_ptr);
+            return;
+        } /* Opcode switch */
+    }
+}
+
+static int rpza_decode_init(AVCodecContext *avctx)
+{
+    RpzaContext *s = (RpzaContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_RGB555;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int rpza_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    RpzaContext *s = (RpzaContext *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    rpza_decode_stream(s);
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int rpza_decode_end(AVCodecContext *avctx)
+{
+    RpzaContext *s = (RpzaContext *)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec rpza_decoder = {
+    "rpza",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RPZA,
+    sizeof(RpzaContext),
+    rpza_decode_init,
+    NULL,
+    rpza_decode_end,
+    rpza_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/rtjpeg.c b/contrib/ffmpeg/libavcodec/rtjpeg.c
new file mode 100644
index 000000000..dbc6cfd88
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/rtjpeg.c
@@ -0,0 +1,165 @@
+/*
+ * RTJpeg decoding functions
+ * Copyright (c) 2006 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "common.h"
+#include "bitstream.h"
+#include "dsputil.h"
+#include "rtjpeg.h"
+
+#define PUT_COEFF(c) \
+    i = scan[coeff--]; \
+    block[i] = (c) * quant[i];
+
+//! aligns the bitstream to the give power of two
+#define ALIGN(a) \
+    n = (-get_bits_count(gb)) & (a - 1); \
+    if (n) {skip_bits(gb, n);}
+
+/**
+ * \brief read one block from stream
+ * \param gb contains stream data
+ * \param block where data is written to
+ * \param scan array containing the mapping stream address -> block position
+ * \param quant quantization factors
+ *
+ * Note: GetBitContext is used to make the code simpler, since all data is
+ * aligned this could be done faster in a different way, e.g. as it is done
+ * in MPlayer libmpcodecs/native/RTjpegN.c
+ */
+static inline int get_block(GetBitContext *gb, DCTELEM *block, uint8_t *scan,
+                            uint32_t *quant) {
+    int coeff, i, n;
+    int8_t ac;
+    uint8_t dc = get_bits(gb, 8);
+
+    // block not coded
+    if (dc == 255)
+       return 0;
+
+    // number of non-zero coefficients
+    coeff = get_bits(gb, 6);
+    // normally we would only need to clear the (63 - coeff) last values,
+    // but since we do not know where they are we just clear the whole block
+    memset(block, 0, 64 * sizeof(DCTELEM));
+
+    // 2 bits per coefficient
+    while (coeff) {
+        ac = get_sbits(gb, 2);
+        if (ac == -2)
+            break; // continue with more bits
+        PUT_COEFF(ac);
+    }
+
+    // 4 bits per coefficient
+    ALIGN(4);
+    while (coeff) {
+        ac = get_sbits(gb, 4);
+        if (ac == -8)
+            break; // continue with more bits
+        PUT_COEFF(ac);
+    }
+
+    // 8 bits per coefficient
+    ALIGN(8);
+    while (coeff) {
+        ac = get_sbits(gb, 8);
+        PUT_COEFF(ac);
+    }
+
+    PUT_COEFF(dc);
+    return 1;
+}
+
+/**
+ * \brief decode one rtjpeg YUV420 frame
+ * \param c context, must be initialized via rtjpeg_decode_init
+ * \param f AVFrame to place decoded frame into. If parts of the frame
+ *          are not coded they are left unchanged, so consider initializing it
+ * \param buf buffer containing input data
+ * \param buf_size length of input data in bytes
+ * \return number of bytes consumed from the input buffer
+ */
+int rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
+                               uint8_t *buf, int buf_size) {
+    DECLARE_ALIGNED_16(DCTELEM, block[64]);
+    GetBitContext gb;
+    int w = c->w / 16, h = c->h / 16;
+    int x, y;
+    uint8_t *y1 = f->data[0], *y2 = f->data[0] + 8 * f->linesize[0];
+    uint8_t *u = f->data[1], *v = f->data[2];
+    init_get_bits(&gb, buf, buf_size * 8);
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++) {
+            if (get_block(&gb, block, c->scan, c->lquant))
+                c->dsp->idct_put(y1, f->linesize[0], block);
+            y1 += 8;
+            if (get_block(&gb, block, c->scan, c->lquant))
+                c->dsp->idct_put(y1, f->linesize[0], block);
+            y1 += 8;
+            if (get_block(&gb, block, c->scan, c->lquant))
+                c->dsp->idct_put(y2, f->linesize[0], block);
+            y2 += 8;
+            if (get_block(&gb, block, c->scan, c->lquant))
+                c->dsp->idct_put(y2, f->linesize[0], block);
+            y2 += 8;
+            if (get_block(&gb, block, c->scan, c->cquant))
+                c->dsp->idct_put(u, f->linesize[1], block);
+            u += 8;
+            if (get_block(&gb, block, c->scan, c->cquant))
+                c->dsp->idct_put(v, f->linesize[2], block);
+            v += 8;
+        }
+        y1 += 2 * 8 * (f->linesize[0] - w);
+        y2 += 2 * 8 * (f->linesize[0] - w);
+        u += 8 * (f->linesize[1] - w);
+        v += 8 * (f->linesize[2] - w);
+    }
+    return get_bits_count(&gb) / 8;
+}
+
+/**
+ * \brief initialize an RTJpegContext, may be called multiple times
+ * \param c context to initialize
+ * \param dsp specifies the idct to use for decoding
+ * \param width width of image, will be rounded down to the nearest multiple
+ *              of 16 for decoding
+ * \param height height of image, will be rounded down to the nearest multiple
+ *              of 16 for decoding
+ * \param lquant luma quantization table to use
+ * \param cquant chroma quantization table to use
+ */
+void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,
+                        int width, int height,
+                        uint32_t *lquant, uint32_t *cquant) {
+    int i;
+    c->dsp = dsp;
+    for (i = 0; i < 64; i++) {
+        int z = ff_zigzag_direct[i];
+        int p = c->dsp->idct_permutation[i];
+        z = ((z << 3) | (z >> 3)) & 63; // rtjpeg uses a transposed variant
+
+        // permute the scan and quantization tables for the chosen idct
+        c->scan[i] = c->dsp->idct_permutation[z];
+        c->lquant[p] = lquant[i];
+        c->cquant[p] = cquant[i];
+    }
+    c->w = width;
+    c->h = height;
+}
diff --git a/contrib/ffmpeg/libavcodec/rtjpeg.h b/contrib/ffmpeg/libavcodec/rtjpeg.h
new file mode 100644
index 000000000..daecc8a75
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/rtjpeg.h
@@ -0,0 +1,39 @@
+/*
+ * RTJpeg decoding functions
+ * copyright (c) 2006 Reimar Doeffinger
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef RTJPEG_H
+#define RTJPEG_H
+
+typedef struct {
+    int w, h;
+    DSPContext *dsp;
+    uint8_t scan[64];
+    uint32_t lquant[64];
+    uint32_t cquant[64];
+} RTJpegContext;
+
+void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,
+                        int width, int height,
+                        uint32_t *lquant, uint32_t *cquant);
+
+int rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
+                               uint8_t *buf, int buf_size);
+#endif
diff --git a/contrib/ffmpeg/libavcodec/rv10.c b/contrib/ffmpeg/libavcodec/rv10.c
new file mode 100644
index 000000000..4b50609c1
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/rv10.c
@@ -0,0 +1,803 @@
+/*
+ * RV10 codec
+ * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file rv10.c
+ * RV10 codec.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+//#define DEBUG
+
+#define DC_VLC_BITS 14 //FIXME find a better solution
+
+static const uint16_t rv_lum_code[256] =
+{
+ 0x3e7f, 0x0f00, 0x0f01, 0x0f02, 0x0f03, 0x0f04, 0x0f05, 0x0f06,
+ 0x0f07, 0x0f08, 0x0f09, 0x0f0a, 0x0f0b, 0x0f0c, 0x0f0d, 0x0f0e,
+ 0x0f0f, 0x0f10, 0x0f11, 0x0f12, 0x0f13, 0x0f14, 0x0f15, 0x0f16,
+ 0x0f17, 0x0f18, 0x0f19, 0x0f1a, 0x0f1b, 0x0f1c, 0x0f1d, 0x0f1e,
+ 0x0f1f, 0x0f20, 0x0f21, 0x0f22, 0x0f23, 0x0f24, 0x0f25, 0x0f26,
+ 0x0f27, 0x0f28, 0x0f29, 0x0f2a, 0x0f2b, 0x0f2c, 0x0f2d, 0x0f2e,
+ 0x0f2f, 0x0f30, 0x0f31, 0x0f32, 0x0f33, 0x0f34, 0x0f35, 0x0f36,
+ 0x0f37, 0x0f38, 0x0f39, 0x0f3a, 0x0f3b, 0x0f3c, 0x0f3d, 0x0f3e,
+ 0x0f3f, 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0386,
+ 0x0387, 0x0388, 0x0389, 0x038a, 0x038b, 0x038c, 0x038d, 0x038e,
+ 0x038f, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396,
+ 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e,
+ 0x039f, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6,
+ 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce,
+ 0x00cf, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056,
+ 0x0057, 0x0020, 0x0021, 0x0022, 0x0023, 0x000c, 0x000d, 0x0004,
+ 0x0000, 0x0005, 0x000e, 0x000f, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+ 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
+ 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
+ 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
+ 0x0f40, 0x0f41, 0x0f42, 0x0f43, 0x0f44, 0x0f45, 0x0f46, 0x0f47,
+ 0x0f48, 0x0f49, 0x0f4a, 0x0f4b, 0x0f4c, 0x0f4d, 0x0f4e, 0x0f4f,
+ 0x0f50, 0x0f51, 0x0f52, 0x0f53, 0x0f54, 0x0f55, 0x0f56, 0x0f57,
+ 0x0f58, 0x0f59, 0x0f5a, 0x0f5b, 0x0f5c, 0x0f5d, 0x0f5e, 0x0f5f,
+ 0x0f60, 0x0f61, 0x0f62, 0x0f63, 0x0f64, 0x0f65, 0x0f66, 0x0f67,
+ 0x0f68, 0x0f69, 0x0f6a, 0x0f6b, 0x0f6c, 0x0f6d, 0x0f6e, 0x0f6f,
+ 0x0f70, 0x0f71, 0x0f72, 0x0f73, 0x0f74, 0x0f75, 0x0f76, 0x0f77,
+ 0x0f78, 0x0f79, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f7e, 0x0f7f,
+};
+
+static const uint8_t rv_lum_bits[256] =
+{
+ 14, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  7,  7,  7,  7,  7,  7,  7,
+  7,  6,  6,  6,  6,  5,  5,  4,
+  2,  4,  5,  5,  6,  6,  6,  6,
+  7,  7,  7,  7,  7,  7,  7,  7,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+};
+
+static const uint16_t rv_chrom_code[256] =
+{
+ 0xfe7f, 0x3f00, 0x3f01, 0x3f02, 0x3f03, 0x3f04, 0x3f05, 0x3f06,
+ 0x3f07, 0x3f08, 0x3f09, 0x3f0a, 0x3f0b, 0x3f0c, 0x3f0d, 0x3f0e,
+ 0x3f0f, 0x3f10, 0x3f11, 0x3f12, 0x3f13, 0x3f14, 0x3f15, 0x3f16,
+ 0x3f17, 0x3f18, 0x3f19, 0x3f1a, 0x3f1b, 0x3f1c, 0x3f1d, 0x3f1e,
+ 0x3f1f, 0x3f20, 0x3f21, 0x3f22, 0x3f23, 0x3f24, 0x3f25, 0x3f26,
+ 0x3f27, 0x3f28, 0x3f29, 0x3f2a, 0x3f2b, 0x3f2c, 0x3f2d, 0x3f2e,
+ 0x3f2f, 0x3f30, 0x3f31, 0x3f32, 0x3f33, 0x3f34, 0x3f35, 0x3f36,
+ 0x3f37, 0x3f38, 0x3f39, 0x3f3a, 0x3f3b, 0x3f3c, 0x3f3d, 0x3f3e,
+ 0x3f3f, 0x0f80, 0x0f81, 0x0f82, 0x0f83, 0x0f84, 0x0f85, 0x0f86,
+ 0x0f87, 0x0f88, 0x0f89, 0x0f8a, 0x0f8b, 0x0f8c, 0x0f8d, 0x0f8e,
+ 0x0f8f, 0x0f90, 0x0f91, 0x0f92, 0x0f93, 0x0f94, 0x0f95, 0x0f96,
+ 0x0f97, 0x0f98, 0x0f99, 0x0f9a, 0x0f9b, 0x0f9c, 0x0f9d, 0x0f9e,
+ 0x0f9f, 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6,
+ 0x03c7, 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce,
+ 0x03cf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6,
+ 0x00e7, 0x0030, 0x0031, 0x0032, 0x0033, 0x0008, 0x0009, 0x0002,
+ 0x0000, 0x0003, 0x000a, 0x000b, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x03d0, 0x03d1, 0x03d2, 0x03d3, 0x03d4, 0x03d5, 0x03d6, 0x03d7,
+ 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
+ 0x0fa0, 0x0fa1, 0x0fa2, 0x0fa3, 0x0fa4, 0x0fa5, 0x0fa6, 0x0fa7,
+ 0x0fa8, 0x0fa9, 0x0faa, 0x0fab, 0x0fac, 0x0fad, 0x0fae, 0x0faf,
+ 0x0fb0, 0x0fb1, 0x0fb2, 0x0fb3, 0x0fb4, 0x0fb5, 0x0fb6, 0x0fb7,
+ 0x0fb8, 0x0fb9, 0x0fba, 0x0fbb, 0x0fbc, 0x0fbd, 0x0fbe, 0x0fbf,
+ 0x3f40, 0x3f41, 0x3f42, 0x3f43, 0x3f44, 0x3f45, 0x3f46, 0x3f47,
+ 0x3f48, 0x3f49, 0x3f4a, 0x3f4b, 0x3f4c, 0x3f4d, 0x3f4e, 0x3f4f,
+ 0x3f50, 0x3f51, 0x3f52, 0x3f53, 0x3f54, 0x3f55, 0x3f56, 0x3f57,
+ 0x3f58, 0x3f59, 0x3f5a, 0x3f5b, 0x3f5c, 0x3f5d, 0x3f5e, 0x3f5f,
+ 0x3f60, 0x3f61, 0x3f62, 0x3f63, 0x3f64, 0x3f65, 0x3f66, 0x3f67,
+ 0x3f68, 0x3f69, 0x3f6a, 0x3f6b, 0x3f6c, 0x3f6d, 0x3f6e, 0x3f6f,
+ 0x3f70, 0x3f71, 0x3f72, 0x3f73, 0x3f74, 0x3f75, 0x3f76, 0x3f77,
+ 0x3f78, 0x3f79, 0x3f7a, 0x3f7b, 0x3f7c, 0x3f7d, 0x3f7e, 0x3f7f,
+};
+
+static const uint8_t rv_chrom_bits[256] =
+{
+ 16, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10,  8,  8,  8,  8,  8,  8,  8,
+  8,  6,  6,  6,  6,  4,  4,  3,
+  2,  3,  4,  4,  6,  6,  6,  6,
+  8,  8,  8,  8,  8,  8,  8,  8,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+};
+
+static VLC rv_dc_lum, rv_dc_chrom;
+
+int rv_decode_dc(MpegEncContext *s, int n)
+{
+    int code;
+
+    if (n < 4) {
+        code = get_vlc2(&s->gb, rv_dc_lum.table, DC_VLC_BITS, 2);
+        if (code < 0) {
+            /* XXX: I don't understand why they use LONGER codes than
+               necessary. The following code would be completely useless
+               if they had thought about it !!! */
+            code = get_bits(&s->gb, 7);
+            if (code == 0x7c) {
+                code = (int8_t)(get_bits(&s->gb, 7) + 1);
+            } else if (code == 0x7d) {
+                code = -128 + get_bits(&s->gb, 7);
+            } else if (code == 0x7e) {
+                if (get_bits(&s->gb, 1) == 0)
+                    code = (int8_t)(get_bits(&s->gb, 8) + 1);
+                else
+                    code = (int8_t)(get_bits(&s->gb, 8));
+            } else if (code == 0x7f) {
+                get_bits(&s->gb, 11);
+                code = 1;
+            }
+        } else {
+            code -= 128;
+        }
+    } else {
+        code = get_vlc2(&s->gb, rv_dc_chrom.table, DC_VLC_BITS, 2);
+        /* same remark */
+        if (code < 0) {
+            code = get_bits(&s->gb, 9);
+            if (code == 0x1fc) {
+                code = (int8_t)(get_bits(&s->gb, 7) + 1);
+            } else if (code == 0x1fd) {
+                code = -128 + get_bits(&s->gb, 7);
+            } else if (code == 0x1fe) {
+                get_bits(&s->gb, 9);
+                code = 1;
+            } else {
+                av_log(s->avctx, AV_LOG_ERROR, "chroma dc error\n");
+                return 0xffff;
+            }
+        } else {
+            code -= 128;
+        }
+    }
+    return -code;
+}
+
+#ifdef CONFIG_ENCODERS
+
+/* write RV 1.0 compatible frame header */
+void rv10_encode_picture_header(MpegEncContext *s, int picture_number)
+{
+    int full_frame= 0;
+
+    align_put_bits(&s->pb);
+
+    put_bits(&s->pb, 1, 1);     /* marker */
+
+    put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
+
+    put_bits(&s->pb, 1, 0);     /* not PB frame */
+
+    put_bits(&s->pb, 5, s->qscale);
+
+    if (s->pict_type == I_TYPE) {
+        /* specific MPEG like DC coding not used */
+    }
+    /* if multiple packets per frame are sent, the position at which
+       to display the macro blocks is coded here */
+    if(!full_frame){
+        put_bits(&s->pb, 6, 0); /* mb_x */
+        put_bits(&s->pb, 6, 0); /* mb_y */
+        put_bits(&s->pb, 12, s->mb_width * s->mb_height);
+    }
+
+    put_bits(&s->pb, 3, 0);     /* ignored */
+}
+
+void rv20_encode_picture_header(MpegEncContext *s, int picture_number){
+    put_bits(&s->pb, 2, s->pict_type); //I 0 vs. 1 ?
+    put_bits(&s->pb, 1, 0);     /* unknown bit */
+    put_bits(&s->pb, 5, s->qscale);
+
+    put_bits(&s->pb, 8, picture_number&0xFF); //FIXME wrong, but correct is not known
+    s->mb_x= s->mb_y= 0;
+    ff_h263_encode_mba(s);
+
+    put_bits(&s->pb, 1, s->no_rounding);
+
+    assert(s->f_code == 1);
+    assert(s->unrestricted_mv == 1);
+//    assert(s->h263_aic== (s->pict_type == I_TYPE));
+    assert(s->alt_inter_vlc == 0);
+    assert(s->umvplus == 0);
+    assert(s->modified_quant==1);
+    assert(s->loop_filter==1);
+
+    s->h263_aic= s->pict_type == I_TYPE;
+    if(s->h263_aic){
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_aic_dc_scale_table;
+    }else{
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+}
+
+#if 0 /* unused, remove? */
+static int get_num(GetBitContext *gb)
+{
+    int n, n1;
+
+    n = get_bits(gb, 16);
+    if (n >= 0x4000) {
+        return n - 0x4000;
+    } else {
+        n1 = get_bits(gb, 16);
+        return (n << 16) | n1;
+    }
+}
+#endif
+
+#endif //CONFIG_ENCODERS
+
+/* read RV 1.0 compatible frame header */
+static int rv10_decode_picture_header(MpegEncContext *s)
+{
+    int mb_count, pb_frame, marker, unk, mb_xy;
+
+//printf("ff:%d\n", full_frame);
+    marker = get_bits(&s->gb, 1);
+
+    if (get_bits(&s->gb, 1))
+        s->pict_type = P_TYPE;
+    else
+        s->pict_type = I_TYPE;
+//printf("h:%X ver:%d\n",h,s->rv10_version);
+    if(!marker) av_log(s->avctx, AV_LOG_ERROR, "marker missing\n");
+    pb_frame = get_bits(&s->gb, 1);
+
+#ifdef DEBUG
+    av_log(s->avctx, AV_LOG_DEBUG, "pict_type=%d pb_frame=%d\n", s->pict_type, pb_frame);
+#endif
+
+    if (pb_frame){
+        av_log(s->avctx, AV_LOG_ERROR, "pb frame not supported\n");
+        return -1;
+    }
+
+    s->qscale = get_bits(&s->gb, 5);
+    if(s->qscale==0){
+        av_log(s->avctx, AV_LOG_ERROR, "error, qscale:0\n");
+        return -1;
+    }
+
+    if (s->pict_type == I_TYPE) {
+        if (s->rv10_version == 3) {
+            /* specific MPEG like DC coding not used */
+            s->last_dc[0] = get_bits(&s->gb, 8);
+            s->last_dc[1] = get_bits(&s->gb, 8);
+            s->last_dc[2] = get_bits(&s->gb, 8);
+#ifdef DEBUG
+            av_log(s->avctx, AV_LOG_DEBUG, "DC:%d %d %d\n",
+                   s->last_dc[0],
+                   s->last_dc[1],
+                   s->last_dc[2]);
+#endif
+        }
+    }
+    /* if multiple packets per frame are sent, the position at which
+       to display the macro blocks is coded here */
+
+    mb_xy= s->mb_x + s->mb_y*s->mb_width;
+    if(show_bits(&s->gb, 12)==0 || (mb_xy && mb_xy < s->mb_num)){
+        s->mb_x = get_bits(&s->gb, 6); /* mb_x */
+        s->mb_y = get_bits(&s->gb, 6); /* mb_y */
+        mb_count = get_bits(&s->gb, 12);
+    } else {
+        s->mb_x = 0;
+        s->mb_y = 0;
+        mb_count = s->mb_width * s->mb_height;
+    }
+    unk= get_bits(&s->gb, 3);   /* ignored */
+//printf("%d\n", unk);
+    s->f_code = 1;
+    s->unrestricted_mv = 1;
+
+    return mb_count;
+}
+
+static int rv20_decode_picture_header(MpegEncContext *s)
+{
+    int seq, mb_pos, i;
+
+#if 0
+    GetBitContext gb= s->gb;
+    for(i=0; i<64; i++){
+        av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&gb));
+        if(i%4==3) av_log(s->avctx, AV_LOG_DEBUG, " ");
+    }
+    av_log(s->avctx, AV_LOG_DEBUG, "\n");
+#endif
+#if 0
+    av_log(s->avctx, AV_LOG_DEBUG, "%3dx%03d/%02Xx%02X ", s->width, s->height, s->width/4, s->height/4);
+    for(i=0; i<s->avctx->extradata_size; i++){
+        av_log(s->avctx, AV_LOG_DEBUG, "%02X ", ((uint8_t*)s->avctx->extradata)[i]);
+        if(i%4==3) av_log(s->avctx, AV_LOG_DEBUG, " ");
+    }
+    av_log(s->avctx, AV_LOG_DEBUG, "\n");
+#endif
+
+    if(s->avctx->sub_id == 0x30202002 || s->avctx->sub_id == 0x30203002){
+        if (get_bits(&s->gb, 3)){
+            av_log(s->avctx, AV_LOG_ERROR, "unknown triplet set\n");
+            return -1;
+        }
+    }
+
+    i= get_bits(&s->gb, 2);
+    switch(i){
+    case 0: s->pict_type= I_TYPE; break;
+    case 1: s->pict_type= I_TYPE; break; //hmm ...
+    case 2: s->pict_type= P_TYPE; break;
+    case 3: s->pict_type= B_TYPE; break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "unknown frame type\n");
+        return -1;
+    }
+
+    if(s->last_picture_ptr==NULL && s->pict_type==B_TYPE){
+        av_log(s->avctx, AV_LOG_ERROR, "early B pix\n");
+        return -1;
+    }
+
+    if (get_bits(&s->gb, 1)){
+        av_log(s->avctx, AV_LOG_ERROR, "unknown bit set\n");
+        return -1;
+    }
+
+    s->qscale = get_bits(&s->gb, 5);
+    if(s->qscale==0){
+        av_log(s->avctx, AV_LOG_ERROR, "error, qscale:0\n");
+        return -1;
+    }
+    if(s->avctx->sub_id == 0x30203002){
+        if (get_bits(&s->gb, 1)){
+            av_log(s->avctx, AV_LOG_ERROR, "unknown bit2 set\n");
+            return -1;
+        }
+    }
+
+    if(s->avctx->has_b_frames){
+        int f, new_w, new_h;
+        int v= s->avctx->extradata_size >= 4 ? 7&((uint8_t*)s->avctx->extradata)[1] : 0;
+
+        if (get_bits(&s->gb, 1)){
+            av_log(s->avctx, AV_LOG_ERROR, "unknown bit3 set\n");
+//            return -1;
+        }
+        seq= get_bits(&s->gb, 13)<<2;
+
+        f= get_bits(&s->gb, av_log2(v)+1);
+
+        if(f){
+            new_w= 4*((uint8_t*)s->avctx->extradata)[6+2*f];
+            new_h= 4*((uint8_t*)s->avctx->extradata)[7+2*f];
+        }else{
+            new_w= s->width; //FIXME wrong we of course must save the original in the context
+            new_h= s->height;
+        }
+        if(new_w != s->width || new_h != s->height){
+            av_log(s->avctx, AV_LOG_DEBUG, "attempting to change resolution to %dx%d\n", new_w, new_h);
+            MPV_common_end(s);
+            s->width  = s->avctx->width = new_w;
+            s->height = s->avctx->height= new_h;
+            if (MPV_common_init(s) < 0)
+                return -1;
+        }
+
+        if(s->avctx->debug & FF_DEBUG_PICT_INFO){
+            av_log(s->avctx, AV_LOG_DEBUG, "F %d/%d\n", f, v);
+        }
+    }else{
+        seq= get_bits(&s->gb, 8)*128;
+    }
+
+//     if(s->avctx->sub_id <= 0x20201002){ //0x20201002 definitely needs this
+    mb_pos= ff_h263_decode_mba(s);
+/*    }else{
+        mb_pos= get_bits(&s->gb, av_log2(s->mb_num-1)+1);
+        s->mb_x= mb_pos % s->mb_width;
+        s->mb_y= mb_pos / s->mb_width;
+    }*/
+//av_log(s->avctx, AV_LOG_DEBUG, "%d\n", seq);
+    seq |= s->time &~0x7FFF;
+    if(seq - s->time >  0x4000) seq -= 0x8000;
+    if(seq - s->time < -0x4000) seq += 0x8000;
+    if(seq != s->time){
+        if(s->pict_type!=B_TYPE){
+            s->time= seq;
+            s->pp_time= s->time - s->last_non_b_time;
+            s->last_non_b_time= s->time;
+        }else{
+            s->time= seq;
+            s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+            if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){
+                av_log(s->avctx, AV_LOG_DEBUG, "messed up order, possible from seeking? skipping current b frame\n");
+                return FRAME_SKIPPED;
+            }
+            ff_mpeg4_init_direct_mv(s);
+        }
+    }
+//    printf("%d %d %d %d %d\n", seq, (int)s->time, (int)s->last_non_b_time, s->pp_time, s->pb_time);
+/*for(i=0; i<32; i++){
+    av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
+}
+av_log(s->avctx, AV_LOG_DEBUG, "\n");*/
+    s->no_rounding= get_bits1(&s->gb);
+
+    s->f_code = 1;
+    s->unrestricted_mv = 1;
+    s->h263_aic= s->pict_type == I_TYPE;
+//    s->alt_inter_vlc=1;
+//    s->obmc=1;
+//    s->umvplus=1;
+    s->modified_quant=1;
+    s->loop_filter=1;
+
+    if(s->avctx->debug & FF_DEBUG_PICT_INFO){
+            av_log(s->avctx, AV_LOG_INFO, "num:%5d x:%2d y:%2d type:%d qscale:%2d rnd:%d\n",
+                   seq, s->mb_x, s->mb_y, s->pict_type, s->qscale, s->no_rounding);
+    }
+
+    assert(s->pict_type != B_TYPE || !s->low_delay);
+
+    return s->mb_width*s->mb_height - mb_pos;
+}
+
+static int rv10_decode_init(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+    static int done=0;
+
+    MPV_decode_defaults(s);
+
+    s->avctx= avctx;
+    s->out_format = FMT_H263;
+    s->codec_id= avctx->codec_id;
+
+    s->width = avctx->width;
+    s->height = avctx->height;
+
+    s->h263_long_vectors= ((uint8_t*)avctx->extradata)[3] & 1;
+    avctx->sub_id= BE_32((uint8_t*)avctx->extradata + 4);
+
+    switch(avctx->sub_id){
+    case 0x10000000:
+        s->rv10_version= 0;
+        s->low_delay=1;
+        break;
+    case 0x10002000:
+        s->rv10_version= 3;
+        s->low_delay=1;
+        s->obmc=1;
+        break;
+    case 0x10003000:
+        s->rv10_version= 3;
+        s->low_delay=1;
+        break;
+    case 0x10003001:
+        s->rv10_version= 3;
+        s->low_delay=1;
+        break;
+    case 0x20001000: /* real rv20 decoder fail on this id */
+    /*case 0x20100001:
+    case 0x20101001:
+    case 0x20103001:*/
+    case 0x20100000 ... 0x2019ffff:
+        s->low_delay=1;
+        break;
+    /*case 0x20200002:
+    case 0x20201002:
+    case 0x20203002:*/
+    case 0x20200002 ... 0x202fffff:
+    case 0x30202002:
+    case 0x30203002:
+        s->low_delay=0;
+        s->avctx->has_b_frames=1;
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "unknown header %X\n", avctx->sub_id);
+    }
+
+    if(avctx->debug & FF_DEBUG_PICT_INFO){
+        av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%X\n", avctx->sub_id, avctx->extradata_size >= 4 ? ((uint32_t*)avctx->extradata)[0] : -1);
+    }
+
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    if (MPV_common_init(s) < 0)
+        return -1;
+
+    h263_decode_init_vlc(s);
+
+    /* init rv vlc */
+    if (!done) {
+        init_vlc(&rv_dc_lum, DC_VLC_BITS, 256,
+                 rv_lum_bits, 1, 1,
+                 rv_lum_code, 2, 2, 1);
+        init_vlc(&rv_dc_chrom, DC_VLC_BITS, 256,
+                 rv_chrom_bits, 1, 1,
+                 rv_chrom_code, 2, 2, 1);
+        done = 1;
+    }
+
+    return 0;
+}
+
+static int rv10_decode_end(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    MPV_common_end(s);
+    return 0;
+}
+
+static int rv10_decode_packet(AVCodecContext *avctx,
+                             uint8_t *buf, int buf_size)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int mb_count, mb_pos, left, start_mb_x;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+    if(s->codec_id ==CODEC_ID_RV10)
+        mb_count = rv10_decode_picture_header(s);
+    else
+        mb_count = rv20_decode_picture_header(s);
+    if (mb_count < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n");
+        return -1;
+    }
+
+    if (s->mb_x >= s->mb_width ||
+        s->mb_y >= s->mb_height) {
+        av_log(s->avctx, AV_LOG_ERROR, "POS ERROR %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+    }
+    mb_pos = s->mb_y * s->mb_width + s->mb_x;
+    left = s->mb_width * s->mb_height - mb_pos;
+    if (mb_count > left) {
+        av_log(s->avctx, AV_LOG_ERROR, "COUNT ERROR\n");
+        return -1;
+    }
+//if(s->pict_type == P_TYPE) return 0;
+
+    if ((s->mb_x == 0 && s->mb_y == 0) || s->current_picture_ptr==NULL) {
+        if(s->current_picture_ptr){ //FIXME write parser so we always have complete frames?
+            ff_er_frame_end(s);
+            MPV_frame_end(s);
+            s->mb_x= s->mb_y = s->resync_mb_x = s->resync_mb_y= 0;
+        }
+        if(MPV_frame_start(s, avctx) < 0)
+            return -1;
+        ff_er_frame_start(s);
+    }
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale);
+#endif
+
+    /* default quantization values */
+    if(s->codec_id== CODEC_ID_RV10){
+        if(s->mb_y==0) s->first_slice_line=1;
+    }else{
+        s->first_slice_line=1;
+        s->resync_mb_x= s->mb_x;
+    }
+    start_mb_x= s->mb_x;
+    s->resync_mb_y= s->mb_y;
+    if(s->h263_aic){
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_aic_dc_scale_table;
+    }else{
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+
+    if(s->modified_quant)
+        s->chroma_qscale_table= ff_h263_chroma_qscale_table;
+
+    ff_set_qscale(s, s->qscale);
+
+    s->rv10_first_dc_coded[0] = 0;
+    s->rv10_first_dc_coded[1] = 0;
+    s->rv10_first_dc_coded[2] = 0;
+//printf("%d %X %X\n", s->pict_type, s->current_picture.motion_val[0], s->current_picture.motion_val[1]);
+    s->block_wrap[0]=
+    s->block_wrap[1]=
+    s->block_wrap[2]=
+    s->block_wrap[3]= s->b8_stride;
+    s->block_wrap[4]=
+    s->block_wrap[5]= s->mb_stride;
+    ff_init_block_index(s);
+    /* decode each macroblock */
+
+    for(s->mb_num_left= mb_count; s->mb_num_left>0; s->mb_num_left--) {
+        int ret;
+        ff_update_block_index(s);
+#ifdef DEBUG
+        av_log(avctx, AV_LOG_DEBUG, "**mb x=%d y=%d\n", s->mb_x, s->mb_y);
+#endif
+
+        s->mv_dir = MV_DIR_FORWARD;
+        s->mv_type = MV_TYPE_16X16;
+        ret=ff_h263_decode_mb(s, s->block);
+
+        if (ret == SLICE_ERROR || s->gb.size_in_bits < get_bits_count(&s->gb)) {
+            av_log(s->avctx, AV_LOG_ERROR, "ERROR at MB %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        if(s->pict_type != B_TYPE)
+            ff_h263_update_motion_val(s);
+        MPV_decode_mb(s, s->block);
+        if(s->loop_filter)
+            ff_h263_loop_filter(s);
+
+        if (++s->mb_x == s->mb_width) {
+            s->mb_x = 0;
+            s->mb_y++;
+            ff_init_block_index(s);
+        }
+        if(s->mb_x == s->resync_mb_x)
+            s->first_slice_line=0;
+        if(ret == SLICE_END) break;
+    }
+
+    ff_er_add_slice(s, start_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
+
+    return buf_size;
+}
+
+static int rv10_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int i;
+    AVFrame *pict = data;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size);
+#endif
+
+    /* no supplementary picture */
+    if (buf_size == 0) {
+        return 0;
+    }
+
+    if(avctx->slice_count){
+        for(i=0; i<avctx->slice_count; i++){
+            int offset= avctx->slice_offset[i];
+            int size;
+
+            if(i+1 == avctx->slice_count)
+                size= buf_size - offset;
+            else
+                size= avctx->slice_offset[i+1] - offset;
+
+            rv10_decode_packet(avctx, buf+offset, size);
+        }
+    }else{
+        rv10_decode_packet(avctx, buf, buf_size);
+    }
+
+    if(s->current_picture_ptr != NULL && s->mb_y>=s->mb_height){
+        ff_er_frame_end(s);
+        MPV_frame_end(s);
+
+        if (s->pict_type == B_TYPE || s->low_delay) {
+            *pict= *(AVFrame*)s->current_picture_ptr;
+        } else if (s->last_picture_ptr != NULL) {
+            *pict= *(AVFrame*)s->last_picture_ptr;
+        }
+
+        if(s->last_picture_ptr || s->low_delay){
+            *data_size = sizeof(AVFrame);
+            ff_print_debug_info(s, pict);
+        }
+        s->current_picture_ptr= NULL; //so we can detect if frame_end wasnt called (find some nicer solution...)
+    }
+
+    return buf_size;
+}
+
+AVCodec rv10_decoder = {
+    "rv10",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RV10,
+    sizeof(MpegEncContext),
+    rv10_decode_init,
+    NULL,
+    rv10_decode_end,
+    rv10_decode_frame,
+    CODEC_CAP_DR1
+};
+
+AVCodec rv20_decoder = {
+    "rv20",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_RV20,
+    sizeof(MpegEncContext),
+    rv10_decode_init,
+    NULL,
+    rv10_decode_end,
+    rv10_decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush= ff_mpeg_flush,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/sh4/dsputil_align.c b/contrib/ffmpeg/libavcodec/sh4/dsputil_align.c
new file mode 100644
index 000000000..7e7e3304b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sh4/dsputil_align.c
@@ -0,0 +1,430 @@
+/*
+ * aligned/packed access motion
+ *
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "../avcodec.h"
+#include "../dsputil.h"
+
+
+#define         LP(p)           *(uint32_t*)(p)
+
+
+#define         UNPACK(ph,pl,tt0,tt1) do { \
+        uint32_t t0,t1; t0=tt0;t1=tt1; \
+        ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
+        pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
+
+#define         rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
+#define         no_rnd_PACK(ph,pl,nph,npl)      ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
+
+/* little endian */
+#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
+#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
+/* big
+#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
+#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
+*/
+
+
+#define         put(d,s)        d = s
+#define         avg(d,s)        d = rnd_avg32(s,d)
+
+#define         OP_C4(ofs) \
+        ref-=ofs; \
+        do { \
+                OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \
+                ref+=stride; \
+                dest+=stride; \
+        } while(--height)
+
+#define        OP_C40() \
+        do { \
+                OP(LP(dest),LP(ref)); \
+                ref+=stride; \
+                dest+=stride; \
+        } while(--height)
+
+
+#define         OP      put
+
+static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
+{
+        switch((int)ref&3){
+        case 0: OP_C40(); return;
+        case 1: OP_C4(1); return;
+        case 2: OP_C4(2); return;
+        case 3: OP_C4(3); return;
+        }
+}
+
+#undef          OP
+#define         OP      avg
+
+static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
+{
+        switch((int)ref&3){
+        case 0: OP_C40(); return;
+        case 1: OP_C4(1); return;
+        case 2: OP_C4(2); return;
+        case 3: OP_C4(3); return;
+        }
+}
+
+#undef          OP
+
+#define         OP_C(ofs,sz,avg2) \
+{ \
+        ref-=ofs; \
+        do { \
+                uint32_t        t0,t1; \
+                t0 = LP(ref+0); \
+                t1 = LP(ref+4); \
+                OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
+                t0 = LP(ref+8); \
+                OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
+if (sz==16) { \
+                t1 = LP(ref+12); \
+                OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
+                t0 = LP(ref+16); \
+                OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
+} \
+                ref+=stride; \
+                dest+= stride; \
+        } while(--height); \
+}
+
+/* aligned */
+#define         OP_C0(sz,avg2) \
+{ \
+        do { \
+                OP(LP(dest+0), LP(ref+0)); \
+                OP(LP(dest+4), LP(ref+4)); \
+if (sz==16) { \
+                OP(LP(dest+8), LP(ref+8)); \
+                OP(LP(dest+12), LP(ref+12)); \
+} \
+                ref+=stride; \
+                dest+= stride; \
+        } while(--height); \
+}
+
+#define         OP_X(ofs,sz,avg2) \
+{ \
+        ref-=ofs; \
+        do { \
+                uint32_t        t0,t1; \
+                t0 = LP(ref+0); \
+                t1 = LP(ref+4); \
+                OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
+                t0 = LP(ref+8); \
+                OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
+if (sz==16) { \
+                t1 = LP(ref+12); \
+                OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
+                t0 = LP(ref+16); \
+                OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
+} \
+                ref+=stride; \
+                dest+= stride; \
+        } while(--height); \
+}
+
+/* aligned */
+#define         OP_Y0(sz,avg2) \
+{ \
+        uint32_t t0,t1,t2,t3,t; \
+\
+        t0 = LP(ref+0); \
+        t1 = LP(ref+4); \
+if (sz==16) { \
+        t2 = LP(ref+8); \
+        t3 = LP(ref+12); \
+} \
+        do { \
+                ref += stride; \
+\
+                t = LP(ref+0); \
+                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
+                t = LP(ref+4); \
+                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
+if (sz==16) { \
+                t = LP(ref+8); \
+                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
+                t = LP(ref+12); \
+                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
+} \
+                dest+= stride; \
+        } while(--height); \
+}
+
+#define         OP_Y(ofs,sz,avg2) \
+{ \
+        uint32_t t0,t1,t2,t3,t,w0,w1; \
+\
+        ref-=ofs; \
+        w0 = LP(ref+0); \
+        w1 = LP(ref+4); \
+        t0 = MERGE1(w0,w1,ofs); \
+        w0 = LP(ref+8); \
+        t1 = MERGE1(w1,w0,ofs); \
+if (sz==16) { \
+        w1 = LP(ref+12); \
+        t2 = MERGE1(w0,w1,ofs); \
+        w0 = LP(ref+16); \
+        t3 = MERGE1(w1,w0,ofs); \
+} \
+        do { \
+                ref += stride; \
+\
+                w0 = LP(ref+0); \
+                w1 = LP(ref+4); \
+                t = MERGE1(w0,w1,ofs); \
+                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
+                w0 = LP(ref+8); \
+                t = MERGE1(w1,w0,ofs); \
+                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
+if (sz==16) { \
+                w1 = LP(ref+12); \
+                t = MERGE1(w0,w1,ofs); \
+                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
+                w0 = LP(ref+16); \
+                t = MERGE1(w1,w0,ofs); \
+                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
+} \
+                dest+=stride; \
+        } while(--height); \
+}
+
+#define OP_X0(sz,avg2) OP_X(0,sz,avg2)
+#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
+#define         OP_XY(ofs,sz,PACK) \
+{ \
+        uint32_t        t2,t3,w0,w1; \
+        uint32_t        a0,a1,a2,a3,a4,a5,a6,a7; \
+\
+        ref -= ofs; \
+        w0 = LP(ref+0); \
+        w1 = LP(ref+4); \
+        UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
+        w0 = LP(ref+8); \
+        UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
+if (sz==16) { \
+        w1 = LP(ref+12); \
+        UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
+        w0 = LP(ref+16); \
+        UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
+} \
+        do { \
+                ref+=stride; \
+                w0 = LP(ref+0); \
+                w1 = LP(ref+4); \
+                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
+                OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
+                a0 = t2; a1 = t3; \
+                w0 = LP(ref+8); \
+                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
+                OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
+                a2 = t2; a3 = t3; \
+if (sz==16) { \
+                w1 = LP(ref+12); \
+                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
+                OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
+                a4 = t2; a5 = t3; \
+                w0 = LP(ref+16); \
+                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
+                OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
+                a6 = t2; a7 = t3; \
+} \
+                dest+=stride; \
+        } while(--height); \
+}
+
+#define         DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
+static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \
+                                const int stride, int height) \
+{ \
+        switch((int)ref&3) { \
+        case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
+        case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
+        case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
+        case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
+        } \
+}
+
+#define OP put
+
+DEFFUNC(put,   rnd,o,8,OP_C,avg2)
+DEFFUNC(put,   rnd,x,8,OP_X,avg2)
+DEFFUNC(put,no_rnd,x,8,OP_X,avg2)
+DEFFUNC(put,   rnd,y,8,OP_Y,avg2)
+DEFFUNC(put,no_rnd,y,8,OP_Y,avg2)
+DEFFUNC(put,   rnd,xy,8,OP_XY,PACK)
+DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
+DEFFUNC(put,   rnd,o,16,OP_C,avg2)
+DEFFUNC(put,   rnd,x,16,OP_X,avg2)
+DEFFUNC(put,no_rnd,x,16,OP_X,avg2)
+DEFFUNC(put,   rnd,y,16,OP_Y,avg2)
+DEFFUNC(put,no_rnd,y,16,OP_Y,avg2)
+DEFFUNC(put,   rnd,xy,16,OP_XY,PACK)
+DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)
+
+#undef OP
+#define OP avg
+
+DEFFUNC(avg,   rnd,o,8,OP_C,avg2)
+DEFFUNC(avg,   rnd,x,8,OP_X,avg2)
+DEFFUNC(avg,no_rnd,x,8,OP_X,avg2)
+DEFFUNC(avg,   rnd,y,8,OP_Y,avg2)
+DEFFUNC(avg,no_rnd,y,8,OP_Y,avg2)
+DEFFUNC(avg,   rnd,xy,8,OP_XY,PACK)
+DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
+DEFFUNC(avg,   rnd,o,16,OP_C,avg2)
+DEFFUNC(avg,   rnd,x,16,OP_X,avg2)
+DEFFUNC(avg,no_rnd,x,16,OP_X,avg2)
+DEFFUNC(avg,   rnd,y,16,OP_Y,avg2)
+DEFFUNC(avg,no_rnd,y,16,OP_Y,avg2)
+DEFFUNC(avg,   rnd,xy,16,OP_XY,PACK)
+DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
+
+#undef OP
+
+#define         put_no_rnd_pixels8_o     put_rnd_pixels8_o
+#define         put_no_rnd_pixels16_o    put_rnd_pixels16_o
+#define         avg_no_rnd_pixels8_o     avg_rnd_pixels8_o
+#define         avg_no_rnd_pixels16_o    avg_rnd_pixels16_o
+
+#define         put_pixels8_c            put_rnd_pixels8_o
+#define         put_pixels16_c           put_rnd_pixels16_o
+#define         avg_pixels8_c            avg_rnd_pixels8_o
+#define         avg_pixels16_c           avg_rnd_pixels16_o
+#define         put_no_rnd_pixels8_c     put_rnd_pixels8_o
+#define         put_no_rnd_pixels16_c    put_rnd_pixels16_o
+#define         avg_no_rnd_pixels8_c     avg_rnd_pixels8_o
+#define         avg_no_rnd_pixels16_c    avg_rnd_pixels16_o
+
+#define         QPEL
+
+#ifdef QPEL
+
+#include "qpel.c"
+
+#endif
+
+void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
+{
+        c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
+        c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
+        c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
+        c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
+        c->put_pixels_tab[1][0] = put_rnd_pixels8_o;
+        c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
+        c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
+        c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;
+
+        c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o;
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
+        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
+        c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o;
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
+        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;
+
+        c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o;
+        c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
+        c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
+        c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
+        c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o;
+        c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
+        c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
+        c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;
+
+        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o;
+        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x;
+        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y;
+        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy;
+        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o;
+        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
+        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
+        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
+
+#ifdef QPEL
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
+
+    dspfunc(put_qpel, 0, 16);
+    dspfunc(put_no_rnd_qpel, 0, 16);
+
+    dspfunc(avg_qpel, 0, 16);
+    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
+
+    dspfunc(put_qpel, 1, 8);
+    dspfunc(put_no_rnd_qpel, 1, 8);
+
+    dspfunc(avg_qpel, 1, 8);
+    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
+
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(put_h264_qpel, 1, 8);
+    dspfunc(put_h264_qpel, 2, 4);
+    dspfunc(avg_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 1, 8);
+    dspfunc(avg_h264_qpel, 2, 4);
+
+#undef dspfunc
+    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
+    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
+    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
+    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
+    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
+    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
+
+    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
+    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
+    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
+    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
+    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
+    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
+    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
+    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
+
+    c->gmc1 = gmc1_c;
+    c->gmc = gmc_c;
+
+#endif
+}
diff --git a/contrib/ffmpeg/libavcodec/sh4/dsputil_sh4.c b/contrib/ffmpeg/libavcodec/sh4/dsputil_sh4.c
new file mode 100644
index 000000000..b38eb2551
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sh4/dsputil_sh4.c
@@ -0,0 +1,120 @@
+/*
+ *  sh4 dsputil
+ *
+ * Copyright (c) 2003 BERO <bero@geocities.co.jp>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../avcodec.h"
+#include "../dsputil.h"
+
+static void memzero_align8(void *dst,size_t size)
+{
+#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+        (char*)dst+=size;
+        size/=8*4;
+        asm(
+#if defined(__SH4__)
+        " fschg\n"  //single float mode
+#endif
+        " fldi0 fr0\n"
+        " fldi0 fr1\n"
+        " fschg\n"  // double
+        "1: \n" \
+        " dt %1\n"
+        " fmov  dr0,@-%0\n"
+        " fmov  dr0,@-%0\n"
+        " fmov  dr0,@-%0\n"
+        " bf.s 1b\n"
+        " fmov  dr0,@-%0\n"
+#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+        " fschg" //back to single
+#endif
+        : : "r"(dst),"r"(size): "memory" );
+#else
+        double *d = dst;
+        size/=8*4;
+        do {
+                d[0] = 0.0;
+                d[1] = 0.0;
+                d[2] = 0.0;
+                d[3] = 0.0;
+                d+=4;
+        } while(--size);
+#endif
+}
+
+static void clear_blocks_sh4(DCTELEM *blocks)
+{
+//        if (((int)blocks&7)==0)
+        memzero_align8(blocks,sizeof(DCTELEM)*6*64);
+}
+
+extern void idct_sh4(DCTELEM *block);
+static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+        idct_sh4(block);
+        int i;
+        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+        for(i=0;i<8;i++) {
+                dest[0] = cm[block[0]];
+                dest[1] = cm[block[1]];
+                dest[2] = cm[block[2]];
+                dest[3] = cm[block[3]];
+                dest[4] = cm[block[4]];
+                dest[5] = cm[block[5]];
+                dest[6] = cm[block[6]];
+                dest[7] = cm[block[7]];
+                dest+=line_size;
+                block+=8;
+        }
+}
+static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+        idct_sh4(block);
+        int i;
+        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+        for(i=0;i<8;i++) {
+                dest[0] = cm[dest[0]+block[0]];
+                dest[1] = cm[dest[1]+block[1]];
+                dest[2] = cm[dest[2]+block[2]];
+                dest[3] = cm[dest[3]+block[3]];
+                dest[4] = cm[dest[4]+block[4]];
+                dest[5] = cm[dest[5]+block[5]];
+                dest[6] = cm[dest[6]+block[6]];
+                dest[7] = cm[dest[7]+block[7]];
+                dest+=line_size;
+                block+=8;
+        }
+}
+
+extern void dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
+
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
+{
+        const int idct_algo= avctx->idct_algo;
+        dsputil_init_align(c,avctx);
+
+        c->clear_blocks = clear_blocks_sh4;
+        if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
+                c->idct_put = idct_put;
+                c->idct_add = idct_add;
+               c->idct     = idct_sh4;
+                c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM;
+        }
+}
diff --git a/contrib/ffmpeg/libavcodec/sh4/idct_sh4.c b/contrib/ffmpeg/libavcodec/sh4/idct_sh4.c
new file mode 100644
index 000000000..3b8428c3c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sh4/idct_sh4.c
@@ -0,0 +1,366 @@
+/*
+ * idct for sh4
+ *
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../dsputil.h"
+#define c1      1.38703984532214752434  /* sqrt(2)*cos(1*pi/16) */
+#define c2      1.30656296487637657577  /* sqrt(2)*cos(2*pi/16) */
+#define c3      1.17587560241935884520  /* sqrt(2)*cos(3*pi/16) */
+#define c4      1.00000000000000000000  /* sqrt(2)*cos(4*pi/16) */
+#define c5      0.78569495838710234903  /* sqrt(2)*cos(5*pi/16) */
+#define c6      0.54119610014619712324  /* sqrt(2)*cos(6*pi/16) */
+#define c7      0.27589937928294311353  /* sqrt(2)*cos(7*pi/16) */
+
+static const float even_table[] __attribute__ ((aligned(8))) = {
+        c4, c4, c4, c4,
+        c2, c6,-c6,-c2,
+        c4,-c4,-c4, c4,
+        c6,-c2, c2,-c6
+};
+
+static const float odd_table[] __attribute__ ((aligned(8))) = {
+        c1, c3, c5, c7,
+        c3,-c7,-c1,-c5,
+        c5,-c1, c7, c3,
+        c7,-c5, c3,-c1
+};
+
+#undef  c1
+#undef  c2
+#undef  c3
+#undef  c4
+#undef  c5
+#undef  c6
+#undef  c7
+
+#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+
+#define         load_matrix(table) \
+        __asm__ volatile( \
+        "       fschg\n" \
+        "       fmov   @%0+,xd0\n" \
+        "       fmov   @%0+,xd2\n" \
+        "       fmov   @%0+,xd4\n" \
+        "       fmov   @%0+,xd6\n" \
+        "       fmov   @%0+,xd8\n" \
+        "       fmov   @%0+,xd10\n" \
+        "       fmov   @%0+,xd12\n" \
+        "       fmov   @%0+,xd14\n" \
+        "       fschg\n" \
+        :\
+        : "r"(table)\
+        : "0" \
+        )
+
+#define         ftrv() \
+                __asm__ volatile("ftrv xmtrx,fv0" \
+                : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
+                :  "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
+
+#define         DEFREG        \
+        register float fr0 __asm__("fr0"); \
+        register float fr1 __asm__("fr1"); \
+        register float fr2 __asm__("fr2"); \
+        register float fr3 __asm__("fr3")
+
+#else
+
+/* generic C code for check */
+
+static void ftrv_(const float xf[],float fv[])
+{
+        float f0,f1,f2,f3;
+        f0 = fv[0];
+        f1 = fv[1];
+        f2 = fv[2];
+        f3 = fv[3];
+        fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
+        fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
+        fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
+        fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
+}
+
+static void load_matrix_(float xf[],const float table[])
+{
+        int i;
+        for(i=0;i<16;i++) xf[i]=table[i];
+}
+
+#define         ftrv()                  ftrv_(xf,fv)
+#define         load_matrix(table)      load_matrix_(xf,table)
+
+#define         DEFREG \
+        float fv[4],xf[16]
+
+#define         fr0     fv[0]
+#define         fr1     fv[1]
+#define         fr2     fv[2]
+#define         fr3     fv[3]
+
+#endif
+
+#if 1
+#define         DESCALE(x,n)    (x)*(1.0f/(1<<(n)))
+#else
+#define         DESCALE(x,n)    (((int)(x)+(1<<(n-1)))>>(n))
+#endif
+
+/* this code work worse on gcc cvs. 3.2.3 work fine */
+
+
+#if 1
+//optimized
+
+void idct_sh4(DCTELEM *block)
+{
+        DEFREG;
+
+        int i;
+        float        tblock[8*8],*fblock;
+        int ofs1,ofs2,ofs3;
+
+#if defined(__SH4__)
+#error  "FIXME!! change to single float"
+#endif
+
+        /* row */
+
+        /* even part */
+        load_matrix(even_table);
+
+        fblock = tblock+4;
+        i = 8;
+        do {
+                fr0 = block[0];
+                fr1 = block[2];
+                fr2 = block[4];
+                fr3 = block[6];
+                block+=8;
+                ftrv();
+                *--fblock = fr3;
+                *--fblock = fr2;
+                *--fblock = fr1;
+                *--fblock = fr0;
+                fblock+=8+4;
+        } while(--i);
+        block-=8*8;
+        fblock-=8*8+4;
+
+        load_matrix(odd_table);
+
+        i = 8;
+
+//        ofs1 = sizeof(float)*1;
+//        ofs2 = sizeof(float)*2;
+//        ofs3 = sizeof(float)*3;
+
+        do {
+                float t0,t1,t2,t3;
+                fr0 = block[1];
+                fr1 = block[3];
+                fr2 = block[5];
+                fr3 = block[7];
+                block+=8;
+                ftrv();
+                t0 = *fblock++;
+                t1 = *fblock++;
+                t2 = *fblock++;
+                t3 = *fblock++;
+                fblock+=4;
+                *--fblock = t0 - fr0;
+                *--fblock = t1 - fr1;
+                *--fblock = t2 - fr2;
+                *--fblock = t3 - fr3;
+                *--fblock = t3 + fr3;
+                *--fblock = t2 + fr2;
+                *--fblock = t1 + fr1;
+                *--fblock = t0 + fr0;
+                fblock+=8;
+        } while(--i);
+        block-=8*8;
+        fblock-=8*8;
+
+        /* col */
+
+        /* even part */
+        load_matrix(even_table);
+
+        ofs1 = sizeof(float)*2*8;
+        ofs2 = sizeof(float)*4*8;
+        ofs3 = sizeof(float)*6*8;
+
+        i = 8;
+
+#define        OA(fblock,ofs)   *(float*)((char*)fblock + ofs)
+
+        do {
+                fr0 = OA(fblock,   0);
+                fr1 = OA(fblock,ofs1);
+                fr2 = OA(fblock,ofs2);
+                fr3 = OA(fblock,ofs3);
+                ftrv();
+                OA(fblock,0   ) = fr0;
+                OA(fblock,ofs1) = fr1;
+                OA(fblock,ofs2) = fr2;
+                OA(fblock,ofs3) = fr3;
+                fblock++;
+        } while(--i);
+        fblock-=8;
+
+        load_matrix(odd_table);
+
+        i=8;
+        do {
+                float t0,t1,t2,t3;
+                t0 = OA(fblock,   0); /* [8*0] */
+                t1 = OA(fblock,ofs1); /* [8*2] */
+                t2 = OA(fblock,ofs2); /* [8*4] */
+                t3 = OA(fblock,ofs3); /* [8*6] */
+                fblock+=8;
+                fr0 = OA(fblock,   0); /* [8*1] */
+                fr1 = OA(fblock,ofs1); /* [8*3] */
+                fr2 = OA(fblock,ofs2); /* [8*5] */
+                fr3 = OA(fblock,ofs3); /* [8*7] */
+                fblock+=-8+1;
+                ftrv();
+                block[8*0] = DESCALE(t0 + fr0,3);
+                block[8*7] = DESCALE(t0 - fr0,3);
+                block[8*1] = DESCALE(t1 + fr1,3);
+                block[8*6] = DESCALE(t1 - fr1,3);
+                block[8*2] = DESCALE(t2 + fr2,3);
+                block[8*5] = DESCALE(t2 - fr2,3);
+                block[8*3] = DESCALE(t3 + fr3,3);
+                block[8*4] = DESCALE(t3 - fr3,3);
+                block++;
+        } while(--i);
+
+#if defined(__SH4__)
+#error  "FIXME!! change to double"
+#endif
+}
+#else
+void idct_sh4(DCTELEM *block)
+{
+        DEFREG;
+
+        int i;
+        float   tblock[8*8],*fblock;
+
+        /* row */
+
+        /* even part */
+        load_matrix(even_table);
+
+        fblock = tblock;
+        i = 8;
+        do {
+                fr0 = block[0];
+                fr1 = block[2];
+                fr2 = block[4];
+                fr3 = block[6];
+                block+=8;
+                ftrv();
+                fblock[0] = fr0;
+                fblock[2] = fr1;
+                fblock[4] = fr2;
+                fblock[6] = fr3;
+                fblock+=8;
+        } while(--i);
+        block-=8*8;
+        fblock-=8*8;
+
+        load_matrix(odd_table);
+
+        i = 8;
+
+        do {
+                float t0,t1,t2,t3;
+                fr0 = block[1];
+                fr1 = block[3];
+                fr2 = block[5];
+                fr3 = block[7];
+                block+=8;
+                ftrv();
+                t0 = fblock[0];
+                t1 = fblock[2];
+                t2 = fblock[4];
+                t3 = fblock[6];
+                fblock[0] = t0 + fr0;
+                fblock[7] = t0 - fr0;
+                fblock[1] = t1 + fr1;
+                fblock[6] = t1 - fr1;
+                fblock[2] = t2 + fr2;
+                fblock[5] = t2 - fr2;
+                fblock[3] = t3 + fr3;
+                fblock[4] = t3 - fr3;
+                fblock+=8;
+        } while(--i);
+        block-=8*8;
+        fblock-=8*8;
+
+        /* col */
+
+        /* even part */
+        load_matrix(even_table);
+
+        i = 8;
+
+        do {
+                fr0 = fblock[8*0];
+                fr1 = fblock[8*2];
+                fr2 = fblock[8*4];
+                fr3 = fblock[8*6];
+                ftrv();
+                fblock[8*0] = fr0;
+                fblock[8*2] = fr1;
+                fblock[8*4] = fr2;
+                fblock[8*6] = fr3;
+                fblock++;
+        } while(--i);
+        fblock-=8;
+
+        load_matrix(odd_table);
+
+        i=8;
+        do {
+                float t0,t1,t2,t3;
+                fr0 = fblock[8*1];
+                fr1 = fblock[8*3];
+                fr2 = fblock[8*5];
+                fr3 = fblock[8*7];
+                ftrv();
+                t0 = fblock[8*0];
+                t1 = fblock[8*2];
+                t2 = fblock[8*4];
+                t3 = fblock[8*6];
+                fblock++;
+                block[8*0] = DESCALE(t0 + fr0,3);
+                block[8*7] = DESCALE(t0 - fr0,3);
+                block[8*1] = DESCALE(t1 + fr1,3);
+                block[8*6] = DESCALE(t1 - fr1,3);
+                block[8*2] = DESCALE(t2 + fr2,3);
+                block[8*5] = DESCALE(t2 - fr2,3);
+                block[8*3] = DESCALE(t3 + fr3,3);
+                block[8*4] = DESCALE(t3 - fr3,3);
+                block++;
+        } while(--i);
+}
+#endif
diff --git a/contrib/ffmpeg/libavcodec/sh4/qpel.c b/contrib/ffmpeg/libavcodec/sh4/qpel.c
new file mode 100644
index 000000000..7a73ac50d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sh4/qpel.c
@@ -0,0 +1,1600 @@
+/*
+ * This is optimized for sh, which have post increment addressing (*p++).
+ * Some CPU may be index (p[n]) faster than post increment (*p++).
+ *
+ * copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define         LD(adr) *(uint32_t*)(adr)
+
+#define PIXOP2(OPNAME, OP) \
+/*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),no_rnd_avg32(LD32(src1  ),LD32(src2  )) ); \
+                OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LD32(src1  ),LD32(src2  )) ); \
+                OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LD32(src1  ),LD32(src2  )) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),no_rnd_avg32(LD32(src1  ),LD32(src2  )) ); \
+                OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+                OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
+                OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LD32(src1  ),LD32(src2  )) ); \
+                OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+                OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
+                OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}*/\
+\
+static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LP(src1  ),LP(src2  )) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LD32(src1  ),LP(src2  )) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),no_rnd_avg32(LD32(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+                OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
+                OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LD32(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+                OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
+                OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do { /* onlye src2 aligned */\
+                OP(LP(dst  ),no_rnd_avg32(LD32(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LD32(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),no_rnd_avg32(LP(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LP(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),no_rnd_avg32(LP(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
+                OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \
+                OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{\
+        do {\
+                OP(LP(dst  ),rnd_avg32(LP(src1  ),LP(src2  )) ); \
+                OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
+                OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \
+                OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \
+                src1+=src_stride1; \
+                src2+=src_stride2; \
+                dst+=dst_stride; \
+        } while(--h); \
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{ OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
+\
+static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{ OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{ OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
+\
+static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
+{ OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
+\
+static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LP(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LP(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; /* src1 only not aligned */\
+                UNPACK(a0,a1,LD32(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LD32(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LP(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
+                UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
+                OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
+                UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
+                OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LP(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
+                UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
+                OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
+                UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
+                OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { /* src1 is unaligned */\
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LD32(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
+                UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
+                OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
+                UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
+                OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+        do { \
+                uint32_t a0,a1,a2,a3; \
+                UNPACK(a0,a1,LD32(src1),LP(src2)); \
+                UNPACK(a2,a3,LP(src3),LP(src4)); \
+                OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+                UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
+                OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
+                UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
+                OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
+                UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
+                UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
+                OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
+                src1+=src_stride1;\
+                src2+=src_stride2;\
+                src3+=src_stride3;\
+                src4+=src_stride4;\
+                dst+=dst_stride;\
+        } while(--h); \
+} \
+\
+
+#define op_avg(a, b) a = rnd_avg32(a,b)
+#define op_put(a, b) a = b
+
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+
+static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
+{
+    const int A=(16-x16)*(16-y16);
+    const int B=(   x16)*(16-y16);
+    const int C=(16-x16)*(   y16);
+    const int D=(   x16)*(   y16);
+
+    do {
+        int t0,t1,t2,t3;
+        uint8_t *s0 = src;
+        uint8_t *s1 = src+stride;
+        t0 = *s0++; t2 = *s1++;
+        t1 = *s0++; t3 = *s1++;
+        dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
+        t0 = *s0++; t2 = *s1++;
+        dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
+        t1 = *s0++; t3 = *s1++;
+        dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
+        t0 = *s0++; t2 = *s1++;
+        dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
+        t1 = *s0++; t3 = *s1++;
+        dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
+        t0 = *s0++; t2 = *s1++;
+        dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
+        t1 = *s0++; t3 = *s1++;
+        dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
+        t0 = *s0++; t2 = *s1++;
+        dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
+        dst+= stride;
+        src+= stride;
+    }while(--h);
+}
+
+static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
+{
+    int y, vx, vy;
+    const int s= 1<<shift;
+
+    width--;
+    height--;
+
+    for(y=0; y<h; y++){
+        int x;
+
+        vx= ox;
+        vy= oy;
+        for(x=0; x<8; x++){ //XXX FIXME optimize
+            int src_x, src_y, frac_x, frac_y, index;
+
+            src_x= vx>>16;
+            src_y= vy>>16;
+            frac_x= src_x&(s-1);
+            frac_y= src_y&(s-1);
+            src_x>>=shift;
+            src_y>>=shift;
+
+            if((unsigned)src_x < width){
+                if((unsigned)src_y < height){
+                    index= src_x + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
+                                           + src[index       +1]*   frac_x )*(s-frac_y)
+                                        + (  src[index+stride  ]*(s-frac_x)
+                                           + src[index+stride+1]*   frac_x )*   frac_y
+                                        + r)>>(shift*2);
+                }else{
+                    index= src_x + clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
+                                          + src[index       +1]*   frac_x )*s
+                                        + r)>>(shift*2);
+                }
+            }else{
+                if((unsigned)src_y < height){
+                    index= clip(src_x, 0, width) + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
+                                           + src[index+stride  ]*   frac_y )*s
+                                        + r)>>(shift*2);
+                }else{
+                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]=    src[index         ];
+                }
+            }
+
+            vx+= dxx;
+            vy+= dyx;
+        }
+        ox += dxy;
+        oy += dyy;
+    }
+}
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    do {\
+        int t0,t1,t2,t3; \
+        uint8_t *s0 = src; \
+        uint8_t *s1 = src+stride; \
+        t0 = *s0++; t2 = *s1++; \
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
+        dst+= stride;\
+        src+= stride;\
+    }while(--h);\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    do {\
+        int t0,t1,t2,t3; \
+        uint8_t *s0 = src; \
+        uint8_t *s1 = src+stride; \
+        t0 = *s0++; t2 = *s1++; \
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
+        dst+= stride;\
+        src+= stride;\
+    }while(--h);\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    do {\
+        int t0,t1,t2,t3; \
+        uint8_t *s0 = src; \
+        uint8_t *s1 = src+stride; \
+        t0 = *s0++; t2 = *s1++; \
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
+        t1 = *s0++; t3 = *s1++; \
+        OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
+        t0 = *s0++; t2 = *s1++; \
+        OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
+        dst+= stride;\
+        src+= stride;\
+    }while(--h);\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_       , op_put)
+H264_CHROMA_MC(avg_       , op_avg)
+#undef op_avg
+#undef op_put
+
+#define QPEL_MC(r, OPNAME, RND, OP) \
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    do {\
+        uint8_t *s = src; \
+        int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
+        src0= *s++;\
+        src1= *s++;\
+        src2= *s++;\
+        src3= *s++;\
+        src4= *s++;\
+        OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
+        src5= *s++;\
+        OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
+        src6= *s++;\
+        OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
+        src7= *s++;\
+        OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
+        src8= *s++;\
+        OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
+        OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
+        OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
+        OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }while(--h);\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int w=8;\
+    do{\
+        uint8_t *s = src, *d=dst;\
+        int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
+        src0 = *s; s+=srcStride; \
+        src1 = *s; s+=srcStride; \
+        src2 = *s; s+=srcStride; \
+        src3 = *s; s+=srcStride; \
+        src4 = *s; s+=srcStride; \
+        OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
+        src5 = *s; s+=srcStride; \
+        OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
+        src6 = *s; s+=srcStride; \
+        OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
+        src7 = *s; s+=srcStride; \
+        OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
+        src8 = *s; \
+        OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
+        OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
+        OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
+        OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
+        dst++;\
+        src++;\
+    }while(--w);\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    do {\
+        uint8_t *s = src;\
+        int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
+        int src9,src10,src11,src12,src13,src14,src15,src16;\
+        src0= *s++;\
+        src1= *s++;\
+        src2= *s++;\
+        src3= *s++;\
+        src4= *s++;\
+        OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
+        src5= *s++;\
+        OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
+        src6= *s++;\
+        OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
+        src7= *s++;\
+        OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
+        src8= *s++;\
+        OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
+        src9= *s++;\
+        OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
+        src10= *s++;\
+        OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
+        src11= *s++;\
+        OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
+        src12= *s++;\
+        OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
+        src13= *s++;\
+        OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
+        src14= *s++;\
+        OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
+        src15= *s++;\
+        OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
+        src16= *s++;\
+        OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
+        OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
+        OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
+        OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }while(--h);\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int w=16;\
+    do {\
+        uint8_t *s = src, *d=dst;\
+        int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
+        int src9,src10,src11,src12,src13,src14,src15,src16;\
+        src0 = *s; s+=srcStride; \
+        src1 = *s; s+=srcStride; \
+        src2 = *s; s+=srcStride; \
+        src3 = *s; s+=srcStride; \
+        src4 = *s; s+=srcStride; \
+        OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
+        src5 = *s; s+=srcStride; \
+        OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
+        src6 = *s; s+=srcStride; \
+        OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
+        src7 = *s; s+=srcStride; \
+        OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
+        src8 = *s; s+=srcStride; \
+        OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
+        src9 = *s; s+=srcStride; \
+        OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
+        src10 = *s; s+=srcStride; \
+        OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
+        src11 = *s; s+=srcStride; \
+        OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
+        src12 = *s; s+=srcStride; \
+        OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
+        src13 = *s; s+=srcStride; \
+        OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
+        src14 = *s; s+=srcStride; \
+        OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
+        src15 = *s; s+=srcStride; \
+        OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
+        src16 = *s; \
+        OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
+        OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
+        OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
+        OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
+        dst++;\
+        src++;\
+    }while(--w);\
+}\
+\
+static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels8_c(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    copy_block9(full, src, 16, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
+}\
+\
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
+}\
+static void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4_aligned(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4_aligned0(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels16_c(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    copy_block17(full, src, 24, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
+}\
+\
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
+}\
+static void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4_aligned(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4_aligned0(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+static void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}
+
+#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
+#define op_put(a, b) a = cm[((b) + 16)>>5]
+#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
+
+QPEL_MC(0, put_       , _       , op_put)
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
+QPEL_MC(0, avg_       , _       , op_avg)
+//QPEL_MC(1, avg_no_rnd , _       , op_avg)
+#undef op_avg
+#undef op_avg_no_rnd
+#undef op_put
+#undef op_put_no_rnd
+
+#if 1
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    do {\
+        int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
+        uint8_t *s = src-2;\
+        srcB = *s++;\
+        srcA = *s++;\
+        src0 = *s++;\
+        src1 = *s++;\
+        src2 = *s++;\
+        src3 = *s++;\
+        OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        src4 = *s++;\
+        OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        src5 = *s++;\
+        OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        src6 = *s++;\
+        OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+      if (w>4) { /* it optimized */ \
+        int src7,src8,src9,src10; \
+        src7 = *s++;\
+        OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        src8 = *s++;\
+        OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        src9 = *s++;\
+        OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        src10 = *s++;\
+        OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+       if (w>8) { \
+        int src11,src12,src13,src14,src15,src16,src17,src18; \
+        src11 = *s++;\
+        OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
+        src12 = *s++;\
+        OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
+        src13 = *s++;\
+        OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
+        src14 = *s++;\
+        OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
+        src15 = *s++;\
+        OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
+        src16 = *s++;\
+        OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
+        src17 = *s++;\
+        OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
+        src18 = *s++;\
+        OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
+       } \
+      } \
+        dst+=dstStride;\
+        src+=srcStride;\
+    }while(--h);\
+}\
+\
+static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    do{\
+        int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
+        uint8_t *s = src-2*srcStride,*d=dst;\
+        srcB = *s; s+=srcStride;\
+        srcA = *s; s+=srcStride;\
+        src0 = *s; s+=srcStride;\
+        src1 = *s; s+=srcStride;\
+        src2 = *s; s+=srcStride;\
+        src3 = *s; s+=srcStride;\
+        OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\
+        src4 = *s; s+=srcStride;\
+        OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\
+        src5 = *s; s+=srcStride;\
+        OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\
+        src6 = *s; s+=srcStride;\
+        OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\
+      if (h>4) { \
+        int src7,src8,src9,src10; \
+        src7 = *s; s+=srcStride;\
+        OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\
+        src8 = *s; s+=srcStride;\
+        OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\
+        src9 = *s; s+=srcStride;\
+        OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\
+        src10 = *s; s+=srcStride;\
+        OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\
+       if (h>8) { \
+        int src11,src12,src13,src14,src15,src16,src17,src18; \
+        src11 = *s; s+=srcStride;\
+        OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\
+        src12 = *s; s+=srcStride;\
+        OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\
+        src13 = *s; s+=srcStride;\
+        OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\
+        src14 = *s; s+=srcStride;\
+        OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\
+        src15 = *s; s+=srcStride;\
+        OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\
+        src16 = *s; s+=srcStride;\
+        OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\
+        src17 = *s; s+=srcStride;\
+        OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\
+        src18 = *s; s+=srcStride;\
+        OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\
+       } \
+      } \
+        dst++;\
+        src++;\
+    }while(--w);\
+}\
+\
+static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    i= h+5; \
+    do {\
+        int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
+        uint8_t *s = src-2;\
+        srcB = *s++;\
+        srcA = *s++;\
+        src0 = *s++;\
+        src1 = *s++;\
+        src2 = *s++;\
+        src3 = *s++;\
+        tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        src4 = *s++;\
+        tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        src5 = *s++;\
+        tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        src6 = *s++;\
+        tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+      if (w>4) { /* it optimized */ \
+        int src7,src8,src9,src10; \
+        src7 = *s++;\
+        tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        src8 = *s++;\
+        tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        src9 = *s++;\
+        tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        src10 = *s++;\
+        tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+       if (w>8) { \
+        int src11,src12,src13,src14,src15,src16,src17,src18; \
+        src11 = *s++;\
+        tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
+        src12 = *s++;\
+        tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
+        src13 = *s++;\
+        tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
+        src14 = *s++;\
+        tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
+        src15 = *s++;\
+        tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
+        src16 = *s++;\
+        tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
+        src17 = *s++;\
+        tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
+        src18 = *s++;\
+        tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
+       } \
+      } \
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }while(--i);\
+    tmp -= tmpStride*(h+5-2);\
+    i = w; \
+    do {\
+        int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\
+        int16_t *s = tmp-2*tmpStride; \
+        uint8_t *d=dst;\
+        tmpB = *s; s+=tmpStride;\
+        tmpA = *s; s+=tmpStride;\
+        tmp0 = *s; s+=tmpStride;\
+        tmp1 = *s; s+=tmpStride;\
+        tmp2 = *s; s+=tmpStride;\
+        tmp3 = *s; s+=tmpStride;\
+        OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\
+        tmp4 = *s; s+=tmpStride;\
+        OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\
+        tmp5 = *s; s+=tmpStride;\
+        OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\
+        tmp6 = *s; s+=tmpStride;\
+        OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\
+      if (h>4) { \
+        int tmp7,tmp8,tmp9,tmp10; \
+        tmp7 = *s; s+=tmpStride;\
+        OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\
+        tmp8 = *s; s+=tmpStride;\
+        OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\
+        tmp9 = *s; s+=tmpStride;\
+        OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\
+        tmp10 = *s; s+=tmpStride;\
+        OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\
+       if (h>8) { \
+        int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \
+        tmp11 = *s; s+=tmpStride;\
+        OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\
+        tmp12 = *s; s+=tmpStride;\
+        OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\
+        tmp13 = *s; s+=tmpStride;\
+        OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\
+        tmp14 = *s; s+=tmpStride;\
+        OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\
+        tmp15 = *s; s+=tmpStride;\
+        OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\
+        tmp16 = *s; s+=tmpStride;\
+        OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\
+        tmp17 = *s; s+=tmpStride;\
+        OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\
+        tmp18 = *s; s+=tmpStride;\
+        OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\
+       } \
+      } \
+        dst++;\
+        tmp++;\
+    }while(--i);\
+}\
+\
+static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \
+}\
+static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+   OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \
+}\
+static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+   OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \
+}\
+\
+static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+   OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \
+}\
+static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+   OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \
+}\
+static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+   OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \
+}\
+static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+   OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \
+}\
+static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+   OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \
+}\
+static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+   OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \
+}\
+
+#define H264_MC(OPNAME, SIZE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+
+#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b)  a = cm[((b) + 16)>>5]
+#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
+#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+
+H264_LOWPASS(put_       , op_put, op2_put)
+H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+#endif
+
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    do{
+        int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
+        uint8_t *s = src;
+        src_1 = s[-1];
+        src0 = *s++;
+        src1 = *s++;
+        src2 = *s++;
+        dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
+        src3 = *s++;
+        dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
+        src4 = *s++;
+        dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
+        src5 = *s++;
+        dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
+        src6 = *s++;
+        dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
+        src7 = *s++;
+        dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
+        src8 = *s++;
+        dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
+        src9 = *s++;
+        dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
+        dst+=dstStride;
+        src+=srcStride;
+    }while(--h);
+}
+
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    do{
+        int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
+        uint8_t *s = src,*d = dst;
+        src_1 = *(s-srcStride);
+        src0 = *s; s+=srcStride;
+        src1 = *s; s+=srcStride;
+        src2 = *s; s+=srcStride;
+        *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
+        src3 = *s; s+=srcStride;
+        *d= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4]; d+=dstStride;
+        src4 = *s; s+=srcStride;
+        *d= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4]; d+=dstStride;
+        src5 = *s; s+=srcStride;
+        *d= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4]; d+=dstStride;
+        src6 = *s; s+=srcStride;
+        *d= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4]; d+=dstStride;
+        src7 = *s; s+=srcStride;
+        *d= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4]; d+=dstStride;
+        src8 = *s; s+=srcStride;
+        *d= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4]; d+=dstStride;
+        src9 = *s;
+        *d= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4]; d+=dstStride;
+        src++;
+        dst++;
+    }while(--w);
+}
+
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_c(dst, src, stride, 8);
+}
+
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
+}
diff --git a/contrib/ffmpeg/libavcodec/shorten.c b/contrib/ffmpeg/libavcodec/shorten.c
new file mode 100644
index 000000000..358ecf23f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/shorten.c
@@ -0,0 +1,533 @@
+/*
+ * Shorten decoder
+ * Copyright (c) 2005 Jeff Muizelaar
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file shorten.c
+ * Shorten decoder
+ * @author Jeff Muizelaar
+ *
+ */
+
+#define DEBUG
+#include <limits.h>
+#include "avcodec.h"
+#include "bitstream.h"
+#include "golomb.h"
+
+#define MAX_CHANNELS 8
+#define MAX_BLOCKSIZE 65535
+
+#define OUT_BUFFER_SIZE 16384
+
+#define ULONGSIZE 2
+
+#define WAVE_FORMAT_PCM 0x0001
+
+#define DEFAULT_BLOCK_SIZE 256
+
+#define TYPESIZE 4
+#define CHANSIZE 0
+#define LPCQSIZE 2
+#define ENERGYSIZE 3
+#define BITSHIFTSIZE 2
+
+#define TYPE_S16HL 3
+#define TYPE_S16LH 5
+
+#define NWRAP 3
+#define NSKIPSIZE 1
+
+#define LPCQUANT 5
+#define V2LPCQOFFSET (1 << LPCQUANT)
+
+#define FNSIZE 2
+#define FN_DIFF0        0
+#define FN_DIFF1        1
+#define FN_DIFF2        2
+#define FN_DIFF3        3
+#define FN_QUIT         4
+#define FN_BLOCKSIZE    5
+#define FN_BITSHIFT     6
+#define FN_QLPC         7
+#define FN_ZERO         8
+#define FN_VERBATIM     9
+
+#define VERBATIM_CKSIZE_SIZE 5
+#define VERBATIM_BYTE_SIZE 8
+#define CANONICAL_HEADER_SIZE 44
+
+typedef struct ShortenContext {
+    AVCodecContext *avctx;
+    GetBitContext gb;
+
+    int min_framesize, max_framesize;
+    int channels;
+
+    int32_t *decoded[MAX_CHANNELS];
+    int32_t *offset[MAX_CHANNELS];
+    uint8_t *bitstream;
+    int bitstream_size;
+    int bitstream_index;
+    unsigned int allocated_bitstream_size;
+    int header_size;
+    uint8_t header[OUT_BUFFER_SIZE];
+    int version;
+    int cur_chan;
+    int bitshift;
+    int nmean;
+    int internal_ftype;
+    int nwrap;
+    int blocksize;
+    int bitindex;
+    int32_t lpcqoffset;
+} ShortenContext;
+
+static int shorten_decode_init(AVCodecContext * avctx)
+{
+    ShortenContext *s = avctx->priv_data;
+    s->avctx = avctx;
+
+    return 0;
+}
+
+static int allocate_buffers(ShortenContext *s)
+{
+    int i, chan;
+    for (chan=0; chan<s->channels; chan++) {
+        if(FFMAX(1, s->nmean) >= UINT_MAX/sizeof(int32_t)){
+            av_log(s->avctx, AV_LOG_ERROR, "nmean too large\n");
+            return -1;
+        }
+        if(s->blocksize + s->nwrap >= UINT_MAX/sizeof(int32_t) || s->blocksize + s->nwrap <= (unsigned)s->nwrap){
+            av_log(s->avctx, AV_LOG_ERROR, "s->blocksize + s->nwrap too large\n");
+            return -1;
+        }
+
+        s->offset[chan] = av_realloc(s->offset[chan], sizeof(int32_t)*FFMAX(1, s->nmean));
+
+        s->decoded[chan] = av_realloc(s->decoded[chan], sizeof(int32_t)*(s->blocksize + s->nwrap));
+        for (i=0; i<s->nwrap; i++)
+            s->decoded[chan][i] = 0;
+        s->decoded[chan] += s->nwrap;
+    }
+    return 0;
+}
+
+
+static inline unsigned int get_uint(ShortenContext *s, int k)
+{
+    if (s->version != 0)
+        k = get_ur_golomb_shorten(&s->gb, ULONGSIZE);
+    return get_ur_golomb_shorten(&s->gb, k);
+}
+
+
+static void fix_bitshift(ShortenContext *s, int32_t *buffer)
+{
+    int i;
+
+    if (s->bitshift != 0)
+        for (i = 0; i < s->blocksize; i++)
+            buffer[s->nwrap + i] <<= s->bitshift;
+}
+
+
+static void init_offset(ShortenContext *s)
+{
+    int32_t mean = 0;
+    int  chan, i;
+    int nblock = FFMAX(1, s->nmean);
+    /* initialise offset */
+    switch (s->internal_ftype)
+    {
+        case TYPE_S16HL:
+        case TYPE_S16LH:
+            mean = 0;
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "unknown audio type");
+            abort();
+    }
+
+    for (chan = 0; chan < s->channels; chan++)
+        for (i = 0; i < nblock; i++)
+            s->offset[chan][i] = mean;
+}
+
+static int inline get_le32(GetBitContext *gb)
+{
+    return bswap_32(get_bits_long(gb, 32));
+}
+
+static short inline get_le16(GetBitContext *gb)
+{
+    return bswap_16(get_bits_long(gb, 16));
+}
+
+static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header_size)
+{
+    GetBitContext hb;
+    int len;
+    int chunk_size;
+    short wave_format;
+
+    init_get_bits(&hb, header, header_size*8);
+    if (get_le32(&hb) != MKTAG('R','I','F','F')) {
+        av_log(avctx, AV_LOG_ERROR, "missing RIFF tag\n");
+        return -1;
+    }
+
+    chunk_size = get_le32(&hb);
+
+    if (get_le32(&hb) != MKTAG('W','A','V','E')) {
+        av_log(avctx, AV_LOG_ERROR, "missing WAVE tag\n");
+        return -1;
+    }
+
+    while (get_le32(&hb) != MKTAG('f','m','t',' ')) {
+        len = get_le32(&hb);
+        skip_bits(&hb, 8*len);
+    }
+    len = get_le32(&hb);
+
+    if (len < 16) {
+        av_log(avctx, AV_LOG_ERROR, "fmt chunk was too short\n");
+        return -1;
+    }
+
+    wave_format = get_le16(&hb);
+
+    switch (wave_format) {
+        case WAVE_FORMAT_PCM:
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "unsupported wave format\n");
+            return -1;
+    }
+
+    avctx->channels = get_le16(&hb);
+    avctx->sample_rate = get_le32(&hb);
+    avctx->bit_rate = get_le32(&hb) * 8;
+    avctx->block_align = get_le16(&hb);
+    avctx->bits_per_sample = get_le16(&hb);
+
+    if (avctx->bits_per_sample != 16) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported number of bits per sample\n");
+        return -1;
+    }
+
+    len -= 16;
+    if (len > 0)
+        av_log(avctx, AV_LOG_INFO, "%d header bytes unparsed\n", len);
+
+    return 0;
+}
+
+static int16_t * interleave_buffer(int16_t *samples, int nchan, int blocksize, int32_t **buffer) {
+    int i, chan;
+    for (i=0; i<blocksize; i++)
+        for (chan=0; chan < nchan; chan++)
+            *samples++ = FFMIN(buffer[chan][i], 32768);
+    return samples;
+}
+
+static void decode_subframe_lpc(ShortenContext *s, int channel, int residual_size, int pred_order)
+{
+    int sum, i, j;
+    int coeffs[pred_order];
+
+    for (i=0; i<pred_order; i++)
+        coeffs[i] = get_sr_golomb_shorten(&s->gb, LPCQUANT);
+
+    for (i=0; i < s->blocksize; i++) {
+        sum = s->lpcqoffset;
+        for (j=0; j<pred_order; j++)
+            sum += coeffs[j] * s->decoded[channel][i-j-1];
+        s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + (sum >> LPCQUANT);
+    }
+}
+
+
+static int shorten_decode_frame(AVCodecContext *avctx,
+        void *data, int *data_size,
+        uint8_t *buf, int buf_size)
+{
+    ShortenContext *s = avctx->priv_data;
+    int i, input_buf_size = 0;
+    int16_t *samples = data;
+    if(s->max_framesize == 0){
+        s->max_framesize= 1024; // should hopefully be enough for the first header
+        s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize);
+    }
+
+    if(1 && s->max_framesize){//FIXME truncated
+        buf_size= FFMIN(buf_size, s->max_framesize - s->bitstream_size);
+        input_buf_size= buf_size;
+
+        if(s->bitstream_index + s->bitstream_size + buf_size > s->allocated_bitstream_size){
+            //                printf("memmove\n");
+            memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size);
+            s->bitstream_index=0;
+        }
+        memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], buf, buf_size);
+        buf= &s->bitstream[s->bitstream_index];
+        buf_size += s->bitstream_size;
+        s->bitstream_size= buf_size;
+
+        if(buf_size < s->max_framesize){
+            //dprintf("wanna more data ... %d\n", buf_size);
+            return input_buf_size;
+        }
+    }
+    init_get_bits(&s->gb, buf, buf_size*8);
+    get_bits(&s->gb, s->bitindex);
+    if (!s->blocksize)
+    {
+        int maxnlpc = 0;
+        /* shorten signature */
+        if (get_bits_long(&s->gb, 32) != bswap_32(ff_get_fourcc("ajkg"))) {
+            av_log(s->avctx, AV_LOG_ERROR, "missing shorten magic 'ajkg'\n");
+            return -1;
+        }
+
+        s->lpcqoffset = 0;
+        s->blocksize = DEFAULT_BLOCK_SIZE;
+        s->channels = 1;
+        s->nmean = -1;
+        s->version = get_bits(&s->gb, 8);
+        s->internal_ftype = get_uint(s, TYPESIZE);
+
+        s->channels = get_uint(s, CHANSIZE);
+        if (s->channels > MAX_CHANNELS) {
+            av_log(s->avctx, AV_LOG_ERROR, "too many channels: %d\n", s->channels);
+            return -1;
+        }
+
+        /* get blocksize if version > 0 */
+        if (s->version > 0) {
+            int skip_bytes;
+            s->blocksize = get_uint(s, av_log2(DEFAULT_BLOCK_SIZE));
+            maxnlpc = get_uint(s, LPCQSIZE);
+            s->nmean = get_uint(s, 0);
+
+            skip_bytes = get_uint(s, NSKIPSIZE);
+            for (i=0; i<skip_bytes; i++) {
+                skip_bits(&s->gb, 8);
+            }
+        }
+        s->nwrap = FFMAX(NWRAP, maxnlpc);
+
+        if (allocate_buffers(s))
+            return -1;
+
+        init_offset(s);
+
+        if (s->version > 1)
+            s->lpcqoffset = V2LPCQOFFSET;
+
+        if (get_ur_golomb_shorten(&s->gb, FNSIZE) != FN_VERBATIM) {
+            av_log(s->avctx, AV_LOG_ERROR, "missing verbatim section at begining of stream\n");
+            return -1;
+        }
+
+        s->header_size = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
+        if (s->header_size >= OUT_BUFFER_SIZE || s->header_size < CANONICAL_HEADER_SIZE) {
+            av_log(s->avctx, AV_LOG_ERROR, "header is wrong size: %d\n", s->header_size);
+            return -1;
+        }
+
+        for (i=0; i<s->header_size; i++)
+            s->header[i] = (char)get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
+
+        if (decode_wave_header(avctx, s->header, s->header_size) < 0)
+            return -1;
+
+        s->cur_chan = 0;
+        s->bitshift = 0;
+    }
+    else
+    {
+        int cmd;
+        int len;
+        cmd = get_ur_golomb_shorten(&s->gb, FNSIZE);
+        switch (cmd) {
+            case FN_ZERO:
+            case FN_DIFF0:
+            case FN_DIFF1:
+            case FN_DIFF2:
+            case FN_DIFF3:
+            case FN_QLPC:
+                {
+                    int residual_size = 0;
+                    int channel = s->cur_chan;
+                    int32_t coffset;
+                    if (cmd != FN_ZERO) {
+                        residual_size = get_ur_golomb_shorten(&s->gb, ENERGYSIZE);
+                        /* this is a hack as version 0 differed in defintion of get_sr_golomb_shorten */
+                        if (s->version == 0)
+                            residual_size--;
+                    }
+
+                    if (s->nmean == 0)
+                        coffset = s->offset[channel][0];
+                    else {
+                        int32_t sum = (s->version < 2) ? 0 : s->nmean / 2;
+                        for (i=0; i<s->nmean; i++)
+                            sum += s->offset[channel][i];
+                        coffset = sum / s->nmean;
+                        if (s->version >= 2)
+                            coffset >>= FFMIN(1, s->bitshift);
+                    }
+                    switch (cmd) {
+                        case FN_ZERO:
+                            for (i=0; i<s->blocksize; i++)
+                                s->decoded[channel][i] = 0;
+                            break;
+                        case FN_DIFF0:
+                            for (i=0; i<s->blocksize; i++)
+                                s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + coffset;
+                            break;
+                        case FN_DIFF1:
+                            for (i=0; i<s->blocksize; i++)
+                                s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + s->decoded[channel][i - 1];
+                            break;
+                        case FN_DIFF2:
+                            for (i=0; i<s->blocksize; i++)
+                                s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + 2*s->decoded[channel][i-1]
+                                                                                                      -   s->decoded[channel][i-2];
+                            break;
+                        case FN_DIFF3:
+                            for (i=0; i<s->blocksize; i++)
+                                s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + 3*s->decoded[channel][i-1]
+                                                                                                      - 3*s->decoded[channel][i-2]
+                                                                                                      +   s->decoded[channel][i-3];
+                            break;
+                        case FN_QLPC:
+                            {
+                                int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE);
+                                for (i=0; i<pred_order; i++)
+                                    s->decoded[channel][i - pred_order] -= coffset;
+                                decode_subframe_lpc(s, channel, residual_size, pred_order);
+                                if (coffset != 0)
+                                    for (i=0; i < s->blocksize; i++)
+                                        s->decoded[channel][i] += coffset;
+                            }
+                    }
+                    if (s->nmean > 0) {
+                        int32_t sum = (s->version < 2) ? 0 : s->blocksize / 2;
+                        for (i=0; i<s->blocksize; i++)
+                            sum += s->decoded[channel][i];
+
+                        for (i=1; i<s->nmean; i++)
+                            s->offset[channel][i-1] = s->offset[channel][i];
+
+                        if (s->version < 2)
+                            s->offset[channel][s->nmean - 1] = sum / s->blocksize;
+                        else
+                            s->offset[channel][s->nmean - 1] = (sum / s->blocksize) << s->bitshift;
+                    }
+                    for (i=-s->nwrap; i<0; i++)
+                        s->decoded[channel][i] = s->decoded[channel][i + s->blocksize];
+
+                    fix_bitshift(s, s->decoded[channel]);
+
+                    s->cur_chan++;
+                    if (s->cur_chan == s->channels) {
+                        samples = interleave_buffer(samples, s->channels, s->blocksize, s->decoded);
+                        s->cur_chan = 0;
+                        goto frame_done;
+                    }
+                    break;
+                }
+                break;
+            case FN_VERBATIM:
+                len = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
+                while (len--) {
+                    get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
+                }
+                break;
+            case FN_BITSHIFT:
+                s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
+                break;
+            case FN_BLOCKSIZE:
+                s->blocksize = get_uint(s, av_log2(s->blocksize));
+                break;
+            case FN_QUIT:
+                return buf_size;
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "unknown shorten function %d\n", cmd);
+                return -1;
+                break;
+        }
+    }
+frame_done:
+    *data_size = (int8_t *)samples - (int8_t *)data;
+
+    //    s->last_blocksize = s->blocksize;
+    s->bitindex = get_bits_count(&s->gb) - 8*((get_bits_count(&s->gb))/8);
+    i= (get_bits_count(&s->gb))/8;
+    if (i > buf_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", i - buf_size);
+        s->bitstream_size=0;
+        s->bitstream_index=0;
+        return -1;
+    }
+    if (s->bitstream_size) {
+        s->bitstream_index += i;
+        s->bitstream_size  -= i;
+        return input_buf_size;
+    } else
+        return i;
+}
+
+static int shorten_decode_close(AVCodecContext *avctx)
+{
+    ShortenContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->channels; i++) {
+        s->decoded[i] -= s->nwrap;
+        av_freep(&s->decoded[i]);
+        av_freep(&s->offset[i]);
+    }
+    av_freep(&s->bitstream);
+    return 0;
+}
+
+static void shorten_flush(AVCodecContext *avctx){
+    ShortenContext *s = avctx->priv_data;
+
+    s->bitstream_size=
+        s->bitstream_index= 0;
+}
+
+AVCodec shorten_decoder = {
+    "shorten",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_SHORTEN,
+    sizeof(ShortenContext),
+    shorten_decode_init,
+    NULL,
+    shorten_decode_close,
+    shorten_decode_frame,
+    .flush= shorten_flush,
+};
diff --git a/contrib/ffmpeg/libavcodec/simple_idct.c b/contrib/ffmpeg/libavcodec/simple_idct.c
new file mode 100644
index 000000000..2c026f08f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/simple_idct.c
@@ -0,0 +1,587 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file simple_idct.c
+ * simpleidct in C.
+ */
+
+/*
+  based upon some outcommented c code from mpeg2dec (idct_mmx.c
+  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
+ */
+#include "avcodec.h"
+#include "dsputil.h"
+#include "simple_idct.h"
+
+#if 0
+#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
+#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
+#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
+#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
+#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
+#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
+#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
+#define ROW_SHIFT 8
+#define COL_SHIFT 17
+#else
+#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define ROW_SHIFT 11
+#define COL_SHIFT 20 // 6
+#endif
+
+#if defined(ARCH_POWERPC_405)
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#define MAC16(rt, ra, rb) \
+    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+
+/* signed 16x16 -> 32 multiply */
+#define MUL16(rt, ra, rb) \
+    asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
+
+#else
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#define MAC16(rt, ra, rb) rt += (ra) * (rb)
+
+/* signed 16x16 -> 32 multiply */
+#define MUL16(rt, ra, rb) rt = (ra) * (rb)
+
+#endif
+
+static inline void idctRowCondDC (DCTELEM * row)
+{
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+#ifdef FAST_64BIT
+        uint64_t temp;
+#else
+        uint32_t temp;
+#endif
+
+#ifdef FAST_64BIT
+#ifdef WORDS_BIGENDIAN
+#define ROW0_MASK 0xffff000000000000LL
+#else
+#define ROW0_MASK 0xffffLL
+#endif
+        if(sizeof(DCTELEM)==2){
+            if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
+                  ((uint64_t *)row)[1]) == 0) {
+                temp = (row[0] << 3) & 0xffff;
+                temp += temp << 16;
+                temp += temp << 32;
+                ((uint64_t *)row)[0] = temp;
+                ((uint64_t *)row)[1] = temp;
+                return;
+            }
+        }else{
+            if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
+                row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
+                return;
+            }
+        }
+#else
+        if(sizeof(DCTELEM)==2){
+            if (!(((uint32_t*)row)[1] |
+                  ((uint32_t*)row)[2] |
+                  ((uint32_t*)row)[3] |
+                  row[1])) {
+                temp = (row[0] << 3) & 0xffff;
+                temp += temp << 16;
+                ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
+                ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
+                return;
+            }
+        }else{
+            if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
+                row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
+                return;
+            }
+        }
+#endif
+
+        a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
+
+        /* no need to optimize : gcc does it */
+        a0 += W2 * row[2];
+        a1 += W6 * row[2];
+        a2 -= W6 * row[2];
+        a3 -= W2 * row[2];
+
+        MUL16(b0, W1, row[1]);
+        MAC16(b0, W3, row[3]);
+        MUL16(b1, W3, row[1]);
+        MAC16(b1, -W7, row[3]);
+        MUL16(b2, W5, row[1]);
+        MAC16(b2, -W1, row[3]);
+        MUL16(b3, W7, row[1]);
+        MAC16(b3, -W5, row[3]);
+
+#ifdef FAST_64BIT
+        temp = ((uint64_t*)row)[1];
+#else
+        temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
+#endif
+        if (temp != 0) {
+            a0 += W4*row[4] + W6*row[6];
+            a1 += - W4*row[4] - W2*row[6];
+            a2 += - W4*row[4] + W2*row[6];
+            a3 += W4*row[4] - W6*row[6];
+
+            MAC16(b0, W5, row[5]);
+            MAC16(b0, W7, row[7]);
+
+            MAC16(b1, -W1, row[5]);
+            MAC16(b1, -W5, row[7]);
+
+            MAC16(b2, W7, row[5]);
+            MAC16(b2, W3, row[7]);
+
+            MAC16(b3, W3, row[5]);
+            MAC16(b3, -W1, row[7]);
+        }
+
+        row[0] = (a0 + b0) >> ROW_SHIFT;
+        row[7] = (a0 - b0) >> ROW_SHIFT;
+        row[1] = (a1 + b1) >> ROW_SHIFT;
+        row[6] = (a1 - b1) >> ROW_SHIFT;
+        row[2] = (a2 + b2) >> ROW_SHIFT;
+        row[5] = (a2 - b2) >> ROW_SHIFT;
+        row[3] = (a3 + b3) >> ROW_SHIFT;
+        row[4] = (a3 - b3) >> ROW_SHIFT;
+}
+
+static inline void idctSparseColPut (uint8_t *dest, int line_size,
+                                     DCTELEM * col)
+{
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+        /* XXX: I did that only to give same values as previous code */
+        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
+
+        a0 +=  + W2*col[8*2];
+        a1 +=  + W6*col[8*2];
+        a2 +=  - W6*col[8*2];
+        a3 +=  - W2*col[8*2];
+
+        MUL16(b0, W1, col[8*1]);
+        MUL16(b1, W3, col[8*1]);
+        MUL16(b2, W5, col[8*1]);
+        MUL16(b3, W7, col[8*1]);
+
+        MAC16(b0, + W3, col[8*3]);
+        MAC16(b1, - W7, col[8*3]);
+        MAC16(b2, - W1, col[8*3]);
+        MAC16(b3, - W5, col[8*3]);
+
+        if(col[8*4]){
+            a0 += + W4*col[8*4];
+            a1 += - W4*col[8*4];
+            a2 += - W4*col[8*4];
+            a3 += + W4*col[8*4];
+        }
+
+        if (col[8*5]) {
+            MAC16(b0, + W5, col[8*5]);
+            MAC16(b1, - W1, col[8*5]);
+            MAC16(b2, + W7, col[8*5]);
+            MAC16(b3, + W3, col[8*5]);
+        }
+
+        if(col[8*6]){
+            a0 += + W6*col[8*6];
+            a1 += - W2*col[8*6];
+            a2 += + W2*col[8*6];
+            a3 += - W6*col[8*6];
+        }
+
+        if (col[8*7]) {
+            MAC16(b0, + W7, col[8*7]);
+            MAC16(b1, - W5, col[8*7]);
+            MAC16(b2, + W3, col[8*7]);
+            MAC16(b3, - W1, col[8*7]);
+        }
+
+        dest[0] = cm[(a0 + b0) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a1 + b1) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a2 + b2) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a3 + b3) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a3 - b3) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a2 - b2) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a1 - b1) >> COL_SHIFT];
+        dest += line_size;
+        dest[0] = cm[(a0 - b0) >> COL_SHIFT];
+}
+
+static inline void idctSparseColAdd (uint8_t *dest, int line_size,
+                                     DCTELEM * col)
+{
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+        /* XXX: I did that only to give same values as previous code */
+        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
+
+        a0 +=  + W2*col[8*2];
+        a1 +=  + W6*col[8*2];
+        a2 +=  - W6*col[8*2];
+        a3 +=  - W2*col[8*2];
+
+        MUL16(b0, W1, col[8*1]);
+        MUL16(b1, W3, col[8*1]);
+        MUL16(b2, W5, col[8*1]);
+        MUL16(b3, W7, col[8*1]);
+
+        MAC16(b0, + W3, col[8*3]);
+        MAC16(b1, - W7, col[8*3]);
+        MAC16(b2, - W1, col[8*3]);
+        MAC16(b3, - W5, col[8*3]);
+
+        if(col[8*4]){
+            a0 += + W4*col[8*4];
+            a1 += - W4*col[8*4];
+            a2 += - W4*col[8*4];
+            a3 += + W4*col[8*4];
+        }
+
+        if (col[8*5]) {
+            MAC16(b0, + W5, col[8*5]);
+            MAC16(b1, - W1, col[8*5]);
+            MAC16(b2, + W7, col[8*5]);
+            MAC16(b3, + W3, col[8*5]);
+        }
+
+        if(col[8*6]){
+            a0 += + W6*col[8*6];
+            a1 += - W2*col[8*6];
+            a2 += + W2*col[8*6];
+            a3 += - W6*col[8*6];
+        }
+
+        if (col[8*7]) {
+            MAC16(b0, + W7, col[8*7]);
+            MAC16(b1, - W5, col[8*7]);
+            MAC16(b2, + W3, col[8*7]);
+            MAC16(b3, - W1, col[8*7]);
+        }
+
+        dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
+        dest += line_size;
+        dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
+}
+
+static inline void idctSparseCol (DCTELEM * col)
+{
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+
+        /* XXX: I did that only to give same values as previous code */
+        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
+
+        a0 +=  + W2*col[8*2];
+        a1 +=  + W6*col[8*2];
+        a2 +=  - W6*col[8*2];
+        a3 +=  - W2*col[8*2];
+
+        MUL16(b0, W1, col[8*1]);
+        MUL16(b1, W3, col[8*1]);
+        MUL16(b2, W5, col[8*1]);
+        MUL16(b3, W7, col[8*1]);
+
+        MAC16(b0, + W3, col[8*3]);
+        MAC16(b1, - W7, col[8*3]);
+        MAC16(b2, - W1, col[8*3]);
+        MAC16(b3, - W5, col[8*3]);
+
+        if(col[8*4]){
+            a0 += + W4*col[8*4];
+            a1 += - W4*col[8*4];
+            a2 += - W4*col[8*4];
+            a3 += + W4*col[8*4];
+        }
+
+        if (col[8*5]) {
+            MAC16(b0, + W5, col[8*5]);
+            MAC16(b1, - W1, col[8*5]);
+            MAC16(b2, + W7, col[8*5]);
+            MAC16(b3, + W3, col[8*5]);
+        }
+
+        if(col[8*6]){
+            a0 += + W6*col[8*6];
+            a1 += - W2*col[8*6];
+            a2 += + W2*col[8*6];
+            a3 += - W6*col[8*6];
+        }
+
+        if (col[8*7]) {
+            MAC16(b0, + W7, col[8*7]);
+            MAC16(b1, - W5, col[8*7]);
+            MAC16(b2, + W3, col[8*7]);
+            MAC16(b3, - W1, col[8*7]);
+        }
+
+        col[0 ] = ((a0 + b0) >> COL_SHIFT);
+        col[8 ] = ((a1 + b1) >> COL_SHIFT);
+        col[16] = ((a2 + b2) >> COL_SHIFT);
+        col[24] = ((a3 + b3) >> COL_SHIFT);
+        col[32] = ((a3 - b3) >> COL_SHIFT);
+        col[40] = ((a2 - b2) >> COL_SHIFT);
+        col[48] = ((a1 - b1) >> COL_SHIFT);
+        col[56] = ((a0 - b0) >> COL_SHIFT);
+}
+
+void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    int i;
+    for(i=0; i<8; i++)
+        idctRowCondDC(block + i*8);
+
+    for(i=0; i<8; i++)
+        idctSparseColPut(dest + i, line_size, block + i);
+}
+
+void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    int i;
+    for(i=0; i<8; i++)
+        idctRowCondDC(block + i*8);
+
+    for(i=0; i<8; i++)
+        idctSparseColAdd(dest + i, line_size, block + i);
+}
+
+void simple_idct(DCTELEM *block)
+{
+    int i;
+    for(i=0; i<8; i++)
+        idctRowCondDC(block + i*8);
+
+    for(i=0; i<8; i++)
+        idctSparseCol(block + i);
+}
+
+/* 2x4x8 idct */
+
+#define CN_SHIFT 12
+#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
+#define C1 C_FIX(0.6532814824)
+#define C2 C_FIX(0.2705980501)
+
+/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
+   and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
+#define C_SHIFT (4+1+12)
+
+static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
+{
+    int c0, c1, c2, c3, a0, a1, a2, a3;
+    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    a0 = col[8*0];
+    a1 = col[8*2];
+    a2 = col[8*4];
+    a3 = col[8*6];
+    c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
+    c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
+    c1 = a1 * C1 + a3 * C2;
+    c3 = a1 * C2 - a3 * C1;
+    dest[0] = cm[(c0 + c1) >> C_SHIFT];
+    dest += line_size;
+    dest[0] = cm[(c2 + c3) >> C_SHIFT];
+    dest += line_size;
+    dest[0] = cm[(c2 - c3) >> C_SHIFT];
+    dest += line_size;
+    dest[0] = cm[(c0 - c1) >> C_SHIFT];
+}
+
+#define BF(k) \
+{\
+    int a0, a1;\
+    a0 = ptr[k];\
+    a1 = ptr[8 + k];\
+    ptr[k] = a0 + a1;\
+    ptr[8 + k] = a0 - a1;\
+}
+
+/* only used by DV codec. The input must be interlaced. 128 is added
+   to the pixels before clamping to avoid systematic error
+   (1024*sqrt(2)) offset would be needed otherwise. */
+/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
+   compensate the extra butterfly stage - I don't have the full DV
+   specification */
+void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    int i;
+    DCTELEM *ptr;
+
+    /* butterfly */
+    ptr = block;
+    for(i=0;i<4;i++) {
+        BF(0);
+        BF(1);
+        BF(2);
+        BF(3);
+        BF(4);
+        BF(5);
+        BF(6);
+        BF(7);
+        ptr += 2 * 8;
+    }
+
+    /* IDCT8 on each line */
+    for(i=0; i<8; i++) {
+        idctRowCondDC(block + i*8);
+    }
+
+    /* IDCT4 and store */
+    for(i=0;i<8;i++) {
+        idct4col(dest + i, 2 * line_size, block + i);
+        idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
+    }
+}
+
+/* 8x4 & 4x8 WMV2 IDCT */
+#undef CN_SHIFT
+#undef C_SHIFT
+#undef C_FIX
+#undef C1
+#undef C2
+#define CN_SHIFT 12
+#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
+#define C1 C_FIX(0.6532814824)
+#define C2 C_FIX(0.2705980501)
+#define C3 C_FIX(0.5)
+#define C_SHIFT (4+1+12)
+static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
+{
+    int c0, c1, c2, c3, a0, a1, a2, a3;
+    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    a0 = col[8*0];
+    a1 = col[8*1];
+    a2 = col[8*2];
+    a3 = col[8*3];
+    c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
+    c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
+    c1 = a1 * C1 + a3 * C2;
+    c3 = a1 * C2 - a3 * C1;
+    dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
+    dest += line_size;
+    dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
+    dest += line_size;
+    dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
+    dest += line_size;
+    dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
+}
+
+#define RN_SHIFT 15
+#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
+#define R1 R_FIX(0.6532814824)
+#define R2 R_FIX(0.2705980501)
+#define R3 R_FIX(0.5)
+#define R_SHIFT 11
+static inline void idct4row(DCTELEM *row)
+{
+    int c0, c1, c2, c3, a0, a1, a2, a3;
+    //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    a0 = row[0];
+    a1 = row[1];
+    a2 = row[2];
+    a3 = row[3];
+    c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
+    c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
+    c1 = a1 * R1 + a3 * R2;
+    c3 = a1 * R2 - a3 * R1;
+    row[0]= (c0 + c1) >> R_SHIFT;
+    row[1]= (c2 + c3) >> R_SHIFT;
+    row[2]= (c2 - c3) >> R_SHIFT;
+    row[3]= (c0 - c1) >> R_SHIFT;
+}
+
+void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    int i;
+
+    /* IDCT8 on each line */
+    for(i=0; i<4; i++) {
+        idctRowCondDC(block + i*8);
+    }
+
+    /* IDCT4 and store */
+    for(i=0;i<8;i++) {
+        idct4col_add(dest + i, line_size, block + i);
+    }
+}
+
+void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    int i;
+
+    /* IDCT4 on each line */
+    for(i=0; i<8; i++) {
+        idct4row(block + i*8);
+    }
+
+    /* IDCT8 and store */
+    for(i=0; i<4; i++){
+        idctSparseColAdd(dest + i, line_size, block + i);
+    }
+}
+
diff --git a/contrib/ffmpeg/libavcodec/simple_idct.h b/contrib/ffmpeg/libavcodec/simple_idct.h
new file mode 100644
index 000000000..c4b453329
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/simple_idct.h
@@ -0,0 +1,38 @@
+/*
+ * Simple IDCT
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file simple_idct.h
+ * simple idct header.
+ */
+
+void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
+void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_simple_idct_mmx(int16_t *block);
+void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block);
+void simple_idct(DCTELEM *block);
+
+void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block);
+
+void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block);
+void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block);
diff --git a/contrib/ffmpeg/libavcodec/smacker.c b/contrib/ffmpeg/libavcodec/smacker.c
new file mode 100644
index 000000000..2f2185848
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/smacker.c
@@ -0,0 +1,723 @@
+/*
+ * Smacker decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file smacker.c
+ * Smacker decoder
+ */
+
+/*
+ * Based on http://wiki.multimedia.cx/index.php?title=Smacker
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#define ALT_BITSTREAM_READER_LE
+#include "bitstream.h"
+
+#define SMKTREE_BITS 9
+#define SMK_NODE 0x80000000
+
+/*
+ * Decoder context
+ */
+typedef struct SmackVContext {
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    int *mmap_tbl, *mclr_tbl, *full_tbl, *type_tbl;
+    int mmap_last[3], mclr_last[3], full_last[3], type_last[3];
+} SmackVContext;
+
+/**
+ * Context used for code reconstructing
+ */
+typedef struct HuffContext {
+    int length;
+    int maxlength;
+    int current;
+    uint32_t *bits;
+    int *lengths;
+    int *values;
+} HuffContext;
+
+/* common parameters used for decode_bigtree */
+typedef struct DBCtx {
+    VLC *v1, *v2;
+    int *recode1, *recode2;
+    int escapes[3];
+    int *last;
+    int lcur;
+} DBCtx;
+
+/* possible runs of blocks */
+static const int block_runs[64] = {
+      1,    2,    3,    4,    5,    6,    7,    8,
+      9,   10,   11,   12,   13,   14,   15,   16,
+     17,   18,   19,   20,   21,   22,   23,   24,
+     25,   26,   27,   28,   29,   30,   31,   32,
+     33,   34,   35,   36,   37,   38,   39,   40,
+     41,   42,   43,   44,   45,   46,   47,   48,
+     49,   50,   51,   52,   53,   54,   55,   56,
+     57,   58,   59,  128,  256,  512, 1024, 2048 };
+
+enum SmkBlockTypes {
+    SMK_BLK_MONO = 0,
+    SMK_BLK_FULL = 1,
+    SMK_BLK_SKIP = 2,
+    SMK_BLK_FILL = 3 };
+
+/**
+ * Decode local frame tree
+ */
+static int smacker_decode_tree(GetBitContext *gb, HuffContext *hc, uint32_t prefix, int length)
+{
+    if(!get_bits1(gb)){ //Leaf
+        if(hc->current >= 256){
+            av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
+            return -1;
+        }
+        if(length){
+            hc->bits[hc->current] = prefix;
+            hc->lengths[hc->current] = length;
+        } else {
+            hc->bits[hc->current] = 0;
+            hc->lengths[hc->current] = 0;
+        }
+        hc->values[hc->current] = get_bits(gb, 8);
+        hc->current++;
+        if(hc->maxlength < length)
+            hc->maxlength = length;
+        return 0;
+    } else { //Node
+        int r;
+        length++;
+        r = smacker_decode_tree(gb, hc, prefix, length);
+        if(r)
+            return r;
+        return smacker_decode_tree(gb, hc, prefix | (1 << (length - 1)), length);
+    }
+}
+
+/**
+ * Decode header tree
+ */
+static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx)
+{
+    if(!get_bits1(gb)){ //Leaf
+        int val, i1, i2, b1, b2;
+        if(hc->current >= hc->length){
+            av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
+            return -1;
+        }
+        b1 = get_bits_count(gb);
+        i1 = get_vlc2(gb, ctx->v1->table, SMKTREE_BITS, 3);
+        b1 = get_bits_count(gb) - b1;
+        b2 = get_bits_count(gb);
+        i2 = get_vlc2(gb, ctx->v2->table, SMKTREE_BITS, 3);
+        b2 = get_bits_count(gb) - b2;
+        val = ctx->recode1[i1] | (ctx->recode2[i2] << 8);
+        if(val == ctx->escapes[0]) {
+            ctx->last[0] = hc->current;
+            val = 0;
+        } else if(val == ctx->escapes[1]) {
+            ctx->last[1] = hc->current;
+            val = 0;
+        } else if(val == ctx->escapes[2]) {
+            ctx->last[2] = hc->current;
+            val = 0;
+        }
+
+        hc->values[hc->current++] = val;
+        return 1;
+    } else { //Node
+        int r = 0, t;
+
+        t = hc->current++;
+        r = smacker_decode_bigtree(gb, hc, ctx);
+        if(r < 0)
+            return r;
+        hc->values[t] = SMK_NODE | r;
+        r++;
+        r += smacker_decode_bigtree(gb, hc, ctx);
+        return r;
+    }
+}
+
+/**
+ * Store large tree as FFmpeg's vlc codes
+ */
+static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int **recodes, int *last, int size)
+{
+    int res;
+    HuffContext huff;
+    HuffContext tmp1, tmp2;
+    VLC vlc[2];
+    int escapes[3];
+    DBCtx ctx;
+
+    if(size >= UINT_MAX>>4){ // (((size + 3) >> 2) + 3) << 2 must not overflow
+        av_log(smk->avctx, AV_LOG_ERROR, "size too large\n");
+        return -1;
+    }
+
+    tmp1.length = 256;
+    tmp1.maxlength = 0;
+    tmp1.current = 0;
+    tmp1.bits = av_mallocz(256 * 4);
+    tmp1.lengths = av_mallocz(256 * sizeof(int));
+    tmp1.values = av_mallocz(256 * sizeof(int));
+
+    tmp2.length = 256;
+    tmp2.maxlength = 0;
+    tmp2.current = 0;
+    tmp2.bits = av_mallocz(256 * 4);
+    tmp2.lengths = av_mallocz(256 * sizeof(int));
+    tmp2.values = av_mallocz(256 * sizeof(int));
+
+    memset(&vlc[0], 0, sizeof(VLC));
+    memset(&vlc[1], 0, sizeof(VLC));
+
+    if(get_bits1(gb)) {
+        smacker_decode_tree(gb, &tmp1, 0, 0);
+        get_bits1(gb);
+        res = init_vlc(&vlc[0], SMKTREE_BITS, tmp1.length,
+                    tmp1.lengths, sizeof(int), sizeof(int),
+                    tmp1.bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
+        if(res < 0) {
+            av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+            return -1;
+        }
+    } else {
+        av_log(smk->avctx, AV_LOG_ERROR, "Skipping low bytes tree\n");
+    }
+    if(get_bits1(gb)){
+        smacker_decode_tree(gb, &tmp2, 0, 0);
+        get_bits1(gb);
+        res = init_vlc(&vlc[1], SMKTREE_BITS, tmp2.length,
+                    tmp2.lengths, sizeof(int), sizeof(int),
+                    tmp2.bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
+        if(res < 0) {
+            av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+            return -1;
+        }
+    } else {
+        av_log(smk->avctx, AV_LOG_ERROR, "Skipping high bytes tree\n");
+    }
+
+    escapes[0]  = get_bits(gb, 8);
+    escapes[0] |= get_bits(gb, 8) << 8;
+    escapes[1]  = get_bits(gb, 8);
+    escapes[1] |= get_bits(gb, 8) << 8;
+    escapes[2]  = get_bits(gb, 8);
+    escapes[2] |= get_bits(gb, 8) << 8;
+
+    last[0] = last[1] = last[2] = -1;
+
+    ctx.escapes[0] = escapes[0];
+    ctx.escapes[1] = escapes[1];
+    ctx.escapes[2] = escapes[2];
+    ctx.v1 = &vlc[0];
+    ctx.v2 = &vlc[1];
+    ctx.recode1 = tmp1.values;
+    ctx.recode2 = tmp2.values;
+    ctx.last = last;
+
+    huff.length = ((size + 3) >> 2) + 3;
+    huff.maxlength = 0;
+    huff.current = 0;
+    huff.values = av_mallocz(huff.length * sizeof(int));
+
+    smacker_decode_bigtree(gb, &huff, &ctx);
+    get_bits1(gb);
+    if(ctx.last[0] == -1) ctx.last[0] = huff.current++;
+    if(ctx.last[1] == -1) ctx.last[1] = huff.current++;
+    if(ctx.last[2] == -1) ctx.last[2] = huff.current++;
+
+    *recodes = huff.values;
+
+    if(vlc[0].table)
+        free_vlc(&vlc[0]);
+    if(vlc[1].table)
+        free_vlc(&vlc[1]);
+    av_free(tmp1.bits);
+    av_free(tmp1.lengths);
+    av_free(tmp1.values);
+    av_free(tmp2.bits);
+    av_free(tmp2.lengths);
+    av_free(tmp2.values);
+
+    return 0;
+}
+
+static int decode_header_trees(SmackVContext *smk) {
+    GetBitContext gb;
+    int mmap_size, mclr_size, full_size, type_size;
+
+    mmap_size = LE_32(smk->avctx->extradata);
+    mclr_size = LE_32(smk->avctx->extradata + 4);
+    full_size = LE_32(smk->avctx->extradata + 8);
+    type_size = LE_32(smk->avctx->extradata + 12);
+
+    init_get_bits(&gb, smk->avctx->extradata + 16, (smk->avctx->extradata_size - 16) * 8);
+
+    if(!get_bits1(&gb)) {
+        av_log(smk->avctx, AV_LOG_INFO, "Skipping MMAP tree\n");
+        smk->mmap_tbl = av_malloc(sizeof(int) * 2);
+        smk->mmap_tbl[0] = 0;
+        smk->mmap_last[0] = smk->mmap_last[1] = smk->mmap_last[2] = 1;
+    } else {
+        smacker_decode_header_tree(smk, &gb, &smk->mmap_tbl, smk->mmap_last, mmap_size);
+    }
+    if(!get_bits(&gb, 1)) {
+        av_log(smk->avctx, AV_LOG_INFO, "Skipping MCLR tree\n");
+        smk->mclr_tbl = av_malloc(sizeof(int) * 2);
+        smk->mclr_tbl[0] = 0;
+        smk->mclr_last[0] = smk->mclr_last[1] = smk->mclr_last[2] = 1;
+    } else {
+        smacker_decode_header_tree(smk, &gb, &smk->mclr_tbl, smk->mclr_last, mclr_size);
+    }
+    if(!get_bits(&gb, 1)) {
+        av_log(smk->avctx, AV_LOG_INFO, "Skipping FULL tree\n");
+        smk->full_tbl = av_malloc(sizeof(int) * 2);
+        smk->full_tbl[0] = 0;
+        smk->full_last[0] = smk->full_last[1] = smk->full_last[2] = 1;
+    } else {
+        smacker_decode_header_tree(smk, &gb, &smk->full_tbl, smk->full_last, full_size);
+    }
+    if(!get_bits(&gb, 1)) {
+        av_log(smk->avctx, AV_LOG_INFO, "Skipping TYPE tree\n");
+        smk->type_tbl = av_malloc(sizeof(int) * 2);
+        smk->type_tbl[0] = 0;
+        smk->type_last[0] = smk->type_last[1] = smk->type_last[2] = 1;
+    } else {
+        smacker_decode_header_tree(smk, &gb, &smk->type_tbl, smk->type_last, type_size);
+    }
+
+    return 0;
+}
+
+static always_inline void last_reset(int *recode, int *last) {
+    recode[last[0]] = recode[last[1]] = recode[last[2]] = 0;
+}
+
+/* get code and update history */
+static always_inline int smk_get_code(GetBitContext *gb, int *recode, int *last) {
+    register int *table = recode;
+    int v, b;
+
+    b = get_bits_count(gb);
+    while(*table & SMK_NODE) {
+        if(get_bits1(gb))
+            table += (*table) & (~SMK_NODE);
+        table++;
+    }
+    v = *table;
+    b = get_bits_count(gb) - b;
+
+    if(v != recode[last[0]]) {
+        recode[last[2]] = recode[last[1]];
+        recode[last[1]] = recode[last[0]];
+        recode[last[0]] = v;
+    }
+    return v;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+    SmackVContext * const smk = (SmackVContext *)avctx->priv_data;
+    uint8_t *out;
+    uint32_t *pal;
+    GetBitContext gb;
+    int blocks, blk, bw, bh;
+    int i;
+    int stride;
+
+    if(buf_size == 769)
+        return 0;
+    if(smk->pic.data[0])
+            avctx->release_buffer(avctx, &smk->pic);
+
+    smk->pic.reference = 1;
+    smk->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if(avctx->reget_buffer(avctx, &smk->pic) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    /* make the palette available on the way out */
+    out = buf + 1;
+    pal = (uint32_t*)smk->pic.data[1];
+    smk->pic.palette_has_changed = buf[0] & 1;
+    smk->pic.key_frame = !!(buf[0] & 2);
+    if(smk->pic.key_frame)
+        smk->pic.pict_type = FF_I_TYPE;
+    else
+        smk->pic.pict_type = FF_P_TYPE;
+
+    for(i = 0; i < 256; i++) {
+        int r, g, b;
+        r = *out++;
+        g = *out++;
+        b = *out++;
+        *pal++ = (r << 16) | (g << 8) | b;
+    }
+
+    last_reset(smk->mmap_tbl, smk->mmap_last);
+    last_reset(smk->mclr_tbl, smk->mclr_last);
+    last_reset(smk->full_tbl, smk->full_last);
+    last_reset(smk->type_tbl, smk->type_last);
+    init_get_bits(&gb, buf + 769, (buf_size - 769) * 8);
+
+    blk = 0;
+    bw = avctx->width >> 2;
+    bh = avctx->height >> 2;
+    blocks = bw * bh;
+    out = smk->pic.data[0];
+    stride = smk->pic.linesize[0];
+    while(blk < blocks) {
+        int type, run, mode;
+        uint16_t pix;
+
+        type = smk_get_code(&gb, smk->type_tbl, smk->type_last);
+        run = block_runs[(type >> 2) & 0x3F];
+        switch(type & 3){
+        case SMK_BLK_MONO:
+            while(run-- && blk < blocks){
+                int clr, map;
+                int hi, lo;
+                clr = smk_get_code(&gb, smk->mclr_tbl, smk->mclr_last);
+                map = smk_get_code(&gb, smk->mmap_tbl, smk->mmap_last);
+                out = smk->pic.data[0] + (blk / bw) * (stride * 4) + (blk % bw) * 4;
+                hi = clr >> 8;
+                lo = clr & 0xFF;
+                for(i = 0; i < 4; i++) {
+                    if(map & 1) out[0] = hi; else out[0] = lo;
+                    if(map & 2) out[1] = hi; else out[1] = lo;
+                    if(map & 4) out[2] = hi; else out[2] = lo;
+                    if(map & 8) out[3] = hi; else out[3] = lo;
+                    map >>= 4;
+                    out += stride;
+                }
+                blk++;
+            }
+            break;
+        case SMK_BLK_FULL:
+            mode = 0;
+            if(avctx->codec_tag == MKTAG('S', 'M', 'K', '4')) { // In case of Smacker v4 we have three modes
+                if(get_bits1(&gb)) mode = 1;
+                else if(get_bits1(&gb)) mode = 2;
+            }
+            while(run-- && blk < blocks){
+                out = smk->pic.data[0] + (blk / bw) * (stride * 4) + (blk % bw) * 4;
+                switch(mode){
+                case 0:
+                    for(i = 0; i < 4; i++) {
+                        pix = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+                        out[2] = pix & 0xFF;
+                        out[3] = pix >> 8;
+                        pix = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+                        out[0] = pix & 0xFF;
+                        out[1] = pix >> 8;
+                        out += stride;
+                    }
+                    break;
+                case 1:
+                    pix = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+                    out[0] = out[1] = pix & 0xFF;
+                    out[2] = out[3] = pix >> 8;
+                    out += stride;
+                    out[0] = out[1] = pix & 0xFF;
+                    out[2] = out[3] = pix >> 8;
+                    out += stride;
+                    pix = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+                    out[0] = out[1] = pix & 0xFF;
+                    out[2] = out[3] = pix >> 8;
+                    out += stride;
+                    out[0] = out[1] = pix & 0xFF;
+                    out[2] = out[3] = pix >> 8;
+                    out += stride;
+                    break;
+                case 2:
+                    for(i = 0; i < 2; i++) {
+                        uint16_t pix1, pix2;
+                        pix1 = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+                        pix2 = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+                        out[0] = pix1 & 0xFF; out[1] = pix1 >> 8;
+                        out[2] = pix2 & 0xFF; out[3] = pix2 >> 8;
+                        out += stride;
+                        out[0] = pix1 & 0xFF; out[1] = pix1 >> 8;
+                        out[2] = pix2 & 0xFF; out[3] = pix2 >> 8;
+                        out += stride;
+                    }
+                    break;
+                }
+                blk++;
+            }
+            break;
+        case SMK_BLK_SKIP:
+            while(run-- && blk < blocks)
+                blk++;
+            break;
+        case SMK_BLK_FILL:
+            mode = type >> 8;
+            while(run-- && blk < blocks){
+                uint32_t col;
+                out = smk->pic.data[0] + (blk / bw) * (stride * 4) + (blk % bw) * 4;
+                col = mode * 0x01010101;
+                for(i = 0; i < 4; i++) {
+                    *((uint32_t*)out) = col;
+                    out += stride;
+                }
+                blk++;
+            }
+            break;
+        }
+
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = smk->pic;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+
+
+/*
+ *
+ * Init smacker decoder
+ *
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+    SmackVContext * const c = (SmackVContext *)avctx->priv_data;
+
+    c->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    c->pic.data[0] = NULL;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+
+
+    /* decode huffman trees from extradata */
+    if(avctx->extradata_size < 16){
+        av_log(avctx, AV_LOG_ERROR, "Extradata missing!\n");
+        return -1;
+    }
+
+    decode_header_trees(c);
+
+
+    return 0;
+}
+
+
+
+/*
+ *
+ * Uninit smacker decoder
+ *
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+    SmackVContext * const smk = (SmackVContext *)avctx->priv_data;
+
+    av_freep(&smk->mmap_tbl);
+    av_freep(&smk->mclr_tbl);
+    av_freep(&smk->full_tbl);
+    av_freep(&smk->type_tbl);
+
+    if (smk->pic.data[0])
+        avctx->release_buffer(avctx, &smk->pic);
+
+    return 0;
+}
+
+
+static int smka_decode_init(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+/**
+ * Decode Smacker audio data
+ */
+static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+    GetBitContext gb;
+    HuffContext h[4];
+    VLC vlc[4];
+    int16_t *samples = data;
+    int val;
+    int i, res;
+    int unp_size;
+    int bits, stereo;
+    int pred[2] = {0, 0};
+
+    unp_size = LE_32(buf);
+
+    init_get_bits(&gb, buf + 4, (buf_size - 4) * 8);
+
+    if(!get_bits1(&gb)){
+        av_log(avctx, AV_LOG_INFO, "Sound: no data\n");
+        *data_size = 0;
+        return 1;
+    }
+    stereo = get_bits1(&gb);
+    bits = get_bits1(&gb);
+
+    memset(vlc, 0, sizeof(VLC) * 4);
+    memset(h, 0, sizeof(HuffContext) * 4);
+    // Initialize
+    for(i = 0; i < (1 << (bits + stereo)); i++) {
+        h[i].length = 256;
+        h[i].maxlength = 0;
+        h[i].current = 0;
+        h[i].bits = av_mallocz(256 * 4);
+        h[i].lengths = av_mallocz(256 * sizeof(int));
+        h[i].values = av_mallocz(256 * sizeof(int));
+        get_bits1(&gb);
+        smacker_decode_tree(&gb, &h[i], 0, 0);
+        get_bits1(&gb);
+        if(h[i].current > 1) {
+            res = init_vlc(&vlc[i], SMKTREE_BITS, h[i].length,
+                    h[i].lengths, sizeof(int), sizeof(int),
+                    h[i].bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
+            if(res < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+                return -1;
+            }
+        }
+    }
+    if(bits) { //decode 16-bit data
+        pred[0]  = get_bits(&gb, 8);
+        pred[0] |= get_bits(&gb, 8);
+        *samples++ = pred[0];
+        if(stereo) {
+            pred[1]  = get_bits(&gb, 8);
+            pred[1] |= get_bits(&gb, 8);
+            *samples++ = pred[1];
+        }
+        for(i = 0; i < unp_size / 2; i++) {
+            if(i & stereo) {
+                if(vlc[2].table)
+                    res = get_vlc2(&gb, vlc[2].table, SMKTREE_BITS, 3);
+                else
+                    res = 0;
+                val  = h[2].values[res];
+                if(vlc[3].table)
+                    res = get_vlc2(&gb, vlc[3].table, SMKTREE_BITS, 3);
+                else
+                    res = 0;
+                val |= h[3].values[res] << 8;
+                pred[1] += (int16_t)val;
+                *samples++ = pred[1];
+            } else {
+                if(vlc[0].table)
+                    res = get_vlc2(&gb, vlc[0].table, SMKTREE_BITS, 3);
+                else
+                    res = 0;
+                val  = h[0].values[res];
+                if(vlc[1].table)
+                    res = get_vlc2(&gb, vlc[1].table, SMKTREE_BITS, 3);
+                else
+                    res = 0;
+                val |= h[1].values[res] << 8;
+                pred[0] += val;
+                *samples++ = pred[0];
+            }
+        }
+    } else { //8-bit data
+        pred[0] = get_bits(&gb, 8);
+        *samples++ = (pred[0] - 0x80) << 8;
+        if(stereo) {
+            pred[1] = get_bits(&gb, 8);
+            *samples++ = (pred[1] - 0x80) << 8;
+        }
+        for(i = 0; i < unp_size; i++) {
+            if(i & stereo){
+                if(vlc[1].table)
+                    res = get_vlc2(&gb, vlc[1].table, SMKTREE_BITS, 3);
+                else
+                    res = 0;
+                pred[1] += (int8_t)h[1].values[res];
+                *samples++ = (pred[1] - 0x80) << 8;
+            } else {
+                if(vlc[0].table)
+                    res = get_vlc2(&gb, vlc[0].table, SMKTREE_BITS, 3);
+                else
+                    res = 0;
+                pred[0] += (int8_t)h[0].values[res];
+                *samples++ = (pred[0] - 0x80) << 8;
+            }
+        }
+        unp_size *= 2;
+    }
+
+    for(i = 0; i < 4; i++) {
+        if(vlc[i].table)
+            free_vlc(&vlc[i]);
+        if(h[i].bits)
+            av_free(h[i].bits);
+        if(h[i].lengths)
+            av_free(h[i].lengths);
+        if(h[i].values)
+            av_free(h[i].values);
+    }
+
+    *data_size = unp_size;
+    return buf_size;
+}
+
+AVCodec smacker_decoder = {
+    "smackvid",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SMACKVIDEO,
+    sizeof(SmackVContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame
+};
+
+AVCodec smackaud_decoder = {
+    "smackaud",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_SMACKAUDIO,
+    0,
+    smka_decode_init,
+    NULL,
+    NULL,
+    smka_decode_frame
+};
+
diff --git a/contrib/ffmpeg/libavcodec/smc.c b/contrib/ffmpeg/libavcodec/smc.c
new file mode 100644
index 000000000..77fae328b
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/smc.c
@@ -0,0 +1,493 @@
+/*
+ * Quicktime Graphics (SMC) Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file smc.c
+ * QT SMC Video Decoder by Mike Melanson (melanson@pcisys.net)
+ * For more information about the SMC format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * The SMC decoder outputs PAL8 colorspace data.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define CPAIR 2
+#define CQUAD 4
+#define COCTET 8
+
+#define COLORS_PER_TABLE 256
+
+typedef struct SmcContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+    /* SMC color tables */
+    unsigned char color_pairs[COLORS_PER_TABLE * CPAIR];
+    unsigned char color_quads[COLORS_PER_TABLE * CQUAD];
+    unsigned char color_octets[COLORS_PER_TABLE * COCTET];
+
+} SmcContext;
+
+#define GET_BLOCK_COUNT() \
+  (opcode & 0x10) ? (1 + s->buf[stream_ptr++]) : 1 + (opcode & 0x0F);
+
+#define ADVANCE_BLOCK() \
+{ \
+    pixel_ptr += 4; \
+    if (pixel_ptr >= width) \
+    { \
+        pixel_ptr = 0; \
+        row_ptr += stride * 4; \
+    } \
+    total_blocks--; \
+    if (total_blocks < 0) \
+    { \
+        av_log(s->avctx, AV_LOG_INFO, "warning: block counter just went negative (this should not happen)\n"); \
+        return; \
+    } \
+}
+
+static void smc_decode_stream(SmcContext *s)
+{
+    int width = s->avctx->width;
+    int height = s->avctx->height;
+    int stride = s->frame.linesize[0];
+    int i;
+    int stream_ptr = 0;
+    int chunk_size;
+    unsigned char opcode;
+    int n_blocks;
+    unsigned int color_flags;
+    unsigned int color_flags_a;
+    unsigned int color_flags_b;
+    unsigned int flag_mask;
+
+    unsigned char *pixels = s->frame.data[0];
+
+    int image_size = height * s->frame.linesize[0];
+    int row_ptr = 0;
+    int pixel_ptr = 0;
+    int pixel_x, pixel_y;
+    int row_inc = stride - 4;
+    int block_ptr;
+    int prev_block_ptr;
+    int prev_block_ptr1, prev_block_ptr2;
+    int prev_block_flag;
+    int total_blocks;
+    int color_table_index;  /* indexes to color pair, quad, or octet tables */
+    int pixel;
+
+    int color_pair_index = 0;
+    int color_quad_index = 0;
+    int color_octet_index = 0;
+
+    /* make the palette available */
+    memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE);
+    if (s->avctx->palctrl->palette_changed) {
+        s->frame.palette_has_changed = 1;
+        s->avctx->palctrl->palette_changed = 0;
+    }
+
+    chunk_size = BE_32(&s->buf[stream_ptr]) & 0x00FFFFFF;
+    stream_ptr += 4;
+    if (chunk_size != s->size)
+        av_log(s->avctx, AV_LOG_INFO, "warning: MOV chunk size != encoded chunk size (%d != %d); using MOV chunk size\n",
+            chunk_size, s->size);
+
+    chunk_size = s->size;
+    total_blocks = ((s->avctx->width + 3) / 4) * ((s->avctx->height + 3) / 4);
+
+    /* traverse through the blocks */
+    while (total_blocks) {
+        /* sanity checks */
+        /* make sure stream ptr hasn't gone out of bounds */
+        if (stream_ptr > chunk_size) {
+            av_log(s->avctx, AV_LOG_INFO, "SMC decoder just went out of bounds (stream ptr = %d, chunk size = %d)\n",
+                stream_ptr, chunk_size);
+            return;
+        }
+        /* make sure the row pointer hasn't gone wild */
+        if (row_ptr >= image_size) {
+            av_log(s->avctx, AV_LOG_INFO, "SMC decoder just went out of bounds (row ptr = %d, height = %d)\n",
+                row_ptr, image_size);
+            return;
+        }
+
+        opcode = s->buf[stream_ptr++];
+        switch (opcode & 0xF0) {
+        /* skip n blocks */
+        case 0x00:
+        case 0x10:
+            n_blocks = GET_BLOCK_COUNT();
+            while (n_blocks--) {
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* repeat last block n times */
+        case 0x20:
+        case 0x30:
+            n_blocks = GET_BLOCK_COUNT();
+
+            /* sanity check */
+            if ((row_ptr == 0) && (pixel_ptr == 0)) {
+                av_log(s->avctx, AV_LOG_INFO, "encountered repeat block opcode (%02X) but no blocks rendered yet\n",
+                    opcode & 0xF0);
+                break;
+            }
+
+            /* figure out where the previous block started */
+            if (pixel_ptr == 0)
+                prev_block_ptr1 =
+                    (row_ptr - s->avctx->width * 4) + s->avctx->width - 4;
+            else
+                prev_block_ptr1 = row_ptr + pixel_ptr - 4;
+
+            while (n_blocks--) {
+                block_ptr = row_ptr + pixel_ptr;
+                prev_block_ptr = prev_block_ptr1;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        pixels[block_ptr++] = pixels[prev_block_ptr++];
+                    }
+                    block_ptr += row_inc;
+                    prev_block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* repeat previous pair of blocks n times */
+        case 0x40:
+        case 0x50:
+            n_blocks = GET_BLOCK_COUNT();
+            n_blocks *= 2;
+
+            /* sanity check */
+            if ((row_ptr == 0) && (pixel_ptr < 2 * 4)) {
+                av_log(s->avctx, AV_LOG_INFO, "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n",
+                    opcode & 0xF0);
+                break;
+            }
+
+            /* figure out where the previous 2 blocks started */
+            if (pixel_ptr == 0)
+                prev_block_ptr1 = (row_ptr - s->avctx->width * 4) +
+                    s->avctx->width - 4 * 2;
+            else if (pixel_ptr == 4)
+                prev_block_ptr1 = (row_ptr - s->avctx->width * 4) + row_inc;
+            else
+                prev_block_ptr1 = row_ptr + pixel_ptr - 4 * 2;
+
+            if (pixel_ptr == 0)
+                prev_block_ptr2 = (row_ptr - s->avctx->width * 4) + row_inc;
+            else
+                prev_block_ptr2 = row_ptr + pixel_ptr - 4;
+
+            prev_block_flag = 0;
+            while (n_blocks--) {
+                block_ptr = row_ptr + pixel_ptr;
+                if (prev_block_flag)
+                    prev_block_ptr = prev_block_ptr2;
+                else
+                    prev_block_ptr = prev_block_ptr1;
+                prev_block_flag = !prev_block_flag;
+
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        pixels[block_ptr++] = pixels[prev_block_ptr++];
+                    }
+                    block_ptr += row_inc;
+                    prev_block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* 1-color block encoding */
+        case 0x60:
+        case 0x70:
+            n_blocks = GET_BLOCK_COUNT();
+            pixel = s->buf[stream_ptr++];
+
+            while (n_blocks--) {
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        pixels[block_ptr++] = pixel;
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* 2-color block encoding */
+        case 0x80:
+        case 0x90:
+            n_blocks = (opcode & 0x0F) + 1;
+
+            /* figure out which color pair to use to paint the 2-color block */
+            if ((opcode & 0xF0) == 0x80) {
+                /* fetch the next 2 colors from bytestream and store in next
+                 * available entry in the color pair table */
+                for (i = 0; i < CPAIR; i++) {
+                    pixel = s->buf[stream_ptr++];
+                    color_table_index = CPAIR * color_pair_index + i;
+                    s->color_pairs[color_table_index] = pixel;
+                }
+                /* this is the base index to use for this block */
+                color_table_index = CPAIR * color_pair_index;
+                color_pair_index++;
+                /* wraparound */
+                if (color_pair_index == COLORS_PER_TABLE)
+                    color_pair_index = 0;
+            } else
+                color_table_index = CPAIR * s->buf[stream_ptr++];
+
+            while (n_blocks--) {
+                color_flags = BE_16(&s->buf[stream_ptr]);
+                stream_ptr += 2;
+                flag_mask = 0x8000;
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        if (color_flags & flag_mask)
+                            pixel = color_table_index + 1;
+                        else
+                            pixel = color_table_index;
+                        flag_mask >>= 1;
+                        pixels[block_ptr++] = s->color_pairs[pixel];
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* 4-color block encoding */
+        case 0xA0:
+        case 0xB0:
+            n_blocks = (opcode & 0x0F) + 1;
+
+            /* figure out which color quad to use to paint the 4-color block */
+            if ((opcode & 0xF0) == 0xA0) {
+                /* fetch the next 4 colors from bytestream and store in next
+                 * available entry in the color quad table */
+                for (i = 0; i < CQUAD; i++) {
+                    pixel = s->buf[stream_ptr++];
+                    color_table_index = CQUAD * color_quad_index + i;
+                    s->color_quads[color_table_index] = pixel;
+                }
+                /* this is the base index to use for this block */
+                color_table_index = CQUAD * color_quad_index;
+                color_quad_index++;
+                /* wraparound */
+                if (color_quad_index == COLORS_PER_TABLE)
+                    color_quad_index = 0;
+            } else
+                color_table_index = CQUAD * s->buf[stream_ptr++];
+
+            while (n_blocks--) {
+                color_flags = BE_32(&s->buf[stream_ptr]);
+                stream_ptr += 4;
+                /* flag mask actually acts as a bit shift count here */
+                flag_mask = 30;
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        pixel = color_table_index +
+                            ((color_flags >> flag_mask) & 0x03);
+                        flag_mask -= 2;
+                        pixels[block_ptr++] = s->color_quads[pixel];
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* 8-color block encoding */
+        case 0xC0:
+        case 0xD0:
+            n_blocks = (opcode & 0x0F) + 1;
+
+            /* figure out which color octet to use to paint the 8-color block */
+            if ((opcode & 0xF0) == 0xC0) {
+                /* fetch the next 8 colors from bytestream and store in next
+                 * available entry in the color octet table */
+                for (i = 0; i < COCTET; i++) {
+                    pixel = s->buf[stream_ptr++];
+                    color_table_index = COCTET * color_octet_index + i;
+                    s->color_octets[color_table_index] = pixel;
+                }
+                /* this is the base index to use for this block */
+                color_table_index = COCTET * color_octet_index;
+                color_octet_index++;
+                /* wraparound */
+                if (color_octet_index == COLORS_PER_TABLE)
+                    color_octet_index = 0;
+            } else
+                color_table_index = COCTET * s->buf[stream_ptr++];
+
+            while (n_blocks--) {
+                /*
+                  For this input of 6 hex bytes:
+                    01 23 45 67 89 AB
+                  Mangle it to this output:
+                    flags_a = xx012456, flags_b = xx89A37B
+                */
+                /* build the color flags */
+                color_flags_a = color_flags_b = 0;
+                color_flags_a =
+                    (s->buf[stream_ptr + 0] << 16) |
+                    ((s->buf[stream_ptr + 1] & 0xF0) << 8) |
+                    ((s->buf[stream_ptr + 2] & 0xF0) << 4) |
+                    ((s->buf[stream_ptr + 2] & 0x0F) << 4) |
+                    ((s->buf[stream_ptr + 3] & 0xF0) >> 4);
+                color_flags_b =
+                    (s->buf[stream_ptr + 4] << 16) |
+                    ((s->buf[stream_ptr + 5] & 0xF0) << 8) |
+                    ((s->buf[stream_ptr + 1] & 0x0F) << 8) |
+                    ((s->buf[stream_ptr + 3] & 0x0F) << 4) |
+                    (s->buf[stream_ptr + 5] & 0x0F);
+                stream_ptr += 6;
+
+                color_flags = color_flags_a;
+                /* flag mask actually acts as a bit shift count here */
+                flag_mask = 21;
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    /* reload flags at third row (iteration pixel_y == 2) */
+                    if (pixel_y == 2) {
+                        color_flags = color_flags_b;
+                        flag_mask = 21;
+                    }
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        pixel = color_table_index +
+                            ((color_flags >> flag_mask) & 0x07);
+                        flag_mask -= 3;
+                        pixels[block_ptr++] = s->color_octets[pixel];
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        /* 16-color block encoding (every pixel is a different color) */
+        case 0xE0:
+            n_blocks = (opcode & 0x0F) + 1;
+
+            while (n_blocks--) {
+                block_ptr = row_ptr + pixel_ptr;
+                for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+                    for (pixel_x = 0; pixel_x < 4; pixel_x++) {
+                        pixels[block_ptr++] = s->buf[stream_ptr++];
+                    }
+                    block_ptr += row_inc;
+                }
+                ADVANCE_BLOCK();
+            }
+            break;
+
+        case 0xF0:
+            av_log(s->avctx, AV_LOG_INFO, "0xF0 opcode seen in SMC chunk (contact the developers)\n");
+            break;
+        }
+    }
+}
+
+static int smc_decode_init(AVCodecContext *avctx)
+{
+    SmcContext *s = (SmcContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int smc_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    SmcContext *s = (SmcContext *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE |
+                            FF_BUFFER_HINTS_REUSABLE | FF_BUFFER_HINTS_READABLE;
+    if (avctx->reget_buffer(avctx, &s->frame)) {
+        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    smc_decode_stream(s);
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int smc_decode_end(AVCodecContext *avctx)
+{
+    SmcContext *s = (SmcContext *)avctx->priv_data;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec smc_decoder = {
+    "smc",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SMC,
+    sizeof(SmcContext),
+    smc_decode_init,
+    NULL,
+    smc_decode_end,
+    smc_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/snow.c b/contrib/ffmpeg/libavcodec/snow.c
new file mode 100644
index 000000000..6bc9a8f1a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/snow.c
@@ -0,0 +1,4686 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#include "dsputil.h"
+#include "snow.h"
+
+#include "rangecoder.h"
+
+#include "mpegvideo.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+static const int8_t quant3[256]={
+ 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
+};
+static const int8_t quant3b[256]={
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+};
+static const int8_t quant3bA[256]={
+ 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+};
+static const int8_t quant5[256]={
+ 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
+};
+static const int8_t quant7[256]={
+ 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
+};
+static const int8_t quant9[256]={
+ 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
+-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
+};
+static const int8_t quant11[256]={
+ 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
+-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
+};
+static const int8_t quant13[256]={
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
+-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
+-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
+};
+
+#if 0 //64*cubic
+static const uint8_t obmc32[1024]={
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
+ 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
+ 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
+ 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
+ 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
+ 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
+ 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
+ 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
+ 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
+ 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
+ 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
+ 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
+ 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
+ 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
+ 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
+ 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
+ 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
+ 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
+ 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
+ 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
+ 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
+ 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
+ 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
+ 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
+ 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
+ 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
+ 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+//error:0.000022
+};
+static const uint8_t obmc16[256]={
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
+ 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
+ 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
+ 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
+ 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
+ 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
+ 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
+ 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
+ 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
+ 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
+ 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
+ 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
+ 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
+ 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+//error:0.000033
+};
+#elif 1 // 64*linear
+static const uint8_t obmc32[1024]={
+  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
+  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
+  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
+  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
+  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
+  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
+  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
+  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
+  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
+  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
+  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
+  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
+  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
+  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
+  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
+  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
+  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
+  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
+  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
+  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
+  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
+  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
+  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
+  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
+  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
+  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
+  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
+  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
+  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
+  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
+  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
+  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
+ //error:0.000020
+};
+static const uint8_t obmc16[256]={
+  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
+  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
+  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
+  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
+  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
+ 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
+ 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
+ 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
+ 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
+ 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
+ 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
+  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
+  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
+  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
+  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
+  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
+//error:0.000015
+};
+#else //64*cos
+static const uint8_t obmc32[1024]={
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
+ 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
+ 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
+ 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
+ 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
+ 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
+ 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
+ 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
+ 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
+ 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
+ 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
+ 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
+ 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
+ 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
+ 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
+ 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
+ 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
+ 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
+ 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
+ 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
+ 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
+ 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
+ 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
+ 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
+ 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
+ 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+//error:0.000022
+};
+static const uint8_t obmc16[256]={
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
+ 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
+ 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
+ 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
+ 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
+ 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
+ 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
+ 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
+ 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
+ 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
+ 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
+ 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
+ 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
+ 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+//error:0.000022
+};
+#endif
+
+//linear *64
+static const uint8_t obmc8[64]={
+  4, 12, 20, 28, 28, 20, 12,  4,
+ 12, 36, 60, 84, 84, 60, 36, 12,
+ 20, 60,100,140,140,100, 60, 20,
+ 28, 84,140,196,196,140, 84, 28,
+ 28, 84,140,196,196,140, 84, 28,
+ 20, 60,100,140,140,100, 60, 20,
+ 12, 36, 60, 84, 84, 60, 36, 12,
+  4, 12, 20, 28, 28, 20, 12,  4,
+//error:0.000000
+};
+
+//linear *64
+static const uint8_t obmc4[16]={
+ 16, 48, 48, 16,
+ 48,144,144, 48,
+ 48,144,144, 48,
+ 16, 48, 48, 16,
+//error:0.000000
+};
+
+static const uint8_t *obmc_tab[4]={
+    obmc32, obmc16, obmc8, obmc4
+};
+
+static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
+
+typedef struct BlockNode{
+    int16_t mx;
+    int16_t my;
+    uint8_t ref;
+    uint8_t color[3];
+    uint8_t type;
+//#define TYPE_SPLIT    1
+#define BLOCK_INTRA   1
+#define BLOCK_OPT     2
+//#define TYPE_NOCOLOR  4
+    uint8_t level; //FIXME merge into type?
+}BlockNode;
+
+static const BlockNode null_block= { //FIXME add border maybe
+    .color= {128,128,128},
+    .mx= 0,
+    .my= 0,
+    .ref= 0,
+    .type= 0,
+    .level= 0,
+};
+
+#define LOG2_MB_SIZE 4
+#define MB_SIZE (1<<LOG2_MB_SIZE)
+
+typedef struct x_and_coeff{
+    int16_t x;
+    uint16_t coeff;
+} x_and_coeff;
+
+typedef struct SubBand{
+    int level;
+    int stride;
+    int width;
+    int height;
+    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
+    DWTELEM *buf;
+    int buf_x_offset;
+    int buf_y_offset;
+    int stride_line; ///< Stride measured in lines, not pixels.
+    x_and_coeff * x_coeff;
+    struct SubBand *parent;
+    uint8_t state[/*7*2*/ 7 + 512][32];
+}SubBand;
+
+typedef struct Plane{
+    int width;
+    int height;
+    SubBand band[MAX_DECOMPOSITIONS][4];
+}Plane;
+
+typedef struct SnowContext{
+//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
+
+    AVCodecContext *avctx;
+    RangeCoder c;
+    DSPContext dsp;
+    AVFrame new_picture;
+    AVFrame input_picture;              ///< new_picture with the internal linesizes
+    AVFrame current_picture;
+    AVFrame last_picture[MAX_REF_FRAMES];
+    AVFrame mconly_picture;
+//     uint8_t q_context[16];
+    uint8_t header_state[32];
+    uint8_t block_state[128 + 32*128];
+    int keyframe;
+    int always_reset;
+    int version;
+    int spatial_decomposition_type;
+    int temporal_decomposition_type;
+    int spatial_decomposition_count;
+    int temporal_decomposition_count;
+    int max_ref_frames;
+    int ref_frames;
+    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
+    uint32_t *ref_scores[MAX_REF_FRAMES];
+    DWTELEM *spatial_dwt_buffer;
+    int colorspace_type;
+    int chroma_h_shift;
+    int chroma_v_shift;
+    int spatial_scalability;
+    int qlog;
+    int lambda;
+    int lambda2;
+    int pass1_rc;
+    int mv_scale;
+    int qbias;
+#define QBIAS_SHIFT 3
+    int b_width;
+    int b_height;
+    int block_max_depth;
+    Plane plane[MAX_PLANES];
+    BlockNode *block;
+#define ME_CACHE_SIZE 1024
+    int me_cache[ME_CACHE_SIZE];
+    int me_cache_generation;
+    slice_buffer sb;
+
+    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
+}SnowContext;
+
+typedef struct {
+    DWTELEM *b0;
+    DWTELEM *b1;
+    DWTELEM *b2;
+    DWTELEM *b3;
+    int y;
+} dwt_compose_t;
+
+#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
+//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
+
+static void iterative_me(SnowContext *s);
+
+static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
+{
+    int i;
+
+    buf->base_buffer = base_buffer;
+    buf->line_count = line_count;
+    buf->line_width = line_width;
+    buf->data_count = max_allocated_lines;
+    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
+    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
+
+    for (i = 0; i < max_allocated_lines; i++)
+    {
+      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
+    }
+
+    buf->data_stack_top = max_allocated_lines - 1;
+}
+
+static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
+{
+    int offset;
+    DWTELEM * buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
+
+    assert(buf->data_stack_top >= 0);
+//  assert(!buf->line[line]);
+    if (buf->line[line])
+        return buf->line[line];
+
+    offset = buf->line_width * line;
+    buffer = buf->data_stack[buf->data_stack_top];
+    buf->data_stack_top--;
+    buf->line[line] = buffer;
+
+//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
+
+    return buffer;
+}
+
+static void slice_buffer_release(slice_buffer * buf, int line)
+{
+    int offset;
+    DWTELEM * buffer;
+
+    assert(line >= 0 && line < buf->line_count);
+    assert(buf->line[line]);
+
+    offset = buf->line_width * line;
+    buffer = buf->line[line];
+    buf->data_stack_top++;
+    buf->data_stack[buf->data_stack_top] = buffer;
+    buf->line[line] = NULL;
+
+//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
+}
+
+static void slice_buffer_flush(slice_buffer * buf)
+{
+    int i;
+    for (i = 0; i < buf->line_count; i++)
+    {
+        if (buf->line[i])
+        {
+//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
+            slice_buffer_release(buf, i);
+        }
+    }
+}
+
+static void slice_buffer_destroy(slice_buffer * buf)
+{
+    int i;
+    slice_buffer_flush(buf);
+
+    for (i = buf->data_count - 1; i >= 0; i--)
+    {
+        assert(buf->data_stack[i]);
+        av_freep(&buf->data_stack[i]);
+    }
+    assert(buf->data_stack);
+    av_freep(&buf->data_stack);
+    assert(buf->line);
+    av_freep(&buf->line);
+}
+
+#ifdef __sgi
+// Avoid a name clash on SGI IRIX
+#undef qexp
+#endif
+#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
+static uint8_t qexp[QROOT];
+
+static inline int mirror(int v, int m){
+    while((unsigned)v > (unsigned)m){
+        v=-v;
+        if(v<0) v+= 2*m;
+    }
+    return v;
+}
+
+static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
+    int i;
+
+    if(v){
+        const int a= FFABS(v);
+        const int e= av_log2(a);
+#if 1
+        const int el= FFMIN(e, 10);
+        put_rac(c, state+0, 0);
+
+        for(i=0; i<el; i++){
+            put_rac(c, state+1+i, 1);  //1..10
+        }
+        for(; i<e; i++){
+            put_rac(c, state+1+9, 1);  //1..10
+        }
+        put_rac(c, state+1+FFMIN(i,9), 0);
+
+        for(i=e-1; i>=el; i--){
+            put_rac(c, state+22+9, (a>>i)&1); //22..31
+        }
+        for(; i>=0; i--){
+            put_rac(c, state+22+i, (a>>i)&1); //22..31
+        }
+
+        if(is_signed)
+            put_rac(c, state+11 + el, v < 0); //11..21
+#else
+
+        put_rac(c, state+0, 0);
+        if(e<=9){
+            for(i=0; i<e; i++){
+                put_rac(c, state+1+i, 1);  //1..10
+            }
+            put_rac(c, state+1+i, 0);
+
+            for(i=e-1; i>=0; i--){
+                put_rac(c, state+22+i, (a>>i)&1); //22..31
+            }
+
+            if(is_signed)
+                put_rac(c, state+11 + e, v < 0); //11..21
+        }else{
+            for(i=0; i<e; i++){
+                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
+            }
+            put_rac(c, state+1+FFMIN(i,9), 0);
+
+            for(i=e-1; i>=0; i--){
+                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
+            }
+
+            if(is_signed)
+                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
+        }
+#endif
+    }else{
+        put_rac(c, state+0, 1);
+    }
+}
+
+static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
+    if(get_rac(c, state+0))
+        return 0;
+    else{
+        int i, e, a;
+        e= 0;
+        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
+            e++;
+        }
+
+        a= 1;
+        for(i=e-1; i>=0; i--){
+            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
+        }
+
+        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
+            return -a;
+        else
+            return a;
+    }
+}
+
+static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
+    int i;
+    int r= log2>=0 ? 1<<log2 : 1;
+
+    assert(v>=0);
+    assert(log2>=-4);
+
+    while(v >= r){
+        put_rac(c, state+4+log2, 1);
+        v -= r;
+        log2++;
+        if(log2>0) r+=r;
+    }
+    put_rac(c, state+4+log2, 0);
+
+    for(i=log2-1; i>=0; i--){
+        put_rac(c, state+31-i, (v>>i)&1);
+    }
+}
+
+static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
+    int i;
+    int r= log2>=0 ? 1<<log2 : 1;
+    int v=0;
+
+    assert(log2>=-4);
+
+    while(get_rac(c, state+4+log2)){
+        v+= r;
+        log2++;
+        if(log2>0) r+=r;
+    }
+
+    for(i=log2-1; i>=0; i--){
+        v+= get_rac(c, state+31-i)<<i;
+    }
+
+    return v;
+}
+
+static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+    const int mirror_left= !highpass;
+    const int mirror_right= (width&1) ^ highpass;
+    const int w= (width>>1) - 1 + (highpass & width);
+    int i;
+
+#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
+    if(mirror_left){
+        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
+        dst += dst_step;
+        src += src_step;
+    }
+
+    for(i=0; i<w; i++){
+        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
+    }
+
+    if(mirror_right){
+        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
+    }
+}
+
+#ifndef lift5
+static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+    const int mirror_left= !highpass;
+    const int mirror_right= (width&1) ^ highpass;
+    const int w= (width>>1) - 1 + (highpass & width);
+    int i;
+
+    if(mirror_left){
+        int r= 3*2*ref[0];
+        r += r>>4;
+        r += r>>8;
+        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
+        dst += dst_step;
+        src += src_step;
+    }
+
+    for(i=0; i<w; i++){
+        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
+        r += r>>4;
+        r += r>>8;
+        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
+    }
+
+    if(mirror_right){
+        int r= 3*2*ref[w*ref_step];
+        r += r>>4;
+        r += r>>8;
+        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
+    }
+}
+#endif
+
+#ifndef liftS
+static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
+    const int mirror_left= !highpass;
+    const int mirror_right= (width&1) ^ highpass;
+    const int w= (width>>1) - 1 + (highpass & width);
+    int i;
+
+    assert(shift == 4);
+#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
+    if(mirror_left){
+        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
+        dst += dst_step;
+        src += src_step;
+    }
+
+    for(i=0; i<w; i++){
+        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
+    }
+
+    if(mirror_right){
+        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
+    }
+}
+#endif
+
+
+static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
+    int x, i;
+
+    for(x=start; x<width; x+=2){
+        int64_t sum=0;
+
+        for(i=0; i<n; i++){
+            int x2= x + 2*i - n + 1;
+            if     (x2<     0) x2= -x2;
+            else if(x2>=width) x2= 2*width-x2-2;
+            sum += coeffs[i]*(int64_t)dst[x2];
+        }
+        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
+        else        dst[x] += (sum + (1<<shift)/2)>>shift;
+    }
+}
+
+static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
+    int x, y, i;
+    for(y=start; y<height; y+=2){
+        for(x=0; x<width; x++){
+            int64_t sum=0;
+
+            for(i=0; i<n; i++){
+                int y2= y + 2*i - n + 1;
+                if     (y2<      0) y2= -y2;
+                else if(y2>=height) y2= 2*height-y2-2;
+                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
+            }
+            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
+            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
+        }
+    }
+}
+
+#define SCALEX 1
+#define LX0 0
+#define LX1 1
+
+#if 0 // more accurate 9/7
+#define N1 2
+#define SHIFT1 14
+#define COEFFS1 (int[]){-25987,-25987}
+#define N2 2
+#define SHIFT2 19
+#define COEFFS2 (int[]){-27777,-27777}
+#define N3 2
+#define SHIFT3 15
+#define COEFFS3 (int[]){28931,28931}
+#define N4 2
+#define SHIFT4 15
+#define COEFFS4 (int[]){14533,14533}
+#elif 1 // 13/7 CRF
+#define N1 4
+#define SHIFT1 4
+#define COEFFS1 (int[]){1,-9,-9,1}
+#define N2 4
+#define SHIFT2 4
+#define COEFFS2 (int[]){-1,5,5,-1}
+#define N3 0
+#define SHIFT3 1
+#define COEFFS3 NULL
+#define N4 0
+#define SHIFT4 1
+#define COEFFS4 NULL
+#elif 1 // 3/5
+#define LX0 1
+#define LX1 0
+#define SCALEX 0.5
+#define N1 2
+#define SHIFT1 1
+#define COEFFS1 (int[]){1,1}
+#define N2 2
+#define SHIFT2 2
+#define COEFFS2 (int[]){-1,-1}
+#define N3 0
+#define SHIFT3 0
+#define COEFFS3 NULL
+#define N4 0
+#define SHIFT4 0
+#define COEFFS4 NULL
+#elif 1 // 11/5
+#define N1 0
+#define SHIFT1 1
+#define COEFFS1 NULL
+#define N2 2
+#define SHIFT2 2
+#define COEFFS2 (int[]){-1,-1}
+#define N3 2
+#define SHIFT3 0
+#define COEFFS3 (int[]){-1,-1}
+#define N4 4
+#define SHIFT4 7
+#define COEFFS4 (int[]){-5,29,29,-5}
+#define SCALEX 4
+#elif 1 // 9/7 CDF
+#define N1 2
+#define SHIFT1 7
+#define COEFFS1 (int[]){-203,-203}
+#define N2 2
+#define SHIFT2 12
+#define COEFFS2 (int[]){-217,-217}
+#define N3 2
+#define SHIFT3 7
+#define COEFFS3 (int[]){113,113}
+#define N4 2
+#define SHIFT4 9
+#define COEFFS4 (int[]){227,227}
+#define SCALEX 1
+#elif 1 // 7/5 CDF
+#define N1 0
+#define SHIFT1 1
+#define COEFFS1 NULL
+#define N2 2
+#define SHIFT2 2
+#define COEFFS2 (int[]){-1,-1}
+#define N3 2
+#define SHIFT3 0
+#define COEFFS3 (int[]){-1,-1}
+#define N4 2
+#define SHIFT4 4
+#define COEFFS4 (int[]){3,3}
+#elif 1 // 9/7 MN
+#define N1 4
+#define SHIFT1 4
+#define COEFFS1 (int[]){1,-9,-9,1}
+#define N2 2
+#define SHIFT2 2
+#define COEFFS2 (int[]){1,1}
+#define N3 0
+#define SHIFT3 1
+#define COEFFS3 NULL
+#define N4 0
+#define SHIFT4 1
+#define COEFFS4 NULL
+#else // 13/7 CRF
+#define N1 4
+#define SHIFT1 4
+#define COEFFS1 (int[]){1,-9,-9,1}
+#define N2 4
+#define SHIFT2 4
+#define COEFFS2 (int[]){-1,5,5,-1}
+#define N3 0
+#define SHIFT3 1
+#define COEFFS3 NULL
+#define N4 0
+#define SHIFT4 1
+#define COEFFS4 NULL
+#endif
+static void horizontal_decomposeX(DWTELEM *b, int width){
+    DWTELEM temp[width];
+    const int width2= width>>1;
+    const int w2= (width+1)>>1;
+    int x;
+
+    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
+    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
+    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
+    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
+
+    for(x=0; x<width2; x++){
+        temp[x   ]= b[2*x    ];
+        temp[x+w2]= b[2*x + 1];
+    }
+    if(width&1)
+        temp[x   ]= b[2*x    ];
+    memcpy(b, temp, width*sizeof(int));
+}
+
+static void horizontal_composeX(DWTELEM *b, int width){
+    DWTELEM temp[width];
+    const int width2= width>>1;
+    int x;
+    const int w2= (width+1)>>1;
+
+    memcpy(temp, b, width*sizeof(int));
+    for(x=0; x<width2; x++){
+        b[2*x    ]= temp[x   ];
+        b[2*x + 1]= temp[x+w2];
+    }
+    if(width&1)
+        b[2*x    ]= temp[x   ];
+
+    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
+    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
+    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
+    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
+}
+
+static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
+    int x, y;
+
+    for(y=0; y<height; y++){
+        for(x=0; x<width; x++){
+            buffer[y*stride + x] *= SCALEX;
+        }
+    }
+
+    for(y=0; y<height; y++){
+        horizontal_decomposeX(buffer + y*stride, width);
+    }
+
+    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
+    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
+    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
+    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
+}
+
+static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
+    int x, y;
+
+    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
+    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
+    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
+    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
+
+    for(y=0; y<height; y++){
+        horizontal_composeX(buffer + y*stride, width);
+    }
+
+    for(y=0; y<height; y++){
+        for(x=0; x<width; x++){
+            buffer[y*stride + x] /= SCALEX;
+        }
+    }
+}
+
+static void horizontal_decompose53i(DWTELEM *b, int width){
+    DWTELEM temp[width];
+    const int width2= width>>1;
+    int x;
+    const int w2= (width+1)>>1;
+
+    for(x=0; x<width2; x++){
+        temp[x   ]= b[2*x    ];
+        temp[x+w2]= b[2*x + 1];
+    }
+    if(width&1)
+        temp[x   ]= b[2*x    ];
+#if 0
+    {
+    int A1,A2,A3,A4;
+    A2= temp[1       ];
+    A4= temp[0       ];
+    A1= temp[0+width2];
+    A1 -= (A2 + A4)>>1;
+    A4 += (A1 + 1)>>1;
+    b[0+width2] = A1;
+    b[0       ] = A4;
+    for(x=1; x+1<width2; x+=2){
+        A3= temp[x+width2];
+        A4= temp[x+1     ];
+        A3 -= (A2 + A4)>>1;
+        A2 += (A1 + A3 + 2)>>2;
+        b[x+width2] = A3;
+        b[x       ] = A2;
+
+        A1= temp[x+1+width2];
+        A2= temp[x+2       ];
+        A1 -= (A2 + A4)>>1;
+        A4 += (A1 + A3 + 2)>>2;
+        b[x+1+width2] = A1;
+        b[x+1       ] = A4;
+    }
+    A3= temp[width-1];
+    A3 -= A2;
+    A2 += (A1 + A3 + 2)>>2;
+    b[width -1] = A3;
+    b[width2-1] = A2;
+    }
+#else
+    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
+    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
+#endif
+}
+
+static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] -= (b0[i] + b2[i])>>1;
+    }
+}
+
+static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] += (b0[i] + b2[i] + 2)>>2;
+    }
+}
+
+static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
+    int y;
+    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
+    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
+
+    for(y=-2; y<height; y+=2){
+        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
+        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
+
+{START_TIMER
+        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
+        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
+STOP_TIMER("horizontal_decompose53i")}
+
+{START_TIMER
+        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
+STOP_TIMER("vertical_decompose53i*")}
+
+        b0=b2;
+        b1=b3;
+    }
+}
+
+static void horizontal_decompose97i(DWTELEM *b, int width){
+    DWTELEM temp[width];
+    const int w2= (width+1)>>1;
+
+    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
+    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
+    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
+    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
+}
+
+
+static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+}
+
+static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+#ifdef lift5
+        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
+#else
+        int r= 3*(b0[i] + b2[i]);
+        r+= r>>4;
+        r+= r>>8;
+        b1[i] += (r+W_CO)>>W_CS;
+#endif
+    }
+}
+
+static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+#ifdef liftS
+        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
+#else
+        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
+#endif
+    }
+}
+
+static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
+    }
+}
+
+static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
+    int y;
+    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
+    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
+    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
+    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
+
+    for(y=-4; y<height; y+=2){
+        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
+        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
+
+{START_TIMER
+        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
+        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
+if(width>400){
+STOP_TIMER("horizontal_decompose97i")
+}}
+
+{START_TIMER
+        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
+        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
+        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
+
+if(width>400){
+STOP_TIMER("vertical_decompose97i")
+}}
+
+        b0=b2;
+        b1=b3;
+        b2=b4;
+        b3=b5;
+    }
+}
+
+void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+    int level;
+
+    for(level=0; level<decomposition_count; level++){
+        switch(type){
+        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
+        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
+        case DWT_X: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
+        }
+    }
+}
+
+static void horizontal_compose53i(DWTELEM *b, int width){
+    DWTELEM temp[width];
+    const int width2= width>>1;
+    const int w2= (width+1)>>1;
+    int x;
+
+#if 0
+    int A1,A2,A3,A4;
+    A2= temp[1       ];
+    A4= temp[0       ];
+    A1= temp[0+width2];
+    A1 -= (A2 + A4)>>1;
+    A4 += (A1 + 1)>>1;
+    b[0+width2] = A1;
+    b[0       ] = A4;
+    for(x=1; x+1<width2; x+=2){
+        A3= temp[x+width2];
+        A4= temp[x+1     ];
+        A3 -= (A2 + A4)>>1;
+        A2 += (A1 + A3 + 2)>>2;
+        b[x+width2] = A3;
+        b[x       ] = A2;
+
+        A1= temp[x+1+width2];
+        A2= temp[x+2       ];
+        A1 -= (A2 + A4)>>1;
+        A4 += (A1 + A3 + 2)>>2;
+        b[x+1+width2] = A1;
+        b[x+1       ] = A4;
+    }
+    A3= temp[width-1];
+    A3 -= A2;
+    A2 += (A1 + A3 + 2)>>2;
+    b[width -1] = A3;
+    b[width2-1] = A2;
+#else
+    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
+    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
+#endif
+    for(x=0; x<width2; x++){
+        b[2*x    ]= temp[x   ];
+        b[2*x + 1]= temp[x+w2];
+    }
+    if(width&1)
+        b[2*x    ]= temp[x   ];
+}
+
+static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] += (b0[i] + b2[i])>>1;
+    }
+}
+
+static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] -= (b0[i] + b2[i] + 2)>>2;
+    }
+}
+
+static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
+    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
+    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
+    cs->y = -1;
+}
+
+static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
+    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
+    cs->b1 = buffer + mirror(-1  , height-1)*stride;
+    cs->y = -1;
+}
+
+static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
+    int y= cs->y;
+
+    DWTELEM *b0= cs->b0;
+    DWTELEM *b1= cs->b1;
+    DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
+    DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
+
+{START_TIMER
+        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
+STOP_TIMER("vertical_compose53i*")}
+
+{START_TIMER
+        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
+        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
+STOP_TIMER("horizontal_compose53i")}
+
+    cs->b0 = b2;
+    cs->b1 = b3;
+    cs->y += 2;
+}
+
+static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
+    int y= cs->y;
+    DWTELEM *b0= cs->b0;
+    DWTELEM *b1= cs->b1;
+    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
+    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
+
+{START_TIMER
+        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
+STOP_TIMER("vertical_compose53i*")}
+
+{START_TIMER
+        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
+        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
+STOP_TIMER("horizontal_compose53i")}
+
+    cs->b0 = b2;
+    cs->b1 = b3;
+    cs->y += 2;
+}
+
+static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
+    dwt_compose_t cs;
+    spatial_compose53i_init(&cs, buffer, height, stride);
+    while(cs.y <= height)
+        spatial_compose53i_dy(&cs, buffer, width, height, stride);
+}
+
+
+void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
+    DWTELEM temp[width];
+    const int w2= (width+1)>>1;
+
+    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
+    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
+    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
+    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
+}
+
+static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+}
+
+static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+#ifdef lift5
+        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
+#else
+        int r= 3*(b0[i] + b2[i]);
+        r+= r>>4;
+        r+= r>>8;
+        b1[i] -= (r+W_CO)>>W_CS;
+#endif
+    }
+}
+
+static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+#ifdef liftS
+        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
+#else
+        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
+#endif
+    }
+}
+
+static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
+    }
+}
+
+void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
+    int i;
+
+    for(i=0; i<width; i++){
+#ifndef lift5
+        int r;
+#endif
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+#ifdef lift5
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+#else
+        r= 3*(b2[i] + b4[i]);
+        r+= r>>4;
+        r+= r>>8;
+        b3[i] -= (r+W_CO)>>W_CS;
+#endif
+#ifdef liftS
+        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
+#else
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+#endif
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+}
+
+static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
+    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
+    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
+    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
+    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
+    cs->y = -3;
+}
+
+static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
+    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
+    cs->b1 = buffer + mirror(-3  , height-1)*stride;
+    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
+    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
+    cs->y = -3;
+}
+
+static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
+    int y = cs->y;
+
+    DWTELEM *b0= cs->b0;
+    DWTELEM *b1= cs->b1;
+    DWTELEM *b2= cs->b2;
+    DWTELEM *b3= cs->b3;
+    DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
+    DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
+
+{START_TIMER
+    if(y>0 && y+4<height){
+        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
+    }else{
+        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
+        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
+        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
+    }
+if(width>400){
+STOP_TIMER("vertical_compose97i")}}
+
+{START_TIMER
+        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
+        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
+if(width>400 && y+0<(unsigned)height){
+STOP_TIMER("horizontal_compose97i")}}
+
+    cs->b0=b2;
+    cs->b1=b3;
+    cs->b2=b4;
+    cs->b3=b5;
+    cs->y += 2;
+}
+
+static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
+    int y = cs->y;
+    DWTELEM *b0= cs->b0;
+    DWTELEM *b1= cs->b1;
+    DWTELEM *b2= cs->b2;
+    DWTELEM *b3= cs->b3;
+    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
+    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
+
+{START_TIMER
+        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
+        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
+        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
+if(width>400){
+STOP_TIMER("vertical_compose97i")}}
+
+{START_TIMER
+        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
+        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
+if(width>400 && b0 <= b2){
+STOP_TIMER("horizontal_compose97i")}}
+
+    cs->b0=b2;
+    cs->b1=b3;
+    cs->b2=b4;
+    cs->b3=b5;
+    cs->y += 2;
+}
+
+static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
+    dwt_compose_t cs;
+    spatial_compose97i_init(&cs, buffer, height, stride);
+    while(cs.y <= height)
+        spatial_compose97i_dy(&cs, buffer, width, height, stride);
+}
+
+static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
+    int level;
+    for(level=decomposition_count-1; level>=0; level--){
+        switch(type){
+        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
+        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
+        /* not slicified yet */
+        case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
+          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
+        }
+    }
+}
+
+static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+    int level;
+    for(level=decomposition_count-1; level>=0; level--){
+        switch(type){
+        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
+        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
+        /* not slicified yet */
+        case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
+        }
+    }
+}
+
+static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
+    const int support = type==1 ? 3 : 5;
+    int level;
+    if(type==2) return;
+
+    for(level=decomposition_count-1; level>=0; level--){
+        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
+            switch(type){
+            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
+                    break;
+            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
+                    break;
+            case DWT_X: break;
+            }
+        }
+    }
+}
+
+static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
+    const int support = type==1 ? 3 : 5;
+    int level;
+    if(type==2) return;
+
+    for(level=decomposition_count-1; level>=0; level--){
+        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
+            switch(type){
+            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
+                    break;
+            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
+                    break;
+            case DWT_X: break;
+            }
+        }
+    }
+}
+
+static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+    if(type==2){
+        int level;
+        for(level=decomposition_count-1; level>=0; level--)
+            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
+    }else{
+        dwt_compose_t cs[MAX_DECOMPOSITIONS];
+        int y;
+        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
+        for(y=0; y<height; y+=4)
+            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
+    }
+}
+
+static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
+    const int w= b->width;
+    const int h= b->height;
+    int x, y;
+
+    if(1){
+        int run=0;
+        int runs[w*h];
+        int run_index=0;
+        int max_index;
+
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int v, p=0;
+                int /*ll=0, */l=0, lt=0, t=0, rt=0;
+                v= src[x + y*stride];
+
+                if(y){
+                    t= src[x + (y-1)*stride];
+                    if(x){
+                        lt= src[x - 1 + (y-1)*stride];
+                    }
+                    if(x + 1 < w){
+                        rt= src[x + 1 + (y-1)*stride];
+                    }
+                }
+                if(x){
+                    l= src[x - 1 + y*stride];
+                    /*if(x > 1){
+                        if(orientation==1) ll= src[y + (x-2)*stride];
+                        else               ll= src[x - 2 + y*stride];
+                    }*/
+                }
+                if(parent){
+                    int px= x>>1;
+                    int py= y>>1;
+                    if(px<b->parent->width && py<b->parent->height)
+                        p= parent[px + py*2*stride];
+                }
+                if(!(/*ll|*/l|lt|t|rt|p)){
+                    if(v){
+                        runs[run_index++]= run;
+                        run=0;
+                    }else{
+                        run++;
+                    }
+                }
+            }
+        }
+        max_index= run_index;
+        runs[run_index++]= run;
+        run_index=0;
+        run= runs[run_index++];
+
+        put_symbol2(&s->c, b->state[30], max_index, 0);
+        if(run_index <= max_index)
+            put_symbol2(&s->c, b->state[1], run, 3);
+
+        for(y=0; y<h; y++){
+            if(s->c.bytestream_end - s->c.bytestream < w*40){
+                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                return -1;
+            }
+            for(x=0; x<w; x++){
+                int v, p=0;
+                int /*ll=0, */l=0, lt=0, t=0, rt=0;
+                v= src[x + y*stride];
+
+                if(y){
+                    t= src[x + (y-1)*stride];
+                    if(x){
+                        lt= src[x - 1 + (y-1)*stride];
+                    }
+                    if(x + 1 < w){
+                        rt= src[x + 1 + (y-1)*stride];
+                    }
+                }
+                if(x){
+                    l= src[x - 1 + y*stride];
+                    /*if(x > 1){
+                        if(orientation==1) ll= src[y + (x-2)*stride];
+                        else               ll= src[x - 2 + y*stride];
+                    }*/
+                }
+                if(parent){
+                    int px= x>>1;
+                    int py= y>>1;
+                    if(px<b->parent->width && py<b->parent->height)
+                        p= parent[px + py*2*stride];
+                }
+                if(/*ll|*/l|lt|t|rt|p){
+                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+
+                    put_rac(&s->c, &b->state[0][context], !!v);
+                }else{
+                    if(!run){
+                        run= runs[run_index++];
+
+                        if(run_index <= max_index)
+                            put_symbol2(&s->c, b->state[1], run, 3);
+                        assert(v);
+                    }else{
+                        run--;
+                        assert(!v);
+                    }
+                }
+                if(v){
+                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+                    int l2= 2*FFABS(l) + (l<0);
+                    int t2= 2*FFABS(t) + (t<0);
+
+                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
+                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
+//    encode_subband_qtree(s, b, src, parent, stride, orientation);
+//    encode_subband_z0run(s, b, src, parent, stride, orientation);
+    return encode_subband_c0run(s, b, src, parent, stride, orientation);
+//    encode_subband_dzr(s, b, src, parent, stride, orientation);
+}
+
+static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
+    const int w= b->width;
+    const int h= b->height;
+    int x,y;
+
+    if(1){
+        int run, runs;
+        x_and_coeff *xc= b->x_coeff;
+        x_and_coeff *prev_xc= NULL;
+        x_and_coeff *prev2_xc= xc;
+        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
+        x_and_coeff *prev_parent_xc= parent_xc;
+
+        runs= get_symbol2(&s->c, b->state[30], 0);
+        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
+        else           run= INT_MAX;
+
+        for(y=0; y<h; y++){
+            int v=0;
+            int lt=0, t=0, rt=0;
+
+            if(y && prev_xc->x == 0){
+                rt= prev_xc->coeff;
+            }
+            for(x=0; x<w; x++){
+                int p=0;
+                const int l= v;
+
+                lt= t; t= rt;
+
+                if(y){
+                    if(prev_xc->x <= x)
+                        prev_xc++;
+                    if(prev_xc->x == x + 1)
+                        rt= prev_xc->coeff;
+                    else
+                        rt=0;
+                }
+                if(parent_xc){
+                    if(x>>1 > parent_xc->x){
+                        parent_xc++;
+                    }
+                    if(x>>1 == parent_xc->x){
+                        p= parent_xc->coeff;
+                    }
+                }
+                if(/*ll|*/l|lt|t|rt|p){
+                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
+
+                    v=get_rac(&s->c, &b->state[0][context]);
+                    if(v){
+                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
+                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
+
+                        xc->x=x;
+                        (xc++)->coeff= v;
+                    }
+                }else{
+                    if(!run){
+                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
+                        else           run= INT_MAX;
+                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
+                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
+
+                        xc->x=x;
+                        (xc++)->coeff= v;
+                    }else{
+                        int max_run;
+                        run--;
+                        v=0;
+
+                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
+                        else  max_run= FFMIN(run, w-x-1);
+                        if(parent_xc)
+                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
+                        x+= max_run;
+                        run-= max_run;
+                    }
+                }
+            }
+            (xc++)->x= w+1; //end marker
+            prev_xc= prev2_xc;
+            prev2_xc= xc;
+
+            if(parent_xc){
+                if(y&1){
+                    while(parent_xc->x != parent->width+1)
+                        parent_xc++;
+                    parent_xc++;
+                    prev_parent_xc= parent_xc;
+                }else{
+                    parent_xc= prev_parent_xc;
+                }
+            }
+        }
+
+        (xc++)->x= w+1; //end marker
+    }
+}
+
+static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
+    const int w= b->width;
+    int y;
+    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+    int new_index = 0;
+
+    START_TIMER
+
+    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
+        qadd= 0;
+        qmul= 1<<QEXPSHIFT;
+    }
+
+    /* If we are on the second or later slice, restore our index. */
+    if (start_y != 0)
+        new_index = save_state[0];
+
+
+    for(y=start_y; y<h; y++){
+        int x = 0;
+        int v;
+        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
+        memset(line, 0, b->width*sizeof(DWTELEM));
+        v = b->x_coeff[new_index].coeff;
+        x = b->x_coeff[new_index++].x;
+        while(x < w)
+        {
+            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
+            register int u= -(v&1);
+            line[x] = (t^u) - u;
+
+            v = b->x_coeff[new_index].coeff;
+            x = b->x_coeff[new_index++].x;
+        }
+    }
+    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
+        STOP_TIMER("decode_subband")
+    }
+
+    /* Save our variables for the next slice. */
+    save_state[0] = new_index;
+
+    return;
+}
+
+static void reset_contexts(SnowContext *s){
+    int plane_index, level, orientation;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1:0; orientation<4; orientation++){
+                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
+            }
+        }
+    }
+    memset(s->header_state, MID_STATE, sizeof(s->header_state));
+    memset(s->block_state, MID_STATE, sizeof(s->block_state));
+}
+
+static int alloc_blocks(SnowContext *s){
+    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
+    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
+
+    s->b_width = w;
+    s->b_height= h;
+
+    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
+    return 0;
+}
+
+static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
+    uint8_t *bytestream= d->bytestream;
+    uint8_t *bytestream_start= d->bytestream_start;
+    *d= *s;
+    d->bytestream= bytestream;
+    d->bytestream_start= bytestream_start;
+}
+
+//near copy & paste from dsputil, FIXME
+static int pix_sum(uint8_t * pix, int line_size, int w)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < w; i++) {
+        for (j = 0; j < w; j++) {
+            s += pix[0];
+            pix ++;
+        }
+        pix += line_size - w;
+    }
+    return s;
+}
+
+//near copy & paste from dsputil, FIXME
+static int pix_norm1(uint8_t * pix, int line_size, int w)
+{
+    int s, i, j;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < w; i++) {
+        for (j = 0; j < w; j ++) {
+            s += sq[pix[0]];
+            pix ++;
+        }
+        pix += line_size - w;
+    }
+    return s;
+}
+
+static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<rem_depth;
+    BlockNode block;
+    int i,j;
+
+    block.color[0]= l;
+    block.color[1]= cb;
+    block.color[2]= cr;
+    block.mx= mx;
+    block.my= my;
+    block.ref= ref;
+    block.type= type;
+    block.level= level;
+
+    for(j=0; j<block_w; j++){
+        for(i=0; i<block_w; i++){
+            s->block[index + i + j*w]= block;
+        }
+    }
+}
+
+static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
+    const int offset[3]= {
+          y*c->  stride + x,
+        ((y*c->uvstride + x)>>1),
+        ((y*c->uvstride + x)>>1),
+    };
+    int i;
+    for(i=0; i<3; i++){
+        c->src[0][i]= src [i];
+        c->ref[0][i]= ref [i] + offset[i];
+    }
+    assert(!ref_index);
+}
+
+static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
+                           BlockNode *left, BlockNode *top, BlockNode *tr){
+    if(s->ref_frames == 1){
+        *mx = mid_pred(left->mx, top->mx, tr->mx);
+        *my = mid_pred(left->my, top->my, tr->my);
+    }else{
+        const int *scale = scale_mv_ref[ref];
+        *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
+                       top ->mx * scale[top ->ref] + 128 >>8,
+                       tr  ->mx * scale[tr  ->ref] + 128 >>8);
+        *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
+                       top ->my * scale[top ->ref] + 128 >>8,
+                       tr  ->my * scale[tr  ->ref] + 128 >>8);
+    }
+}
+
+//FIXME copy&paste
+#define P_LEFT P[1]
+#define P_TOP P[2]
+#define P_TOPRIGHT P[3]
+#define P_MEDIAN P[4]
+#define P_MV1 P[9]
+#define FLAG_QPEL   1 //must be 1
+
+static int encode_q_branch(SnowContext *s, int level, int x, int y){
+    uint8_t p_buffer[1024];
+    uint8_t i_buffer[1024];
+    uint8_t p_state[sizeof(s->block_state)];
+    uint8_t i_state[sizeof(s->block_state)];
+    RangeCoder pc, ic;
+    uint8_t *pbbak= s->c.bytestream;
+    uint8_t *pbbak_start= s->c.bytestream_start;
+    int score, score2, iscore, i_len, p_len, block_s, sum;
+    const int w= s->b_width  << s->block_max_depth;
+    const int h= s->b_height << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<(LOG2_MB_SIZE - level);
+    int trx= (x+1)<<rem_depth;
+    int try= (y+1)<<rem_depth;
+    BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
+    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
+    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx, pmy;
+    int mx=0, my=0;
+    int l,cr,cb;
+    const int stride= s->current_picture.linesize[0];
+    const int uvstride= s->current_picture.linesize[1];
+    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
+                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
+                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
+    int P[10][2];
+    int16_t last_mv[3][2];
+    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
+    const int shift= 1+qpel;
+    MotionEstContext *c= &s->m.me;
+    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+    int my_context= av_log2(2*FFABS(left->my - top->my));
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+    int ref, best_ref, ref_score, ref_mx, ref_my;
+
+    assert(sizeof(s->block_state) >= 256);
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
+        return 0;
+    }
+
+//    clip predictors / edge ?
+
+    P_LEFT[0]= left->mx;
+    P_LEFT[1]= left->my;
+    P_TOP [0]= top->mx;
+    P_TOP [1]= top->my;
+    P_TOPRIGHT[0]= tr->mx;
+    P_TOPRIGHT[1]= tr->my;
+
+    last_mv[0][0]= s->block[index].mx;
+    last_mv[0][1]= s->block[index].my;
+    last_mv[1][0]= right->mx;
+    last_mv[1][1]= right->my;
+    last_mv[2][0]= bottom->mx;
+    last_mv[2][1]= bottom->my;
+
+    s->m.mb_stride=2;
+    s->m.mb_x=
+    s->m.mb_y= 0;
+    s->m.me.skip= 0;
+
+    assert(s->m.me.  stride ==   stride);
+    assert(s->m.me.uvstride == uvstride);
+
+    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
+    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
+
+    c->xmin = - x*block_w - 16+2;
+    c->ymin = - y*block_w - 16+2;
+    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
+    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
+
+    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
+    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
+    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
+    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
+    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
+    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
+    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+
+    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+    if (!y) {
+        c->pred_x= P_LEFT[0];
+        c->pred_y= P_LEFT[1];
+    } else {
+        c->pred_x = P_MEDIAN[0];
+        c->pred_y = P_MEDIAN[1];
+    }
+
+    score= INT_MAX;
+    best_ref= 0;
+    for(ref=0; ref<s->ref_frames; ref++){
+        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
+
+        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
+                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
+
+        assert(ref_mx >= c->xmin);
+        assert(ref_mx <= c->xmax);
+        assert(ref_my >= c->ymin);
+        assert(ref_my <= c->ymax);
+
+        ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
+        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
+        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
+        if(s->ref_mvs[ref]){
+            s->ref_mvs[ref][index][0]= ref_mx;
+            s->ref_mvs[ref][index][1]= ref_my;
+            s->ref_scores[ref][index]= ref_score;
+        }
+        if(score > ref_score){
+            score= ref_score;
+            best_ref= ref;
+            mx= ref_mx;
+            my= ref_my;
+        }
+    }
+    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
+
+  //  subpel search
+    pc= s->c;
+    pc.bytestream_start=
+    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
+    memcpy(p_state, s->block_state, sizeof(s->block_state));
+
+    if(level!=s->block_max_depth)
+        put_rac(&pc, &p_state[4 + s_context], 1);
+    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
+    if(s->ref_frames > 1)
+        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
+    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
+    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
+    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
+    p_len= pc.bytestream - pc.bytestream_start;
+    score += (s->lambda2*(p_len*8
+              + (pc.outstanding_count - s->c.outstanding_count)*8
+              + (-av_log2(pc.range)    + av_log2(s->c.range))
+             ))>>FF_LAMBDA_SHIFT;
+
+    block_s= block_w*block_w;
+    sum = pix_sum(current_data[0], stride, block_w);
+    l= (sum + block_s/2)/block_s;
+    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
+
+    block_s= block_w*block_w>>2;
+    sum = pix_sum(current_data[1], uvstride, block_w>>1);
+    cb= (sum + block_s/2)/block_s;
+//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
+    sum = pix_sum(current_data[2], uvstride, block_w>>1);
+    cr= (sum + block_s/2)/block_s;
+//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
+
+    ic= s->c;
+    ic.bytestream_start=
+    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
+    memcpy(i_state, s->block_state, sizeof(s->block_state));
+    if(level!=s->block_max_depth)
+        put_rac(&ic, &i_state[4 + s_context], 1);
+    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
+    put_symbol(&ic, &i_state[32],  l-pl , 1);
+    put_symbol(&ic, &i_state[64], cb-pcb, 1);
+    put_symbol(&ic, &i_state[96], cr-pcr, 1);
+    i_len= ic.bytestream - ic.bytestream_start;
+    iscore += (s->lambda2*(i_len*8
+              + (ic.outstanding_count - s->c.outstanding_count)*8
+              + (-av_log2(ic.range)    + av_log2(s->c.range))
+             ))>>FF_LAMBDA_SHIFT;
+
+//    assert(score==256*256*256*64-1);
+    assert(iscore < 255*255*256 + s->lambda2*10);
+    assert(iscore >= 0);
+    assert(l>=0 && l<=255);
+    assert(pl>=0 && pl<=255);
+
+    if(level==0){
+        int varc= iscore >> 8;
+        int vard= score >> 8;
+        if (vard <= 64 || vard < varc)
+            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
+        else
+            c->scene_change_score+= s->m.qscale;
+    }
+
+    if(level!=s->block_max_depth){
+        put_rac(&s->c, &s->block_state[4 + s_context], 0);
+        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
+        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
+        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
+        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
+        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
+
+        if(score2 < score && score2 < iscore)
+            return score2;
+    }
+
+    if(iscore < score){
+        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
+        memcpy(pbbak, i_buffer, i_len);
+        s->c= ic;
+        s->c.bytestream_start= pbbak_start;
+        s->c.bytestream= pbbak + i_len;
+        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
+        memcpy(s->block_state, i_state, sizeof(s->block_state));
+        return iscore;
+    }else{
+        memcpy(pbbak, p_buffer, p_len);
+        s->c= pc;
+        s->c.bytestream_start= pbbak_start;
+        s->c.bytestream= pbbak + p_len;
+        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
+        memcpy(s->block_state, p_state, sizeof(s->block_state));
+        return score;
+    }
+}
+
+static always_inline int same_block(BlockNode *a, BlockNode *b){
+    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
+        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
+    }else{
+        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
+    }
+}
+
+static void encode_q_branch2(SnowContext *s, int level, int x, int y){
+    const int w= s->b_width  << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    BlockNode *b= &s->block[index];
+    BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx, pmy;
+    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
+    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
+        return;
+    }
+
+    if(level!=s->block_max_depth){
+        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
+            put_rac(&s->c, &s->block_state[4 + s_context], 1);
+        }else{
+            put_rac(&s->c, &s->block_state[4 + s_context], 0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
+            return;
+        }
+    }
+    if(b->type & BLOCK_INTRA){
+        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
+        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
+        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
+        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
+        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
+        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
+    }else{
+        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
+        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
+        if(s->ref_frames > 1)
+            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
+        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
+        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
+        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
+    }
+}
+
+static void decode_q_branch(SnowContext *s, int level, int x, int y){
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
+        return;
+    }
+
+    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
+        int type;
+        int l = left->color[0];
+        int cb= left->color[1];
+        int cr= left->color[2];
+        int mx= mid_pred(left->mx, top->mx, tr->mx);
+        int my= mid_pred(left->my, top->my, tr->my);
+        int ref = 0;
+        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
+        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
+
+        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
+
+        if(type){
+            pred_mv(s, &mx, &my, 0, left, top, tr);
+            l += get_symbol(&s->c, &s->block_state[32], 1);
+            cb+= get_symbol(&s->c, &s->block_state[64], 1);
+            cr+= get_symbol(&s->c, &s->block_state[96], 1);
+        }else{
+            if(s->ref_frames > 1)
+                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
+            pred_mv(s, &mx, &my, ref, left, top, tr);
+            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+        }
+        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
+    }else{
+        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
+        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
+        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
+        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
+    }
+}
+
+static void encode_blocks(SnowContext *s, int search){
+    int x, y;
+    int w= s->b_width;
+    int h= s->b_height;
+
+    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
+        iterative_me(s);
+
+    for(y=0; y<h; y++){
+        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
+            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+            return;
+        }
+        for(x=0; x<w; x++){
+            if(s->avctx->me_method == ME_ITER || !search)
+                encode_q_branch2(s, 0, x, y);
+            else
+                encode_q_branch (s, 0, x, y);
+        }
+    }
+}
+
+static void decode_blocks(SnowContext *s){
+    int x, y;
+    int w= s->b_width;
+    int h= s->b_height;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            decode_q_branch(s, 0, x, y);
+        }
+    }
+}
+
+static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+    int x, y;
+START_TIMER
+    for(y=0; y < b_h+5; y++){
+        for(x=0; x < b_w; x++){
+            int a0= src[x    ];
+            int a1= src[x + 1];
+            int a2= src[x + 2];
+            int a3= src[x + 3];
+            int a4= src[x + 4];
+            int a5= src[x + 5];
+//            int am= 9*(a1+a2) - (a0+a3);
+            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+//            int am= 18*(a2+a3) - 2*(a1+a4);
+//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
+//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
+
+//            if(b_w==16) am= 8*(a1+a2);
+
+            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
+            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
+
+            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
+            if(am&(~255)) am= ~(am>>31);
+
+            tmp[x] = am;
+
+/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
+            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
+            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
+            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
+        }
+        tmp += stride;
+        src += stride;
+    }
+    tmp -= (b_h+5)*stride;
+
+    for(y=0; y < b_h; y++){
+        for(x=0; x < b_w; x++){
+            int a0= tmp[x + 0*stride];
+            int a1= tmp[x + 1*stride];
+            int a2= tmp[x + 2*stride];
+            int a3= tmp[x + 3*stride];
+            int a4= tmp[x + 4*stride];
+            int a5= tmp[x + 5*stride];
+            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+//            int am= 18*(a2+a3) - 2*(a1+a4);
+/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
+            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
+
+//            if(b_w==16) am= 8*(a1+a2);
+
+            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
+            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
+
+            if(am&(~255)) am= ~(am>>31);
+
+            dst[x] = am;
+/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
+            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
+            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
+            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
+        }
+        dst += stride;
+        tmp += stride;
+    }
+STOP_TIMER("mc_block")
+}
+
+#define mca(dx,dy,b_w)\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
+    uint8_t tmp[stride*(b_w+5)];\
+    assert(h==b_w);\
+    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
+}
+
+mca( 0, 0,16)
+mca( 8, 0,16)
+mca( 0, 8,16)
+mca( 8, 8,16)
+mca( 0, 0,8)
+mca( 8, 0,8)
+mca( 0, 8,8)
+mca( 8, 8,8)
+
+static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
+    if(block->type & BLOCK_INTRA){
+        int x, y;
+        const int color = block->color[plane_index];
+        const int color4= color*0x01010101;
+        if(b_w==32){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+                *(uint32_t*)&dst[16+ y*stride]= color4;
+                *(uint32_t*)&dst[20+ y*stride]= color4;
+                *(uint32_t*)&dst[24+ y*stride]= color4;
+                *(uint32_t*)&dst[28+ y*stride]= color4;
+            }
+        }else if(b_w==16){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+            }
+        }else if(b_w==8){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+            }
+        }else if(b_w==4){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+            }
+        }else{
+            for(y=0; y < b_h; y++){
+                for(x=0; x < b_w; x++){
+                    dst[x + y*stride]= color;
+                }
+            }
+        }
+    }else{
+        uint8_t *src= s->last_picture[block->ref].data[plane_index];
+        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
+        int mx= block->mx*scale;
+        int my= block->my*scale;
+        const int dx= mx&15;
+        const int dy= my&15;
+        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
+        sx += (mx>>4) - 2;
+        sy += (my>>4) - 2;
+        src += sx + sy*stride;
+        if(   (unsigned)sx >= w - b_w - 4
+           || (unsigned)sy >= h - b_h - 4){
+            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
+            src= tmp + MB_SIZE;
+        }
+//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
+//        assert(!(b_w&(b_w-1)));
+        assert(b_w>1 && b_h>1);
+        assert(tab_index>=0 && tab_index<4 || b_w==32);
+        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
+            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
+        else if(b_w==32){
+            int y;
+            for(y=0; y<b_h; y+=16){
+                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
+                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
+            }
+        }else if(b_w==b_h)
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
+        else if(b_w==2*b_h){
+            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
+        }else{
+            assert(2*b_w==b_h);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
+        }
+    }
+}
+
+void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+    int y, x;
+    DWTELEM * dst;
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+        dst = slice_buffer_get_line(sb, src_y + y);
+        for(x=0; x<b_w; x++){
+            int v=   obmc1[x] * block[3][x + y*src_stride]
+                    +obmc2[x] * block[2][x + y*src_stride]
+                    +obmc3[x] * block[1][x + y*src_stride]
+                    +obmc4[x] * block[0][x + y*src_stride];
+
+            v <<= 8 - LOG2_OBMC_MAX;
+            if(FRAC_BITS != 8){
+                v += 1<<(7 - FRAC_BITS);
+                v >>= 8 - FRAC_BITS;
+            }
+            if(add){
+                v += dst[x + src_x];
+                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+                if(v&(~255)) v= ~(v>>31);
+                dst8[x + y*src_stride] = v;
+            }else{
+                dst[x + src_x] -= v;
+            }
+        }
+    }
+}
+
+//FIXME name clenup (b_w, block_w, b_width stuff)
+static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    BlockNode *lt= &s->block[b_x + b_y*b_stride];
+    BlockNode *rt= lt+1;
+    BlockNode *lb= lt+b_stride;
+    BlockNode *rb= lb+1;
+    uint8_t *block[4];
+    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
+    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
+    uint8_t *ptmp;
+    int x,y;
+
+    if(b_x<0){
+        lt= rt;
+        lb= rb;
+    }else if(b_x + 1 >= b_width){
+        rt= lt;
+        rb= lb;
+    }
+    if(b_y<0){
+        lt= lb;
+        rt= rb;
+    }else if(b_y + 1 >= b_height){
+        lb= lt;
+        rb= rt;
+    }
+
+    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
+        obmc -= src_x;
+        b_w += src_x;
+        if(!sliced && !offset_dst)
+            dst -= src_x;
+        src_x=0;
+    }else if(src_x + b_w > w){
+        b_w = w - src_x;
+    }
+    if(src_y<0){
+        obmc -= src_y*obmc_stride;
+        b_h += src_y;
+        if(!sliced && !offset_dst)
+            dst -= src_y*dst_stride;
+        src_y=0;
+    }else if(src_y + b_h> h){
+        b_h = h - src_y;
+    }
+
+    if(b_w<=0 || b_h<=0) return;
+
+assert(src_stride > 2*MB_SIZE + 5);
+    if(!sliced && offset_dst)
+        dst += src_x + src_y*dst_stride;
+    dst8+= src_x + src_y*src_stride;
+//    src += src_x + src_y*src_stride;
+
+    ptmp= tmp + 3*tmp_step;
+    block[0]= ptmp;
+    ptmp+=tmp_step;
+    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
+
+    if(same_block(lt, rt)){
+        block[1]= block[0];
+    }else{
+        block[1]= ptmp;
+        ptmp+=tmp_step;
+        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
+    }
+
+    if(same_block(lt, lb)){
+        block[2]= block[0];
+    }else if(same_block(rt, lb)){
+        block[2]= block[1];
+    }else{
+        block[2]= ptmp;
+        ptmp+=tmp_step;
+        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
+    }
+
+    if(same_block(lt, rb) ){
+        block[3]= block[0];
+    }else if(same_block(rt, rb)){
+        block[3]= block[1];
+    }else if(same_block(lb, rb)){
+        block[3]= block[2];
+    }else{
+        block[3]= ptmp;
+        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
+    }
+#if 0
+    for(y=0; y<b_h; y++){
+        for(x=0; x<b_w; x++){
+            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
+            if(add) dst[x + y*dst_stride] += v;
+            else    dst[x + y*dst_stride] -= v;
+        }
+    }
+    for(y=0; y<b_h; y++){
+        uint8_t *obmc2= obmc + (obmc_stride>>1);
+        for(x=0; x<b_w; x++){
+            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
+            if(add) dst[x + y*dst_stride] += v;
+            else    dst[x + y*dst_stride] -= v;
+        }
+    }
+    for(y=0; y<b_h; y++){
+        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
+        for(x=0; x<b_w; x++){
+            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
+            if(add) dst[x + y*dst_stride] += v;
+            else    dst[x + y*dst_stride] -= v;
+        }
+    }
+    for(y=0; y<b_h; y++){
+        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+        for(x=0; x<b_w; x++){
+            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
+            if(add) dst[x + y*dst_stride] += v;
+            else    dst[x + y*dst_stride] -= v;
+        }
+    }
+#else
+    if(sliced){
+        START_TIMER
+
+        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+        STOP_TIMER("inner_add_yblock")
+    }else
+    for(y=0; y<b_h; y++){
+        //FIXME ugly missue of obmc_stride
+        uint8_t *obmc1= obmc + y*obmc_stride;
+        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+        for(x=0; x<b_w; x++){
+            int v=   obmc1[x] * block[3][x + y*src_stride]
+                    +obmc2[x] * block[2][x + y*src_stride]
+                    +obmc3[x] * block[1][x + y*src_stride]
+                    +obmc4[x] * block[0][x + y*src_stride];
+
+            v <<= 8 - LOG2_OBMC_MAX;
+            if(FRAC_BITS != 8){
+                v += 1<<(7 - FRAC_BITS);
+                v >>= 8 - FRAC_BITS;
+            }
+            if(add){
+                v += dst[x + y*dst_stride];
+                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+                if(v&(~255)) v= ~(v>>31);
+                dst8[x + y*src_stride] = v;
+            }else{
+                dst[x + y*dst_stride] -= v;
+            }
+        }
+    }
+#endif
+}
+
+static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
+    Plane *p= &s->plane[plane_index];
+    const int mb_w= s->b_width  << s->block_max_depth;
+    const int mb_h= s->b_height << s->block_max_depth;
+    int x, y, mb_x;
+    int block_size = MB_SIZE >> s->block_max_depth;
+    int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    int obmc_stride= plane_index ? block_size : 2*block_size;
+    int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *dst8= s->current_picture.data[plane_index];
+    int w= p->width;
+    int h= p->height;
+    START_TIMER
+
+    if(s->keyframe || (s->avctx->debug&512)){
+        if(mb_y==mb_h)
+            return;
+
+        if(add){
+            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
+            {
+//                DWTELEM * line = slice_buffer_get_line(sb, y);
+                DWTELEM * line = sb->line[y];
+                for(x=0; x<w; x++)
+                {
+//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    v >>= FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*ref_stride]= v;
+                }
+            }
+        }else{
+            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
+            {
+//                DWTELEM * line = slice_buffer_get_line(sb, y);
+                DWTELEM * line = sb->line[y];
+                for(x=0; x<w; x++)
+                {
+                    line[x] -= 128 << FRAC_BITS;
+//                    buf[x + y*w]-= 128<<FRAC_BITS;
+                }
+            }
+        }
+
+        return;
+    }
+
+        for(mb_x=0; mb_x<=mb_w; mb_x++){
+            START_TIMER
+
+            add_yblock(s, 1, sb, old_buffer, dst8, obmc,
+                       block_w*mb_x - block_w/2,
+                       block_w*mb_y - block_w/2,
+                       block_w, block_w,
+                       w, h,
+                       w, ref_stride, obmc_stride,
+                       mb_x - 1, mb_y - 1,
+                       add, 0, plane_index);
+
+            STOP_TIMER("add_yblock")
+        }
+
+    STOP_TIMER("predict_slice")
+}
+
+static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
+    Plane *p= &s->plane[plane_index];
+    const int mb_w= s->b_width  << s->block_max_depth;
+    const int mb_h= s->b_height << s->block_max_depth;
+    int x, y, mb_x;
+    int block_size = MB_SIZE >> s->block_max_depth;
+    int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *dst8= s->current_picture.data[plane_index];
+    int w= p->width;
+    int h= p->height;
+    START_TIMER
+
+    if(s->keyframe || (s->avctx->debug&512)){
+        if(mb_y==mb_h)
+            return;
+
+        if(add){
+            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
+                for(x=0; x<w; x++){
+                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    v >>= FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*ref_stride]= v;
+                }
+            }
+        }else{
+            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
+                for(x=0; x<w; x++){
+                    buf[x + y*w]-= 128<<FRAC_BITS;
+                }
+            }
+        }
+
+        return;
+    }
+
+        for(mb_x=0; mb_x<=mb_w; mb_x++){
+            START_TIMER
+
+            add_yblock(s, 0, NULL, buf, dst8, obmc,
+                       block_w*mb_x - block_w/2,
+                       block_w*mb_y - block_w/2,
+                       block_w, block_w,
+                       w, h,
+                       w, ref_stride, obmc_stride,
+                       mb_x - 1, mb_y - 1,
+                       add, 1, plane_index);
+
+            STOP_TIMER("add_yblock")
+        }
+
+    STOP_TIMER("predict_slice")
+}
+
+static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
+    const int mb_h= s->b_height << s->block_max_depth;
+    int mb_y;
+    for(mb_y=0; mb_y<=mb_h; mb_y++)
+        predict_slice(s, buf, plane_index, add, mb_y);
+}
+
+static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
+    int i, x2, y2;
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *src= s-> input_picture.data[plane_index];
+    DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int index= mb_x + mb_y*b_stride;
+    BlockNode *b= &s->block[index];
+    BlockNode backup= *b;
+    int ab=0;
+    int aa=0;
+
+    b->type|= BLOCK_INTRA;
+    b->color[plane_index]= 0;
+    memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
+
+    for(i=0; i<4; i++){
+        int mb_x2= mb_x + (i &1) - 1;
+        int mb_y2= mb_y + (i>>1) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_w*mb_y2 + block_w/2;
+
+        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
+                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
+
+        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
+            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
+                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
+                int obmc_v= obmc[index];
+                int d;
+                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
+                if(x<0) obmc_v += obmc[index + block_w];
+                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
+                if(x+block_w>w) obmc_v += obmc[index - block_w];
+                //FIXME precalc this or simplify it somehow else
+
+                d = -dst[index] + (1<<(FRAC_BITS-1));
+                dst[index] = d;
+                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
+                aa += obmc_v * obmc_v; //FIXME precalclate this
+            }
+        }
+    }
+    *b= backup;
+
+    return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
+}
+
+static inline int get_block_bits(SnowContext *s, int x, int y, int w){
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    int index= x + y*b_stride;
+    BlockNode *b     = &s->block[index];
+    BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
+    BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
+    BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
+    int dmx, dmy;
+//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+//  int my_context= av_log2(2*FFABS(left->my - top->my));
+
+    if(x<0 || x>=b_stride || y>=b_height)
+        return 0;
+/*
+1            0      0
+01X          1-2    1
+001XX        3-6    2-3
+0001XXX      7-14   4-7
+00001XXXX   15-30   8-15
+*/
+//FIXME try accurate rate
+//FIXME intra and inter predictors if surrounding blocks arent the same type
+    if(b->type & BLOCK_INTRA){
+        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
+                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
+                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
+    }else{
+        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
+        dmx-= b->mx;
+        dmy-= b->my;
+        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
+                    + av_log2(2*FFABS(dmy))
+                    + av_log2(2*b->ref));
+    }
+}
+
+static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *dst= s->current_picture.data[plane_index];
+    uint8_t *src= s->  input_picture.data[plane_index];
+    DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
+    uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+    int sx= block_w*mb_x - block_w/2;
+    int sy= block_w*mb_y - block_w/2;
+    int x0= FFMAX(0,-sx);
+    int y0= FFMAX(0,-sy);
+    int x1= FFMIN(block_w*2, w-sx);
+    int y1= FFMIN(block_w*2, h-sy);
+    int i,x,y;
+
+    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
+
+    for(y=y0; y<y1; y++){
+        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
+        const DWTELEM *pred1 = pred + y*obmc_stride;
+        uint8_t *cur1 = cur + y*ref_stride;
+        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
+        for(x=x0; x<x1; x++){
+            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
+            v = (v + pred1[x]) >> FRAC_BITS;
+            if(v&(~255)) v= ~(v>>31);
+            dst1[x] = v;
+        }
+    }
+
+    /* copy the regions where obmc[] = (uint8_t)256 */
+    if(LOG2_OBMC_MAX == 8
+        && (mb_x == 0 || mb_x == b_stride-1)
+        && (mb_y == 0 || mb_y == b_height-1)){
+        if(mb_x == 0)
+            x1 = block_w;
+        else
+            x0 = block_w;
+        if(mb_y == 0)
+            y1 = block_w;
+        else
+            y0 = block_w;
+        for(y=y0; y<y1; y++)
+            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
+    }
+
+    if(block_w==16){
+        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
+        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
+        /* FIXME cmps overlap but don't cover the wavelet's whole support,
+         * so improving the score of one block is not strictly guaranteed to
+         * improve the score of the whole frame, so iterative motion est
+         * doesn't always converge. */
+        if(s->avctx->me_cmp == FF_CMP_W97)
+            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+        else if(s->avctx->me_cmp == FF_CMP_W53)
+            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+        else{
+            distortion = 0;
+            for(i=0; i<4; i++){
+                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
+                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
+            }
+        }
+    }else{
+        assert(block_w==8);
+        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
+    }
+
+    if(plane_index==0){
+        for(i=0; i<4; i++){
+/* ..RRr
+ * .RXx.
+ * rxx..
+ */
+            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
+        }
+        if(mb_x == b_stride-2)
+            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
+    int i, y2;
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *dst= s->current_picture.data[plane_index];
+    uint8_t *src= s-> input_picture.data[plane_index];
+    static const DWTELEM zero_dst[4096]; //FIXME
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion= 0;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+
+    for(i=0; i<9; i++){
+        int mb_x2= mb_x + (i%3) - 1;
+        int mb_y2= mb_y + (i/3) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_w*mb_y2 + block_w/2;
+
+        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
+                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
+
+        //FIXME find a cleaner/simpler way to skip the outside stuff
+        for(y2= y; y2<0; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        for(y2= h; y2<y+block_w; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        if(x<0){
+            for(y2= y; y2<y+block_w; y2++)
+                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
+        }
+        if(x+block_w > w){
+            for(y2= y; y2<y+block_w; y2++)
+                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
+        }
+
+        assert(block_w== 8 || block_w==16);
+        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
+    }
+
+    if(plane_index==0){
+        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
+        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
+
+/* ..RRRr
+ * .RXXx.
+ * .RXXx.
+ * rxxx.
+ */
+        if(merged)
+            rate = get_block_bits(s, mb_x, mb_y, 2);
+        for(i=merged?4:0; i<9; i++){
+            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
+            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
+        }
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup= *block;
+    int rd, index, value;
+
+    assert(mb_x>=0 && mb_y>=0);
+    assert(mb_x<b_stride);
+
+    if(intra){
+        block->color[0] = p[0];
+        block->color[1] = p[1];
+        block->color[2] = p[2];
+        block->type |= BLOCK_INTRA;
+    }else{
+        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
+        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
+        if(s->me_cache[index] == value)
+            return 0;
+        s->me_cache[index]= value;
+
+        block->mx= p[0];
+        block->my= p[1];
+        block->type &= ~BLOCK_INTRA;
+    }
+
+    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        *block= backup;
+        return 0;
+    }
+}
+
+/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
+static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
+    int p[2] = {p0, p1};
+    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
+}
+
+static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
+    int rd, index, value;
+
+    assert(mb_x>=0 && mb_y>=0);
+    assert(mb_x<b_stride);
+    assert(((mb_x|mb_y)&1) == 0);
+
+    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
+    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
+    if(s->me_cache[index] == value)
+        return 0;
+    s->me_cache[index]= value;
+
+    block->mx= p0;
+    block->my= p1;
+    block->ref= ref;
+    block->type &= ~BLOCK_INTRA;
+    block[1]= block[b_stride]= block[b_stride+1]= *block;
+
+    rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        block[0]= backup[0];
+        block[1]= backup[1];
+        block[b_stride]= backup[2];
+        block[b_stride+1]= backup[3];
+        return 0;
+    }
+}
+
+static void iterative_me(SnowContext *s){
+    int pass, mb_x, mb_y;
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    int color[3];
+
+    {
+        RangeCoder r = s->c;
+        uint8_t state[sizeof(s->block_state)];
+        memcpy(state, s->block_state, sizeof(s->block_state));
+        for(mb_y= 0; mb_y<s->b_height; mb_y++)
+            for(mb_x= 0; mb_x<s->b_width; mb_x++)
+                encode_q_branch(s, 0, mb_x, mb_y);
+        s->c = r;
+        memcpy(s->block_state, state, sizeof(s->block_state));
+    }
+
+    for(pass=0; pass<25; pass++){
+        int change= 0;
+
+        for(mb_y= 0; mb_y<b_height; mb_y++){
+            for(mb_x= 0; mb_x<b_width; mb_x++){
+                int dia_change, i, j, ref;
+                int best_rd= INT_MAX, ref_rd;
+                BlockNode backup, ref_b;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *block= &s->block[index];
+                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
+                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
+                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
+                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
+                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
+                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
+                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
+                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
+                const int b_w= (MB_SIZE >> s->block_max_depth);
+                uint8_t obmc_edged[b_w*2][b_w*2];
+
+                if(pass && (block->type & BLOCK_OPT))
+                    continue;
+                block->type |= BLOCK_OPT;
+
+                backup= *block;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                //FIXME precalc
+                {
+                    int x, y;
+                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
+                    if(mb_x==0)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
+                    if(mb_x==b_stride-1)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
+                    if(mb_y==0){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
+                        for(y=1; y<b_w; y++)
+                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
+                    }
+                    if(mb_y==b_height-1){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
+                        for(y=b_w; y<b_w*2-1; y++)
+                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
+                    }
+                }
+
+                //skip stuff outside the picture
+                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
+                {
+                    uint8_t *src= s->  input_picture.data[0];
+                    uint8_t *dst= s->current_picture.data[0];
+                    const int stride= s->current_picture.linesize[0];
+                    const int block_w= MB_SIZE >> s->block_max_depth;
+                    const int sx= block_w*mb_x - block_w/2;
+                    const int sy= block_w*mb_y - block_w/2;
+                    const int w= s->plane[0].width;
+                    const int h= s->plane[0].height;
+                    int y;
+
+                    for(y=sy; y<0; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    for(y=h; y<sy+block_w*2; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    if(sx<0){
+                        for(y=sy; y<sy+block_w*2; y++)
+                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
+                    }
+                    if(sx+block_w*2 > w){
+                        for(y=sy; y<sy+block_w*2; y++)
+                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
+                    }
+                }
+
+                // intra(black) = neighbors' contribution to the current block
+                for(i=0; i<3; i++)
+                    color[i]= get_dc(s, mb_x, mb_y, i);
+
+                // get previous score (cant be cached due to OBMC)
+                if(pass > 0 && (block->type&BLOCK_INTRA)){
+                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
+                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
+                }else
+                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
+
+                ref_b= *block;
+                ref_rd= best_rd;
+                for(ref=0; ref < s->ref_frames; ref++){
+                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
+                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
+                        continue;
+                    block->ref= ref;
+                    best_rd= INT_MAX;
+
+                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
+                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
+                    if(tb)
+                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
+                    if(lb)
+                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
+                    if(rb)
+                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
+                    if(bb)
+                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
+
+                    /* fullpel ME */
+                    //FIXME avoid subpel interpol / round to nearest integer
+                    do{
+                        dia_change=0;
+                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
+                            for(j=0; j<i; j++){
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
+                            }
+                        }
+                    }while(dia_change);
+                    /* subpel ME */
+                    do{
+                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
+                        dia_change=0;
+                        for(i=0; i<8; i++)
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
+                    }while(dia_change);
+                    //FIXME or try the standard 2 pass qpel or similar
+
+                    mvr[0][0]= block->mx;
+                    mvr[0][1]= block->my;
+                    if(ref_rd > best_rd){
+                        ref_rd= best_rd;
+                        ref_b= *block;
+                    }
+                }
+                best_rd= ref_rd;
+                *block= ref_b;
+#if 1
+                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
+                //FIXME RD style color selection
+#endif
+                if(!same_block(block, &backup)){
+                    if(tb ) tb ->type &= ~BLOCK_OPT;
+                    if(lb ) lb ->type &= ~BLOCK_OPT;
+                    if(rb ) rb ->type &= ~BLOCK_OPT;
+                    if(bb ) bb ->type &= ~BLOCK_OPT;
+                    if(tlb) tlb->type &= ~BLOCK_OPT;
+                    if(trb) trb->type &= ~BLOCK_OPT;
+                    if(blb) blb->type &= ~BLOCK_OPT;
+                    if(brb) brb->type &= ~BLOCK_OPT;
+                    change ++;
+                }
+            }
+        }
+        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
+        if(!change)
+            break;
+    }
+
+    if(s->block_max_depth == 1){
+        int change= 0;
+        for(mb_y= 0; mb_y<b_height; mb_y+=2){
+            for(mb_x= 0; mb_x<b_width; mb_x+=2){
+                int i;
+                int best_rd, init_rd;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *b[4];
+
+                b[0]= &s->block[index];
+                b[1]= b[0]+1;
+                b[2]= b[0]+b_stride;
+                b[3]= b[2]+1;
+                if(same_block(b[0], b[1]) &&
+                   same_block(b[0], b[2]) &&
+                   same_block(b[0], b[3]))
+                    continue;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+                //FIXME more multiref search?
+                check_4block_inter(s, mb_x, mb_y,
+                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
+                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
+
+                for(i=0; i<4; i++)
+                    if(!(b[i]->type&BLOCK_INTRA))
+                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
+
+                if(init_rd != best_rd)
+                    change++;
+            }
+        }
+        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
+    }
+}
+
+static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
+    const int level= b->level;
+    const int w= b->width;
+    const int h= b->height;
+    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    int x,y, thres1, thres2;
+//    START_TIMER
+
+    if(s->qlog == LOSSLESS_QLOG) return;
+
+    bias= bias ? 0 : (3*qmul)>>3;
+    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
+    thres2= 2*thres1;
+
+    if(!bias){
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= src[x + y*stride];
+
+                if((unsigned)(i+thres1) > thres2){
+                    if(i>=0){
+                        i<<= QEXPSHIFT;
+                        i/= qmul; //FIXME optimize
+                        src[x + y*stride]=  i;
+                    }else{
+                        i= -i;
+                        i<<= QEXPSHIFT;
+                        i/= qmul; //FIXME optimize
+                        src[x + y*stride]= -i;
+                    }
+                }else
+                    src[x + y*stride]= 0;
+            }
+        }
+    }else{
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= src[x + y*stride];
+
+                if((unsigned)(i+thres1) > thres2){
+                    if(i>=0){
+                        i<<= QEXPSHIFT;
+                        i= (i + bias) / qmul; //FIXME optimize
+                        src[x + y*stride]=  i;
+                    }else{
+                        i= -i;
+                        i<<= QEXPSHIFT;
+                        i= (i + bias) / qmul; //FIXME optimize
+                        src[x + y*stride]= -i;
+                    }
+                }else
+                    src[x + y*stride]= 0;
+            }
+        }
+    }
+    if(level+1 == s->spatial_decomposition_count){
+//        STOP_TIMER("quantize")
+    }
+}
+
+static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
+    const int w= b->width;
+    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+    int x,y;
+    START_TIMER
+
+    if(s->qlog == LOSSLESS_QLOG) return;
+
+    for(y=start_y; y<end_y; y++){
+//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
+        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+        for(x=0; x<w; x++){
+            int i= line[x];
+            if(i<0){
+                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
+            }else if(i>0){
+                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
+            }
+        }
+    }
+    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
+        STOP_TIMER("dquant")
+    }
+}
+
+static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
+    const int w= b->width;
+    const int h= b->height;
+    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+    int x,y;
+    START_TIMER
+
+    if(s->qlog == LOSSLESS_QLOG) return;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            int i= src[x + y*stride];
+            if(i<0){
+                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
+            }else if(i>0){
+                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
+            }
+        }
+    }
+    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
+        STOP_TIMER("dquant")
+    }
+}
+
+static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
+    const int w= b->width;
+    const int h= b->height;
+    int x,y;
+
+    for(y=h-1; y>=0; y--){
+        for(x=w-1; x>=0; x--){
+            int i= x + y*stride;
+
+            if(x){
+                if(use_median){
+                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
+                    else  src[i] -= src[i - 1];
+                }else{
+                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
+                    else  src[i] -= src[i - 1];
+                }
+            }else{
+                if(y) src[i] -= src[i - stride];
+            }
+        }
+    }
+}
+
+static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
+    const int w= b->width;
+    int x,y;
+
+//    START_TIMER
+
+    DWTELEM * line;
+    DWTELEM * prev;
+
+    if (start_y != 0)
+        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+
+    for(y=start_y; y<end_y; y++){
+        prev = line;
+//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
+        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+        for(x=0; x<w; x++){
+            if(x){
+                if(use_median){
+                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
+                    else  line[x] += line[x - 1];
+                }else{
+                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
+                    else  line[x] += line[x - 1];
+                }
+            }else{
+                if(y) line[x] += prev[x];
+            }
+        }
+    }
+
+//    STOP_TIMER("correlate")
+}
+
+static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
+    const int w= b->width;
+    const int h= b->height;
+    int x,y;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            int i= x + y*stride;
+
+            if(x){
+                if(use_median){
+                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
+                    else  src[i] += src[i - 1];
+                }else{
+                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
+                    else  src[i] += src[i - 1];
+                }
+            }else{
+                if(y) src[i] += src[i - stride];
+            }
+        }
+    }
+}
+
+static void encode_header(SnowContext *s){
+    int plane_index, level, orientation;
+    uint8_t kstate[32];
+
+    memset(kstate, MID_STATE, sizeof(kstate));
+
+    put_rac(&s->c, kstate, s->keyframe);
+    if(s->keyframe || s->always_reset)
+        reset_contexts(s);
+    if(s->keyframe){
+        put_symbol(&s->c, s->header_state, s->version, 0);
+        put_rac(&s->c, s->header_state, s->always_reset);
+        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
+        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
+        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
+        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
+        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
+        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
+        put_rac(&s->c, s->header_state, s->spatial_scalability);
+//        put_rac(&s->c, s->header_state, s->rate_scalability);
+        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
+
+        for(plane_index=0; plane_index<2; plane_index++){
+            for(level=0; level<s->spatial_decomposition_count; level++){
+                for(orientation=level ? 1:0; orientation<4; orientation++){
+                    if(orientation==2) continue;
+                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
+                }
+            }
+        }
+    }
+    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
+    put_symbol(&s->c, s->header_state, s->qlog, 1);
+    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
+    put_symbol(&s->c, s->header_state, s->qbias, 1);
+    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
+}
+
+static int decode_header(SnowContext *s){
+    int plane_index, level, orientation;
+    uint8_t kstate[32];
+
+    memset(kstate, MID_STATE, sizeof(kstate));
+
+    s->keyframe= get_rac(&s->c, kstate);
+    if(s->keyframe || s->always_reset)
+        reset_contexts(s);
+    if(s->keyframe){
+        s->version= get_symbol(&s->c, s->header_state, 0);
+        if(s->version>0){
+            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
+            return -1;
+        }
+        s->always_reset= get_rac(&s->c, s->header_state);
+        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
+        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
+        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
+        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
+        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
+        s->spatial_scalability= get_rac(&s->c, s->header_state);
+//        s->rate_scalability= get_rac(&s->c, s->header_state);
+        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
+
+        for(plane_index=0; plane_index<3; plane_index++){
+            for(level=0; level<s->spatial_decomposition_count; level++){
+                for(orientation=level ? 1:0; orientation<4; orientation++){
+                    int q;
+                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
+                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
+                    else                    q= get_symbol(&s->c, s->header_state, 1);
+                    s->plane[plane_index].band[level][orientation].qlog= q;
+                }
+            }
+        }
+    }
+
+    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+    if(s->spatial_decomposition_type > 2){
+        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
+        return -1;
+    }
+
+    s->qlog= get_symbol(&s->c, s->header_state, 1);
+    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
+    s->qbias= get_symbol(&s->c, s->header_state, 1);
+    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
+    if(s->block_max_depth > 1 || s->block_max_depth < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
+        s->block_max_depth= 0;
+        return -1;
+    }
+
+    return 0;
+}
+
+static void init_qexp(void){
+    int i;
+    double v=128;
+
+    for(i=0; i<QROOT; i++){
+        qexp[i]= lrintf(v);
+        v *= pow(2, 1.0 / QROOT);
+    }
+}
+
+static int common_init(AVCodecContext *avctx){
+    SnowContext *s = avctx->priv_data;
+    int width, height;
+    int level, orientation, plane_index, dec;
+    int i, j;
+
+    s->avctx= avctx;
+
+    dsputil_init(&s->dsp, avctx);
+
+#define mcf(dx,dy)\
+    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
+    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
+        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
+    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
+    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
+        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
+
+    mcf( 0, 0)
+    mcf( 4, 0)
+    mcf( 8, 0)
+    mcf(12, 0)
+    mcf( 0, 4)
+    mcf( 4, 4)
+    mcf( 8, 4)
+    mcf(12, 4)
+    mcf( 0, 8)
+    mcf( 4, 8)
+    mcf( 8, 8)
+    mcf(12, 8)
+    mcf( 0,12)
+    mcf( 4,12)
+    mcf( 8,12)
+    mcf(12,12)
+
+#define mcfh(dx,dy)\
+    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
+    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
+        mc_block_hpel ## dx ## dy ## 16;\
+    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
+    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
+        mc_block_hpel ## dx ## dy ## 8;
+
+    mcfh(0, 0)
+    mcfh(8, 0)
+    mcfh(0, 8)
+    mcfh(8, 8)
+
+    if(!qexp[0])
+        init_qexp();
+
+    dec= s->spatial_decomposition_count= 5;
+    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
+
+    s->chroma_h_shift= 1; //FIXME XXX
+    s->chroma_v_shift= 1;
+
+//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
+
+    width= s->avctx->width;
+    height= s->avctx->height;
+
+    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
+
+    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
+    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        int w= s->avctx->width;
+        int h= s->avctx->height;
+
+        if(plane_index){
+            w>>= s->chroma_h_shift;
+            h>>= s->chroma_v_shift;
+        }
+        s->plane[plane_index].width = w;
+        s->plane[plane_index].height= h;
+//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
+        for(level=s->spatial_decomposition_count-1; level>=0; level--){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &s->plane[plane_index].band[level][orientation];
+
+                b->buf= s->spatial_dwt_buffer;
+                b->level= level;
+                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
+                b->width = (w + !(orientation&1))>>1;
+                b->height= (h + !(orientation>1))>>1;
+
+                b->stride_line = 1 << (s->spatial_decomposition_count - level);
+                b->buf_x_offset = 0;
+                b->buf_y_offset = 0;
+
+                if(orientation&1){
+                    b->buf += (w+1)>>1;
+                    b->buf_x_offset = (w+1)>>1;
+                }
+                if(orientation>1){
+                    b->buf += b->stride>>1;
+                    b->buf_y_offset = b->stride_line >> 1;
+                }
+
+                if(level)
+                    b->parent= &s->plane[plane_index].band[level-1][orientation];
+                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
+            }
+            w= (w+1)>>1;
+            h= (h+1)>>1;
+        }
+    }
+
+    for(i=0; i<MAX_REF_FRAMES; i++)
+        for(j=0; j<MAX_REF_FRAMES; j++)
+            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
+
+    reset_contexts(s);
+/*
+    width= s->width= avctx->width;
+    height= s->height= avctx->height;
+
+    assert(width && height);
+*/
+    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
+
+    return 0;
+}
+
+static int qscale2qlog(int qscale){
+    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
+           + 61*QROOT/8; //<64 >60
+}
+
+static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
+{
+    /* estimate the frame's complexity as a sum of weighted dwt coefs.
+     * FIXME we know exact mv bits at this point,
+     * but ratecontrol isn't set up to include them. */
+    uint32_t coef_sum= 0;
+    int level, orientation, delta_qlog;
+
+    for(level=0; level<s->spatial_decomposition_count; level++){
+        for(orientation=level ? 1 : 0; orientation<4; orientation++){
+            SubBand *b= &s->plane[0].band[level][orientation];
+            DWTELEM *buf= b->buf;
+            const int w= b->width;
+            const int h= b->height;
+            const int stride= b->stride;
+            const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
+            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+            const int qdiv= (1<<16)/qmul;
+            int x, y;
+            if(orientation==0)
+                decorrelate(s, b, buf, stride, 1, 0);
+            for(y=0; y<h; y++)
+                for(x=0; x<w; x++)
+                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
+            if(orientation==0)
+                correlate(s, b, buf, stride, 1, 0);
+        }
+    }
+
+    /* ugly, ratecontrol just takes a sqrt again */
+    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
+    assert(coef_sum < INT_MAX);
+
+    if(pict->pict_type == I_TYPE){
+        s->m.current_picture.mb_var_sum= coef_sum;
+        s->m.current_picture.mc_mb_var_sum= 0;
+    }else{
+        s->m.current_picture.mc_mb_var_sum= coef_sum;
+        s->m.current_picture.mb_var_sum= 0;
+    }
+
+    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
+    if (pict->quality < 0)
+        return INT_MIN;
+    s->lambda= pict->quality * 3/2;
+    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
+    s->qlog+= delta_qlog;
+    return delta_qlog;
+}
+
+static void calculate_vissual_weight(SnowContext *s, Plane *p){
+    int width = p->width;
+    int height= p->height;
+    int level, orientation, x, y;
+
+    for(level=0; level<s->spatial_decomposition_count; level++){
+        for(orientation=level ? 1 : 0; orientation<4; orientation++){
+            SubBand *b= &p->band[level][orientation];
+            DWTELEM *buf= b->buf;
+            int64_t error=0;
+
+            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
+            buf[b->width/2 + b->height/2*b->stride]= 256*256;
+            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int64_t d= s->spatial_dwt_buffer[x + y*width];
+                    error += d*d;
+                }
+            }
+
+            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
+//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
+        }
+    }
+}
+
+static int encode_init(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+    int plane_index;
+
+    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
+        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
+               "use vstrict=-2 / -strict -2 to use it anyway\n");
+        return -1;
+    }
+
+    if(avctx->prediction_method == DWT_97
+       && (avctx->flags & CODEC_FLAG_QSCALE)
+       && avctx->global_quality == 0){
+        av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
+        return -1;
+    }
+
+    common_init(avctx);
+    alloc_blocks(s);
+
+    s->version=0;
+
+    s->m.avctx   = avctx;
+    s->m.flags   = avctx->flags;
+    s->m.bit_rate= avctx->bit_rate;
+
+    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
+    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
+    h263_encode_init(&s->m); //mv_penalty
+
+    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
+
+    if(avctx->flags&CODEC_FLAG_PASS1){
+        if(!avctx->stats_out)
+            avctx->stats_out = av_mallocz(256);
+    }
+    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
+        if(ff_rate_control_init(&s->m) < 0)
+            return -1;
+    }
+    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        calculate_vissual_weight(s, &s->plane[plane_index]);
+    }
+
+
+    avctx->coded_frame= &s->current_picture;
+    switch(avctx->pix_fmt){
+//    case PIX_FMT_YUV444P:
+//    case PIX_FMT_YUV422P:
+    case PIX_FMT_YUV420P:
+    case PIX_FMT_GRAY8:
+//    case PIX_FMT_YUV411P:
+//    case PIX_FMT_YUV410P:
+        s->colorspace_type= 0;
+        break;
+/*    case PIX_FMT_RGBA32:
+        s->colorspace= 1;
+        break;*/
+    default:
+        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
+        return -1;
+    }
+//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+    s->chroma_h_shift= 1;
+    s->chroma_v_shift= 1;
+
+    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
+
+    s->avctx->get_buffer(s->avctx, &s->input_picture);
+
+    if(s->avctx->me_method == ME_ITER){
+        int i;
+        int size= s->b_width * s->b_height << 2*s->block_max_depth;
+        for(i=0; i<s->max_ref_frames; i++){
+            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
+            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
+        }
+    }
+
+    return 0;
+}
+
+static int frame_start(SnowContext *s){
+   AVFrame tmp;
+   int w= s->avctx->width; //FIXME round up to x16 ?
+   int h= s->avctx->height;
+
+    if(s->current_picture.data[0]){
+        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
+        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
+        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
+    }
+
+    tmp= s->last_picture[s->max_ref_frames-1];
+    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
+    s->last_picture[0]= s->current_picture;
+    s->current_picture= tmp;
+
+    if(s->keyframe){
+        s->ref_frames= 0;
+    }else{
+        int i;
+        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
+            if(i && s->last_picture[i-1].key_frame)
+                break;
+        s->ref_frames= i;
+    }
+
+    s->current_picture.reference= 1;
+    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    s->current_picture.key_frame= s->keyframe;
+
+    return 0;
+}
+
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    SnowContext *s = avctx->priv_data;
+    RangeCoder * const c= &s->c;
+    AVFrame *pict = data;
+    const int width= s->avctx->width;
+    const int height= s->avctx->height;
+    int level, orientation, plane_index, i, y;
+    uint8_t rc_header_bak[sizeof(s->header_state)];
+    uint8_t rc_block_bak[sizeof(s->block_state)];
+
+    ff_init_range_encoder(c, buf, buf_size);
+    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+
+    for(i=0; i<3; i++){
+        int shift= !!i;
+        for(y=0; y<(height>>shift); y++)
+            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
+                   &pict->data[i][y * pict->linesize[i]],
+                   width>>shift);
+    }
+    s->new_picture = *pict;
+
+    s->m.picture_number= avctx->frame_number;
+    if(avctx->flags&CODEC_FLAG_PASS2){
+        s->m.pict_type =
+        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
+        s->keyframe= pict->pict_type==FF_I_TYPE;
+        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
+            pict->quality= ff_rate_estimate_qscale(&s->m, 0);
+            if (pict->quality < 0)
+                return -1;
+        }
+    }else{
+        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
+        s->m.pict_type=
+        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
+    }
+
+    if(s->pass1_rc && avctx->frame_number == 0)
+        pict->quality= 2*FF_QP2LAMBDA;
+    if(pict->quality){
+        s->qlog= qscale2qlog(pict->quality);
+        s->lambda = pict->quality * 3/2;
+    }
+    if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
+        s->qlog= LOSSLESS_QLOG;
+        s->lambda = 0;
+    }//else keep previous frame's qlog until after motion est
+
+    frame_start(s);
+
+    s->m.current_picture_ptr= &s->m.current_picture;
+    if(pict->pict_type == P_TYPE){
+        int block_width = (width +15)>>4;
+        int block_height= (height+15)>>4;
+        int stride= s->current_picture.linesize[0];
+
+        assert(s->current_picture.data[0]);
+        assert(s->last_picture[0].data[0]);
+
+        s->m.avctx= s->avctx;
+        s->m.current_picture.data[0]= s->current_picture.data[0];
+        s->m.   last_picture.data[0]= s->last_picture[0].data[0];
+        s->m.    new_picture.data[0]= s->  input_picture.data[0];
+        s->m.   last_picture_ptr= &s->m.   last_picture;
+        s->m.linesize=
+        s->m.   last_picture.linesize[0]=
+        s->m.    new_picture.linesize[0]=
+        s->m.current_picture.linesize[0]= stride;
+        s->m.uvlinesize= s->current_picture.linesize[1];
+        s->m.width = width;
+        s->m.height= height;
+        s->m.mb_width = block_width;
+        s->m.mb_height= block_height;
+        s->m.mb_stride=   s->m.mb_width+1;
+        s->m.b8_stride= 2*s->m.mb_width+1;
+        s->m.f_code=1;
+        s->m.pict_type= pict->pict_type;
+        s->m.me_method= s->avctx->me_method;
+        s->m.me.scene_change_score=0;
+        s->m.flags= s->avctx->flags;
+        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
+        s->m.out_format= FMT_H263;
+        s->m.unrestricted_mv= 1;
+
+        s->m.lambda = s->lambda;
+        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
+
+        s->m.dsp= s->dsp; //move
+        ff_init_me(&s->m);
+        s->dsp= s->m.dsp;
+    }
+
+    if(s->pass1_rc){
+        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
+        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
+    }
+
+redo_frame:
+
+    s->m.pict_type = pict->pict_type;
+    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
+
+    encode_header(s);
+    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
+    encode_blocks(s, 1);
+    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        Plane *p= &s->plane[plane_index];
+        int w= p->width;
+        int h= p->height;
+        int x, y;
+//        int bits= put_bits_count(&s->c.pb);
+
+    if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
+        //FIXME optimize
+     if(pict->data[plane_index]) //FIXME gray hack
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
+            }
+        }
+        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
+
+        if(   plane_index==0
+           && pict->pict_type == P_TYPE
+           && !(avctx->flags&CODEC_FLAG_PASS2)
+           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
+            ff_init_range_encoder(c, buf, buf_size);
+            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+            pict->pict_type= FF_I_TYPE;
+            s->keyframe=1;
+            s->current_picture.key_frame=1;
+            reset_contexts(s);
+            goto redo_frame;
+        }
+
+        if(s->qlog == LOSSLESS_QLOG){
+            for(y=0; y<h; y++){
+                for(x=0; x<w; x++){
+                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
+                }
+            }
+        }
+
+        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+
+        if(s->pass1_rc && plane_index==0){
+            int delta_qlog = ratecontrol_1pass(s, pict);
+            if (delta_qlog <= INT_MIN)
+                return -1;
+            if(delta_qlog){
+                //reordering qlog in the bitstream would eliminate this reset
+                ff_init_range_encoder(c, buf, buf_size);
+                memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
+                memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
+                encode_header(s);
+                encode_blocks(s, 0);
+            }
+        }
+
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &p->band[level][orientation];
+
+                quantize(s, b, b->buf, b->stride, s->qbias);
+                if(orientation==0)
+                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
+                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
+                assert(b->parent==NULL || b->parent->stride == b->stride*2);
+                if(orientation==0)
+                    correlate(s, b, b->buf, b->stride, 1, 0);
+            }
+        }
+//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
+
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &p->band[level][orientation];
+
+                dequantize(s, b, b->buf, b->stride);
+            }
+        }
+
+        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+        if(s->qlog == LOSSLESS_QLOG){
+            for(y=0; y<h; y++){
+                for(x=0; x<w; x++){
+                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
+                }
+            }
+        }
+{START_TIMER
+        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
+STOP_TIMER("pred-conv")}
+      }else{
+            //ME/MC only
+            if(pict->pict_type == I_TYPE){
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
+                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
+                    }
+                }
+            }else{
+                memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
+                predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
+            }
+      }
+        if(s->avctx->flags&CODEC_FLAG_PSNR){
+            int64_t error= 0;
+
+    if(pict->data[plane_index]) //FIXME gray hack
+            for(y=0; y<h; y++){
+                for(x=0; x<w; x++){
+                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
+                    error += d*d;
+                }
+            }
+            s->avctx->error[plane_index] += error;
+            s->current_picture.error[plane_index] = error;
+        }
+    }
+
+    if(s->last_picture[s->max_ref_frames-1].data[0])
+        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
+
+    s->current_picture.coded_picture_number = avctx->frame_number;
+    s->current_picture.pict_type = pict->pict_type;
+    s->current_picture.quality = pict->quality;
+    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
+    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
+    s->m.current_picture.display_picture_number =
+    s->m.current_picture.coded_picture_number = avctx->frame_number;
+    s->m.current_picture.quality = pict->quality;
+    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
+    if(s->pass1_rc)
+        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
+            return -1;
+    if(avctx->flags&CODEC_FLAG_PASS1)
+        ff_write_pass1_stats(&s->m);
+    s->m.last_pict_type = s->m.pict_type;
+    avctx->frame_bits = s->m.frame_bits;
+    avctx->mv_bits = s->m.mv_bits;
+    avctx->misc_bits = s->m.misc_bits;
+    avctx->p_tex_bits = s->m.p_tex_bits;
+
+    emms_c();
+
+    return ff_rac_terminate(c);
+}
+
+static void common_end(SnowContext *s){
+    int plane_index, level, orientation, i;
+
+    av_freep(&s->spatial_dwt_buffer);
+
+    av_freep(&s->m.me.scratchpad);
+    av_freep(&s->m.me.map);
+    av_freep(&s->m.me.score_map);
+    av_freep(&s->m.obmc_scratchpad);
+
+    av_freep(&s->block);
+
+    for(i=0; i<MAX_REF_FRAMES; i++){
+        av_freep(&s->ref_mvs[i]);
+        av_freep(&s->ref_scores[i]);
+        if(s->last_picture[i].data[0])
+            s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
+    }
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        for(level=s->spatial_decomposition_count-1; level>=0; level--){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &s->plane[plane_index].band[level][orientation];
+
+                av_freep(&b->x_coeff);
+            }
+        }
+    }
+}
+
+static int encode_end(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+
+    common_end(s);
+    av_free(avctx->stats_out);
+
+    return 0;
+}
+
+static int decode_init(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+    int block_size;
+
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    common_init(avctx);
+
+    block_size = MB_SIZE >> s->block_max_depth;
+    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
+    SnowContext *s = avctx->priv_data;
+    RangeCoder * const c= &s->c;
+    int bytes_read;
+    AVFrame *picture = data;
+    int level, orientation, plane_index;
+
+    ff_init_range_decoder(c, buf, buf_size);
+    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+
+    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
+    decode_header(s);
+    if(!s->block) alloc_blocks(s);
+
+    frame_start(s);
+    //keyframe flag dupliaction mess FIXME
+    if(avctx->debug&FF_DEBUG_PICT_INFO)
+        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
+
+    decode_blocks(s);
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        Plane *p= &s->plane[plane_index];
+        int w= p->width;
+        int h= p->height;
+        int x, y;
+        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
+
+if(s->avctx->debug&2048){
+        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
+        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
+
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
+                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
+            }
+        }
+}
+
+{   START_TIMER
+    for(level=0; level<s->spatial_decomposition_count; level++){
+        for(orientation=level ? 1 : 0; orientation<4; orientation++){
+            SubBand *b= &p->band[level][orientation];
+            unpack_coeffs(s, b, b->parent, orientation);
+        }
+    }
+    STOP_TIMER("unpack coeffs");
+}
+
+{START_TIMER
+    const int mb_h= s->b_height << s->block_max_depth;
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    int mb_y;
+    dwt_compose_t cs[MAX_DECOMPOSITIONS];
+    int yd=0, yq=0;
+    int y;
+    int end_y;
+
+    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
+    for(mb_y=0; mb_y<=mb_h; mb_y++){
+
+        int slice_starty = block_w*mb_y;
+        int slice_h = block_w*(mb_y+1);
+        if (!(s->keyframe || s->avctx->debug&512)){
+            slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
+            slice_h -= (block_w >> 1);
+        }
+
+        {
+        START_TIMER
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &p->band[level][orientation];
+                int start_y;
+                int end_y;
+                int our_mb_start = mb_y;
+                int our_mb_end = (mb_y + 1);
+                const int extra= 3;
+                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
+                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
+                if (!(s->keyframe || s->avctx->debug&512)){
+                    start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
+                    end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
+                }
+                start_y = FFMIN(b->height, start_y);
+                end_y = FFMIN(b->height, end_y);
+
+                if (start_y != end_y){
+                    if (orientation == 0){
+                        SubBand * correlate_band = &p->band[0][0];
+                        int correlate_end_y = FFMIN(b->height, end_y + 1);
+                        int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
+                        decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
+                        correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
+                        dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
+                    }
+                    else
+                        decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
+                }
+            }
+        }
+        STOP_TIMER("decode_subband_slice");
+        }
+
+{   START_TIMER
+        for(; yd<slice_h; yd+=4){
+            ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
+        }
+    STOP_TIMER("idwt slice");}
+
+
+        if(s->qlog == LOSSLESS_QLOG){
+            for(; yq<slice_h && yq<h; yq++){
+                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
+                for(x=0; x<w; x++){
+                    line[x] <<= FRAC_BITS;
+                }
+            }
+        }
+
+        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
+
+        y = FFMIN(p->height, slice_starty);
+        end_y = FFMIN(p->height, slice_h);
+        while(y < end_y)
+            slice_buffer_release(&s->sb, y++);
+    }
+
+    slice_buffer_flush(&s->sb);
+
+STOP_TIMER("idwt + predict_slices")}
+    }
+
+    emms_c();
+
+    if(s->last_picture[s->max_ref_frames-1].data[0])
+        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
+
+if(!(s->avctx->debug&2048))
+    *picture= s->current_picture;
+else
+    *picture= s->mconly_picture;
+
+    *data_size = sizeof(AVFrame);
+
+    bytes_read= c->bytestream - c->bytestream_start;
+    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
+
+    return bytes_read;
+}
+
+static int decode_end(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+
+    slice_buffer_destroy(&s->sb);
+
+    common_end(s);
+
+    return 0;
+}
+
+AVCodec snow_decoder = {
+    "snow",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SNOW,
+    sizeof(SnowContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    NULL
+};
+
+#ifdef CONFIG_ENCODERS
+AVCodec snow_encoder = {
+    "snow",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SNOW,
+    sizeof(SnowContext),
+    encode_init,
+    encode_frame,
+    encode_end,
+};
+#endif
+
+
+#if 0
+#undef malloc
+#undef free
+#undef printf
+
+int main(){
+    int width=256;
+    int height=256;
+    int buffer[2][width*height];
+    SnowContext s;
+    int i;
+    s.spatial_decomposition_count=6;
+    s.spatial_decomposition_type=1;
+
+    printf("testing 5/3 DWT\n");
+    for(i=0; i<width*height; i++)
+        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
+
+    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+
+    for(i=0; i<width*height; i++)
+        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
+
+    printf("testing 9/7 DWT\n");
+    s.spatial_decomposition_type=0;
+    for(i=0; i<width*height; i++)
+        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
+
+    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+
+    for(i=0; i<width*height; i++)
+        if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
+
+#if 0
+    printf("testing AC coder\n");
+    memset(s.header_state, 0, sizeof(s.header_state));
+    ff_init_range_encoder(&s.c, buffer[0], 256*256);
+    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
+
+    for(i=-256; i<256; i++){
+START_TIMER
+        put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
+STOP_TIMER("put_symbol")
+    }
+    ff_rac_terminate(&s.c);
+
+    memset(s.header_state, 0, sizeof(s.header_state));
+    ff_init_range_decoder(&s.c, buffer[0], 256*256);
+    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
+
+    for(i=-256; i<256; i++){
+        int j;
+START_TIMER
+        j= get_symbol(&s.c, s.header_state, 1);
+STOP_TIMER("get_symbol")
+        if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
+    }
+#endif
+{
+int level, orientation, x, y;
+int64_t errors[8][4];
+int64_t g=0;
+
+    memset(errors, 0, sizeof(errors));
+    s.spatial_decomposition_count=3;
+    s.spatial_decomposition_type=0;
+    for(level=0; level<s.spatial_decomposition_count; level++){
+        for(orientation=level ? 1 : 0; orientation<4; orientation++){
+            int w= width  >> (s.spatial_decomposition_count-level);
+            int h= height >> (s.spatial_decomposition_count-level);
+            int stride= width  << (s.spatial_decomposition_count-level);
+            DWTELEM *buf= buffer[0];
+            int64_t error=0;
+
+            if(orientation&1) buf+=w;
+            if(orientation>1) buf+=stride>>1;
+
+            memset(buffer[0], 0, sizeof(int)*width*height);
+            buf[w/2 + h/2*stride]= 256*256;
+            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int64_t d= buffer[0][x + y*width];
+                    error += d*d;
+                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
+                }
+                if(FFABS(height/2-y)<9 && level==2) printf("\n");
+            }
+            error= (int)(sqrt(error)+0.5);
+            errors[level][orientation]= error;
+            if(g) g=ff_gcd(g, error);
+            else g= error;
+        }
+    }
+    printf("static int const visual_weight[][4]={\n");
+    for(level=0; level<s.spatial_decomposition_count; level++){
+        printf("  {");
+        for(orientation=0; orientation<4; orientation++){
+            printf("%8"PRId64",", errors[level][orientation]/g);
+        }
+        printf("},\n");
+    }
+    printf("};\n");
+    {
+            int level=2;
+            int orientation=3;
+            int w= width  >> (s.spatial_decomposition_count-level);
+            int h= height >> (s.spatial_decomposition_count-level);
+            int stride= width  << (s.spatial_decomposition_count-level);
+            DWTELEM *buf= buffer[0];
+            int64_t error=0;
+
+            buf+=w;
+            buf+=stride>>1;
+
+            memset(buffer[0], 0, sizeof(int)*width*height);
+#if 1
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int tab[4]={0,2,3,1};
+                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
+                }
+            }
+            ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+#else
+            for(y=0; y<h; y++){
+                for(x=0; x<w; x++){
+                    buf[x + y*stride  ]=169;
+                    buf[x + y*stride-w]=64;
+                }
+            }
+            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+#endif
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int64_t d= buffer[0][x + y*width];
+                    error += d*d;
+                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
+                }
+                if(FFABS(height/2-y)<9) printf("\n");
+            }
+    }
+
+}
+    return 0;
+}
+#endif
+
diff --git a/contrib/ffmpeg/libavcodec/snow.h b/contrib/ffmpeg/libavcodec/snow.h
new file mode 100644
index 000000000..f7cee131a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/snow.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _SNOW_H
+#define _SNOW_H
+
+#include "dsputil.h"
+
+#define MID_STATE 128
+
+#define MAX_DECOMPOSITIONS 8
+#define MAX_PLANES 4
+#define QSHIFT 5
+#define QROOT (1<<QSHIFT)
+#define LOSSLESS_QLOG -128
+#define FRAC_BITS 8
+#define MAX_REF_FRAMES 8
+
+#define LOG2_OBMC_MAX 8
+#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
+
+#define DWT_97 0
+#define DWT_53 1
+#define DWT_X  2
+
+/** Used to minimize the amount of memory used in order to optimize cache performance. **/
+struct slice_buffer_s {
+    DWTELEM * * line; ///< For use by idwt and predict_slices.
+    DWTELEM * * data_stack; ///< Used for internal purposes.
+    int data_stack_top;
+    int line_count;
+    int line_width;
+    int data_count;
+    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
+};
+
+#define liftS lift
+#define lift5 lift
+#if 1
+#define W_AM 3
+#define W_AO 0
+#define W_AS 1
+
+#undef liftS
+#define W_BM 1
+#define W_BO 8
+#define W_BS 4
+
+#define W_CM 1
+#define W_CO 0
+#define W_CS 0
+
+#define W_DM 3
+#define W_DO 4
+#define W_DS 3
+#elif 0
+#define W_AM 55
+#define W_AO 16
+#define W_AS 5
+
+#define W_BM 3
+#define W_BO 32
+#define W_BS 6
+
+#define W_CM 127
+#define W_CO 64
+#define W_CS 7
+
+#define W_DM 7
+#define W_DO 8
+#define W_DS 4
+#elif 0
+#define W_AM 97
+#define W_AO 32
+#define W_AS 6
+
+#define W_BM 63
+#define W_BO 512
+#define W_BS 10
+
+#define W_CM 13
+#define W_CO 8
+#define W_CS 4
+
+#define W_DM 15
+#define W_DO 16
+#define W_DS 5
+
+#else
+
+#define W_AM 203
+#define W_AO 64
+#define W_AS 7
+
+#define W_BM 217
+#define W_BO 2048
+#define W_BS 12
+
+#define W_CM 113
+#define W_CO 64
+#define W_CS 7
+
+#define W_DM 227
+#define W_DO 128
+#define W_DS 9
+#endif
+
+extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
+extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
+extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+
+#ifdef CONFIG_SNOW_ENCODER
+int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
+int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
+#else
+static int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {assert (0);}
+static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {assert (0);}
+#endif
+
+/* C bits used by mmx/sse2/altivec */
+
+static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
+    (*i) = (width) - 2;
+
+    if (width & 1){
+        low[(*i)+1] = low[((*i)+1)>>1];
+        (*i)--;
+    }
+}
+
+static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
+    for (; (*i)>=0; (*i)-=2){
+        low[(*i)+1] = high[(*i)>>1];
+        low[*i] = low[(*i)>>1];
+    }
+}
+
+static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
+    for(; i<w; i++){
+        dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
+    }
+
+    if((width^lift_high)&1){
+        dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift);
+    }
+}
+
+static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
+        for(; i<w; i++){
+            dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS);
+        }
+
+        if(width&1){
+            dst[w] = src[w] - (((-2 * ref[w] + W_BO) - 4 * src[w]) >> W_BS);
+        }
+}
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/sonic.c b/contrib/ffmpeg/libavcodec/sonic.c
new file mode 100644
index 000000000..2f798cc03
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sonic.c
@@ -0,0 +1,981 @@
+/*
+ * Simple free lossless/lossy audio codec
+ * Copyright (c) 2004 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "bitstream.h"
+#include "golomb.h"
+
+/**
+ * @file sonic.c
+ * Simple free lossless/lossy audio codec
+ * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
+ * Written and designed by Alex Beregszaszi
+ *
+ * TODO:
+ *  - CABAC put/get_symbol
+ *  - independent quantizer for channels
+ *  - >2 channels support
+ *  - more decorrelation types
+ *  - more tap_quant tests
+ *  - selectable intlist writers/readers (bonk-style, golomb, cabac)
+ */
+
+#define MAX_CHANNELS 2
+
+#define MID_SIDE 0
+#define LEFT_SIDE 1
+#define RIGHT_SIDE 2
+
+typedef struct SonicContext {
+    int lossless, decorrelation;
+
+    int num_taps, downsampling;
+    double quantization;
+
+    int channels, samplerate, block_align, frame_size;
+
+    int *tap_quant;
+    int *int_samples;
+    int *coded_samples[MAX_CHANNELS];
+
+    // for encoding
+    int *tail;
+    int tail_size;
+    int *window;
+    int window_size;
+
+    // for decoding
+    int *predictor_k;
+    int *predictor_state[MAX_CHANNELS];
+} SonicContext;
+
+#define LATTICE_SHIFT   10
+#define SAMPLE_SHIFT    4
+#define LATTICE_FACTOR  (1 << LATTICE_SHIFT)
+#define SAMPLE_FACTOR   (1 << SAMPLE_SHIFT)
+
+#define BASE_QUANT      0.6
+#define RATE_VARIATION  3.0
+
+static inline int divide(int a, int b)
+{
+    if (a < 0)
+        return -( (-a + b/2)/b );
+    else
+        return (a + b/2)/b;
+}
+
+static inline int shift(int a,int b)
+{
+    return (a+(1<<(b-1))) >> b;
+}
+
+static inline int shift_down(int a,int b)
+{
+    return (a>>b)+((a<0)?1:0);
+}
+
+#if 1
+static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
+{
+    int i;
+
+    for (i = 0; i < entries; i++)
+        set_se_golomb(pb, buf[i]);
+
+    return 1;
+}
+
+static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
+{
+    int i;
+
+    for (i = 0; i < entries; i++)
+        buf[i] = get_se_golomb(gb);
+
+    return 1;
+}
+
+#else
+
+#define ADAPT_LEVEL 8
+
+static int bits_to_store(uint64_t x)
+{
+    int res = 0;
+
+    while(x)
+    {
+        res++;
+        x >>= 1;
+    }
+    return res;
+}
+
+static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
+{
+    int i, bits;
+
+    if (!max)
+        return;
+
+    bits = bits_to_store(max);
+
+    for (i = 0; i < bits-1; i++)
+        put_bits(pb, 1, value & (1 << i));
+
+    if ( (value | (1 << (bits-1))) <= max)
+        put_bits(pb, 1, value & (1 << (bits-1)));
+}
+
+static unsigned int read_uint_max(GetBitContext *gb, int max)
+{
+    int i, bits, value = 0;
+
+    if (!max)
+        return 0;
+
+    bits = bits_to_store(max);
+
+    for (i = 0; i < bits-1; i++)
+        if (get_bits1(gb))
+            value += 1 << i;
+
+    if ( (value | (1<<(bits-1))) <= max)
+        if (get_bits1(gb))
+            value += 1 << (bits-1);
+
+    return value;
+}
+
+static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
+{
+    int i, j, x = 0, low_bits = 0, max = 0;
+    int step = 256, pos = 0, dominant = 0, any = 0;
+    int *copy, *bits;
+
+    copy = av_mallocz(4* entries);
+    if (!copy)
+        return -1;
+
+    if (base_2_part)
+    {
+        int energy = 0;
+
+        for (i = 0; i < entries; i++)
+            energy += abs(buf[i]);
+
+        low_bits = bits_to_store(energy / (entries * 2));
+        if (low_bits > 15)
+            low_bits = 15;
+
+        put_bits(pb, 4, low_bits);
+    }
+
+    for (i = 0; i < entries; i++)
+    {
+        put_bits(pb, low_bits, abs(buf[i]));
+        copy[i] = abs(buf[i]) >> low_bits;
+        if (copy[i] > max)
+            max = abs(copy[i]);
+    }
+
+    bits = av_mallocz(4* entries*max);
+    if (!bits)
+    {
+//        av_free(copy);
+        return -1;
+    }
+
+    for (i = 0; i <= max; i++)
+    {
+        for (j = 0; j < entries; j++)
+            if (copy[j] >= i)
+                bits[x++] = copy[j] > i;
+    }
+
+    // store bitstream
+    while (pos < x)
+    {
+        int steplet = step >> 8;
+
+        if (pos + steplet > x)
+            steplet = x - pos;
+
+        for (i = 0; i < steplet; i++)
+            if (bits[i+pos] != dominant)
+                any = 1;
+
+        put_bits(pb, 1, any);
+
+        if (!any)
+        {
+            pos += steplet;
+            step += step / ADAPT_LEVEL;
+        }
+        else
+        {
+            int interloper = 0;
+
+            while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
+                interloper++;
+
+            // note change
+            write_uint_max(pb, interloper, (step >> 8) - 1);
+
+            pos += interloper + 1;
+            step -= step / ADAPT_LEVEL;
+        }
+
+        if (step < 256)
+        {
+            step = 65536 / step;
+            dominant = !dominant;
+        }
+    }
+
+    // store signs
+    for (i = 0; i < entries; i++)
+        if (buf[i])
+            put_bits(pb, 1, buf[i] < 0);
+
+//    av_free(bits);
+//    av_free(copy);
+
+    return 0;
+}
+
+static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
+{
+    int i, low_bits = 0, x = 0;
+    int n_zeros = 0, step = 256, dominant = 0;
+    int pos = 0, level = 0;
+    int *bits = av_mallocz(4* entries);
+
+    if (!bits)
+        return -1;
+
+    if (base_2_part)
+    {
+        low_bits = get_bits(gb, 4);
+
+        if (low_bits)
+            for (i = 0; i < entries; i++)
+                buf[i] = get_bits(gb, low_bits);
+    }
+
+//    av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
+
+    while (n_zeros < entries)
+    {
+        int steplet = step >> 8;
+
+        if (!get_bits1(gb))
+        {
+            for (i = 0; i < steplet; i++)
+                bits[x++] = dominant;
+
+            if (!dominant)
+                n_zeros += steplet;
+
+            step += step / ADAPT_LEVEL;
+        }
+        else
+        {
+            int actual_run = read_uint_max(gb, steplet-1);
+
+//            av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
+
+            for (i = 0; i < actual_run; i++)
+                bits[x++] = dominant;
+
+            bits[x++] = !dominant;
+
+            if (!dominant)
+                n_zeros += actual_run;
+            else
+                n_zeros++;
+
+            step -= step / ADAPT_LEVEL;
+        }
+
+        if (step < 256)
+        {
+            step = 65536 / step;
+            dominant = !dominant;
+        }
+    }
+
+    // reconstruct unsigned values
+    n_zeros = 0;
+    for (i = 0; n_zeros < entries; i++)
+    {
+        while(1)
+        {
+            if (pos >= entries)
+            {
+                pos = 0;
+                level += 1 << low_bits;
+            }
+
+            if (buf[pos] >= level)
+                break;
+
+            pos++;
+        }
+
+        if (bits[i])
+            buf[pos] += 1 << low_bits;
+        else
+            n_zeros++;
+
+        pos++;
+    }
+//    av_free(bits);
+
+    // read signs
+    for (i = 0; i < entries; i++)
+        if (buf[i] && get_bits1(gb))
+            buf[i] = -buf[i];
+
+//    av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
+
+    return 0;
+}
+#endif
+
+static void predictor_init_state(int *k, int *state, int order)
+{
+    int i;
+
+    for (i = order-2; i >= 0; i--)
+    {
+        int j, p, x = state[i];
+
+        for (j = 0, p = i+1; p < order; j++,p++)
+            {
+            int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
+            state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
+            x = tmp;
+        }
+    }
+}
+
+static int predictor_calc_error(int *k, int *state, int order, int error)
+{
+    int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
+
+#if 1
+    int *k_ptr = &(k[order-2]),
+        *state_ptr = &(state[order-2]);
+    for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
+    {
+        int k_value = *k_ptr, state_value = *state_ptr;
+        x -= shift_down(k_value * state_value, LATTICE_SHIFT);
+        state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
+    }
+#else
+    for (i = order-2; i >= 0; i--)
+    {
+        x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
+        state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
+    }
+#endif
+
+    // don't drift too far, to avoid overflows
+    if (x >  (SAMPLE_FACTOR<<16)) x =  (SAMPLE_FACTOR<<16);
+    if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
+
+    state[0] = x;
+
+    return x;
+}
+
+#ifdef CONFIG_ENCODERS
+// Heavily modified Levinson-Durbin algorithm which
+// copes better with quantization, and calculates the
+// actual whitened result as it goes.
+
+static void modified_levinson_durbin(int *window, int window_entries,
+        int *out, int out_entries, int channels, int *tap_quant)
+{
+    int i;
+    int *state = av_mallocz(4* window_entries);
+
+    memcpy(state, window, 4* window_entries);
+
+    for (i = 0; i < out_entries; i++)
+    {
+        int step = (i+1)*channels, k, j;
+        double xx = 0.0, xy = 0.0;
+#if 1
+        int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
+        j = window_entries - step;
+        for (;j>=0;j--,x_ptr++,state_ptr++)
+        {
+            double x_value = *x_ptr, state_value = *state_ptr;
+            xx += state_value*state_value;
+            xy += x_value*state_value;
+        }
+#else
+        for (j = 0; j <= (window_entries - step); j++);
+        {
+            double stepval = window[step+j], stateval = window[j];
+//            xx += (double)window[j]*(double)window[j];
+//            xy += (double)window[step+j]*(double)window[j];
+            xx += stateval*stateval;
+            xy += stepval*stateval;
+        }
+#endif
+        if (xx == 0.0)
+            k = 0;
+        else
+            k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
+
+        if (k > (LATTICE_FACTOR/tap_quant[i]))
+            k = LATTICE_FACTOR/tap_quant[i];
+        if (-k > (LATTICE_FACTOR/tap_quant[i]))
+            k = -(LATTICE_FACTOR/tap_quant[i]);
+
+        out[i] = k;
+        k *= tap_quant[i];
+
+#if 1
+        x_ptr = &(window[step]);
+        state_ptr = &(state[0]);
+        j = window_entries - step;
+        for (;j>=0;j--,x_ptr++,state_ptr++)
+        {
+            int x_value = *x_ptr, state_value = *state_ptr;
+            *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
+            *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
+        }
+#else
+        for (j=0; j <= (window_entries - step); j++)
+        {
+            int stepval = window[step+j], stateval=state[j];
+            window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
+            state[j] += shift_down(k * stepval, LATTICE_SHIFT);
+        }
+#endif
+    }
+
+    av_free(state);
+}
+#endif /* CONFIG_ENCODERS */
+
+static int samplerate_table[] =
+    { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
+
+#ifdef CONFIG_ENCODERS
+
+static inline int code_samplerate(int samplerate)
+{
+    switch (samplerate)
+    {
+        case 44100: return 0;
+        case 22050: return 1;
+        case 11025: return 2;
+        case 96000: return 3;
+        case 48000: return 4;
+        case 32000: return 5;
+        case 24000: return 6;
+        case 16000: return 7;
+        case 8000: return 8;
+    }
+    return -1;
+}
+
+static int sonic_encode_init(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    PutBitContext pb;
+    int i, version = 0;
+
+    if (avctx->channels > MAX_CHANNELS)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
+        return -1; /* only stereo or mono for now */
+    }
+
+    if (avctx->channels == 2)
+        s->decorrelation = MID_SIDE;
+
+    if (avctx->codec->id == CODEC_ID_SONIC_LS)
+    {
+        s->lossless = 1;
+        s->num_taps = 32;
+        s->downsampling = 1;
+        s->quantization = 0.0;
+    }
+    else
+    {
+        s->num_taps = 128;
+        s->downsampling = 2;
+        s->quantization = 1.0;
+    }
+
+    // max tap 2048
+    if ((s->num_taps < 32) || (s->num_taps > 1024) ||
+        ((s->num_taps>>5)<<5 != s->num_taps))
+    {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
+        return -1;
+    }
+
+    // generate taps
+    s->tap_quant = av_mallocz(4* s->num_taps);
+    for (i = 0; i < s->num_taps; i++)
+        s->tap_quant[i] = (int)(sqrt(i+1));
+
+    s->channels = avctx->channels;
+    s->samplerate = avctx->sample_rate;
+
+    s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
+    s->frame_size = s->channels*s->block_align*s->downsampling;
+
+    s->tail = av_mallocz(4* s->num_taps*s->channels);
+    if (!s->tail)
+        return -1;
+    s->tail_size = s->num_taps*s->channels;
+
+    s->predictor_k = av_mallocz(4 * s->num_taps);
+    if (!s->predictor_k)
+        return -1;
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->coded_samples[i] = av_mallocz(4* s->block_align);
+        if (!s->coded_samples[i])
+            return -1;
+    }
+
+    s->int_samples = av_mallocz(4* s->frame_size);
+
+    s->window_size = ((2*s->tail_size)+s->frame_size);
+    s->window = av_mallocz(4* s->window_size);
+    if (!s->window)
+        return -1;
+
+    avctx->extradata = av_mallocz(16);
+    if (!avctx->extradata)
+        return -1;
+    init_put_bits(&pb, avctx->extradata, 16*8);
+
+    put_bits(&pb, 2, version); // version
+    if (version == 1)
+    {
+        put_bits(&pb, 2, s->channels);
+        put_bits(&pb, 4, code_samplerate(s->samplerate));
+    }
+    put_bits(&pb, 1, s->lossless);
+    if (!s->lossless)
+        put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
+    put_bits(&pb, 2, s->decorrelation);
+    put_bits(&pb, 2, s->downsampling);
+    put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
+    put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
+
+    flush_put_bits(&pb);
+    avctx->extradata_size = put_bits_count(&pb)/8;
+
+    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
+        version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
+
+    avctx->coded_frame = avcodec_alloc_frame();
+    if (!avctx->coded_frame)
+        return -ENOMEM;
+    avctx->coded_frame->key_frame = 1;
+    avctx->frame_size = s->block_align*s->downsampling;
+
+    return 0;
+}
+
+static int sonic_encode_close(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    int i;
+
+    av_freep(&avctx->coded_frame);
+
+    for (i = 0; i < s->channels; i++)
+        av_free(s->coded_samples[i]);
+
+    av_free(s->predictor_k);
+    av_free(s->tail);
+    av_free(s->tap_quant);
+    av_free(s->window);
+    av_free(s->int_samples);
+
+    return 0;
+}
+
+static int sonic_encode_frame(AVCodecContext *avctx,
+                            uint8_t *buf, int buf_size, void *data)
+{
+    SonicContext *s = avctx->priv_data;
+    PutBitContext pb;
+    int i, j, ch, quant = 0, x = 0;
+    short *samples = data;
+
+    init_put_bits(&pb, buf, buf_size*8);
+
+    // short -> internal
+    for (i = 0; i < s->frame_size; i++)
+        s->int_samples[i] = samples[i];
+
+    if (!s->lossless)
+        for (i = 0; i < s->frame_size; i++)
+            s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
+
+    switch(s->decorrelation)
+    {
+        case MID_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+            {
+                s->int_samples[i] += s->int_samples[i+1];
+                s->int_samples[i+1] -= shift(s->int_samples[i], 1);
+            }
+            break;
+        case LEFT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i+1] -= s->int_samples[i];
+            break;
+        case RIGHT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i] -= s->int_samples[i+1];
+            break;
+    }
+
+    memset(s->window, 0, 4* s->window_size);
+
+    for (i = 0; i < s->tail_size; i++)
+        s->window[x++] = s->tail[i];
+
+    for (i = 0; i < s->frame_size; i++)
+        s->window[x++] = s->int_samples[i];
+
+    for (i = 0; i < s->tail_size; i++)
+        s->window[x++] = 0;
+
+    for (i = 0; i < s->tail_size; i++)
+        s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
+
+    // generate taps
+    modified_levinson_durbin(s->window, s->window_size,
+                s->predictor_k, s->num_taps, s->channels, s->tap_quant);
+    if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
+        return -1;
+
+    for (ch = 0; ch < s->channels; ch++)
+    {
+        x = s->tail_size+ch;
+        for (i = 0; i < s->block_align; i++)
+        {
+            int sum = 0;
+            for (j = 0; j < s->downsampling; j++, x += s->channels)
+                sum += s->window[x];
+            s->coded_samples[ch][i] = sum;
+        }
+    }
+
+    // simple rate control code
+    if (!s->lossless)
+    {
+        double energy1 = 0.0, energy2 = 0.0;
+        for (ch = 0; ch < s->channels; ch++)
+        {
+            for (i = 0; i < s->block_align; i++)
+            {
+                double sample = s->coded_samples[ch][i];
+                energy2 += sample*sample;
+                energy1 += fabs(sample);
+            }
+        }
+
+        energy2 = sqrt(energy2/(s->channels*s->block_align));
+        energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
+
+        // increase bitrate when samples are like a gaussian distribution
+        // reduce bitrate when samples are like a two-tailed exponential distribution
+
+        if (energy2 > energy1)
+            energy2 += (energy2-energy1)*RATE_VARIATION;
+
+        quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
+//        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
+
+        if (quant < 1)
+            quant = 1;
+        if (quant > 65535)
+            quant = 65535;
+
+        set_ue_golomb(&pb, quant);
+
+        quant *= SAMPLE_FACTOR;
+    }
+
+    // write out coded samples
+    for (ch = 0; ch < s->channels; ch++)
+    {
+        if (!s->lossless)
+            for (i = 0; i < s->block_align; i++)
+                s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
+
+        if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
+            return -1;
+    }
+
+//    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
+
+    flush_put_bits(&pb);
+    return (put_bits_count(&pb)+7)/8;
+}
+#endif //CONFIG_ENCODERS
+
+#ifdef CONFIG_DECODERS
+static int sonic_decode_init(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    GetBitContext gb;
+    int i, version;
+
+    s->channels = avctx->channels;
+    s->samplerate = avctx->sample_rate;
+
+    if (!avctx->extradata)
+    {
+        av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
+        return -1;
+    }
+
+    init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
+
+    version = get_bits(&gb, 2);
+    if (version > 1)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
+        return -1;
+    }
+
+    if (version == 1)
+    {
+        s->channels = get_bits(&gb, 2);
+        s->samplerate = samplerate_table[get_bits(&gb, 4)];
+        av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
+            s->channels, s->samplerate);
+    }
+
+    if (s->channels > MAX_CHANNELS)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
+        return -1;
+    }
+
+    s->lossless = get_bits1(&gb);
+    if (!s->lossless)
+        skip_bits(&gb, 3); // XXX FIXME
+    s->decorrelation = get_bits(&gb, 2);
+
+    s->downsampling = get_bits(&gb, 2);
+    s->num_taps = (get_bits(&gb, 5)+1)<<5;
+    if (get_bits1(&gb)) // XXX FIXME
+        av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
+
+    s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
+    s->frame_size = s->channels*s->block_align*s->downsampling;
+//    avctx->frame_size = s->block_align;
+
+    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
+        version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
+
+    // generate taps
+    s->tap_quant = av_mallocz(4* s->num_taps);
+    for (i = 0; i < s->num_taps; i++)
+        s->tap_quant[i] = (int)(sqrt(i+1));
+
+    s->predictor_k = av_mallocz(4* s->num_taps);
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->predictor_state[i] = av_mallocz(4* s->num_taps);
+        if (!s->predictor_state[i])
+            return -1;
+    }
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->coded_samples[i] = av_mallocz(4* s->block_align);
+        if (!s->coded_samples[i])
+            return -1;
+    }
+    s->int_samples = av_mallocz(4* s->frame_size);
+
+    return 0;
+}
+
+static int sonic_decode_close(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    int i;
+
+    av_free(s->int_samples);
+    av_free(s->tap_quant);
+    av_free(s->predictor_k);
+
+    for (i = 0; i < s->channels; i++)
+    {
+        av_free(s->predictor_state[i]);
+        av_free(s->coded_samples[i]);
+    }
+
+    return 0;
+}
+
+static int sonic_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    SonicContext *s = avctx->priv_data;
+    GetBitContext gb;
+    int i, quant, ch, j;
+    short *samples = data;
+
+    if (buf_size == 0) return 0;
+
+//    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
+
+    init_get_bits(&gb, buf, buf_size*8);
+
+    intlist_read(&gb, s->predictor_k, s->num_taps, 0);
+
+    // dequantize
+    for (i = 0; i < s->num_taps; i++)
+        s->predictor_k[i] *= s->tap_quant[i];
+
+    if (s->lossless)
+        quant = 1;
+    else
+        quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
+
+//    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
+
+    for (ch = 0; ch < s->channels; ch++)
+    {
+        int x = ch;
+
+        predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
+
+        intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
+
+        for (i = 0; i < s->block_align; i++)
+        {
+            for (j = 0; j < s->downsampling - 1; j++)
+            {
+                s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
+                x += s->channels;
+            }
+
+            s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
+            x += s->channels;
+        }
+
+        for (i = 0; i < s->num_taps; i++)
+            s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
+    }
+
+    switch(s->decorrelation)
+    {
+        case MID_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+            {
+                s->int_samples[i+1] += shift(s->int_samples[i], 1);
+                s->int_samples[i] -= s->int_samples[i+1];
+            }
+            break;
+        case LEFT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i+1] += s->int_samples[i];
+            break;
+        case RIGHT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i] += s->int_samples[i+1];
+            break;
+    }
+
+    if (!s->lossless)
+        for (i = 0; i < s->frame_size; i++)
+            s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
+
+    // internal -> short
+    for (i = 0; i < s->frame_size; i++)
+    {
+        if (s->int_samples[i] > 32767)
+            samples[i] = 32767;
+        else if (s->int_samples[i] < -32768)
+            samples[i] = -32768;
+        else
+            samples[i] = s->int_samples[i];
+    }
+
+    align_get_bits(&gb);
+
+    *data_size = s->frame_size * 2;
+
+    return (get_bits_count(&gb)+7)/8;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
+AVCodec sonic_encoder = {
+    "sonic",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_SONIC,
+    sizeof(SonicContext),
+    sonic_encode_init,
+    sonic_encode_frame,
+    sonic_encode_close,
+    NULL,
+};
+
+AVCodec sonic_ls_encoder = {
+    "sonicls",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_SONIC_LS,
+    sizeof(SonicContext),
+    sonic_encode_init,
+    sonic_encode_frame,
+    sonic_encode_close,
+    NULL,
+};
+#endif
+
+#ifdef CONFIG_DECODERS
+AVCodec sonic_decoder = {
+    "sonic",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_SONIC,
+    sizeof(SonicContext),
+    sonic_decode_init,
+    NULL,
+    sonic_decode_close,
+    sonic_decode_frame,
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/sp5x.h b/contrib/ffmpeg/libavcodec/sp5x.h
new file mode 100644
index 000000000..0d0d3551f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sp5x.h
@@ -0,0 +1,332 @@
+/*
+ * Sunplus JPEG tables
+ * Copyright (c) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SP5X_H
+#define SP5X_H
+
+static const uint8_t sp5x_data_sof[] =
+{
+    0xFF, 0xC0,       /* SOF */
+    0x00, 0x11,       /* len */
+    0x08,             /* bits */
+    0x00, 0xf0,       /* height (default: 240) */
+    0x01, 0x40,       /* width (default: 240) */
+    0x03,             /* nb components */
+    0x01, 0x22, 0x00, /* 21 vs 22 ? */
+    0x02, 0x11, 0x01,
+    0x03, 0x11, 0x01
+};
+
+static const uint8_t sp5x_data_sos[] =
+{
+    0xFF, 0xDA,       /* SOS */
+    0x00, 0x0C,       /* len */
+    0x03,             /* nb components */
+    0x01, 0x00,
+    0x02, 0x11,
+    0x03, 0x11,
+    0x00,             /* Ss */
+    0x3F,             /* Se */
+    0x00              /* Ah/Al */
+};
+
+static const uint8_t sp5x_data_dqt[] =
+{
+    0xFF, 0xDB, /* DQT */
+    0x00, 0x84, /* len */
+    0x00,
+    0x05, 0x03, 0x04, 0x04, 0x04, 0x03, 0x05, 0x04,
+    0x04, 0x04, 0x06, 0x05, 0x05, 0x06, 0x08, 0x0D,
+    0x08, 0x08, 0x07, 0x07, 0x08, 0x10, 0x0C, 0x0C,
+    0x0A, 0x0D, 0x14, 0x11, 0x15, 0x14, 0x13, 0x11,
+    0x13, 0x13, 0x16, 0x18, 0x1F, 0x1A, 0x16, 0x17,
+    0x1E, 0x17, 0x13, 0x13, 0x1B, 0x25, 0x1C, 0x1E,
+    0x20, 0x21, 0x23, 0x23, 0x23, 0x15, 0x1A, 0x27,
+    0x29, 0x26, 0x22, 0x29, 0x1F, 0x22, 0x23, 0x22,
+    0x01,
+    0x05, 0x06, 0x06, 0x08, 0x07, 0x08, 0x10, 0x08,
+    0x08, 0x10, 0x22, 0x16, 0x13, 0x16, 0x22, 0x22,
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+    0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22
+};
+
+static const uint8_t sp5x_data_dht[] = {
+    0xFF, 0xC4, /* DHT */
+    0x01, 0xA2, /* len */
+    0x00, 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+    0x07, 0x08, 0x09, 0x0A, 0x0B, 0x01, 0x00, 0x03,
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
+    0x0A, 0x0B, 0x10, 0x00, 0x02, 0x01, 0x03, 0x03,
+    0x02, 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00,
+    0x00, 0x01, 0x7D, 0x01, 0x02, 0x03, 0x00, 0x04,
+    0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13,
+    0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81,
+    0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15,
+    0x52, 0xD1, 0xF0, 0x24, 0x33, 0x62, 0x72, 0x82,
+    0x09, 0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25,
+    0x26, 0x27, 0x28, 0x29, 0x2A, 0x34, 0x35, 0x36,
+    0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46,
+    0x47, 0x48, 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56,
+    0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66,
+    0x67, 0x68, 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76,
+    0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86,
+    0x87, 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95,
+    0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4,
+    0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3,
+    0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2,
+    0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA,
+    0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9,
+    0xDA, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+    0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5,
+    0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0x11, 0x00, 0x02,
+    0x01, 0x02, 0x04, 0x04, 0x03, 0x04, 0x07, 0x05,
+    0x04, 0x04, 0x00, 0x01, 0x02, 0x77, 0x00, 0x01,
+    0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06,
+    0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22,
+    0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1,
+    0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, 0x15, 0x62,
+    0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, 0xE1, 0x25,
+    0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, 0x27, 0x28,
+    0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A,
+    0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A,
+    0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A,
+    0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
+    0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A,
+    0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+    0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+    0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
+    0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
+    0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4,
+    0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE2, 0xE3,
+    0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2,
+    0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA
+};
+
+
+static const uint8_t sp5x_quant_table[20][64]=
+{
+    /* index 0, Q50 */
+    {  16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, 40,
+       26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, 57, 51,
+       56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80,109, 81, 87,
+       95, 98,103,104,103, 62, 77,113,121,112,100,120, 92,101,103, 99 },
+    {  17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, 99, 99,
+       99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+       99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+       99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99 },
+
+    /* index 1, Q70 */
+    {  10,  7,  7,  8,  7,  6, 10,  8,  8,  8, 11, 10, 10, 11, 14, 24,
+       16, 14, 13, 13, 14, 29, 21, 22, 17, 24, 35, 31, 37, 36, 34, 31,
+       34, 33, 38, 43, 55, 47, 38, 41, 52, 41, 33, 34, 48, 65, 49, 52,
+       57, 59, 62, 62, 62, 37, 46, 68, 73, 67, 60, 72, 55, 61, 62, 59 },
+    {  10, 11, 11, 14, 13, 14, 28, 16, 16, 28, 59, 40, 34, 40, 59, 59,
+       59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+       59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+       59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59 },
+
+    /* index 2, Q80 */
+    {   6,  4,  5,  6,  5,  4,  6,  6,  5,  6,  7,  7,  6,  8, 10, 16,
+       10, 10,  9,  9, 10, 20, 14, 15, 12, 16, 23, 20, 24, 24, 23, 20,
+       22, 22, 26, 29, 37, 31, 26, 27, 35, 28, 22, 22, 32, 44, 32, 35,
+       38, 39, 41, 42, 41, 25, 31, 45, 48, 45, 40, 48, 37, 40, 41, 40 },
+    {   7,  7,  7, 10,  8, 10, 19, 10, 10, 19, 40, 26, 22, 26, 40, 40,
+       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 },
+
+    /* index 3, Q85 */
+    {   5,  3,  4,  4,  4,  3,  5,  4,  4,  4,  5,  5,  5,  6,  7, 12,
+        8,  7,  7,  7,  7, 15, 11, 11,  9, 12, 17, 15, 18, 18, 17, 15,
+       17, 17, 19, 22, 28, 23, 19, 20, 26, 21, 17, 17, 24, 33, 24, 26,
+       29, 29, 31, 31, 31, 19, 23, 34, 36, 34, 30, 36, 28, 30, 31, 30 },
+    {   5,  5,  5,  7,  6,  7, 14,  8,  8, 14, 30, 20, 17, 20, 30, 30,
+       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 },
+
+    /* index 4, Q90 */
+    {   3,  2,  2,  3,  2,  2,  3,  3,  3,  3,  4,  3,  3,  4,  5,  8,
+        5,  5,  4,  4,  5, 10,  7,  7,  6,  8, 12, 10, 12, 12, 11, 10,
+       11, 11, 13, 14, 18, 16, 13, 14, 17, 14, 11, 11, 16, 22, 16, 17,
+       19, 20, 21, 21, 21, 12, 15, 23, 24, 22, 20, 24, 18, 20, 21, 20 },
+    {   3,  4,  4,  5,  4,  5,  9,  5,  5,  9, 20, 13, 11, 13, 20, 20,
+       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 },
+
+    /* index 5, Q60 */
+    {  13,  9, 10, 11, 10,  8, 13, 11, 10, 11, 14, 14, 13, 15, 19, 32,
+       21, 19, 18, 18, 19, 39, 28, 30, 23, 32, 46, 41, 49, 48, 46, 41,
+       45, 44, 51, 58, 74, 62, 51, 54, 70, 55, 44, 45, 64, 87, 65, 70,
+       76, 78, 82, 83, 82, 50, 62, 90, 97, 90, 80, 96, 74, 81, 82, 79 },
+    {  14, 14, 14, 19, 17, 19, 38, 21, 21, 38, 79, 53, 45, 53, 79, 79,
+       79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
+       79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
+       79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79 },
+
+    /* index 6, Q25 */
+    {  32, 22, 24, 28, 24, 20, 32, 28, 26, 28, 36, 34, 32, 38, 48, 80,
+       52, 48, 44, 44, 48, 98, 70, 74, 58, 80,116,102,122,120,114,102,
+      112,110,128,144,184,156,128,136,174,138,110,112,160,218,162,174,
+      190,196,206,208,206,124,154,226,242,224,200,240,184,202,206,198 },
+    {  34, 36, 36, 48, 42, 48, 94, 52, 52, 94,198,132,112,132,198,198,
+      198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,
+      198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,
+      198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,198 },
+
+    /* index 7, Q95 */
+    {   2,  1,  1,  1,  1,  1,  2,  1,  1,  1,  2,  2,  2,  2,  2,  4,
+        3,  2,  2,  2,  2,  5,  4,  4,  3,  4,  6,  5,  6,  6,  6,  5,
+        6,  6,  6,  7,  9,  8,  6,  7,  9,  7,  6,  6,  8, 11,  8,  9,
+       10, 10, 10, 10, 10,  6,  8, 11, 12, 11, 10, 12,  9, 10, 10, 10 },
+    {   2,  2,  2,  2,  2,  2,  5,  3,  3,  5, 10,  7,  6,  7, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
+
+    /* index 8, Q93 */
+    {   2,  2,  2,  2,  2,  1,  2,  2,  2,  2,  3,  2,  2,  3,  3,  6,
+        4,  3,  3,  3,  3,  7,  5,  5,  4,  6,  8,  7,  9,  8,  8,  7,
+        8,  8,  9, 10, 13, 11,  9, 10, 12, 10,  8,  8, 11, 15, 11, 12,
+       13, 14, 14, 15, 14,  9, 11, 16, 17, 16, 14, 17, 13, 14, 14, 14 },
+    {   2,  3,  3,  3,  3,  3,  7,  4,  4,  7, 14,  9,  8,  9, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 },
+
+    /* index 9, Q40 */
+    {  20, 14, 15, 18, 15, 13, 20, 18, 16, 18, 23, 21, 20, 24, 30, 50,
+       33, 30, 28, 28, 30, 61, 44, 46, 36, 50, 73, 64, 76, 75, 71, 64,
+       70, 69, 80, 90,115, 98, 80, 85,109, 86, 69, 70,100,136,101,109,
+      119,123,129,130,129, 78, 96,141,151,140,125,150,115,126,129,124 },
+    {  21, 23, 23, 30, 26, 30, 59, 33, 33, 59,124, 83, 70, 83,124,124,
+      124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,
+      124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,
+      124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124 }
+};
+
+#if 0
+/* 4NF-M, not ZigZag */
+static const uint8_t sp5x_quant_table_orig[18][64] =
+{
+    /* index 0, Q50 */
+    {  16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55,
+       14, 13, 16, 24, 40, 57, 69, 56, 14, 17, 22, 29, 51, 87, 80, 62,
+       18, 22, 37, 56, 68,109,103, 77, 24, 35, 55, 64, 81,104,113, 92,
+       49, 64, 78, 87,103,121,120,101, 72, 92, 95, 98,112,100,103, 99 },
+    {  17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99,
+       24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99,
+       99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+       99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99 },
+
+    /* index 1, Q70 */
+    {  10,  7,  6, 10, 14, 24, 31, 37,  7,  7,  8, 11, 16, 35, 36, 33,
+        8,  8, 10, 14, 24, 34, 41, 34,  8, 10, 13, 17, 31, 52, 48, 37,
+       11, 13, 22, 34, 41, 65, 62, 46, 14, 21, 33, 38, 49, 62, 68, 55,
+       29, 38, 47, 52, 62, 73, 72, 61, 43, 55, 57, 59, 67, 60, 62, 59 },
+    {  10, 11, 14, 28, 59, 59, 59, 59, 11, 13, 16, 40, 59, 59, 59, 59,
+       14, 16, 34, 59, 59, 59, 59, 59, 28, 40, 59, 59, 59, 59, 59, 59,
+       59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+       59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59 },
+
+    /* index 2, Q80 */
+    {   6,  4,  4,  6, 10, 16, 20, 24,  5,  5,  6,  8, 10, 23, 24, 22,
+        6,  5,  6, 10, 16, 23, 28, 22,  6,  7,  9, 12, 20, 35, 32, 25,
+        7,  9, 15, 22, 27, 44, 41, 31, 10, 14, 22, 26, 32, 42, 45, 37,
+       20, 26, 31, 35, 41, 48, 48, 40, 29, 37, 38, 39, 45, 40, 41, 40 },
+    {   7,  7, 10, 19, 40, 40, 40, 40,  7,  8, 10, 26, 40, 40, 40, 40,
+       10, 10, 22, 40, 40, 40, 40, 40, 19, 26, 40, 40, 40, 40, 40, 40,
+       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 },
+
+    /* index 3, Q85 */
+    {   5,  3,  3,  5,  7, 12, 15, 18,  4,  4,  4,  6,  8, 17, 18, 17,
+        4,  4,  5,  7, 12, 17, 21, 17,  4,  5,  7,  9, 15, 26, 24, 19,
+        5,  7, 11, 17, 20, 33, 31, 23,  7, 11, 17, 19, 24, 31, 34, 28,
+       15, 19, 23, 26, 31, 36, 36, 30, 22, 28, 29, 29, 34, 30, 31, 30 },
+    {   5,  5,  7, 14, 30, 30, 30, 30,  5,  6,  8, 20, 30, 30, 30, 30,
+        7,  8, 17, 30, 30, 30, 30, 30, 14, 20, 30, 30, 30, 30, 30, 30,
+       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+       30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 },
+
+    /* index 4, Q90 */
+    {   3,  2,  2,  3,  5,  8, 10, 12,  2,  2,  3,  4,  5, 12, 12, 11,
+        3,  3,  3,  5,  8, 11, 14, 11,  3,  3,  4,  6, 10, 17, 16, 12,
+        4,  4,  7, 11, 14, 22, 21, 15,  5,  7, 11, 13, 16, 21, 23, 18,
+       10, 13, 16, 17, 21, 24, 24, 20, 14, 18, 19, 20, 22, 20, 21, 20 },
+    {   3,  4,  5,  9, 20, 20, 20, 20,  4,  4,  5, 13, 20, 20, 20, 20,
+        5,  5, 11, 20, 20, 20, 20, 20,  9, 13, 20, 20, 20, 20, 20, 20,
+       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+       20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 },
+
+    /* index 5, Q60 */
+    {  13,  9,  8, 13, 19, 32, 41, 49, 10, 10, 11, 15, 21, 46, 48, 44,
+       11, 10, 13, 19, 32, 46, 55, 45, 11, 14, 18, 23, 41, 70, 64, 50,
+       14, 18, 30, 45, 54, 87, 82, 62, 19, 28, 44, 51, 65, 83, 90, 74,
+       39, 51, 62, 70, 82, 97, 96, 81, 58, 74, 76, 78, 90, 80, 82, 79 },
+    {  14, 14, 19, 38, 79, 79, 79, 79, 14, 17, 21, 53, 79, 79, 79, 79,
+       19, 21, 45, 79, 79, 79, 79, 79, 38, 53, 79, 79, 79, 79, 79, 79,
+       79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
+       79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79 },
+
+    /* index 6, Q25 */
+    {  32, 22, 20, 32, 48, 80,102,122, 24, 24, 28, 38, 52,116,120,110,
+       28, 26, 32, 48, 80,114,138,112, 28, 34, 44, 58,102,174,160,124,
+       36, 44, 74,112,136,218,206,154, 48, 70,110,128,162,208,226,184,
+       98,128,156,174,206,242,240,202,144,184,190,196,224,200,206,198 },
+    {  34, 36, 48, 94,198,198,198,198, 36, 42, 52,132,198,198,198,198,
+       48, 52,112,198,198,198,198,198, 94,132,198,198,198,198,198,198,
+      198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,
+      198,198,198,198,198,198,198,198,198,198,198,198,198,198,198,198 },
+
+    /* index 7, Q95 */
+    {   2,  1,  1,  2,  2,  4,  5,  6,  1,  1,  1,  2,  3,  6,  6,  6,
+        1,  1,  2,  2,  4,  6,  7,  6,  1,  2,  2,  3,  5,  9,  8,  6,
+        2,  2,  4,  6,  7, 11, 10,  8,  2,  4,  6,  6,  8, 10, 11,  9,
+        5,  6,  8,  9, 10, 12, 12, 10,  7,  9, 10, 10, 11, 10, 10, 10 },
+    {   2,  2,  2,  5, 10, 10, 10, 10,  2,  2,  3,  7, 10, 10, 10, 10,
+        2,  3,  6, 10, 10, 10, 10, 10,  5,  7, 10, 10, 10, 10, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+       10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 },
+
+    /* index 8, Q93 */
+    {   2,  2,  1,  2,  3,  6,  7,  9,  2,  2,  2,  3,  4,  8,  8,  8,
+        2,  2,  2,  3,  6,  8, 10,  8,  2,  2,  3,  4,  7, 12, 11,  9,
+        3,  3,  5,  8, 10, 15, 14, 11,  3,  5,  8,  9, 11, 15, 16, 13,
+        7,  9, 11, 12, 14, 17, 17, 14, 10, 13, 13, 14, 16, 14, 14, 14 },
+    {   2,  3,  3,  7, 14, 14, 14, 14,  3,  3,  4,  9, 14, 14, 14, 14,
+        3,  4,  8, 14, 14, 14, 14, 14,  7,  9, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 }
+};
+#endif
+
+#endif /* SP5X_H */
diff --git a/contrib/ffmpeg/libavcodec/sparc/dsputil_vis.c b/contrib/ffmpeg/libavcodec/sparc/dsputil_vis.c
new file mode 100644
index 000000000..5e59ce776
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sparc/dsputil_vis.c
@@ -0,0 +1,4090 @@
+/*
+ * dsputil_vis.c
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* The *no_round* functions have been added by James A. Morrison, 2003,2004.
+   The vis code from libmpeg2 was adapted for ffmpeg by James A. Morrison.
+ */
+
+#include "config.h"
+
+#ifdef ARCH_SPARC
+
+#include <inttypes.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include "../dsputil.h"
+
+#include "vis.h"
+
+/* The trick used in some of this file is the formula from the MMX
+ * motion comp code, which is:
+ *
+ * (x+y+1)>>1 == (x|y)-((x^y)>>1)
+ *
+ * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
+ * We avoid overflows by masking before we do the shift, and we
+ * implement the shift by multiplying by 1/2 using mul8x16.  So in
+ * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
+ * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
+ * the value 0x80808080 is in f8):
+ *
+ *      fxor            f0,   f2, f10
+ *      fand            f10,  f4, f10
+ *      fmul8x16        f8,  f10, f10
+ *      fand            f10,  f6, f10
+ *      for             f0,   f2, f12
+ *      fpsub16         f12, f10, f10
+ */
+
+#define ATTR_ALIGN(alignd) __attribute__ ((aligned(alignd)))
+
+#define DUP4(x) {x, x, x, x}
+#define DUP8(x) {x, x, x, x, x, x, x, x}
+static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1);
+static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2);
+static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3);
+static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6);
+static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe);
+static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f);
+static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128);
+static const int16_t constants256_512[] ATTR_ALIGN(8) =
+        {256, 512, 256, 512};
+static const int16_t constants256_1024[] ATTR_ALIGN(8) =
+        {256, 1024, 256, 1024};
+
+#define REF_0           0
+#define REF_0_1         1
+#define REF_2           2
+#define REF_2_1         3
+#define REF_4           4
+#define REF_4_1         5
+#define REF_6           6
+#define REF_6_1         7
+#define REF_S0          8
+#define REF_S0_1        9
+#define REF_S2          10
+#define REF_S2_1        11
+#define REF_S4          12
+#define REF_S4_1        13
+#define REF_S6          14
+#define REF_S6_1        15
+#define DST_0           16
+#define DST_1           17
+#define DST_2           18
+#define DST_3           19
+#define CONST_1         20
+#define CONST_2         20
+#define CONST_3         20
+#define CONST_6         20
+#define MASK_fe         20
+#define CONST_128       22
+#define CONST_256       22
+#define CONST_512       22
+#define CONST_1024      22
+#define TMP0            24
+#define TMP1            25
+#define TMP2            26
+#define TMP3            27
+#define TMP4            28
+#define TMP5            29
+#define ZERO            30
+#define MASK_7f         30
+
+#define TMP6            32
+#define TMP8            34
+#define TMP10           36
+#define TMP12           38
+#define TMP14           40
+#define TMP16           42
+#define TMP18           44
+#define TMP20           46
+#define TMP22           48
+#define TMP24           50
+#define TMP26           52
+#define TMP28           54
+#define TMP30           56
+#define TMP32           58
+
+static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        do {    /* 5 cycles */
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64_2(ref, 8, TMP2);
+
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
+
+                vis_faligndata(TMP2, TMP4, REF_2);
+                vis_st64_2(REF_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        do {    /* 4 cycles */
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64(ref[8], TMP2);
+                ref += stride;
+
+                /* stall */
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+
+static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64(ref[8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(dest[0], DST_0);
+
+        vis_ld64(dest[8], DST_2);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP2, TMP4, REF_2);
+
+        vis_ld64(constants128[0], CONST_128);
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP6);
+
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP6, MASK_fe, TMP6);
+
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_xor(DST_2, REF_2, TMP8);
+
+                vis_and(TMP8, MASK_fe, TMP8);
+
+                vis_or(DST_0, REF_0, TMP10);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+
+                vis_or(DST_2, REF_2, TMP12);
+                vis_ld64_2(dest, stride_8, DST_2);
+
+                vis_ld64(ref[0], TMP14);
+                vis_and(TMP6, MASK_7f, TMP6);
+
+                vis_and(TMP8, MASK_7f, TMP8);
+
+                vis_psub16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
+
+                vis_psub16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+
+                dest += stride;
+                vis_ld64_2(ref, 8, TMP16);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, 16, TMP18);
+                vis_faligndata(TMP2, TMP4, REF_2);
+                ref += stride;
+
+                vis_xor(DST_0, REF_0, TMP20);
+
+                vis_and(TMP20, MASK_fe, TMP20);
+
+                vis_xor(DST_2, REF_2, TMP22);
+                vis_mul8x16(CONST_128, TMP20, TMP20);
+
+                vis_and(TMP22, MASK_fe, TMP22);
+
+                vis_or(DST_0, REF_0, TMP24);
+                vis_mul8x16(CONST_128, TMP22, TMP22);
+
+                vis_or(DST_2, REF_2, TMP26);
+
+                vis_ld64_2(dest, stride, DST_0);
+                vis_faligndata(TMP14, TMP16, REF_0);
+
+                vis_ld64_2(dest, stride_8, DST_2);
+                vis_faligndata(TMP16, TMP18, REF_2);
+
+                vis_and(TMP20, MASK_7f, TMP20);
+
+                vis_and(TMP22, MASK_7f, TMP22);
+
+                vis_psub16(TMP24, TMP20, TMP20);
+                vis_st64(TMP20, dest[0]);
+
+                vis_psub16(TMP26, TMP22, TMP22);
+                vis_st64_2(TMP22, dest, 8);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP6);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP6, MASK_fe, TMP6);
+
+        vis_ld64_2(ref, 16, TMP4);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_xor(DST_2, REF_2, TMP8);
+
+        vis_and(TMP8, MASK_fe, TMP8);
+
+        vis_or(DST_0, REF_0, TMP10);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+
+        vis_or(DST_2, REF_2, TMP12);
+        vis_ld64_2(dest, stride_8, DST_2);
+
+        vis_ld64(ref[0], TMP14);
+        vis_and(TMP6, MASK_7f, TMP6);
+
+        vis_and(TMP8, MASK_7f, TMP8);
+
+        vis_psub16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
+
+        vis_psub16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+
+        dest += stride;
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_faligndata(TMP2, TMP4, REF_2);
+
+        vis_xor(DST_0, REF_0, TMP20);
+
+        vis_and(TMP20, MASK_fe, TMP20);
+
+        vis_xor(DST_2, REF_2, TMP22);
+        vis_mul8x16(CONST_128, TMP20, TMP20);
+
+        vis_and(TMP22, MASK_fe, TMP22);
+
+        vis_or(DST_0, REF_0, TMP24);
+        vis_mul8x16(CONST_128, TMP22, TMP22);
+
+        vis_or(DST_2, REF_2, TMP26);
+
+        vis_and(TMP20, MASK_7f, TMP20);
+
+        vis_and(TMP22, MASK_7f, TMP22);
+
+        vis_psub16(TMP24, TMP20, TMP20);
+        vis_st64(TMP20, dest[0]);
+
+        vis_psub16(TMP26, TMP22, TMP22);
+        vis_st64_2(TMP22, dest, 8);
+}
+
+static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64(ref[8], TMP2);
+
+        vis_ld64(dest[0], DST_0);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants128[0], CONST_128);
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP4);
+
+                vis_ld64(ref[8], TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
+
+                vis_or(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP4, TMP4);
+
+                vis_ld64(ref[0], TMP12);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64(ref[8], TMP2);
+                vis_xor(DST_0, REF_0, TMP0);
+                ref += stride;
+
+                vis_and(TMP0, MASK_fe, TMP0);
+
+                vis_and(TMP4, MASK_7f, TMP4);
+
+                vis_psub16(TMP6, TMP4, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+                vis_mul8x16(CONST_128, TMP0, TMP0);
+
+                vis_or(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
+
+                vis_faligndata(TMP12, TMP2, REF_0);
+
+                vis_and(TMP0, MASK_7f, TMP0);
+
+                vis_psub16(TMP6, TMP0, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP4);
+
+        vis_ld64(ref[8], TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
+
+        vis_or(DST_0, REF_0, TMP6);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_xor(DST_0, REF_0, TMP0);
+
+        vis_and(TMP0, MASK_fe, TMP0);
+
+        vis_and(TMP4, MASK_7f, TMP4);
+
+        vis_psub16(TMP6, TMP4, TMP4);
+        vis_st64(TMP4, dest[0]);
+        dest += stride;
+        vis_mul8x16(CONST_128, TMP0, TMP0);
+
+        vis_or(DST_0, REF_0, TMP6);
+
+        vis_and(TMP0, MASK_7f, TMP0);
+
+        vis_psub16(TMP6, TMP0, TMP4);
+        vis_st64(TMP4, dest[0]);
+}
+
+static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0],    TMP0);
+
+        vis_ld64_2(ref, 8,  TMP2);
+
+        vis_ld64_2(ref, 16, TMP4);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 34 cycles */
+                vis_ld64(ref[0],    TMP0);
+                vis_xor(REF_0, REF_2, TMP6);
+
+                vis_ld64_2(ref, 8,  TMP2);
+                vis_xor(REF_4, REF_6, TMP8);
+
+                vis_ld64_2(ref, 16, TMP4);
+                vis_and(TMP6, MASK_fe, TMP6);
+                ref += stride;
+
+                vis_ld64(ref[0],    TMP14);
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
+
+                vis_ld64_2(ref, 8,  TMP16);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_or(REF_0, REF_2, TMP10);
+
+                vis_ld64_2(ref, 16, TMP18);
+                ref += stride;
+                vis_or(REF_4, REF_6, TMP12);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
+
+                vis_and(TMP6, MASK_7f, TMP6);
+
+                vis_and(TMP8, MASK_7f, TMP8);
+
+                vis_psub16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
+
+                vis_psub16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
+
+                vis_xor(REF_0, REF_2, TMP6);
+
+                vis_xor(REF_4, REF_6, TMP8);
+
+                vis_and(TMP6, MASK_fe, TMP6);
+
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
+
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_or(REF_0, REF_2, TMP10);
+
+                vis_or(REF_4, REF_6, TMP12);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_faligndata(TMP14, TMP16, REF_0);
+
+                vis_faligndata(TMP16, TMP18, REF_4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP14, TMP16, REF_2);
+                        vis_faligndata(TMP16, TMP18, REF_6);
+                } else {
+                        vis_src1(TMP16, REF_2);
+                        vis_src1(TMP18, REF_6);
+                }
+
+                vis_and(TMP6, MASK_7f, TMP6);
+
+                vis_and(TMP8, MASK_7f, TMP8);
+
+                vis_psub16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
+
+                vis_psub16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0],    TMP0);
+        vis_xor(REF_0, REF_2, TMP6);
+
+        vis_ld64_2(ref, 8,  TMP2);
+        vis_xor(REF_4, REF_6, TMP8);
+
+        vis_ld64_2(ref, 16, TMP4);
+        vis_and(TMP6, MASK_fe, TMP6);
+
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
+
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_or(REF_0, REF_2, TMP10);
+
+        vis_or(REF_4, REF_6, TMP12);
+
+        vis_alignaddr_g0((void *)off);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
+
+        vis_and(TMP6, MASK_7f, TMP6);
+
+        vis_and(TMP8, MASK_7f, TMP8);
+
+        vis_psub16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
+
+        vis_psub16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+        dest += stride;
+
+        vis_xor(REF_0, REF_2, TMP6);
+
+        vis_xor(REF_4, REF_6, TMP8);
+
+        vis_and(TMP6, MASK_fe, TMP6);
+
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
+
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_or(REF_0, REF_2, TMP10);
+
+        vis_or(REF_4, REF_6, TMP12);
+
+        vis_and(TMP6, MASK_7f, TMP6);
+
+        vis_and(TMP8, MASK_7f, TMP8);
+
+        vis_psub16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
+
+        vis_psub16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+}
+
+static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64(ref[8], TMP2);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 20 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
+
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
+                ref += stride;
+
+                vis_ld64(ref[0], TMP8);
+                vis_or(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                }
+
+                vis_and(TMP4, MASK_7f, TMP4);
+
+                vis_psub16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_xor(REF_0, REF_2, TMP12);
+
+                vis_and(TMP12, MASK_fe, TMP12);
+
+                vis_or(REF_0, REF_2, TMP14);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+
+                vis_alignaddr_g0((void *)off);
+                vis_faligndata(TMP8, TMP10, REF_0);
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP8, TMP10, REF_2);
+                } else {
+                        vis_src1(TMP10, REF_2);
+                }
+
+                vis_and(TMP12, MASK_7f, TMP12);
+
+                vis_psub16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
+
+        vis_or(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
+
+        vis_alignaddr_g0((void *)off);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
+
+        vis_and(TMP4, MASK_7f, TMP4);
+
+        vis_psub16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
+
+        vis_xor(REF_0, REF_2, TMP12);
+
+        vis_and(TMP12, MASK_fe, TMP12);
+
+        vis_or(REF_0, REF_2, TMP14);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+
+        vis_and(TMP12, MASK_7f, TMP12);
+
+        vis_psub16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
+}
+
+static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
+
+        ref = vis_alignaddr(ref);
+        do {    /* 26 cycles */
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64(ref[8], TMP2);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64(ref[16], TMP4);
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64(dest[8], DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
+
+                vis_mul8x16au(REF_0,   CONST_256, TMP0);
+
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+                vis_pmerge(ZERO, REF_2_1, TMP6);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+
+                vis_mul8x16al(DST_0,   CONST_512, TMP4);
+                vis_padd16(TMP2, TMP6, TMP2);
+
+                vis_mul8x16al(DST_1,   CONST_512, TMP6);
+
+                vis_mul8x16au(REF_6,   CONST_256, TMP12);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4,   CONST_256, TMP16);
+
+                vis_padd16(TMP0, CONST_3, TMP8);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP18);
+
+                vis_padd16(TMP2, CONST_3, TMP10);
+                vis_pack16(TMP8, DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_padd16(TMP16, TMP12, TMP0);
+
+                vis_st64(DST_0, dest[0]);
+                vis_mul8x16al(DST_2,   CONST_512, TMP4);
+                vis_padd16(TMP18, TMP14, TMP2);
+
+                vis_mul8x16al(DST_3,   CONST_512, TMP6);
+                vis_padd16(TMP0, CONST_3, TMP0);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_pack16(TMP0, DST_2);
+
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[8]);
+
+                ref += stride;
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_times_2 = stride << 1;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
+
+        ref = vis_alignaddr(ref);
+        height >>= 2;
+        do {    /* 47 cycles */
+                vis_ld64(ref[0],   TMP0);
+
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64(ref[0],   TMP4);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, 8, TMP6);
+                ref += stride;
+
+                vis_ld64(ref[0],   TMP8);
+
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP4, TMP6, REF_4);
+
+                vis_ld64(ref[0],   TMP12);
+
+                vis_ld64_2(ref, 8, TMP14);
+                ref += stride;
+                vis_faligndata(TMP8, TMP10, REF_S0);
+
+                vis_faligndata(TMP12, TMP14, REF_S4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+
+                        vis_ld64(dest[0], DST_0);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_faligndata(TMP4, TMP6, REF_6);
+
+                        vis_faligndata(TMP8, TMP10, REF_S2);
+
+                        vis_faligndata(TMP12, TMP14, REF_S6);
+                } else {
+                        vis_ld64(dest[0], DST_0);
+                        vis_src1(TMP2, REF_2);
+
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_src1(TMP6, REF_6);
+
+                        vis_src1(TMP10, REF_S2);
+
+                        vis_src1(TMP14, REF_S6);
+                }
+
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP6);
+
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP8);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP10);
+
+                vis_padd16(TMP0, TMP16, TMP0);
+                vis_mul8x16au(REF_6, CONST_256, TMP12);
+
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_2, CONST_512, TMP16);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(DST_3, CONST_512, TMP18);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP0, DST_0);
+
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP10, CONST_3, TMP10);
+
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP8, TMP16, TMP8);
+
+                vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
+                vis_padd16(TMP10, TMP18, TMP10);
+                vis_pack16(TMP8, DST_2);
+
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_S0,     TMP0);
+
+                vis_pmerge(ZERO,     REF_S2,     TMP24);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16au(REF_S4, CONST_256, TMP8);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
+
+                vis_padd16(TMP0, TMP24, TMP0);
+                vis_mul8x16au(REF_S6, CONST_256, TMP12);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
+
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+
+                vis_padd16(TMP10, CONST_3, TMP10);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
+
+                vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
+                vis_padd16(TMP0, TMP16, TMP0);
+
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_padd16(TMP8, TMP20, TMP8);
+
+                vis_padd16(TMP10, TMP22, TMP10);
+                vis_pack16(TMP8, DST_2);
+
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64_2(ref, 8, TMP2);
+
+        vis_ld64_2(ref, 16, TMP4);
+        ref += stride;
+
+        vis_ld64(ref[0], TMP6);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64_2(ref, 8, TMP8);
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        vis_ld64_2(ref, 16, TMP10);
+        ref += stride;
+
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP6, TMP8, REF_2);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP8, TMP10, REF_6);
+
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP12);
+
+                vis_ld64_2(ref, 8, TMP2);
+                vis_xor(REF_4, REF_6, TMP16);
+
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_or(REF_0, REF_2, TMP14);
+
+                vis_ld64(ref[0], TMP6);
+                vis_or(REF_4, REF_6, TMP18);
+
+                vis_ld64_2(ref, 8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, 16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_and(TMP12, MASK_fe, TMP12);
+
+                vis_and(TMP16, MASK_fe, TMP16);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+
+                vis_mul8x16(CONST_128, TMP16, TMP16);
+                vis_xor(REF_0, REF_2, TMP0);
+
+                vis_xor(REF_4, REF_6, TMP2);
+
+                vis_or(REF_0, REF_2, TMP20);
+
+                vis_and(TMP12, MASK_7f, TMP12);
+
+                vis_and(TMP16, MASK_7f, TMP16);
+
+                vis_psub16(TMP14, TMP12, TMP12);
+                vis_st64(TMP12, dest[0]);
+
+                vis_psub16(TMP18, TMP16, TMP16);
+                vis_st64_2(TMP16, dest, 8);
+                dest += stride;
+
+                vis_or(REF_4, REF_6, TMP18);
+
+                vis_and(TMP0, MASK_fe, TMP0);
+
+                vis_and(TMP2, MASK_fe, TMP2);
+                vis_mul8x16(CONST_128, TMP0, TMP0);
+
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16(CONST_128, TMP2, TMP2);
+
+                vis_faligndata(TMP8, TMP10, REF_6);
+
+                vis_and(TMP0, MASK_7f, TMP0);
+
+                vis_and(TMP2, MASK_7f, TMP2);
+
+                vis_psub16(TMP20, TMP0, TMP0);
+                vis_st64(TMP0, dest[0]);
+
+                vis_psub16(TMP18, TMP2, TMP2);
+                vis_st64_2(TMP2, dest, 8);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP12);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_xor(REF_4, REF_6, TMP16);
+
+        vis_ld64_2(ref, 16, TMP4);
+        vis_or(REF_0, REF_2, TMP14);
+
+        vis_or(REF_4, REF_6, TMP18);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        vis_and(TMP12, MASK_fe, TMP12);
+
+        vis_and(TMP16, MASK_fe, TMP16);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+
+        vis_mul8x16(CONST_128, TMP16, TMP16);
+        vis_xor(REF_0, REF_2, TMP0);
+
+        vis_xor(REF_4, REF_6, TMP2);
+
+        vis_or(REF_0, REF_2, TMP20);
+
+        vis_and(TMP12, MASK_7f, TMP12);
+
+        vis_and(TMP16, MASK_7f, TMP16);
+
+        vis_psub16(TMP14, TMP12, TMP12);
+        vis_st64(TMP12, dest[0]);
+
+        vis_psub16(TMP18, TMP16, TMP16);
+        vis_st64_2(TMP16, dest, 8);
+        dest += stride;
+
+        vis_or(REF_4, REF_6, TMP18);
+
+        vis_and(TMP0, MASK_fe, TMP0);
+
+        vis_and(TMP2, MASK_fe, TMP2);
+        vis_mul8x16(CONST_128, TMP0, TMP0);
+
+        vis_mul8x16(CONST_128, TMP2, TMP2);
+
+        vis_and(TMP0, MASK_7f, TMP0);
+
+        vis_and(TMP2, MASK_7f, TMP2);
+
+        vis_psub16(TMP20, TMP0, TMP0);
+        vis_st64(TMP0, dest[0]);
+
+        vis_psub16(TMP18, TMP2, TMP2);
+        vis_st64_2(TMP2, dest, 8);
+}
+
+static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64_2(ref, 8, TMP2);
+        ref += stride;
+
+        vis_ld64(ref[0], TMP4);
+
+        vis_ld64_2(ref, 8, TMP6);
+        ref += stride;
+
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP4, TMP6, REF_2);
+
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
+
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_and(TMP4, MASK_fe, TMP4);
+
+                vis_or(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_xor(REF_0, REF_2, TMP12);
+
+                vis_and(TMP4, MASK_7f, TMP4);
+
+                vis_and(TMP12, MASK_fe, TMP12);
+
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_or(REF_0, REF_2, TMP14);
+
+                vis_psub16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_faligndata(TMP0, TMP2, REF_2);
+
+                vis_and(TMP12, MASK_7f, TMP12);
+
+                vis_psub16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
+
+        vis_or(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_xor(REF_0, REF_2, TMP12);
+
+        vis_and(TMP4, MASK_7f, TMP4);
+
+        vis_and(TMP12, MASK_fe, TMP12);
+
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_or(REF_0, REF_2, TMP14);
+
+        vis_psub16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
+
+        vis_and(TMP12, MASK_7f, TMP12);
+
+        vis_psub16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
+}
+
+static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
+
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_6);
+        height >>= 1;
+
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP12);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP14);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_pmerge(ZERO,       REF_6,     TMP16);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP18);
+
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_ld64_2(ref, stride, TMP6);
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_pmerge(ZERO,     REF_4,     TMP4);
+
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+
+                vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+                vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
+                vis_faligndata(TMP8, TMP10, REF_6);
+                vis_mul8x16al(DST_0,   CONST_512, TMP20);
+
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_1,   CONST_512, TMP22);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
+
+                vis_padd16(TMP4, CONST_3, TMP4);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
+
+                vis_padd16(TMP6, CONST_3, TMP6);
+
+                vis_padd16(TMP12, TMP20, TMP12);
+                vis_mul8x16al(REF_S0,   CONST_512, TMP20);
+
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
+
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_mul8x16al(REF_S2,   CONST_512, TMP24);
+
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
+
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_2,   CONST_256, TMP28);
+
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP30);
+
+                vis_padd16(TMP16, TMP4, TMP16);
+                vis_mul8x16au(REF_6,   CONST_256, REF_S4);
+
+                vis_padd16(TMP18, TMP6, TMP18);
+                vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
+
+                vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP28, TMP0, TMP12);
+
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP30, TMP2, TMP14);
+
+                vis_pack16(TMP16, DST_2);
+                vis_padd16(REF_S4, TMP4, TMP16);
+
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(REF_S6, TMP6, TMP18);
+
+                vis_padd16(TMP12, TMP20, TMP12);
+
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_pack16(TMP12, DST_0);
+
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_pack16(TMP16, DST_2);
+
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
+
+        vis_ld64(constants256_512[0], CONST_256);
+
+        height >>= 1;
+        do {    /* 20 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP8);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP10);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+
+                vis_ld64(dest[0], DST_0);
+
+                vis_ld64_2(dest, stride, DST_2);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, stride, TMP4);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_pmerge(ZERO,       REF_0,     TMP12);
+
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_pmerge(ZERO,       REF_0_1,   TMP14);
+
+                vis_padd16(TMP12, CONST_3, TMP12);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
+
+                vis_padd16(TMP14, CONST_3, TMP14);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
+
+                vis_faligndata(TMP4, TMP6, REF_2);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_mul8x16au(REF_2,   CONST_256, TMP20);
+
+                vis_padd16(TMP8, TMP16, TMP0);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP22);
+
+                vis_padd16(TMP10, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
+
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP12, TMP20, TMP12);
+
+                vis_padd16(TMP14, TMP22, TMP14);
+
+                vis_padd16(TMP12, TMP24, TMP0);
+
+                vis_padd16(TMP14, TMP26, TMP2);
+                vis_pack16(TMP0, DST_2);
+
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+                              const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(constants2[0], CONST_2);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
+
+        height >>= 1;
+        do {
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_faligndata(TMP6, TMP8, REF_S0);
+
+                vis_faligndata(TMP8, TMP10, REF_S4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
+
+                vis_mul8x16au(REF_0, CONST_256, TMP0);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
+
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
+
+                vis_padd16(TMP0, CONST_2, TMP8);
+                vis_mul8x16au(REF_4, CONST_256, TMP0);
+
+                vis_padd16(TMP2, CONST_2, TMP10);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP2);
+
+                vis_padd16(TMP8, TMP4, TMP8);
+                vis_mul8x16au(REF_6, CONST_256, TMP4);
+
+                vis_padd16(TMP10, TMP6, TMP10);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP6);
+
+                vis_padd16(TMP12, TMP8, TMP12);
+
+                vis_padd16(TMP14, TMP10, TMP14);
+
+                vis_padd16(TMP12, TMP16, TMP12);
+
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_pack16(TMP12, DST_0);
+
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP0, CONST_2, TMP12);
+
+                vis_mul8x16au(REF_S0, CONST_256, TMP0);
+                vis_padd16(TMP2, CONST_2, TMP14);
+
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_padd16(TMP12, TMP4, TMP12);
+
+                vis_mul8x16au(REF_S2, CONST_256, TMP4);
+                vis_padd16(TMP14, TMP6, TMP14);
+
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+                vis_padd16(TMP20, TMP12, TMP20);
+
+                vis_padd16(TMP22, TMP14, TMP22);
+
+                vis_padd16(TMP20, TMP24, TMP20);
+
+                vis_padd16(TMP22, TMP26, TMP22);
+                vis_pack16(TMP20, DST_2);
+
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(TMP0, TMP4, TMP24);
+
+                vis_mul8x16au(REF_S4, CONST_256, TMP0);
+                vis_padd16(TMP2, TMP6, TMP26);
+
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
+                vis_padd16(TMP24, TMP8, TMP24);
+
+                vis_padd16(TMP26, TMP10, TMP26);
+                vis_pack16(TMP24, DST_0);
+
+                vis_pack16(TMP26, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_pmerge(ZERO, REF_S6, TMP4);
+
+                vis_pmerge(ZERO,      REF_S6_1,  TMP6);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+
+                vis_padd16(TMP0, TMP12, TMP0);
+
+                vis_padd16(TMP2, TMP14, TMP2);
+                vis_pack16(TMP0, DST_2);
+
+                vis_pack16(TMP2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(constants2[0], CONST_2);
+
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
+
+        height >>= 1;
+        do {    /* 26 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0,   CONST_256, TMP8);
+                vis_pmerge(ZERO,        REF_S2,    TMP12);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
+                vis_pmerge(ZERO,        REF_S2_1,  TMP14);
+
+                vis_ld64_2(ref, stride, TMP4);
+
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_S4);
+
+                vis_pmerge(ZERO, REF_S4, TMP18);
+
+                vis_pmerge(ZERO, REF_S4_1, TMP20);
+
+                vis_faligndata(TMP4, TMP6, REF_S0);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
+
+                vis_padd16(TMP18, CONST_2, TMP18);
+                vis_mul8x16au(REF_S6,   CONST_256, TMP22);
+
+                vis_padd16(TMP20, CONST_2, TMP20);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
+
+                vis_mul8x16au(REF_S0,   CONST_256, TMP26);
+                vis_pmerge(ZERO, REF_S0_1, TMP28);
+
+                vis_mul8x16au(REF_S2,   CONST_256, TMP30);
+                vis_padd16(TMP18, TMP22, TMP18);
+
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
+                vis_padd16(TMP20, TMP24, TMP20);
+
+                vis_padd16(TMP8,  TMP18, TMP8);
+
+                vis_padd16(TMP10, TMP20, TMP10);
+
+                vis_padd16(TMP8,  TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP8,  DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP18, TMP26, TMP18);
+
+                vis_padd16(TMP20, TMP28, TMP20);
+
+                vis_padd16(TMP18, TMP30, TMP18);
+
+                vis_padd16(TMP20, TMP32, TMP20);
+                vis_pack16(TMP18, DST_2);
+
+                vis_pack16(TMP20, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+                              const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
+
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(constants6[0], CONST_6);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
+
+        height >>= 1;
+        do {    /* 55 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP6, TMP8, REF_S0);
+
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP8, TMP10, REF_S4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
+
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_0, TMP0);
+
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
+
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
+
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP0, CONST_6, TMP0);
+
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP2, CONST_6, TMP2);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP4);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_6, CONST_256, TMP8);
+
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP10);
+
+                vis_padd16(TMP12, TMP16, TMP12);
+                vis_mul8x16au(REF_S0, CONST_256, REF_4);
+
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
+
+                vis_padd16(TMP12, TMP30, TMP12);
+
+                vis_padd16(TMP14, TMP32, TMP14);
+                vis_pack16(TMP12, DST_0);
+
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP4, CONST_6, TMP4);
+
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP6, CONST_6, TMP6);
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
+
+                vis_padd16(TMP4, TMP8, TMP4);
+                vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
+
+                vis_padd16(TMP6, TMP10, TMP6);
+
+                vis_padd16(TMP20, TMP4, TMP20);
+
+                vis_padd16(TMP22, TMP6, TMP22);
+
+                vis_padd16(TMP20, TMP24, TMP20);
+
+                vis_padd16(TMP22, TMP26, TMP22);
+
+                vis_padd16(TMP20, REF_0, TMP20);
+                vis_mul8x16au(REF_S4, CONST_256, REF_0);
+
+                vis_padd16(TMP22, REF_2, TMP22);
+                vis_pack16(TMP20, DST_2);
+
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+
+                vis_ld64_2(dest, 8, DST_2);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO,      REF_S4_1,  REF_2);
+
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_padd16(REF_4, TMP0, TMP8);
+
+                vis_mul8x16au(REF_S6, CONST_256, REF_4);
+                vis_padd16(REF_6, TMP2, TMP10);
+
+                vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
+                vis_padd16(TMP8, TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+
+                vis_padd16(TMP8, TMP30, TMP8);
+
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+
+                vis_padd16(REF_0, TMP4, REF_0);
+
+                vis_mul8x16al(DST_2,   CONST_1024, TMP30);
+                vis_padd16(REF_2, TMP6, REF_2);
+
+                vis_mul8x16al(DST_3,   CONST_1024, TMP32);
+                vis_padd16(REF_0, REF_4, REF_0);
+
+                vis_padd16(REF_2, REF_6, REF_2);
+
+                vis_padd16(REF_0, TMP30, REF_0);
+
+                /* stall */
+
+                vis_padd16(REF_2, TMP32, REF_2);
+                vis_pack16(REF_0, DST_2);
+
+                vis_pack16(REF_2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64_2(ref, 8, TMP2);
+
+        vis_ld64(constants6[0], CONST_6);
+
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
+
+        height >>= 1;
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP8);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP10);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP14);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride, TMP4);
+                vis_faligndata(TMP0, TMP2, REF_S4);
+
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP4, TMP6, REF_S0);
+
+                vis_ld64_2(dest, stride, DST_2);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
+
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_S4, TMP22);
+
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP24);
+
+                vis_mul8x16au(REF_S6, CONST_256, TMP26);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP28);
+
+                vis_mul8x16au(REF_S0, CONST_256, REF_S4);
+                vis_padd16(TMP22, CONST_6, TMP22);
+
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
+                vis_padd16(TMP24, CONST_6, TMP24);
+
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP22, TMP26, TMP22);
+
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP24, TMP28, TMP24);
+
+                vis_mul8x16au(REF_S2, CONST_256, TMP26);
+                vis_padd16(TMP8, TMP22, TMP8);
+
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
+                vis_padd16(TMP10, TMP24, TMP10);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+
+                vis_padd16(TMP8, TMP30, TMP8);
+
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_padd16(REF_S4, TMP22, TMP12);
+
+                vis_padd16(REF_S6, TMP24, TMP14);
+
+                vis_padd16(TMP12, TMP26, TMP12);
+
+                vis_padd16(TMP14, TMP28, TMP14);
+
+                vis_padd16(TMP12, REF_0, TMP12);
+
+                vis_padd16(TMP14, REF_2, TMP14);
+                vis_pack16(TMP12, DST_2);
+
+                vis_pack16(TMP14, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+/* End of rounding code */
+
+/* Start of no rounding code */
+/* The trick used in some of this file is the formula from the MMX
+ * motion comp code, which is:
+ *
+ * (x+y)>>1 == (x&y)+((x^y)>>1)
+ *
+ * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
+ * We avoid overflows by masking before we do the shift, and we
+ * implement the shift by multiplying by 1/2 using mul8x16.  So in
+ * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
+ * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
+ * the value 0x80808080 is in f8):
+ *
+ *      fxor            f0,   f2, f10
+ *      fand            f10,  f4, f10
+ *      fmul8x16        f8,  f10, f10
+ *      fand            f10,  f6, f10
+ *      fand            f0,   f2, f12
+ *      fpadd16         f12, f10, f10
+ */
+
+static void MC_put_no_round_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+                                      const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        do {    /* 5 cycles */
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64_2(ref, 8, TMP2);
+
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
+
+                vis_faligndata(TMP2, TMP4, REF_2);
+                vis_st64_2(REF_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_no_round_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        do {    /* 4 cycles */
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64(ref[8], TMP2);
+                ref += stride;
+
+                /* stall */
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+
+static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64(ref[8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(dest[0], DST_0);
+
+        vis_ld64(dest[8], DST_2);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP2, TMP4, REF_2);
+
+        vis_ld64(constants128[0], CONST_128);
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP6);
+
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP6, MASK_fe, TMP6);
+
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_xor(DST_2, REF_2, TMP8);
+
+                vis_and(TMP8, MASK_fe, TMP8);
+
+                vis_and(DST_0, REF_0, TMP10);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+
+                vis_and(DST_2, REF_2, TMP12);
+                vis_ld64_2(dest, stride_8, DST_2);
+
+                vis_ld64(ref[0], TMP14);
+                vis_and(TMP6, MASK_7f, TMP6);
+
+                vis_and(TMP8, MASK_7f, TMP8);
+
+                vis_padd16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
+
+                vis_padd16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+
+                dest += stride;
+                vis_ld64_2(ref, 8, TMP16);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, 16, TMP18);
+                vis_faligndata(TMP2, TMP4, REF_2);
+                ref += stride;
+
+                vis_xor(DST_0, REF_0, TMP20);
+
+                vis_and(TMP20, MASK_fe, TMP20);
+
+                vis_xor(DST_2, REF_2, TMP22);
+                vis_mul8x16(CONST_128, TMP20, TMP20);
+
+                vis_and(TMP22, MASK_fe, TMP22);
+
+                vis_and(DST_0, REF_0, TMP24);
+                vis_mul8x16(CONST_128, TMP22, TMP22);
+
+                vis_and(DST_2, REF_2, TMP26);
+
+                vis_ld64_2(dest, stride, DST_0);
+                vis_faligndata(TMP14, TMP16, REF_0);
+
+                vis_ld64_2(dest, stride_8, DST_2);
+                vis_faligndata(TMP16, TMP18, REF_2);
+
+                vis_and(TMP20, MASK_7f, TMP20);
+
+                vis_and(TMP22, MASK_7f, TMP22);
+
+                vis_padd16(TMP24, TMP20, TMP20);
+                vis_st64(TMP20, dest[0]);
+
+                vis_padd16(TMP26, TMP22, TMP22);
+                vis_st64_2(TMP22, dest, 8);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP6);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP6, MASK_fe, TMP6);
+
+        vis_ld64_2(ref, 16, TMP4);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_xor(DST_2, REF_2, TMP8);
+
+        vis_and(TMP8, MASK_fe, TMP8);
+
+        vis_and(DST_0, REF_0, TMP10);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+
+        vis_and(DST_2, REF_2, TMP12);
+        vis_ld64_2(dest, stride_8, DST_2);
+
+        vis_ld64(ref[0], TMP14);
+        vis_and(TMP6, MASK_7f, TMP6);
+
+        vis_and(TMP8, MASK_7f, TMP8);
+
+        vis_padd16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
+
+        vis_padd16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+
+        dest += stride;
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_faligndata(TMP2, TMP4, REF_2);
+
+        vis_xor(DST_0, REF_0, TMP20);
+
+        vis_and(TMP20, MASK_fe, TMP20);
+
+        vis_xor(DST_2, REF_2, TMP22);
+        vis_mul8x16(CONST_128, TMP20, TMP20);
+
+        vis_and(TMP22, MASK_fe, TMP22);
+
+        vis_and(DST_0, REF_0, TMP24);
+        vis_mul8x16(CONST_128, TMP22, TMP22);
+
+        vis_and(DST_2, REF_2, TMP26);
+
+        vis_and(TMP20, MASK_7f, TMP20);
+
+        vis_and(TMP22, MASK_7f, TMP22);
+
+        vis_padd16(TMP24, TMP20, TMP20);
+        vis_st64(TMP20, dest[0]);
+
+        vis_padd16(TMP26, TMP22, TMP22);
+        vis_st64_2(TMP22, dest, 8);
+}
+
+static void MC_avg_no_round_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64(ref[8], TMP2);
+
+        vis_ld64(dest[0], DST_0);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants128[0], CONST_128);
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP4);
+
+                vis_ld64(ref[8], TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
+
+                vis_and(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP4, TMP4);
+
+                vis_ld64(ref[0], TMP12);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64(ref[8], TMP2);
+                vis_xor(DST_0, REF_0, TMP0);
+                ref += stride;
+
+                vis_and(TMP0, MASK_fe, TMP0);
+
+                vis_and(TMP4, MASK_7f, TMP4);
+
+                vis_padd16(TMP6, TMP4, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+                vis_mul8x16(CONST_128, TMP0, TMP0);
+
+                vis_and(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
+
+                vis_faligndata(TMP12, TMP2, REF_0);
+
+                vis_and(TMP0, MASK_7f, TMP0);
+
+                vis_padd16(TMP6, TMP0, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP4);
+
+        vis_ld64(ref[8], TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
+
+        vis_and(DST_0, REF_0, TMP6);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_xor(DST_0, REF_0, TMP0);
+
+        vis_and(TMP0, MASK_fe, TMP0);
+
+        vis_and(TMP4, MASK_7f, TMP4);
+
+        vis_padd16(TMP6, TMP4, TMP4);
+        vis_st64(TMP4, dest[0]);
+        dest += stride;
+        vis_mul8x16(CONST_128, TMP0, TMP0);
+
+        vis_and(DST_0, REF_0, TMP6);
+
+        vis_and(TMP0, MASK_7f, TMP0);
+
+        vis_padd16(TMP6, TMP0, TMP4);
+        vis_st64(TMP4, dest[0]);
+}
+
+static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0],    TMP0);
+
+        vis_ld64_2(ref, 8,  TMP2);
+
+        vis_ld64_2(ref, 16, TMP4);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 34 cycles */
+                vis_ld64(ref[0],    TMP0);
+                vis_xor(REF_0, REF_2, TMP6);
+
+                vis_ld64_2(ref, 8,  TMP2);
+                vis_xor(REF_4, REF_6, TMP8);
+
+                vis_ld64_2(ref, 16, TMP4);
+                vis_and(TMP6, MASK_fe, TMP6);
+                ref += stride;
+
+                vis_ld64(ref[0],    TMP14);
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
+
+                vis_ld64_2(ref, 8,  TMP16);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_and(REF_0, REF_2, TMP10);
+
+                vis_ld64_2(ref, 16, TMP18);
+                ref += stride;
+                vis_and(REF_4, REF_6, TMP12);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
+
+                vis_and(TMP6, MASK_7f, TMP6);
+
+                vis_and(TMP8, MASK_7f, TMP8);
+
+                vis_padd16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
+
+                vis_padd16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
+
+                vis_xor(REF_0, REF_2, TMP6);
+
+                vis_xor(REF_4, REF_6, TMP8);
+
+                vis_and(TMP6, MASK_fe, TMP6);
+
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
+
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_and(REF_0, REF_2, TMP10);
+
+                vis_and(REF_4, REF_6, TMP12);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_faligndata(TMP14, TMP16, REF_0);
+
+                vis_faligndata(TMP16, TMP18, REF_4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP14, TMP16, REF_2);
+                        vis_faligndata(TMP16, TMP18, REF_6);
+                } else {
+                        vis_src1(TMP16, REF_2);
+                        vis_src1(TMP18, REF_6);
+                }
+
+                vis_and(TMP6, MASK_7f, TMP6);
+
+                vis_and(TMP8, MASK_7f, TMP8);
+
+                vis_padd16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
+
+                vis_padd16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0],    TMP0);
+        vis_xor(REF_0, REF_2, TMP6);
+
+        vis_ld64_2(ref, 8,  TMP2);
+        vis_xor(REF_4, REF_6, TMP8);
+
+        vis_ld64_2(ref, 16, TMP4);
+        vis_and(TMP6, MASK_fe, TMP6);
+
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
+
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_and(REF_0, REF_2, TMP10);
+
+        vis_and(REF_4, REF_6, TMP12);
+
+        vis_alignaddr_g0((void *)off);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
+
+        vis_and(TMP6, MASK_7f, TMP6);
+
+        vis_and(TMP8, MASK_7f, TMP8);
+
+        vis_padd16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
+
+        vis_padd16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+        dest += stride;
+
+        vis_xor(REF_0, REF_2, TMP6);
+
+        vis_xor(REF_4, REF_6, TMP8);
+
+        vis_and(TMP6, MASK_fe, TMP6);
+
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
+
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_and(REF_0, REF_2, TMP10);
+
+        vis_and(REF_4, REF_6, TMP12);
+
+        vis_and(TMP6, MASK_7f, TMP6);
+
+        vis_and(TMP8, MASK_7f, TMP8);
+
+        vis_padd16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
+
+        vis_padd16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+}
+
+static void MC_put_no_round_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64(ref[8], TMP2);
+
+        vis_ld64(constants_fe[0], MASK_fe);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
+
+        ref += stride;
+        height = (height >> 1) - 1;
+
+        do {    /* 20 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
+
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
+                ref += stride;
+
+                vis_ld64(ref[0], TMP8);
+                vis_and(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                }
+
+                vis_and(TMP4, MASK_7f, TMP4);
+
+                vis_padd16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_xor(REF_0, REF_2, TMP12);
+
+                vis_and(TMP12, MASK_fe, TMP12);
+
+                vis_and(REF_0, REF_2, TMP14);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+
+                vis_alignaddr_g0((void *)off);
+                vis_faligndata(TMP8, TMP10, REF_0);
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP8, TMP10, REF_2);
+                } else {
+                        vis_src1(TMP10, REF_2);
+                }
+
+                vis_and(TMP12, MASK_7f, TMP12);
+
+                vis_padd16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
+
+        vis_and(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
+
+        vis_alignaddr_g0((void *)off);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
+
+        vis_and(TMP4, MASK_7f, TMP4);
+
+        vis_padd16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
+
+        vis_xor(REF_0, REF_2, TMP12);
+
+        vis_and(TMP12, MASK_fe, TMP12);
+
+        vis_and(REF_0, REF_2, TMP14);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+
+        vis_and(TMP12, MASK_7f, TMP12);
+
+        vis_padd16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
+}
+
+static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
+
+        ref = vis_alignaddr(ref);
+        do {    /* 26 cycles */
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64(ref[8], TMP2);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64(ref[16], TMP4);
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64(dest[8], DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
+
+                vis_mul8x16au(REF_0,   CONST_256, TMP0);
+
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+                vis_pmerge(ZERO, REF_2_1, TMP6);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+
+                vis_mul8x16al(DST_0,   CONST_512, TMP4);
+                vis_padd16(TMP2, TMP6, TMP2);
+
+                vis_mul8x16al(DST_1,   CONST_512, TMP6);
+
+                vis_mul8x16au(REF_6,   CONST_256, TMP12);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4,   CONST_256, TMP16);
+
+                vis_padd16(TMP0, CONST_3, TMP8);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP18);
+
+                vis_padd16(TMP2, CONST_3, TMP10);
+                vis_pack16(TMP8, DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_padd16(TMP16, TMP12, TMP0);
+
+                vis_st64(DST_0, dest[0]);
+                vis_mul8x16al(DST_2,   CONST_512, TMP4);
+                vis_padd16(TMP18, TMP14, TMP2);
+
+                vis_mul8x16al(DST_3,   CONST_512, TMP6);
+                vis_padd16(TMP0, CONST_3, TMP0);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_pack16(TMP0, DST_2);
+
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[8]);
+
+                ref += stride;
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_no_round_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_times_2 = stride << 1;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
+
+        ref = vis_alignaddr(ref);
+        height >>= 2;
+        do {    /* 47 cycles */
+                vis_ld64(ref[0],   TMP0);
+
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64(ref[0],   TMP4);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, 8, TMP6);
+                ref += stride;
+
+                vis_ld64(ref[0],   TMP8);
+
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP4, TMP6, REF_4);
+
+                vis_ld64(ref[0],   TMP12);
+
+                vis_ld64_2(ref, 8, TMP14);
+                ref += stride;
+                vis_faligndata(TMP8, TMP10, REF_S0);
+
+                vis_faligndata(TMP12, TMP14, REF_S4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+
+                        vis_ld64(dest[0], DST_0);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_faligndata(TMP4, TMP6, REF_6);
+
+                        vis_faligndata(TMP8, TMP10, REF_S2);
+
+                        vis_faligndata(TMP12, TMP14, REF_S6);
+                } else {
+                        vis_ld64(dest[0], DST_0);
+                        vis_src1(TMP2, REF_2);
+
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_src1(TMP6, REF_6);
+
+                        vis_src1(TMP10, REF_S2);
+
+                        vis_src1(TMP14, REF_S6);
+                }
+
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP6);
+
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP8);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP10);
+
+                vis_padd16(TMP0, TMP16, TMP0);
+                vis_mul8x16au(REF_6, CONST_256, TMP12);
+
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_2, CONST_512, TMP16);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(DST_3, CONST_512, TMP18);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP0, DST_0);
+
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP10, CONST_3, TMP10);
+
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP8, TMP16, TMP8);
+
+                vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
+                vis_padd16(TMP10, TMP18, TMP10);
+                vis_pack16(TMP8, DST_2);
+
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_S0,     TMP0);
+
+                vis_pmerge(ZERO,     REF_S2,     TMP24);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16au(REF_S4, CONST_256, TMP8);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
+
+                vis_padd16(TMP0, TMP24, TMP0);
+                vis_mul8x16au(REF_S6, CONST_256, TMP12);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
+
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+
+                vis_padd16(TMP10, CONST_3, TMP10);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
+
+                vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
+                vis_padd16(TMP0, TMP16, TMP0);
+
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_padd16(TMP8, TMP20, TMP8);
+
+                vis_padd16(TMP10, TMP22, TMP10);
+                vis_pack16(TMP8, DST_2);
+
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64_2(ref, 8, TMP2);
+
+        vis_ld64_2(ref, 16, TMP4);
+        ref += stride;
+
+        vis_ld64(ref[0], TMP6);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64_2(ref, 8, TMP8);
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        vis_ld64_2(ref, 16, TMP10);
+        ref += stride;
+
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP6, TMP8, REF_2);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP8, TMP10, REF_6);
+
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP12);
+
+                vis_ld64_2(ref, 8, TMP2);
+                vis_xor(REF_4, REF_6, TMP16);
+
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_and(REF_0, REF_2, TMP14);
+
+                vis_ld64(ref[0], TMP6);
+                vis_and(REF_4, REF_6, TMP18);
+
+                vis_ld64_2(ref, 8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, 16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_and(TMP12, MASK_fe, TMP12);
+
+                vis_and(TMP16, MASK_fe, TMP16);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+
+                vis_mul8x16(CONST_128, TMP16, TMP16);
+                vis_xor(REF_0, REF_2, TMP0);
+
+                vis_xor(REF_4, REF_6, TMP2);
+
+                vis_and(REF_0, REF_2, TMP20);
+
+                vis_and(TMP12, MASK_7f, TMP12);
+
+                vis_and(TMP16, MASK_7f, TMP16);
+
+                vis_padd16(TMP14, TMP12, TMP12);
+                vis_st64(TMP12, dest[0]);
+
+                vis_padd16(TMP18, TMP16, TMP16);
+                vis_st64_2(TMP16, dest, 8);
+                dest += stride;
+
+                vis_and(REF_4, REF_6, TMP18);
+
+                vis_and(TMP0, MASK_fe, TMP0);
+
+                vis_and(TMP2, MASK_fe, TMP2);
+                vis_mul8x16(CONST_128, TMP0, TMP0);
+
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16(CONST_128, TMP2, TMP2);
+
+                vis_faligndata(TMP8, TMP10, REF_6);
+
+                vis_and(TMP0, MASK_7f, TMP0);
+
+                vis_and(TMP2, MASK_7f, TMP2);
+
+                vis_padd16(TMP20, TMP0, TMP0);
+                vis_st64(TMP0, dest[0]);
+
+                vis_padd16(TMP18, TMP2, TMP2);
+                vis_st64_2(TMP2, dest, 8);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP12);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_xor(REF_4, REF_6, TMP16);
+
+        vis_ld64_2(ref, 16, TMP4);
+        vis_and(REF_0, REF_2, TMP14);
+
+        vis_and(REF_4, REF_6, TMP18);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_faligndata(TMP2, TMP4, REF_4);
+
+        vis_and(TMP12, MASK_fe, TMP12);
+
+        vis_and(TMP16, MASK_fe, TMP16);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+
+        vis_mul8x16(CONST_128, TMP16, TMP16);
+        vis_xor(REF_0, REF_2, TMP0);
+
+        vis_xor(REF_4, REF_6, TMP2);
+
+        vis_and(REF_0, REF_2, TMP20);
+
+        vis_and(TMP12, MASK_7f, TMP12);
+
+        vis_and(TMP16, MASK_7f, TMP16);
+
+        vis_padd16(TMP14, TMP12, TMP12);
+        vis_st64(TMP12, dest[0]);
+
+        vis_padd16(TMP18, TMP16, TMP16);
+        vis_st64_2(TMP16, dest, 8);
+        dest += stride;
+
+        vis_and(REF_4, REF_6, TMP18);
+
+        vis_and(TMP0, MASK_fe, TMP0);
+
+        vis_and(TMP2, MASK_fe, TMP2);
+        vis_mul8x16(CONST_128, TMP0, TMP0);
+
+        vis_mul8x16(CONST_128, TMP2, TMP2);
+
+        vis_and(TMP0, MASK_7f, TMP0);
+
+        vis_and(TMP2, MASK_7f, TMP2);
+
+        vis_padd16(TMP20, TMP0, TMP0);
+        vis_st64(TMP0, dest[0]);
+
+        vis_padd16(TMP18, TMP2, TMP2);
+        vis_st64_2(TMP2, dest, 8);
+}
+
+static void MC_put_no_round_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
+
+        vis_ld64_2(ref, 8, TMP2);
+        ref += stride;
+
+        vis_ld64(ref[0], TMP4);
+
+        vis_ld64_2(ref, 8, TMP6);
+        ref += stride;
+
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP4, TMP6, REF_2);
+
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
+
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_and(TMP4, MASK_fe, TMP4);
+
+                vis_and(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
+
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0], TMP0);
+
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_xor(REF_0, REF_2, TMP12);
+
+                vis_and(TMP4, MASK_7f, TMP4);
+
+                vis_and(TMP12, MASK_fe, TMP12);
+
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_and(REF_0, REF_2, TMP14);
+
+                vis_padd16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_faligndata(TMP0, TMP2, REF_2);
+
+                vis_and(TMP12, MASK_7f, TMP12);
+
+                vis_padd16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
+
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
+
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
+
+        vis_and(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
+
+        vis_faligndata(TMP0, TMP2, REF_0);
+
+        vis_xor(REF_0, REF_2, TMP12);
+
+        vis_and(TMP4, MASK_7f, TMP4);
+
+        vis_and(TMP12, MASK_fe, TMP12);
+
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_and(REF_0, REF_2, TMP14);
+
+        vis_padd16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
+
+        vis_and(TMP12, MASK_7f, TMP12);
+
+        vis_padd16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
+}
+
+static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+                             const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
+
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_6);
+        height >>= 1;
+
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP12);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP14);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_pmerge(ZERO,       REF_6,     TMP16);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP18);
+
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_ld64_2(ref, stride, TMP6);
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_pmerge(ZERO,     REF_4,     TMP4);
+
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+
+                vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+                vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
+                vis_faligndata(TMP8, TMP10, REF_6);
+                vis_mul8x16al(DST_0,   CONST_512, TMP20);
+
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_1,   CONST_512, TMP22);
+
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
+
+                vis_padd16(TMP4, CONST_3, TMP4);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
+
+                vis_padd16(TMP6, CONST_3, TMP6);
+
+                vis_padd16(TMP12, TMP20, TMP12);
+                vis_mul8x16al(REF_S0,   CONST_512, TMP20);
+
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
+
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_mul8x16al(REF_S2,   CONST_512, TMP24);
+
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
+
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_2,   CONST_256, TMP28);
+
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP30);
+
+                vis_padd16(TMP16, TMP4, TMP16);
+                vis_mul8x16au(REF_6,   CONST_256, REF_S4);
+
+                vis_padd16(TMP18, TMP6, TMP18);
+                vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
+
+                vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP28, TMP0, TMP12);
+
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP30, TMP2, TMP14);
+
+                vis_pack16(TMP16, DST_2);
+                vis_padd16(REF_S4, TMP4, TMP16);
+
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(REF_S6, TMP6, TMP18);
+
+                vis_padd16(TMP12, TMP20, TMP12);
+
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_pack16(TMP12, DST_0);
+
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_pack16(TMP16, DST_2);
+
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_no_round_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+                            const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
+
+        vis_ld64(constants256_512[0], CONST_256);
+
+        height >>= 1;
+        do {    /* 20 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP8);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP10);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+
+                vis_ld64(dest[0], DST_0);
+
+                vis_ld64_2(dest, stride, DST_2);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, stride, TMP4);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_pmerge(ZERO,       REF_0,     TMP12);
+
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_pmerge(ZERO,       REF_0_1,   TMP14);
+
+                vis_padd16(TMP12, CONST_3, TMP12);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
+
+                vis_padd16(TMP14, CONST_3, TMP14);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
+
+                vis_faligndata(TMP4, TMP6, REF_2);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_mul8x16au(REF_2,   CONST_256, TMP20);
+
+                vis_padd16(TMP8, TMP16, TMP0);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP22);
+
+                vis_padd16(TMP10, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
+
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP12, TMP20, TMP12);
+
+                vis_padd16(TMP14, TMP22, TMP14);
+
+                vis_padd16(TMP12, TMP24, TMP0);
+
+                vis_padd16(TMP14, TMP26, TMP2);
+                vis_pack16(TMP0, DST_2);
+
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+                                       const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(constants1[0], CONST_1);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
+
+        height >>= 1;
+        do {
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_faligndata(TMP6, TMP8, REF_S0);
+
+                vis_faligndata(TMP8, TMP10, REF_S4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
+
+                vis_mul8x16au(REF_0, CONST_256, TMP0);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
+
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
+
+                vis_padd16(TMP0, CONST_2, TMP8);
+                vis_mul8x16au(REF_4, CONST_256, TMP0);
+
+                vis_padd16(TMP2, CONST_1, TMP10);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP2);
+
+                vis_padd16(TMP8, TMP4, TMP8);
+                vis_mul8x16au(REF_6, CONST_256, TMP4);
+
+                vis_padd16(TMP10, TMP6, TMP10);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP6);
+
+                vis_padd16(TMP12, TMP8, TMP12);
+
+                vis_padd16(TMP14, TMP10, TMP14);
+
+                vis_padd16(TMP12, TMP16, TMP12);
+
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_pack16(TMP12, DST_0);
+
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP0, CONST_1, TMP12);
+
+                vis_mul8x16au(REF_S0, CONST_256, TMP0);
+                vis_padd16(TMP2, CONST_1, TMP14);
+
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_padd16(TMP12, TMP4, TMP12);
+
+                vis_mul8x16au(REF_S2, CONST_256, TMP4);
+                vis_padd16(TMP14, TMP6, TMP14);
+
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+                vis_padd16(TMP20, TMP12, TMP20);
+
+                vis_padd16(TMP22, TMP14, TMP22);
+
+                vis_padd16(TMP20, TMP24, TMP20);
+
+                vis_padd16(TMP22, TMP26, TMP22);
+                vis_pack16(TMP20, DST_2);
+
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(TMP0, TMP4, TMP24);
+
+                vis_mul8x16au(REF_S4, CONST_256, TMP0);
+                vis_padd16(TMP2, TMP6, TMP26);
+
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
+                vis_padd16(TMP24, TMP8, TMP24);
+
+                vis_padd16(TMP26, TMP10, TMP26);
+                vis_pack16(TMP24, DST_0);
+
+                vis_pack16(TMP26, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_pmerge(ZERO, REF_S6, TMP4);
+
+                vis_pmerge(ZERO,      REF_S6_1,  TMP6);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+
+                vis_padd16(TMP0, TMP12, TMP0);
+
+                vis_padd16(TMP2, TMP14, TMP2);
+                vis_pack16(TMP0, DST_2);
+
+                vis_pack16(TMP2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_put_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+                                      const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(constants1[0], CONST_1);
+
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
+
+        height >>= 1;
+        do {    /* 26 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0,   CONST_256, TMP8);
+                vis_pmerge(ZERO,        REF_S2,    TMP12);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
+                vis_pmerge(ZERO,        REF_S2_1,  TMP14);
+
+                vis_ld64_2(ref, stride, TMP4);
+
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_S4);
+
+                vis_pmerge(ZERO, REF_S4, TMP18);
+
+                vis_pmerge(ZERO, REF_S4_1, TMP20);
+
+                vis_faligndata(TMP4, TMP6, REF_S0);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
+
+                vis_padd16(TMP18, CONST_1, TMP18);
+                vis_mul8x16au(REF_S6,   CONST_256, TMP22);
+
+                vis_padd16(TMP20, CONST_1, TMP20);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
+
+                vis_mul8x16au(REF_S0,   CONST_256, TMP26);
+                vis_pmerge(ZERO, REF_S0_1, TMP28);
+
+                vis_mul8x16au(REF_S2,   CONST_256, TMP30);
+                vis_padd16(TMP18, TMP22, TMP18);
+
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
+                vis_padd16(TMP20, TMP24, TMP20);
+
+                vis_padd16(TMP8,  TMP18, TMP8);
+
+                vis_padd16(TMP10, TMP20, TMP10);
+
+                vis_padd16(TMP8,  TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP8,  DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP18, TMP26, TMP18);
+
+                vis_padd16(TMP20, TMP28, TMP20);
+
+                vis_padd16(TMP18, TMP30, TMP18);
+
+                vis_padd16(TMP20, TMP32, TMP20);
+                vis_pack16(TMP18, DST_2);
+
+                vis_pack16(TMP20, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_no_round_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+                                       const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
+
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64(ref[ 8], TMP2);
+
+        vis_ld64(ref[16], TMP4);
+
+        vis_ld64(constants6[0], CONST_6);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
+
+        height >>= 1;
+        do {    /* 55 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
+
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP6, TMP8, REF_S0);
+
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP8, TMP10, REF_S4);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
+
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_0, TMP0);
+
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
+
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
+
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP0, CONST_6, TMP0);
+
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP2, CONST_6, TMP2);
+
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP4);
+
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_6, CONST_256, TMP8);
+
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP10);
+
+                vis_padd16(TMP12, TMP16, TMP12);
+                vis_mul8x16au(REF_S0, CONST_256, REF_4);
+
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
+
+                vis_padd16(TMP12, TMP30, TMP12);
+
+                vis_padd16(TMP14, TMP32, TMP14);
+                vis_pack16(TMP12, DST_0);
+
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP4, CONST_6, TMP4);
+
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP6, CONST_6, TMP6);
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
+
+                vis_padd16(TMP4, TMP8, TMP4);
+                vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
+
+                vis_padd16(TMP6, TMP10, TMP6);
+
+                vis_padd16(TMP20, TMP4, TMP20);
+
+                vis_padd16(TMP22, TMP6, TMP22);
+
+                vis_padd16(TMP20, TMP24, TMP20);
+
+                vis_padd16(TMP22, TMP26, TMP22);
+
+                vis_padd16(TMP20, REF_0, TMP20);
+                vis_mul8x16au(REF_S4, CONST_256, REF_0);
+
+                vis_padd16(TMP22, REF_2, TMP22);
+                vis_pack16(TMP20, DST_2);
+
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+
+                vis_ld64_2(dest, 8, DST_2);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO,      REF_S4_1,  REF_2);
+
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_padd16(REF_4, TMP0, TMP8);
+
+                vis_mul8x16au(REF_S6, CONST_256, REF_4);
+                vis_padd16(REF_6, TMP2, TMP10);
+
+                vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
+                vis_padd16(TMP8, TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+
+                vis_padd16(TMP8, TMP30, TMP8);
+
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+
+                vis_padd16(REF_0, TMP4, REF_0);
+
+                vis_mul8x16al(DST_2,   CONST_1024, TMP30);
+                vis_padd16(REF_2, TMP6, REF_2);
+
+                vis_mul8x16al(DST_3,   CONST_1024, TMP32);
+                vis_padd16(REF_0, REF_4, REF_0);
+
+                vis_padd16(REF_2, REF_6, REF_2);
+
+                vis_padd16(REF_0, TMP30, REF_0);
+
+                /* stall */
+
+                vis_padd16(REF_2, TMP32, REF_2);
+                vis_pack16(REF_0, DST_2);
+
+                vis_pack16(REF_2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
+}
+
+static void MC_avg_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+                                      const int stride, int height)
+{
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+        ref = vis_alignaddr(ref);
+
+        vis_ld64(ref[0], TMP0);
+        vis_fzero(ZERO);
+
+        vis_ld64_2(ref, 8, TMP2);
+
+        vis_ld64(constants6[0], CONST_6);
+
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
+
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
+
+        height >>= 1;
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP8);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP10);
+
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP14);
+
+                vis_alignaddr_g0((void *)off);
+
+                vis_ld64_2(ref, stride, TMP4);
+                vis_faligndata(TMP0, TMP2, REF_S4);
+
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP4, TMP6, REF_S0);
+
+                vis_ld64_2(dest, stride, DST_2);
+
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
+
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_S4, TMP22);
+
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP24);
+
+                vis_mul8x16au(REF_S6, CONST_256, TMP26);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP28);
+
+                vis_mul8x16au(REF_S0, CONST_256, REF_S4);
+                vis_padd16(TMP22, CONST_6, TMP22);
+
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
+                vis_padd16(TMP24, CONST_6, TMP24);
+
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP22, TMP26, TMP22);
+
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP24, TMP28, TMP24);
+
+                vis_mul8x16au(REF_S2, CONST_256, TMP26);
+                vis_padd16(TMP8, TMP22, TMP8);
+
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
+                vis_padd16(TMP10, TMP24, TMP10);
+
+                vis_padd16(TMP8, TMP12, TMP8);
+
+                vis_padd16(TMP10, TMP14, TMP10);
+
+                vis_padd16(TMP8, TMP30, TMP8);
+
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
+
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+
+                vis_padd16(REF_S4, TMP22, TMP12);
+
+                vis_padd16(REF_S6, TMP24, TMP14);
+
+                vis_padd16(TMP12, TMP26, TMP12);
+
+                vis_padd16(TMP14, TMP28, TMP14);
+
+                vis_padd16(TMP12, REF_0, TMP12);
+
+                vis_padd16(TMP14, REF_2, TMP14);
+                vis_pack16(TMP12, DST_2);
+
+                vis_pack16(TMP14, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
+}
+
+/* End of no rounding code */
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void sigill_handler (int sig)
+{
+    if (!canjump) {
+        signal (sig, SIG_DFL);
+        raise (sig);
+    }
+
+    canjump = 0;
+    siglongjmp (jmpbuf, 1);
+}
+
+#define ACCEL_SPARC_VIS 1
+#define ACCEL_SPARC_VIS2 2
+
+static int vis_level ()
+{
+    int accel = 0;
+
+    signal (SIGILL, sigill_handler);
+    if (sigsetjmp (jmpbuf, 1)) {
+        signal (SIGILL, SIG_DFL);
+        return accel;
+    }
+
+    canjump = 1;
+
+    /* pdist %f0, %f0, %f0 */
+    __asm__ __volatile__(".word\t0x81b007c0");
+
+    canjump = 0;
+    accel |= ACCEL_SPARC_VIS;
+
+    if (sigsetjmp (jmpbuf, 1)) {
+        signal (SIGILL, SIG_DFL);
+        return accel;
+    }
+
+    canjump = 1;
+
+    /* edge8n %g0, %g0, %g0 */
+    __asm__ __volatile__(".word\t0x81b00020");
+
+    canjump = 0;
+    accel |= ACCEL_SPARC_VIS2;
+
+    signal (SIGILL, SIG_DFL);
+
+    return accel;
+}
+
+/* libavcodec initialization code */
+void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
+{
+  /* VIS specific optimisations */
+  int accel = vis_level ();
+
+  if (accel & ACCEL_SPARC_VIS) {
+      c->put_pixels_tab[0][0] = MC_put_o_16_vis;
+      c->put_pixels_tab[0][1] = MC_put_x_16_vis;
+      c->put_pixels_tab[0][2] = MC_put_y_16_vis;
+      c->put_pixels_tab[0][3] = MC_put_xy_16_vis;
+
+      c->put_pixels_tab[1][0] = MC_put_o_8_vis;
+      c->put_pixels_tab[1][1] = MC_put_x_8_vis;
+      c->put_pixels_tab[1][2] = MC_put_y_8_vis;
+      c->put_pixels_tab[1][3] = MC_put_xy_8_vis;
+
+      c->avg_pixels_tab[0][0] = MC_avg_o_16_vis;
+      c->avg_pixels_tab[0][1] = MC_avg_x_16_vis;
+      c->avg_pixels_tab[0][2] = MC_avg_y_16_vis;
+      c->avg_pixels_tab[0][3] = MC_avg_xy_16_vis;
+
+      c->avg_pixels_tab[1][0] = MC_avg_o_8_vis;
+      c->avg_pixels_tab[1][1] = MC_avg_x_8_vis;
+      c->avg_pixels_tab[1][2] = MC_avg_y_8_vis;
+      c->avg_pixels_tab[1][3] = MC_avg_xy_8_vis;
+
+      c->put_no_rnd_pixels_tab[0][0] = MC_put_no_round_o_16_vis;
+      c->put_no_rnd_pixels_tab[0][1] = MC_put_no_round_x_16_vis;
+      c->put_no_rnd_pixels_tab[0][2] = MC_put_no_round_y_16_vis;
+      c->put_no_rnd_pixels_tab[0][3] = MC_put_no_round_xy_16_vis;
+
+      c->put_no_rnd_pixels_tab[1][0] = MC_put_no_round_o_8_vis;
+      c->put_no_rnd_pixels_tab[1][1] = MC_put_no_round_x_8_vis;
+      c->put_no_rnd_pixels_tab[1][2] = MC_put_no_round_y_8_vis;
+      c->put_no_rnd_pixels_tab[1][3] = MC_put_no_round_xy_8_vis;
+
+      c->avg_no_rnd_pixels_tab[0][0] = MC_avg_no_round_o_16_vis;
+      c->avg_no_rnd_pixels_tab[0][1] = MC_avg_no_round_x_16_vis;
+      c->avg_no_rnd_pixels_tab[0][2] = MC_avg_no_round_y_16_vis;
+      c->avg_no_rnd_pixels_tab[0][3] = MC_avg_no_round_xy_16_vis;
+
+      c->avg_no_rnd_pixels_tab[1][0] = MC_avg_no_round_o_8_vis;
+      c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
+      c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
+      c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
+  }
+}
+
+#endif  /* !(ARCH_SPARC) */
diff --git a/contrib/ffmpeg/libavcodec/sparc/vis.h b/contrib/ffmpeg/libavcodec/sparc/vis.h
new file mode 100644
index 000000000..d4a8ce092
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/sparc/vis.h
@@ -0,0 +1,327 @@
+/*
+ * vis.h
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* You may be asking why I hard-code the instruction opcodes and don't
+ * use the normal VIS assembler mnenomics for the VIS instructions.
+ *
+ * The reason is that Sun, in their infinite wisdom, decided that a binary
+ * using a VIS instruction will cause it to be marked (in the ELF headers)
+ * as doing so, and this prevents the OS from loading such binaries if the
+ * current cpu doesn't have VIS.  There is no way to easily override this
+ * behavior of the assembler that I am aware of.
+ *
+ * This totally defeats what libmpeg2 is trying to do which is allow a
+ * single binary to be created, and then detect the availability of VIS
+ * at runtime.
+ *
+ * I'm not saying that tainting the binary by default is bad, rather I'm
+ * saying that not providing a way to override this easily unnecessarily
+ * ties people's hands.
+ *
+ * Thus, we do the opcode encoding by hand and output 32-bit words in
+ * the assembler to keep the binary from becoming tainted.
+ */
+
+#define vis_opc_base    ((0x1 << 31) | (0x36 << 19))
+#define vis_opf(X)      ((X) << 5)
+#define vis_sreg(X)     (X)
+#define vis_dreg(X)     (((X)&0x1f)|((X)>>5))
+#define vis_rs1_s(X)    (vis_sreg(X) << 14)
+#define vis_rs1_d(X)    (vis_dreg(X) << 14)
+#define vis_rs2_s(X)    (vis_sreg(X) << 0)
+#define vis_rs2_d(X)    (vis_dreg(X) << 0)
+#define vis_rd_s(X)     (vis_sreg(X) << 25)
+#define vis_rd_d(X)     (vis_dreg(X) << 25)
+
+#define vis_ss2s(opf,rs1,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_s(rd)))
+
+#define vis_dd2d(opf,rs1,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_d(rs1) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_ss2d(opf,rs1,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_sd2d(opf,rs1,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_d2s(opf,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_s(rd)))
+
+#define vis_s2d(opf,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_d12d(opf,rs1,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_d(rs1) | \
+                                       vis_rd_d(rd)))
+
+#define vis_d22d(opf,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_d(rs2) | \
+                                       vis_rd_d(rd)))
+
+#define vis_s12s(opf,rs1,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs1_s(rs1) | \
+                                       vis_rd_s(rd)))
+
+#define vis_s22s(opf,rs2,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rs2_s(rs2) | \
+                                       vis_rd_s(rd)))
+
+#define vis_s(opf,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rd_s(rd)))
+
+#define vis_d(opf,rd) \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
+                                       vis_rd_d(rd)))
+
+#define vis_r2m(op,rd,mem) \
+        __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
+
+#define vis_r2m_2(op,rd,mem1,mem2) \
+        __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
+
+#define vis_m2r(op,mem,rd) \
+        __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
+
+#define vis_m2r_2(op,mem1,mem2,rd) \
+        __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
+
+static inline void vis_set_gsr(unsigned int _val)
+{
+        register unsigned int val asm("g1");
+
+        val = _val;
+        __asm__ __volatile__(".word 0xa7804000"
+                             : : "r" (val));
+}
+
+#define VIS_GSR_ALIGNADDR_MASK          0x0000007
+#define VIS_GSR_ALIGNADDR_SHIFT         0
+#define VIS_GSR_SCALEFACT_MASK          0x0000078
+#define VIS_GSR_SCALEFACT_SHIFT         3
+
+#define vis_ld32(mem,rs1)               vis_m2r(ld, mem, rs1)
+#define vis_ld32_2(mem1,mem2,rs1)       vis_m2r_2(ld, mem1, mem2, rs1)
+#define vis_st32(rs1,mem)               vis_r2m(st, rs1, mem)
+#define vis_st32_2(rs1,mem1,mem2)       vis_r2m_2(st, rs1, mem1, mem2)
+#define vis_ld64(mem,rs1)               vis_m2r(ldd, mem, rs1)
+#define vis_ld64_2(mem1,mem2,rs1)       vis_m2r_2(ldd, mem1, mem2, rs1)
+#define vis_st64(rs1,mem)               vis_r2m(std, rs1, mem)
+#define vis_st64_2(rs1,mem1,mem2)       vis_r2m_2(std, rs1, mem1, mem2)
+
+#define vis_ldblk(mem, rd) \
+do {        register void *__mem asm("g1"); \
+        __mem = &(mem); \
+        __asm__ __volatile__(".word 0xc1985e00 | %1" \
+                             : \
+                             : "r" (__mem), \
+                               "i" (vis_rd_d(rd)) \
+                             : "memory"); \
+} while (0)
+
+#define vis_stblk(rd, mem) \
+do {        register void *__mem asm("g1"); \
+        __mem = &(mem); \
+        __asm__ __volatile__(".word 0xc1b85e00 | %1" \
+                             : \
+                             : "r" (__mem), \
+                               "i" (vis_rd_d(rd)) \
+                             : "memory"); \
+} while (0)
+
+#define vis_membar_storestore()        \
+        __asm__ __volatile__(".word 0x8143e008" : : : "memory")
+
+#define vis_membar_sync()        \
+        __asm__ __volatile__(".word 0x8143e040" : : : "memory")
+
+/* 16 and 32 bit partitioned addition and subtraction.  The normal
+ * versions perform 4 16-bit or 2 32-bit additions or subtractions.
+ * The 's' versions perform 2 16-bit or 1 32-bit additions or
+ * subtractions.
+ */
+
+#define vis_padd16(rs1,rs2,rd)          vis_dd2d(0x50, rs1, rs2, rd)
+#define vis_padd16s(rs1,rs2,rd)         vis_ss2s(0x51, rs1, rs2, rd)
+#define vis_padd32(rs1,rs2,rd)          vis_dd2d(0x52, rs1, rs2, rd)
+#define vis_padd32s(rs1,rs2,rd)         vis_ss2s(0x53, rs1, rs2, rd)
+#define vis_psub16(rs1,rs2,rd)          vis_dd2d(0x54, rs1, rs2, rd)
+#define vis_psub16s(rs1,rs2,rd)         vis_ss2s(0x55, rs1, rs2, rd)
+#define vis_psub32(rs1,rs2,rd)          vis_dd2d(0x56, rs1, rs2, rd)
+#define vis_psub32s(rs1,rs2,rd)         vis_ss2s(0x57, rs1, rs2, rd)
+
+/* Pixel formatting instructions.  */
+
+#define vis_pack16(rs2,rd)              vis_d2s( 0x3b,      rs2, rd)
+#define vis_pack32(rs1,rs2,rd)          vis_dd2d(0x3a, rs1, rs2, rd)
+#define vis_packfix(rs2,rd)             vis_d2s( 0x3d,      rs2, rd)
+#define vis_expand(rs2,rd)              vis_s2d( 0x4d,      rs2, rd)
+#define vis_pmerge(rs1,rs2,rd)          vis_ss2d(0x4b, rs1, rs2, rd)
+
+/* Partitioned multiply instructions.  */
+
+#define vis_mul8x16(rs1,rs2,rd)         vis_sd2d(0x31, rs1, rs2, rd)
+#define vis_mul8x16au(rs1,rs2,rd)       vis_ss2d(0x33, rs1, rs2, rd)
+#define vis_mul8x16al(rs1,rs2,rd)       vis_ss2d(0x35, rs1, rs2, rd)
+#define vis_mul8sux16(rs1,rs2,rd)       vis_dd2d(0x36, rs1, rs2, rd)
+#define vis_mul8ulx16(rs1,rs2,rd)       vis_dd2d(0x37, rs1, rs2, rd)
+#define vis_muld8sux16(rs1,rs2,rd)      vis_ss2d(0x38, rs1, rs2, rd)
+#define vis_muld8ulx16(rs1,rs2,rd)      vis_ss2d(0x39, rs1, rs2, rd)
+
+/* Alignment instructions.  */
+
+static inline void *vis_alignaddr(void *_ptr)
+{
+        register void *ptr asm("g1");
+
+        ptr = _ptr;
+
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x18) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(1)));
+
+        return ptr;
+}
+
+static inline void vis_alignaddr_g0(void *_ptr)
+{
+        register void *ptr asm("g1");
+
+        ptr = _ptr;
+
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x18) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(0)));
+}
+
+static inline void *vis_alignaddrl(void *_ptr)
+{
+        register void *ptr asm("g1");
+
+        ptr = _ptr;
+
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x19) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(1)));
+
+        return ptr;
+}
+
+static inline void vis_alignaddrl_g0(void *_ptr)
+{
+        register void *ptr asm("g1");
+
+        ptr = _ptr;
+
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x19) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(0)));
+}
+
+#define vis_faligndata(rs1,rs2,rd)        vis_dd2d(0x48, rs1, rs2, rd)
+
+/* Logical operate instructions.  */
+
+#define vis_fzero(rd)                   vis_d(   0x60,           rd)
+#define vis_fzeros(rd)                  vis_s(   0x61,           rd)
+#define vis_fone(rd)                    vis_d(   0x7e,           rd)
+#define vis_fones(rd)                   vis_s(   0x7f,           rd)
+#define vis_src1(rs1,rd)                vis_d12d(0x74, rs1,      rd)
+#define vis_src1s(rs1,rd)               vis_s12s(0x75, rs1,      rd)
+#define vis_src2(rs2,rd)                vis_d22d(0x78,      rs2, rd)
+#define vis_src2s(rs2,rd)               vis_s22s(0x79,      rs2, rd)
+#define vis_not1(rs1,rd)                vis_d12d(0x6a, rs1,      rd)
+#define vis_not1s(rs1,rd)               vis_s12s(0x6b, rs1,      rd)
+#define vis_not2(rs2,rd)                vis_d22d(0x66,      rs2, rd)
+#define vis_not2s(rs2,rd)               vis_s22s(0x67,      rs2, rd)
+#define vis_or(rs1,rs2,rd)              vis_dd2d(0x7c, rs1, rs2, rd)
+#define vis_ors(rs1,rs2,rd)             vis_ss2s(0x7d, rs1, rs2, rd)
+#define vis_nor(rs1,rs2,rd)             vis_dd2d(0x62, rs1, rs2, rd)
+#define vis_nors(rs1,rs2,rd)            vis_ss2s(0x63, rs1, rs2, rd)
+#define vis_and(rs1,rs2,rd)             vis_dd2d(0x70, rs1, rs2, rd)
+#define vis_ands(rs1,rs2,rd)            vis_ss2s(0x71, rs1, rs2, rd)
+#define vis_nand(rs1,rs2,rd)            vis_dd2d(0x6e, rs1, rs2, rd)
+#define vis_nands(rs1,rs2,rd)           vis_ss2s(0x6f, rs1, rs2, rd)
+#define vis_xor(rs1,rs2,rd)             vis_dd2d(0x6c, rs1, rs2, rd)
+#define vis_xors(rs1,rs2,rd)            vis_ss2s(0x6d, rs1, rs2, rd)
+#define vis_xnor(rs1,rs2,rd)            vis_dd2d(0x72, rs1, rs2, rd)
+#define vis_xnors(rs1,rs2,rd)           vis_ss2s(0x73, rs1, rs2, rd)
+#define vis_ornot1(rs1,rs2,rd)          vis_dd2d(0x7a, rs1, rs2, rd)
+#define vis_ornot1s(rs1,rs2,rd)         vis_ss2s(0x7b, rs1, rs2, rd)
+#define vis_ornot2(rs1,rs2,rd)          vis_dd2d(0x76, rs1, rs2, rd)
+#define vis_ornot2s(rs1,rs2,rd)         vis_ss2s(0x77, rs1, rs2, rd)
+#define vis_andnot1(rs1,rs2,rd)         vis_dd2d(0x68, rs1, rs2, rd)
+#define vis_andnot1s(rs1,rs2,rd)        vis_ss2s(0x69, rs1, rs2, rd)
+#define vis_andnot2(rs1,rs2,rd)         vis_dd2d(0x64, rs1, rs2, rd)
+#define vis_andnot2s(rs1,rs2,rd)        vis_ss2s(0x65, rs1, rs2, rd)
+
+/* Pixel component distance.  */
+
+#define vis_pdist(rs1,rs2,rd)           vis_dd2d(0x3e, rs1, rs2, rd)
diff --git a/contrib/ffmpeg/libavcodec/svq1.c b/contrib/ffmpeg/libavcodec/svq1.c
new file mode 100644
index 000000000..5e8616269
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/svq1.c
@@ -0,0 +1,1431 @@
+/*
+ *
+ * Copyright (C) 2002 the xine project
+ * Copyright (C) 2002 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * (SVQ1 Decoder)
+ * Ported to mplayer by Arpi <arpi@thot.banki.hu>
+ * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
+ *
+ * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
+ */
+
+/**
+ * @file svq1.c
+ * Sorenson Vector Quantizer #1 (SVQ1) video codec.
+ * For more information of the SVQ1 algorithm, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ */
+
+
+//#define DEBUG_SVQ1
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <limits.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "bswap.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+extern const uint8_t mvtab[33][2];
+
+static VLC svq1_block_type;
+static VLC svq1_motion_component;
+static VLC svq1_intra_multistage[6];
+static VLC svq1_inter_multistage[6];
+static VLC svq1_intra_mean;
+static VLC svq1_inter_mean;
+
+#define SVQ1_BLOCK_SKIP         0
+#define SVQ1_BLOCK_INTER        1
+#define SVQ1_BLOCK_INTER_4V     2
+#define SVQ1_BLOCK_INTRA        3
+
+typedef struct SVQ1Context {
+    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame picture;
+    AVFrame current_picture;
+    AVFrame last_picture;
+    PutBitContext pb;
+    GetBitContext gb;
+
+    PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
+
+    int frame_width;
+    int frame_height;
+
+    /* Y plane block dimensions */
+    int y_block_width;
+    int y_block_height;
+
+    /* U & V plane (C planes) block dimensions */
+    int c_block_width;
+    int c_block_height;
+
+    uint16_t *mb_type;
+    uint32_t *dummy;
+    int16_t (*motion_val8[3])[2];
+    int16_t (*motion_val16[3])[2];
+
+    int64_t rd_total;
+} SVQ1Context;
+
+/* motion vector (prediction) */
+typedef struct svq1_pmv_s {
+  int           x;
+  int           y;
+} svq1_pmv_t;
+
+#include "svq1_cb.h"
+#include "svq1_vlc.h"
+
+static const uint16_t checksum_table[256] = {
+  0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
+  0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
+  0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
+  0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
+  0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
+  0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
+  0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
+  0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
+  0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
+  0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
+  0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
+  0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
+  0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
+  0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
+  0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
+  0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
+  0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
+  0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
+  0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
+  0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
+  0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
+  0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
+  0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
+  0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
+  0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
+  0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
+  0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
+  0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
+  0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
+  0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
+  0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
+  0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
+};
+
+static const uint8_t string_table[256] = {
+  0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
+  0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
+  0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
+  0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
+  0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
+  0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
+  0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
+  0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
+  0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
+  0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
+  0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
+  0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
+  0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
+  0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
+  0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
+  0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
+  0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
+  0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
+  0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
+  0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
+  0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
+  0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
+  0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
+  0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
+  0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
+  0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
+  0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
+  0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
+  0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
+  0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
+  0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
+  0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
+};
+
+#define SVQ1_PROCESS_VECTOR()\
+    for (; level > 0; i++) {\
+      /* process next depth */\
+      if (i == m) {\
+        m = n;\
+        if (--level == 0)\
+          break;\
+      }\
+      /* divide block if next bit set */\
+      if (get_bits (bitbuf, 1) == 0)\
+        break;\
+      /* add child nodes */\
+      list[n++] = list[i];\
+      list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
+    }
+
+#define SVQ1_ADD_CODEBOOK()\
+          /* add codebook entries to vector */\
+          for (j=0; j < stages; j++) {\
+            n3  = codebook[entries[j]] ^ 0x80808080;\
+            n1 += ((n3 & 0xFF00FF00) >> 8);\
+            n2 +=  (n3 & 0x00FF00FF);\
+          }\
+\
+          /* clip to [0..255] */\
+          if (n1 & 0xFF00FF00) {\
+            n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n1 += 0x7F007F00;\
+            n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n1 &= (n3 & 0x00FF00FF);\
+          }\
+\
+          if (n2 & 0xFF00FF00) {\
+            n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n2 += 0x7F007F00;\
+            n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n2 &= (n3 & 0x00FF00FF);\
+          }
+
+#define SVQ1_DO_CODEBOOK_INTRA()\
+      for (y=0; y < height; y++) {\
+        for (x=0; x < (width / 4); x++, codebook++) {\
+        n1 = n4;\
+        n2 = n4;\
+        SVQ1_ADD_CODEBOOK()\
+        /* store result */\
+        dst[x] = (n1 << 8) | n2;\
+        }\
+        dst += (pitch / 4);\
+      }
+
+#define SVQ1_DO_CODEBOOK_NONINTRA()\
+      for (y=0; y < height; y++) {\
+        for (x=0; x < (width / 4); x++, codebook++) {\
+        n3 = dst[x];\
+        /* add mean value to vector */\
+        n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
+        n2 =  (n3 & 0x00FF00FF)          + n4;\
+        SVQ1_ADD_CODEBOOK()\
+        /* store result */\
+        dst[x] = (n1 << 8) | n2;\
+        }\
+        dst += (pitch / 4);\
+      }
+
+#define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
+      codebook = (const uint32_t *) cbook[level];\
+      bit_cache = get_bits (bitbuf, 4*stages);\
+      /* calculate codebook entries for this vector */\
+      for (j=0; j < stages; j++) {\
+        entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
+      }\
+      mean -= (stages * 128);\
+      n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
+
+static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
+  uint32_t    bit_cache;
+  uint8_t    *list[63];
+  uint32_t   *dst;
+  const uint32_t *codebook;
+  int         entries[6];
+  int         i, j, m, n;
+  int         mean, stages;
+  unsigned    x, y, width, height, level;
+  uint32_t    n1, n2, n3, n4;
+
+  /* initialize list for breadth first processing of vectors */
+  list[0] = pixels;
+
+  /* recursively process vector */
+  for (i=0, m=1, n=1, level=5; i < n; i++) {
+    SVQ1_PROCESS_VECTOR();
+
+    /* destination address and vector size */
+    dst = (uint32_t *) list[i];
+    width = 1 << ((4 + level) /2);
+    height = 1 << ((3 + level) /2);
+
+    /* get number of stages (-1 skips vector, 0 for mean only) */
+    stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
+
+    if (stages == -1) {
+        for (y=0; y < height; y++) {
+          memset (&dst[y*(pitch / 4)], 0, width);
+        }
+      continue;                 /* skip vector */
+    }
+
+    if ((stages > 0) && (level >= 4)) {
+#ifdef DEBUG_SVQ1
+    av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
+#endif
+      return -1;        /* invalid vector */
+    }
+
+    mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
+
+    if (stages == 0) {
+      for (y=0; y < height; y++) {
+        memset (&dst[y*(pitch / 4)], mean, width);
+      }
+    } else {
+      SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
+      SVQ1_DO_CODEBOOK_INTRA()
+    }
+  }
+
+  return 0;
+}
+
+static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
+  uint32_t    bit_cache;
+  uint8_t    *list[63];
+  uint32_t   *dst;
+  const uint32_t *codebook;
+  int         entries[6];
+  int         i, j, m, n;
+  int         mean, stages;
+  int         x, y, width, height, level;
+  uint32_t    n1, n2, n3, n4;
+
+  /* initialize list for breadth first processing of vectors */
+  list[0] = pixels;
+
+  /* recursively process vector */
+  for (i=0, m=1, n=1, level=5; i < n; i++) {
+    SVQ1_PROCESS_VECTOR();
+
+    /* destination address and vector size */
+    dst = (uint32_t *) list[i];
+    width = 1 << ((4 + level) /2);
+    height = 1 << ((3 + level) /2);
+
+    /* get number of stages (-1 skips vector, 0 for mean only) */
+    stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
+
+    if (stages == -1) continue; /* skip vector */
+
+    if ((stages > 0) && (level >= 4)) {
+#ifdef DEBUG_SVQ1
+    av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
+#endif
+      return -1;        /* invalid vector */
+    }
+
+    mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
+
+    SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
+    SVQ1_DO_CODEBOOK_NONINTRA()
+  }
+  return 0;
+}
+
+static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
+  int        diff;
+  int        i;
+
+  for (i=0; i < 2; i++) {
+
+    /* get motion code */
+    diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
+    if(diff<0)
+        return -1;
+    else if(diff){
+        if(get_bits1(bitbuf)) diff= -diff;
+    }
+
+    /* add median of motion vector predictors and clip result */
+    if (i == 1)
+      mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
+    else
+      mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
+  }
+
+  return 0;
+}
+
+static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
+  uint8_t *src;
+  uint8_t *dst;
+  int      i;
+
+  src = &previous[x + y*pitch];
+  dst = current;
+
+  for (i=0; i < 16; i++) {
+    memcpy (dst, src, 16);
+    src += pitch;
+    dst += pitch;
+  }
+}
+
+static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
+                               uint8_t *current, uint8_t *previous, int pitch,
+                               svq1_pmv_t *motion, int x, int y) {
+  uint8_t    *src;
+  uint8_t    *dst;
+  svq1_pmv_t  mv;
+  svq1_pmv_t *pmv[3];
+  int         result;
+
+  /* predict and decode motion vector */
+  pmv[0] = &motion[0];
+  if (y == 0) {
+    pmv[1] =
+    pmv[2] = pmv[0];
+  }
+  else {
+    pmv[1] = &motion[(x / 8) + 2];
+    pmv[2] = &motion[(x / 8) + 4];
+  }
+
+  result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
+
+  if (result != 0)
+    return result;
+
+  motion[0].x                =
+  motion[(x / 8) + 2].x      =
+  motion[(x / 8) + 3].x      = mv.x;
+  motion[0].y                =
+  motion[(x / 8) + 2].y      =
+  motion[(x / 8) + 3].y      = mv.y;
+
+  if(y + (mv.y >> 1)<0)
+     mv.y= 0;
+  if(x + (mv.x >> 1)<0)
+     mv.x= 0;
+
+#if 0
+  int w= (s->width+15)&~15;
+  int h= (s->height+15)&~15;
+  if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
+      av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
+#endif
+
+  src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
+  dst = current;
+
+  s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
+
+  return 0;
+}
+
+static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
+                                  uint8_t *current, uint8_t *previous, int pitch,
+                                  svq1_pmv_t *motion,int x, int y) {
+  uint8_t    *src;
+  uint8_t    *dst;
+  svq1_pmv_t  mv;
+  svq1_pmv_t *pmv[4];
+  int         i, result;
+
+  /* predict and decode motion vector (0) */
+  pmv[0] = &motion[0];
+  if (y == 0) {
+    pmv[1] =
+    pmv[2] = pmv[0];
+  }
+  else {
+    pmv[1] = &motion[(x / 8) + 2];
+    pmv[2] = &motion[(x / 8) + 4];
+  }
+
+  result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
+
+  if (result != 0)
+    return result;
+
+  /* predict and decode motion vector (1) */
+  pmv[0] = &mv;
+  if (y == 0) {
+    pmv[1] =
+    pmv[2] = pmv[0];
+  }
+  else {
+    pmv[1] = &motion[(x / 8) + 3];
+  }
+  result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
+
+  if (result != 0)
+    return result;
+
+  /* predict and decode motion vector (2) */
+  pmv[1] = &motion[0];
+  pmv[2] = &motion[(x / 8) + 1];
+
+  result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
+
+  if (result != 0)
+    return result;
+
+  /* predict and decode motion vector (3) */
+  pmv[2] = &motion[(x / 8) + 2];
+  pmv[3] = &motion[(x / 8) + 3];
+
+  result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
+
+  if (result != 0)
+    return result;
+
+  /* form predictions */
+  for (i=0; i < 4; i++) {
+    int mvx= pmv[i]->x + (i&1)*16;
+    int mvy= pmv[i]->y + (i>>1)*16;
+
+    ///XXX /FIXME cliping or padding?
+    if(y + (mvy >> 1)<0)
+       mvy= 0;
+    if(x + (mvx >> 1)<0)
+       mvx= 0;
+
+#if 0
+  int w= (s->width+15)&~15;
+  int h= (s->height+15)&~15;
+  if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
+      av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
+#endif
+    src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
+    dst = current;
+
+    s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
+
+    /* select next block */
+    if (i & 1) {
+      current  += 8*(pitch - 1);
+    } else {
+      current  += 8;
+    }
+  }
+
+  return 0;
+}
+
+static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
+                        uint8_t *current, uint8_t *previous, int pitch,
+                        svq1_pmv_t *motion, int x, int y) {
+  uint32_t block_type;
+  int      result = 0;
+
+  /* get block type */
+  block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
+
+  /* reset motion vectors */
+  if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
+    motion[0].x                 =
+    motion[0].y                 =
+    motion[(x / 8) + 2].x =
+    motion[(x / 8) + 2].y =
+    motion[(x / 8) + 3].x =
+    motion[(x / 8) + 3].y = 0;
+  }
+
+  switch (block_type) {
+  case SVQ1_BLOCK_SKIP:
+    svq1_skip_block (current, previous, pitch, x, y);
+    break;
+
+  case SVQ1_BLOCK_INTER:
+    result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
+
+    if (result != 0)
+    {
+#ifdef DEBUG_SVQ1
+    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
+#endif
+      break;
+    }
+    result = svq1_decode_block_non_intra (bitbuf, current, pitch);
+    break;
+
+  case SVQ1_BLOCK_INTER_4V:
+    result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
+
+    if (result != 0)
+    {
+#ifdef DEBUG_SVQ1
+    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
+#endif
+      break;
+    }
+    result = svq1_decode_block_non_intra (bitbuf, current, pitch);
+    break;
+
+  case SVQ1_BLOCK_INTRA:
+    result = svq1_decode_block_intra (bitbuf, current, pitch);
+    break;
+  }
+
+  return result;
+}
+
+/* standard video sizes */
+static struct { int width; int height; } svq1_frame_size_table[8] = {
+  { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
+  { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
+};
+
+static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
+  int i;
+
+  for (i=0; i < length; i++) {
+    value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
+  }
+
+  return value;
+}
+
+#if 0 /* unused, remove? */
+static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
+                                         int width, int height, int value) {
+  int x, y;
+
+  for (y=0; y < height; y++) {
+    for (x=0; x < width; x++) {
+      value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
+    }
+
+    pixels += pitch;
+  }
+
+  return value;
+}
+#endif
+
+#ifdef CONFIG_DECODERS
+static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
+  uint8_t seed;
+  int     i;
+
+  out[0] = get_bits (bitbuf, 8);
+
+  seed = string_table[out[0]];
+
+  for (i=1; i <= out[0]; i++) {
+    out[i] = get_bits (bitbuf, 8) ^ seed;
+    seed   = string_table[out[i] ^ seed];
+  }
+}
+
+static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
+  int frame_size_code;
+  int temporal_reference;
+
+  temporal_reference = get_bits (bitbuf, 8);
+
+  /* frame type */
+  s->pict_type= get_bits (bitbuf, 2)+1;
+  if(s->pict_type==4)
+      return -1;
+
+  if (s->pict_type == I_TYPE) {
+
+    /* unknown fields */
+    if (s->f_code == 0x50 || s->f_code == 0x60) {
+      int csum = get_bits (bitbuf, 16);
+
+      csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
+
+//      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
+//              (csum == 0) ? "correct" : "incorrect", csum);
+    }
+
+    if ((s->f_code ^ 0x10) >= 0x50) {
+      uint8_t msg[256];
+
+      svq1_parse_string (bitbuf, msg);
+
+      av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
+    }
+
+    skip_bits (bitbuf, 2);
+    skip_bits (bitbuf, 2);
+    skip_bits1 (bitbuf);
+
+    /* load frame size */
+    frame_size_code = get_bits (bitbuf, 3);
+
+    if (frame_size_code == 7) {
+      /* load width, height (12 bits each) */
+      s->width = get_bits (bitbuf, 12);
+      s->height = get_bits (bitbuf, 12);
+
+      if (!s->width || !s->height)
+        return -1;
+    } else {
+      /* get width, height from table */
+      s->width = svq1_frame_size_table[frame_size_code].width;
+      s->height = svq1_frame_size_table[frame_size_code].height;
+    }
+  }
+
+  /* unknown fields */
+  if (get_bits (bitbuf, 1) == 1) {
+    skip_bits1 (bitbuf);       /* use packet checksum if (1) */
+    skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
+
+    if (get_bits (bitbuf, 2) != 0)
+      return -1;
+  }
+
+  if (get_bits (bitbuf, 1) == 1) {
+    skip_bits1 (bitbuf);
+    skip_bits (bitbuf, 4);
+    skip_bits1 (bitbuf);
+    skip_bits (bitbuf, 2);
+
+    while (get_bits (bitbuf, 1) == 1) {
+      skip_bits (bitbuf, 8);
+    }
+  }
+
+  return 0;
+}
+
+static int svq1_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+  MpegEncContext *s=avctx->priv_data;
+  uint8_t        *current, *previous;
+  int             result, i, x, y, width, height;
+  AVFrame *pict = data;
+
+  /* initialize bit buffer */
+  init_get_bits(&s->gb,buf,buf_size*8);
+
+  /* decode frame header */
+  s->f_code = get_bits (&s->gb, 22);
+
+  if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
+    return -1;
+
+  /* swap some header bytes (why?) */
+  if (s->f_code != 0x20) {
+    uint32_t *src = (uint32_t *) (buf + 4);
+
+    for (i=0; i < 4; i++) {
+      src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
+    }
+  }
+
+  result = svq1_decode_frame_header (&s->gb, s);
+
+  if (result != 0)
+  {
+#ifdef DEBUG_SVQ1
+    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
+#endif
+    return result;
+  }
+
+  //FIXME this avoids some confusion for "B frames" without 2 references
+  //this should be removed after libavcodec can handle more flexible picture types & ordering
+  if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
+
+  if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
+  if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
+     ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
+     || avctx->skip_frame >= AVDISCARD_ALL)
+      return buf_size;
+
+  if(MPV_frame_start(s, avctx) < 0)
+      return -1;
+
+  /* decode y, u and v components */
+  for (i=0; i < 3; i++) {
+    int linesize;
+    if (i == 0) {
+      width  = (s->width+15)&~15;
+      height = (s->height+15)&~15;
+      linesize= s->linesize;
+    } else {
+      if(s->flags&CODEC_FLAG_GRAY) break;
+      width  = (s->width/4+15)&~15;
+      height = (s->height/4+15)&~15;
+      linesize= s->uvlinesize;
+    }
+
+    current  = s->current_picture.data[i];
+
+    if(s->pict_type==B_TYPE){
+        previous = s->next_picture.data[i];
+    }else{
+        previous = s->last_picture.data[i];
+    }
+
+    if (s->pict_type == I_TYPE) {
+      /* keyframe */
+      for (y=0; y < height; y+=16) {
+        for (x=0; x < width; x+=16) {
+          result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
+          if (result != 0)
+          {
+//#ifdef DEBUG_SVQ1
+            av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
+//#endif
+            return result;
+          }
+        }
+        current += 16*linesize;
+      }
+    } else {
+      svq1_pmv_t pmv[width/8+3];
+      /* delta frame */
+      memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
+
+      for (y=0; y < height; y+=16) {
+        for (x=0; x < width; x+=16) {
+          result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
+                                            linesize, pmv, x, y);
+          if (result != 0)
+          {
+#ifdef DEBUG_SVQ1
+    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
+#endif
+            return result;
+          }
+        }
+
+        pmv[0].x =
+        pmv[0].y = 0;
+
+        current += 16*linesize;
+      }
+    }
+  }
+
+  *pict = *(AVFrame*)&s->current_picture;
+
+
+  MPV_frame_end(s);
+
+  *data_size=sizeof(AVFrame);
+  return buf_size;
+}
+
+static int svq1_decode_init(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+    int i;
+
+    MPV_decode_defaults(s);
+
+    s->avctx = avctx;
+    s->width = (avctx->width+3)&~3;
+    s->height = (avctx->height+3)&~3;
+    s->codec_id= avctx->codec->id;
+    avctx->pix_fmt = PIX_FMT_YUV410P;
+    avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
+    s->flags= avctx->flags;
+    if (MPV_common_init(s) < 0) return -1;
+
+    init_vlc(&svq1_block_type, 2, 4,
+        &svq1_block_type_vlc[0][1], 2, 1,
+        &svq1_block_type_vlc[0][0], 2, 1, 1);
+
+    init_vlc(&svq1_motion_component, 7, 33,
+        &mvtab[0][1], 2, 1,
+        &mvtab[0][0], 2, 1, 1);
+
+    for (i = 0; i < 6; i++) {
+        init_vlc(&svq1_intra_multistage[i], 3, 8,
+            &svq1_intra_multistage_vlc[i][0][1], 2, 1,
+            &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1);
+        init_vlc(&svq1_inter_multistage[i], 3, 8,
+            &svq1_inter_multistage_vlc[i][0][1], 2, 1,
+            &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1);
+    }
+
+    init_vlc(&svq1_intra_mean, 8, 256,
+        &svq1_intra_mean_vlc[0][1], 4, 2,
+        &svq1_intra_mean_vlc[0][0], 4, 2, 1);
+
+    init_vlc(&svq1_inter_mean, 9, 512,
+        &svq1_inter_mean_vlc[0][1], 4, 2,
+        &svq1_inter_mean_vlc[0][0], 4, 2, 1);
+
+    return 0;
+}
+
+static int svq1_decode_end(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+
+    MPV_common_end(s);
+    return 0;
+}
+#endif /* CONFIG_DECODERS */
+
+#ifdef CONFIG_ENCODERS
+static void svq1_write_header(SVQ1Context *s, int frame_type)
+{
+    int i;
+
+    /* frame code */
+    put_bits(&s->pb, 22, 0x20);
+
+    /* temporal reference (sure hope this is a "don't care") */
+    put_bits(&s->pb, 8, 0x00);
+
+    /* frame type */
+    put_bits(&s->pb, 2, frame_type - 1);
+
+    if (frame_type == I_TYPE) {
+
+        /* no checksum since frame code is 0x20 */
+
+        /* no embedded string either */
+
+        /* output 5 unknown bits (2 + 2 + 1) */
+        put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */
+
+        for (i = 0; i < 7; i++)
+        {
+            if ((svq1_frame_size_table[i].width == s->frame_width) &&
+                (svq1_frame_size_table[i].height == s->frame_height))
+            {
+                put_bits(&s->pb, 3, i);
+                break;
+            }
+        }
+
+        if (i == 7)
+        {
+            put_bits(&s->pb, 3, 7);
+                put_bits(&s->pb, 12, s->frame_width);
+                put_bits(&s->pb, 12, s->frame_height);
+        }
+    }
+
+    /* no checksum or extra data (next 2 bits get 0) */
+    put_bits(&s->pb, 2, 0);
+}
+
+
+#define QUALITY_THRESHOLD 100
+#define THRESHOLD_MULTIPLIER 0.6
+
+#if defined(HAVE_ALTIVEC)
+#undef vector
+#endif
+
+static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
+    int count, y, x, i, j, split, best_mean, best_score, best_count;
+    int best_vector[6];
+    int block_sum[7]= {0, 0, 0, 0, 0, 0};
+    int w= 2<<((level+2)>>1);
+    int h= 2<<((level+1)>>1);
+    int size=w*h;
+    int16_t block[7][256];
+    const int8_t *codebook_sum, *codebook;
+    const uint16_t (*mean_vlc)[2];
+    const uint8_t (*multistage_vlc)[2];
+
+    best_score=0;
+    //FIXME optimize, this doenst need to be done multiple times
+    if(intra){
+        codebook_sum= svq1_intra_codebook_sum[level];
+        codebook= svq1_intra_codebooks[level];
+        mean_vlc= svq1_intra_mean_vlc;
+        multistage_vlc= svq1_intra_multistage_vlc[level];
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int v= src[x + y*stride];
+                block[0][x + w*y]= v;
+                best_score += v*v;
+                block_sum[0] += v;
+            }
+        }
+    }else{
+        codebook_sum= svq1_inter_codebook_sum[level];
+        codebook= svq1_inter_codebooks[level];
+        mean_vlc= svq1_inter_mean_vlc + 256;
+        multistage_vlc= svq1_inter_multistage_vlc[level];
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int v= src[x + y*stride] - ref[x + y*stride];
+                block[0][x + w*y]= v;
+                best_score += v*v;
+                block_sum[0] += v;
+            }
+        }
+    }
+
+    best_count=0;
+    best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
+    best_mean= (block_sum[0] + (size>>1)) >> (level+3);
+
+    if(level<4){
+        for(count=1; count<7; count++){
+            int best_vector_score= INT_MAX;
+            int best_vector_sum=-999, best_vector_mean=-999;
+            const int stage= count-1;
+            const int8_t *vector;
+
+            for(i=0; i<16; i++){
+                int sum= codebook_sum[stage*16 + i];
+                int sqr=0;
+                int diff, mean, score;
+
+                vector = codebook + stage*size*16 + i*size;
+
+                for(j=0; j<size; j++){
+                    int v= vector[j];
+                    sqr += (v - block[stage][j])*(v - block[stage][j]);
+                }
+                diff= block_sum[stage] - sum;
+                mean= (diff + (size>>1)) >> (level+3);
+                assert(mean >-300 && mean<300);
+                if(intra) mean= clip(mean, 0, 255);
+                else      mean= clip(mean, -256, 255);
+                score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
+                if(score < best_vector_score){
+                    best_vector_score= score;
+                    best_vector[stage]= i;
+                    best_vector_sum= sum;
+                    best_vector_mean= mean;
+                }
+            }
+            assert(best_vector_mean != -999);
+            vector= codebook + stage*size*16 + best_vector[stage]*size;
+            for(j=0; j<size; j++){
+                block[stage+1][j] = block[stage][j] - vector[j];
+            }
+            block_sum[stage+1]= block_sum[stage] - best_vector_sum;
+            best_vector_score +=
+                lambda*(+ 1 + 4*count
+                        + multistage_vlc[1+count][1]
+                        + mean_vlc[best_vector_mean][1]);
+
+            if(best_vector_score < best_score){
+                best_score= best_vector_score;
+                best_count= count;
+                best_mean= best_vector_mean;
+            }
+        }
+    }
+
+    split=0;
+    if(best_score > threshold && level){
+        int score=0;
+        int offset= (level&1) ? stride*h/2 : w/2;
+        PutBitContext backup[6];
+
+        for(i=level-1; i>=0; i--){
+            backup[i]= s->reorder_pb[i];
+        }
+        score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
+        score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
+        score += lambda;
+
+        if(score < best_score){
+            best_score= score;
+            split=1;
+        }else{
+            for(i=level-1; i>=0; i--){
+                s->reorder_pb[i]= backup[i];
+            }
+        }
+    }
+    if (level > 0)
+        put_bits(&s->reorder_pb[level], 1, split);
+
+    if(!split){
+        assert((best_mean >= 0 && best_mean<256) || !intra);
+        assert(best_mean >= -256 && best_mean<256);
+        assert(best_count >=0 && best_count<7);
+        assert(level<4 || best_count==0);
+
+        /* output the encoding */
+        put_bits(&s->reorder_pb[level],
+            multistage_vlc[1 + best_count][1],
+            multistage_vlc[1 + best_count][0]);
+        put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
+            mean_vlc[best_mean][0]);
+
+        for (i = 0; i < best_count; i++){
+            assert(best_vector[i]>=0 && best_vector[i]<16);
+            put_bits(&s->reorder_pb[level], 4, best_vector[i]);
+        }
+
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
+            }
+        }
+    }
+
+    return best_score;
+}
+
+
+static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
+    int width, int height, int src_stride, int stride)
+{
+    int x, y;
+    int i;
+    int block_width, block_height;
+    int level;
+    int threshold[6];
+    const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
+
+    /* figure out the acceptable level thresholds in advance */
+    threshold[5] = QUALITY_THRESHOLD;
+    for (level = 4; level >= 0; level--)
+        threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
+
+    block_width = (width + 15) / 16;
+    block_height = (height + 15) / 16;
+
+    if(s->picture.pict_type == P_TYPE){
+        s->m.avctx= s->avctx;
+        s->m.current_picture_ptr= &s->m.current_picture;
+        s->m.last_picture_ptr   = &s->m.last_picture;
+        s->m.last_picture.data[0]= ref_plane;
+        s->m.linesize=
+        s->m.last_picture.linesize[0]=
+        s->m.new_picture.linesize[0]=
+        s->m.current_picture.linesize[0]= stride;
+        s->m.width= width;
+        s->m.height= height;
+        s->m.mb_width= block_width;
+        s->m.mb_height= block_height;
+        s->m.mb_stride= s->m.mb_width+1;
+        s->m.b8_stride= 2*s->m.mb_width+1;
+        s->m.f_code=1;
+        s->m.pict_type= s->picture.pict_type;
+        s->m.me_method= s->avctx->me_method;
+        s->m.me.scene_change_score=0;
+        s->m.flags= s->avctx->flags;
+//        s->m.out_format = FMT_H263;
+//        s->m.unrestricted_mv= 1;
+
+        s->m.lambda= s->picture.quality;
+        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
+
+        if(!s->motion_val8[plane]){
+            s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
+            s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
+        }
+
+        s->m.mb_type= s->mb_type;
+
+        //dummies, to avoid segfaults
+        s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
+        s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
+        s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
+        s->m.current_picture.mb_type= s->dummy;
+
+        s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
+        s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
+        s->m.dsp= s->dsp; //move
+        ff_init_me(&s->m);
+
+        s->m.me.dia_size= s->avctx->dia_size;
+        s->m.first_slice_line=1;
+        for (y = 0; y < block_height; y++) {
+            uint8_t src[stride*16];
+
+            s->m.new_picture.data[0]= src - y*16*stride; //ugly
+            s->m.mb_y= y;
+
+            for(i=0; i<16 && i + 16*y<height; i++){
+                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
+                for(x=width; x<16*block_width; x++)
+                    src[i*stride+x]= src[i*stride+x-1];
+            }
+            for(; i<16 && i + 16*y<16*block_height; i++)
+                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
+
+            for (x = 0; x < block_width; x++) {
+                s->m.mb_x= x;
+                ff_init_block_index(&s->m);
+                ff_update_block_index(&s->m);
+
+                ff_estimate_p_frame_motion(&s->m, x, y);
+            }
+            s->m.first_slice_line=0;
+        }
+
+        ff_fix_long_p_mvs(&s->m);
+        ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
+    }
+
+    s->m.first_slice_line=1;
+    for (y = 0; y < block_height; y++) {
+        uint8_t src[stride*16];
+
+        for(i=0; i<16 && i + 16*y<height; i++){
+            memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
+            for(x=width; x<16*block_width; x++)
+                src[i*stride+x]= src[i*stride+x-1];
+        }
+        for(; i<16 && i + 16*y<16*block_height; i++)
+            memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
+
+        s->m.mb_y= y;
+        for (x = 0; x < block_width; x++) {
+            uint8_t reorder_buffer[3][6][7*32];
+            int count[3][6];
+            int offset = y * 16 * stride + x * 16;
+            uint8_t *decoded= decoded_plane + offset;
+            uint8_t *ref= ref_plane + offset;
+            int score[4]={0,0,0,0}, best;
+            uint8_t temp[16*stride];
+
+            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
+                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                return -1;
+            }
+
+            s->m.mb_x= x;
+            ff_init_block_index(&s->m);
+            ff_update_block_index(&s->m);
+
+            if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
+                for(i=0; i<6; i++){
+                    init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
+                }
+                if(s->picture.pict_type == P_TYPE){
+                    const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
+                    put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
+                    score[0]= vlc[1]*lambda;
+                }
+                score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
+                for(i=0; i<6; i++){
+                    count[0][i]= put_bits_count(&s->reorder_pb[i]);
+                    flush_put_bits(&s->reorder_pb[i]);
+                }
+            }else
+                score[0]= INT_MAX;
+
+            best=0;
+
+            if(s->picture.pict_type == P_TYPE){
+                const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
+                int mx, my, pred_x, pred_y, dxy;
+                int16_t *motion_ptr;
+
+                motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
+                if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
+                    for(i=0; i<6; i++)
+                        init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
+
+                    put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
+
+                    s->m.pb= s->reorder_pb[5];
+                    mx= motion_ptr[0];
+                    my= motion_ptr[1];
+                    assert(mx>=-32 && mx<=31);
+                    assert(my>=-32 && my<=31);
+                    assert(pred_x>=-32 && pred_x<=31);
+                    assert(pred_y>=-32 && pred_y<=31);
+                    ff_h263_encode_motion(&s->m, mx - pred_x, 1);
+                    ff_h263_encode_motion(&s->m, my - pred_y, 1);
+                    s->reorder_pb[5]= s->m.pb;
+                    score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
+
+                    dxy= (mx&1) + 2*(my&1);
+
+                    s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
+
+                    score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
+                    best= score[1] <= score[0];
+
+                    vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
+                    score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
+                    score[2]+= vlc[1]*lambda;
+                    if(score[2] < score[best] && mx==0 && my==0){
+                        best=2;
+                        s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
+                        for(i=0; i<6; i++){
+                            count[2][i]=0;
+                        }
+                        put_bits(&s->pb, vlc[1], vlc[0]);
+                    }
+                }
+
+                if(best==1){
+                    for(i=0; i<6; i++){
+                        count[1][i]= put_bits_count(&s->reorder_pb[i]);
+                        flush_put_bits(&s->reorder_pb[i]);
+                    }
+                }else{
+                    motion_ptr[0                 ] = motion_ptr[1                 ]=
+                    motion_ptr[2                 ] = motion_ptr[3                 ]=
+                    motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
+                    motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
+                }
+            }
+
+            s->rd_total += score[best];
+
+            for(i=5; i>=0; i--){
+                ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
+            }
+            if(best==0){
+                s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
+            }
+        }
+        s->m.first_slice_line=0;
+    }
+    return 0;
+}
+
+static int svq1_encode_init(AVCodecContext *avctx)
+{
+    SVQ1Context * const s = avctx->priv_data;
+
+    dsputil_init(&s->dsp, avctx);
+    avctx->coded_frame= (AVFrame*)&s->picture;
+
+    s->frame_width = avctx->width;
+    s->frame_height = avctx->height;
+
+    s->y_block_width = (s->frame_width + 15) / 16;
+    s->y_block_height = (s->frame_height + 15) / 16;
+
+    s->c_block_width = (s->frame_width / 4 + 15) / 16;
+    s->c_block_height = (s->frame_height / 4 + 15) / 16;
+
+    s->avctx= avctx;
+    s->m.avctx= avctx;
+    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
+    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
+    s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
+    h263_encode_init(&s->m); //mv_penalty
+
+    return 0;
+}
+
+static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
+    int buf_size, void *data)
+{
+    SVQ1Context * const s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    AVFrame temp;
+    int i;
+
+    if(avctx->pix_fmt != PIX_FMT_YUV410P){
+        av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
+        return -1;
+    }
+
+    if(!s->current_picture.data[0]){
+        avctx->get_buffer(avctx, &s->current_picture);
+        avctx->get_buffer(avctx, &s->last_picture);
+    }
+
+    temp= s->current_picture;
+    s->current_picture= s->last_picture;
+    s->last_picture= temp;
+
+    init_put_bits(&s->pb, buf, buf_size);
+
+    *p = *pict;
+    p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
+    p->key_frame = p->pict_type == I_TYPE;
+
+    svq1_write_header(s, p->pict_type);
+    for(i=0; i<3; i++){
+        if(svq1_encode_plane(s, i,
+            s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
+            s->frame_width / (i?4:1), s->frame_height / (i?4:1),
+            s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
+                return -1;
+    }
+
+//    align_put_bits(&s->pb);
+    while(put_bits_count(&s->pb) & 31)
+        put_bits(&s->pb, 1, 0);
+
+    flush_put_bits(&s->pb);
+
+    return (put_bits_count(&s->pb) / 8);
+}
+
+static int svq1_encode_end(AVCodecContext *avctx)
+{
+    SVQ1Context * const s = avctx->priv_data;
+    int i;
+
+    av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
+
+    av_freep(&s->m.me.scratchpad);
+    av_freep(&s->m.me.map);
+    av_freep(&s->m.me.score_map);
+    av_freep(&s->mb_type);
+    av_freep(&s->dummy);
+
+    for(i=0; i<3; i++){
+        av_freep(&s->motion_val8[i]);
+        av_freep(&s->motion_val16[i]);
+    }
+
+    return 0;
+}
+
+#endif //CONFIG_ENCODERS
+
+#ifdef CONFIG_DECODERS
+AVCodec svq1_decoder = {
+    "svq1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SVQ1,
+    sizeof(MpegEncContext),
+    svq1_decode_init,
+    NULL,
+    svq1_decode_end,
+    svq1_decode_frame,
+    CODEC_CAP_DR1,
+    .flush= ff_mpeg_flush,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
+};
+#endif
+
+#ifdef CONFIG_ENCODERS
+
+AVCodec svq1_encoder = {
+    "svq1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SVQ1,
+    sizeof(SVQ1Context),
+    svq1_encode_init,
+    svq1_encode_frame,
+    svq1_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
+};
+
+#endif //CONFIG_ENCODERS
diff --git a/contrib/ffmpeg/libavcodec/svq1_cb.h b/contrib/ffmpeg/libavcodec/svq1_cb.h
new file mode 100644
index 000000000..a0748bd44
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/svq1_cb.h
@@ -0,0 +1,1580 @@
+/*
+ *
+ * Copyright (C) 2002 the xine project
+ * Copyright (C) 2002 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Ported to mplayer by Arpi <arpi@thot.banki.hu>
+ * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
+ *
+ */
+
+/**
+ * @file svq1_cb.h
+ * svq1 code books.
+ */
+
+/* 6x16-entry codebook for inter-coded 4x2 vectors */
+static const int8_t svq1_inter_codebook_4x2[768] = {
+    7,  2, -6, -7,  7,  3, -3, -4, -7, -2,  7,  8, -8, -4,  3,  4,
+   19, 17,  9,  3,-14,-16,-12, -8,-18,-16, -8, -3, 11, 14, 12,  8,
+    7,-16,-10, 20,  7,-17,-10, 20, -6, 18,  8,-21, -7, 18,  9,-20,
+   25,  3,-20,-14, 29,  7,-18,-13,-29, -4, 21, 14,-31, -6, 20, 14,
+  -19,-26,-28,-24, 31, 32, 22, 10, 15, 24, 31, 28,-32,-32,-22,-13,
+    2, -8,-23,-26, -9,  3, 27, 35,  3, 11, 21, 21,  8, -4,-27,-34,
+  -30,-31, 12, 47,-29,-30, 13, 47, 38, 30,-17,-46, 34, 26,-19,-46,
+  -42,-50,-51,-43, 34, 48, 55, 48, 48, 54, 51, 42,-44,-52,-53,-47,
+    4,  5,  0, -6, -2, -2,  0,  1,-11, -6, -1, -2,  1,  8,  9,  1,
+    0,  1, -6,  5,  8,  1,-12,  2,  7,-14, -7,  8,  5, -8,  0,  8,
+    1,  4, 11,  8,-12, -8,  0, -5, -1,  1,  0,  4,-15, -8,  3, 16,
+   17,  8, -4, -6,  9, -4,-13, -8,  2,  6,  1,-18, -1, 11, 11,-12,
+    6,  0,  2,  0, 14,  6, -7,-21,  1, -1,-13,-20,  1,  1, 10, 21,
+  -22, -5,  7, 13,-11, -1,  4, 12, -7,  0, 14, 19, -4,  3, -5,-19,
+  -26,-14, 10, 15, 18,  4, -6, -2, 25, 19, -5,-18,-20, -7,  4,  2,
+  -13, -6, -1, -4, 25, 37, -2,-35,  5,  4,  1,  1,-21,-36,  2, 43,
+    2, -2, -1,  3,  8, -2, -6, -1, -2, -3,  2, 12, -5, -2, -2, -1,
+   -3, -1, -1, -5, -1,  7,  8, -2,  2,  7,  5, -3,  1,  1, -3, -8,
+   -3, -1, -3, -2, -2, -3,  2, 13, 15,  0,-11, -6,  3,  0,  0,  0,
+   -6, -9, -5, -4, 18,  4,  1,  3, 12,  3,  0,  4,-16, -3,  3, -3,
+  -17,  3, 18,  2, -1, -3, -1, -1, -6, 16, -8,  0, -9, 14, -7,  0,
+    3,-13, 14, -5,  3,-13, 14, -4, -7, 20, 14,-23,  8, -7, -8,  4,
+    8,-15,-19, 16,-10, 13, 11, -3,  9, -1,  1, 26,  5,-15,-27,  2,
+  -20,  7, 16, -4,-40,  9, 31,  1, 26,-12,-30, -7, 40, -2,-19,  4,
+    6,  0,  0,  0, -6, -2,  1,  2,  0, -1,  0, -6,  9,  0, -2, -1,
+   -7,  8,  2, -3, -1,  2, -3,  2,  7, -4, -2,  4,  2,  0,  0, -6,
+   -3, -2,  9,  2, -2, -1,  0, -4, -3, -3,  0, -3, -6,  2, 10,  4,
+    3,  0,-10,  8,  0,  0, -4,  4, -1,  1,  4,  2,  3, -7, -9,  7,
+    2,  1, -9, -4, -1, 12,  0,  0,  3, -1,  7, -4,  3,-14,  4,  2,
+  -12, -9,  1, 11,  2,  5,  1,  0,  3,  1,  0,  2,  0,  8,  6,-19,
+   -6,-10, -7, -4,  9,  7,  5,  7,  6, 21,  3, -3,-11, -9, -5, -2,
+   -4, -9,-16, -1, -2, -5,  1, 36,  8, 11, 19,  0,  2,  5, -4,-41,
+   -1, -1, -2, -1, -2, -2,  1,  6,  0,  4,  1, -8,  1,  1,  1,  0,
+   -2, -3,  4,  0,  2, -1,  3, -3,  1,  3, -4,  1, -1,  3,  0, -5,
+    3,  4,  2,  3, -2, -3, -6, -1, -2, -3, -2,  2, -4,  8,  1,  0,
+   -7,  4,  2,  6, -7, -1,  1,  0, -2,  2, -4,  1,  8, -6,  2, -1,
+   -6,  2,  0,  2,  5,  4, -8, -1, -1,-11,  0,  9,  0, -2,  2,  2,
+   17, -5, -4, -1, -1, -4, -2, -2,  0,-13,  9, -3, -1, 12, -7,  2,
+    0, -2, -5,  2, -7, -5, 20, -3,  7,  7, -1,-30,  3,  5,  8,  1,
+   -6,  3, -1, -4,  2, -2,-11, 18,  0, -7,  3, 14, 20, -3,-18, -9,
+    7, -2,  0, -1, -2,  0,  0, -1, -4, -1,  1,  0, -2,  2,  0,  4,
+    1, -3,  2,  1,  3,  1, -5,  1, -3,  0, -1, -2,  7,  1,  0, -3,
+    2,  5,  0, -2,  2, -5, -1,  1, -1, -2,  4, -1,  0, -3,  5,  0,
+    0,  3, -1, -2, -4,  1,  5, -1, -1,  0, -1,  9, -1, -2, -1, -1,
+   -2,  5,  5, -1, -2,  2, -3, -2,  1,  2,-11,  1,  2,  1,  3,  2,
+    2,-10, -1, -2,  4,  2,  4,  1,  4,  5, -5,  1,  0,  6,-11,  1,
+    1,  0,  6,  6,  0,  2,  1,-15,  7,  3,  5,  9,-30,  2,  2,  2,
+  -34,  1,  9,  2,  5,  8,  8,  2,  7,  2,  6,  6,  2,-27,  1,  4
+};
+
+/* 6x16-entry codebook for inter-coded 4x4 vectors */
+static const int8_t svq1_inter_codebook_4x4[1536] = {
+    4,  0, -6, -7, -4, -8,-13, -9, -8, -8, -1,  6, -2,  5, 22, 27,
+  -16, -7, 11, 10,-18, -7, 13, 10,-15, -4, 12,  8, -9, -1,  9,  5,
+   -2,  2, 15,-16, -3,  2, 19,-19, -3,  2, 19,-19, -2,  3, 15,-14,
+   17, 22, 22, 16, -6, -7, -5, -2,-12,-16,-16,-12,  1,  1, -1, -3,
+   11,-17,  0,  8, 14,-21, -1,  9, 14,-21, -2,  8, 11,-16, -2,  6,
+    7, -2,-16, 11,  9, -2,-21, 14, 10, -1,-22, 14,  8, -1,-18, 10,
+  -10, 16,  3, -9,-13, 20,  4,-11,-14, 21,  4,-10,-11, 16,  3, -8,
+   11,  4, -9, -9, 15,  6,-12,-14, 17,  8,-12,-14, 16, 10, -7,-11,
+    4, 10, 14, 13, -1,  7, 15, 16,-12, -7,  3,  8,-20,-23,-18,-10,
+  -10,-18,-26,-25,  4,  1, -6,-11, 13, 15, 11,  3, 12, 15, 13,  8,
+  -16,-19,-16,-11,  7, 12, 15, 11, 11, 16, 16, 11, -6, -9,-11,-10,
+   18, 19, 12,  5, 18, 16,  5, -4,  6,  0,-10,-15, -9,-17,-23,-22,
+  -10,-14, -1, 21,-11,-17,  0, 29,-11,-16,  1, 30,-10,-14,  0, 23,
+  -16,-17,-12, -6,-19,-19,-14, -7, -3, -1,  1,  2, 27, 35, 29, 19,
+  -37, -8, 23, 23,-42, -9, 28, 29,-43,-10, 26, 28,-38,-11, 19, 22,
+   32, 16,-16,-33, 39, 20,-18,-37, 38, 19,-19,-38, 32, 15,-17,-34,
+   24,  9, -6, -4, -1,-10, -6,  3, -8, -9, -1,  3,  3,  7,  2, -6,
+   -1, -3, -1,  0, -1,  4,  2, -7, -3, 11,  3,-16,  1, 20,  9,-18,
+   -3, -8,  6, 12, -5,-10,  7, 13, -6, -9,  5,  7, -5, -5,  2, -1,
+   -8, 12, -3, -1,-10, 15, -3,  1,-11, 13, -4,  1,-11,  8, -3,  2,
+    9,  6, -5,-12,  3,  0, -8,-13, -4, -4, -1, -1, -4,  1, 15, 18,
+    9, 13, 14, 12,  4,  3, -1, -2, -2, -5, -8, -5, -7,-11, -9, -4,
+    7, -5, -7, -4, 14, -2, -7, -4, 17,  0, -8, -5, 15,  1, -7, -5,
+  -10, -1,  6,  4,-15, -9,  2,  4,  2, -1, -3,  0, 25, 13, -8,-10,
+    7, 11, -3,-16,  7, 11, -3,-15,  6,  7, -2, -9,  4,  2, -3, -5,
+   -7, -1, -1,  0, -9, -2,  2,  6,-12, -4,  6, 14,-13, -6,  8, 19,
+  -18,-18,-11, -5, -3,  0,  3,  4,  6,  8,  6,  6,  6,  6,  6,  6,
+   -5,  3, 13,-10, -6,  1, 15, -9, -6, -3, 15, -6, -6, -6, 10, -3,
+    9,  1, -9, -9, 11,  9,  6,  5,  0,  3,  8,  7,-15,-14, -6, -5,
+  -11, -6, 11, 19, -2, -5, -9, -8,  6,  2, -9,-10,  6,  5,  4,  5,
+   -7, -3,  8, 15, -1,  3, 10, 15,  5,  5, -1, -2,  4, -2,-21,-25,
+    6, -6, -6,  5,  8, -9, -7,  9,  8,-12, -7, 13,  4,-14, -7, 14,
+   -4, -3,  1,  1, -3, -5, -2, -3,  7,  0, -2, -4, 20,  7, -4, -4,
+   -3,-20, -6, 10,  6,  0,  0,  1,  5,  8,  5, -1, -3,  0,  0, -2,
+   13,  6, -1,  2,  5,  3,  2,  3, -3,  0,  3,  0,-16, -8, -2, -5,
+   -2, -7, -6,  0, -3, -6, -3,  1, -5, -1,  2, -1, -1, 12, 16,  5,
+   -7,  1,  9,  8,-10, -2,  5,  3, -6,  2,  7,  3, -4,  0, -1, -7,
+    3,  4, -9,-24,  0,  2,  6,  3, -1, -1,  4,  7,  5,  3, -1, -2,
+    3,  6, -9,  2,  1,  6,-13,  1,  1,  8,-10,  2,  1,  8, -7,  1,
+   -3, -3,  2, 22, -2, -3, -5, 12, -2, -3,-10,  2, -3, -1, -4,  2,
+   11, 12,  8,  2, -5, -5, -5, -8, -6, -4,  0, -3, -2, -1,  3,  3,
+   12, -6, -2, -1, 12, -8, -2, -2,  9, -7,  0, -3,  4, -6,  2, -2,
+  -19,  1, 12, -3, -4,  4,  5, -4,  6,  1, -2, -1,  4, -4, -2,  7,
+   -3, -4, -7, -8, -4, -4, -2,  0, -1,  2, 14, 16, -4, -2,  4,  4,
+   -1,  7,  2, -5, -2,  0, -1,  1,  4, -3, -1, 13,  6,-12,-14,  8,
+   -1,  5,  4, -5, -2,  5,  3, -9, -2,  7,  4,-12, -1,  7,  4, -9,
+   -6, -3,  1,  1, 11, 11,  0, -6,  6,  4, -2, -7,-12,-10,  3, 10,
+   -2, -3, -3, -2,  6, 11, 14, 10, -9,-11,-10,-10,  2,  2,  3,  2,
+   -7, -5, -7, -1, -1,  2,  0,  7, -1,  1,  0,  9,  3,  4, -5, -1,
+   10, -1,-15, -1,  4,  1, -5,  2, -3,  1, -1,  1, -3,  1,  4,  4,
+    2, -1,  4, 10,  6,  2, -1,  0,  2,  2, -7,-12, -4,  2,  0, -3,
+   -1, -4, -1, -8,  3, -1,  2, -9,  4,  0,  5, -5,  2,  0,  8,  3,
+    3,  2,  1,  1,  4, -2,  0,  3,  2, -1,  4,  1,  0,  6, -1,-25,
+   -1, -2, -2, -4, -3,  0, -1, -4, -1, -1, -4,  2,  0, -6,  2, 25,
+  -11, -1,  5,  0,  7,  0, -2,  2, 10, -1, -3,  4, -5, -5, -2, -1,
+    0,  6,  3, -1, -2, -1, -1,  1, -1, -7,-12, -5,  8,  6,  2,  4,
+    2,  6, -1, -6,  9, 10, -1, -4,  1,  0, -4,  0,  3, -2, -9, -5,
+   -4,  3,  4,  0, -4,  3,  3,  0,-11,  0,  3,  2,-11,  3,  7,  2,
+    2, -4,  7,  3,  1, -8,  7,  1, -1,-12,  4,  1,  3, -9,  2,  2,
+    2, -2, -2,  9,-17, -3,  3,  1, -4,  7,  1, -6,  5,  4, -1,  3,
+   -1,  2,  0, -4, -7,  8, 12, -1, -2,  5,  4, -5,  3, -5, -8, -2,
+    0,  0, -5, -2, -2, -8,  3, 27, -1, -4, -3,  6, -3,  1, -2, -7,
+    4,  4,  1, -1, -7,-10, -7, -3, 10, 10,  5,  3, -2, -2, -4, -3,
+    0,  1,  5,  7,  4, -2,-16,-20,  0,  4,  7,  8,  2,  0, -2, -1,
+   -2,  1,  3, 17, -3,  1, -2, -1, -1, -2, -1, -2, -1, -5, -1,  0,
+    5, -3,  1,  0,  6, -2,  0,  0, -1, -2,  0, -3,-11,  1,  8, -1,
+    3,  0,  0,  0,  0,  2,  4,  1,  2,  0,  6,  1, -2,-18, -3,  2,
+  -14,  0,  6,  1, -5, -2, -1,  1, -1,  1,  0,  1,  1,  7,  4,  0,
+   -1,  0,  1, -4,  1,  8,  3, -4, -3,  4,  1,  3, -6,  1, -4,  1,
+    1,-12,  3,  3, -1,-10,  0, -1,  2,  0,  2,  1,  3,  2,  2,  4,
+    3,  0,  0,  3,  2,  0, -2,  1,  5,  2, -5,  0,  6, -1,-14, -1,
+   -2, -6, -3, -3,  2, -1,  4,  5,  6, -1, -2,  0,  4,  4, -1, -5,
+   -4,  1,-11,  0, -1,  2, -4,  1,  2, -3,  3, -1,  1, -2, 15,  0,
+    1, -1,  0, -2,  1, -4, -7,  1, -2, -6, -1, 21, -2,  2, -1,  1,
+   21, -1, -2,  0, -1, -3,  1, -2, -9, -2,  2, -1,  2,  1, -4, -1,
+    1,  8,  2, -6,-10, -1,  4,  0, -4, -3,  3,  3,  5,  0, -1, -1,
+    3,  2,  1, -2, -2, -2,  4,  3,  5,  2, -4,-17,  0, -2,  4,  3,
+   -7, -4,  0,  3,  9,  9,  2, -1,-11, -6,  0, -1,  5,  1,  0,  1,
+    0, 17,  5,-11,  3, -2, -6,  0,  2, -2, -4,  1, -4,  1,  2, -1,
+   -5, -1, -5, -3, -3,  5, -3, -2,  4, 16,  2, -5, -2,  5, -1, -1,
+    0,  0, -4,  1, -1,  2,  5, 11, -1, -1, -2,  1, -4, -2, -3, -1,
+   -5, -1, 10,  0,  6,  1,  0, -3,  0, -4,  1,  0, -2, -4,  3, -1,
+    6,  9,  3,  0, -2,  1, -2,  0, -2, -3, -2, -2,  1,  0,  1, -6,
+    1,  0,  2,  1, -1,  3, -2,  1,  0, -1,-15,  0, -1,  5,  2,  6,
+    2,  0,  2,  2,  0,-12, -4,  6,  0,  1,  4, -1,  1,  2,  1, -4,
+    1, -2, -7,  0,  0,  0,  0, -1, -5,  2, 11,  3,  1,  3,  0, -6,
+    0, -3, -9, -4,  1,  3, -1,  0,  4,  1, -2,  0,  7, -3, -1,  6,
+    1, -2,  6,  2,  0, -1,  3, -2, -2,  4,  0,  2, -1,  2,-14,  2,
+    2,  2,  0, -1, -2,  3, -3,-14,  0,  2,  3, -3,  5,  1,  3,  2,
+    1, -3,  4,-14,  1, -2, 11, -1,  0, -1,  3,  0, -1,  1,  0,  2,
+   -2,  3, -3,  2, -4, -1, -4,  3, -1,  2,  1,  3, -6, -2,  2,  7,
+   -2,  1,  2,  0, -2,  0,  0, -1, 12,  5, -1,  2, -8, -1,  1, -7,
+    2, -2, -4,  2, 11,  0,-11, -2,  3,  1, -3, -1,  0,  3,  1, -1,
+    0,  3,  0, -2,  0, -6, -1, -3, 12, -7, -2,  0,  7, -2,  1,  1,
+    1,  2,  2,  2, -1,  2,  0,  2,-23,  0,  4,  0,  3,  2,  1,  3,
+   -4, -5, -1,  5, -3,  5, 10, -1,  0,  0,  3, -4,  1, -1,  2, -5
+};
+
+/* 6x16-entry codebook for inter-coded 8x4 vectors */
+static const int8_t svq1_inter_codebook_8x4[3072] = {
+    9,  8,  4,  0, -3, -4, -4, -3,  9,  8,  4, -1, -4, -5, -5, -3,
+    8,  7,  3, -2, -5, -5, -5, -4,  6,  4,  1, -2, -4, -5, -4, -3,
+  -12,-14,-11, -4,  1,  5,  6,  6, -8,-10, -7, -5, -2,  1,  1,  1,
+    5,  4,  3,  1,  0,  0, -1, -1, 13, 13,  9,  6,  3,  0, -1, -2,
+   -4, -4, -3, -1,  1,  4,  8, 11, -5, -6, -4, -2,  0,  3,  8, 12,
+   -7, -7, -6, -4, -2,  2,  7, 10, -7, -7, -5, -4, -2,  1,  5,  8,
+   -3, -2, -1,  1,  3,  6,  7,  6,  2,  3,  5,  7,  8,  8,  6,  4,
+    4,  5,  4,  3,  1, -2, -6, -7,  1,  0, -2, -7,-10,-14,-17,-16,
+   -5, -4,  1,  8,  9,  3, -3, -7, -7, -6,  1, 11, 12,  5, -3, -8,
+   -8, -7,  0,  9, 11,  5, -3, -7, -8, -6, -1,  5,  8,  4, -2, -6,
+   -4, -5, -7, -8, -9, -9, -8, -6, -4, -5, -6, -7, -7, -6, -4, -2,
+    0,  1,  2,  3,  5,  8, 10,  9,  1,  2,  3,  6,  9, 12, 14, 13,
+    5,  6,  6,  5,  4,  3,  2,  1,  5,  6,  7,  7,  6,  6,  6,  4,
+   -1,  0,  1,  1,  3,  5,  5,  5,-13,-16,-17,-17,-14,-10, -6, -4,
+    9, 11, 13, 16, 15, 13, 12, 10, -4, -5, -6, -7, -7, -7, -6, -5,
+   -6, -6, -7, -7, -7, -7, -6, -5, -2, -1,  0,  0,  0,  0,  0, -1,
+  -11,-13,-15,-16,-16,-14,-12,-10,  2,  3,  4,  5,  4,  3,  3,  3,
+    6,  7,  8,  8,  8,  7,  6,  5,  3,  4,  3,  3,  3,  3,  3,  3,
+    3,  4,  4,  1, -2, -7,-13,-17,  5,  7,  7,  5,  1, -5,-13,-19,
+    6,  8,  9,  8,  5, -1, -9,-16,  6,  8, 10, 10,  7,  2, -4,-11,
+   18,  9, -1,-10,-13, -9, -4,  0, 22, 12, -1,-12,-15,-10, -4,  2,
+   23, 13,  0,-10,-13, -9, -3,  2, 20, 12,  2, -6, -9, -6, -2,  2,
+   -6, -6, -6, -7, -7, -7, -7, -6, -6, -7, -8, -8, -9, -9, -9, -8,
+   -3, -3, -3, -3, -3, -3, -3, -3, 12, 15, 18, 21, 21, 19, 17, 14,
+   14, 16, 18, 18, 18, 16, 15, 13,  5,  6,  6,  5,  5,  4,  4,  3,
+   -6, -7, -9,-10,-10,-10, -9, -7,-10,-11,-13,-14,-14,-13,-12,-10,
+  -27,-17, -4,  5,  9, 10, 10,  7,-32,-19, -3,  7, 11, 12, 11,  8,
+  -30,-16, -2,  8, 12, 12, 10,  7,-23,-12,  0,  7, 10, 11,  9,  6,
+   16, 17, 16, 12,  6, -1, -8,-12, 17, 18, 15, 10,  1, -8,-15,-18,
+   15, 14, 10,  4, -5,-14,-20,-23, 10,  8,  4, -1, -9,-16,-21,-22,
+  -10,-12,-12,-11, -5,  4, 14, 20,-11,-13,-15,-12, -4,  7, 19, 27,
+  -11,-13,-14,-11, -3,  8, 21, 28,-10,-11,-12, -9, -2,  8, 18, 25,
+   -1, -1, -1,  1,  4,  6,  6,  5,  0,  0,  0,  2,  4,  3,  1, -2,
+    0,  0,  2,  4,  4, -1, -7,-10,  0,  0,  3,  5,  3, -3,-11,-15,
+  -14,-13, -8, -1,  3,  3, -1, -4, -5, -4, -1,  4,  8,  8,  3,  0,
+    3,  2,  2,  3,  4,  5,  3,  1,  5,  3,  0, -2, -2, -1, -1, -1,
+    9,  1, -6, -6, -5, -3, -2, -1, 12,  1, -6, -6, -4, -2, -1,  0,
+   14,  4, -4, -4, -2, -2, -1, -1, 14,  6, -1, -1, -1, -1, -1, -1,
+    4,  6,  8, 10, 11,  9,  7,  5, -1, -1, -1,  0,  0, -1, -1, -2,
+   -2, -4, -4, -5, -5, -5, -5, -4, -2, -3, -3, -4, -4, -3, -2, -1,
+    2,  3,  4,  4,  3,  1,  0,  0, -1,  1,  4,  5,  6,  5,  4,  3,
+   -8, -6, -2,  2,  3,  4,  4,  3,-14,-13, -9, -5, -2, -1,  0,  0,
+   -3, -4, -5, -4,  0,  7, 12, 13, -3, -4, -5, -5, -2,  4,  9, 10,
+   -2, -3, -4, -5, -4, -1,  3,  4, -1, -1, -2, -3, -3, -2,  0,  1,
+    9,  5, -2, -8,-11,-10, -7, -4, 12, 10,  6,  2,  0, -1,  0,  0,
+    2,  2,  3,  4,  3,  1,  1,  1, -9, -8, -4,  0,  1,  2,  1,  0,
+    6,  8,  8,  5,  1, -5,-11,-13,  0,  1,  2,  2, -1, -4, -8,-11,
+   -3, -2,  1,  3,  3,  1, -1, -4, -2, -1,  2,  5,  6,  6,  4,  1,
+    3,  4,  5,  5,  4,  1, -3, -6,  5,  6,  4,  2,  2,  2,  0, -3,
+    6,  5,  0, -5, -5, -2, -1, -2,  7,  4, -3,-11,-12, -7, -3, -2,
+    1,  0, -1, -1, -1,  0,  0,  0,  2,  3,  4,  4,  5,  5,  4,  3,
+   -7, -9, -9,-10,-10, -9, -7, -6,  3,  4,  5,  6,  5,  5,  5,  5,
+   -7, -7, -7, -7, -6, -6, -5, -4, -5, -4, -3, -1, -1, -1,  0,  0,
+   -3, -2,  1,  4,  5,  5,  5,  5, -2, -1,  3,  6,  9, 10, 10,  9,
+  -14,  1, 10,  3, -2,  0,  1,  1,-16,  2, 13,  3, -3, -1,  1,  0,
+  -15,  2, 12,  3, -4, -2,  1,  1,-10,  3, 10,  2, -3, -1,  1,  1,
+    0,  1,  4,  2, -5,-10, -3, 11, -1,  1,  4,  2, -6,-13, -2, 15,
+   -1,  0,  3,  1, -6,-12, -1, 15, -1,  1,  2,  1, -4, -8,  0, 11,
+   10,  5, -2, -2,  2,  5,  1, -4,  7,  0, -8, -6,  1,  5,  2, -4,
+    2, -5,-12, -7,  2,  7,  4, -1, -1, -7,-10, -4,  4,  9,  7,  2,
+   -5, -5, -4, -6, -6, -5, -5, -3, -1, -2, -2, -4, -5, -6, -5, -4,
+    6,  7,  7,  4,  0, -2, -3, -3, 13, 14, 13, 10,  5,  1, -1, -2,
+    1,  1,  2,  2,  2,  2,  2,  2, -5, -6, -8, -9, -9, -8, -7, -6,
+    7,  9, 10, 11, 11,  9,  7,  5, -1, -2, -3, -3, -4, -4, -4, -3,
+   -1, -1,  0,  0,  0,  0, -1, -1, -3, -3, -4, -5, -4, -3, -3, -2,
+    2,  1, -1, -3, -3, -2, -1,  0, 12, 12,  8,  3,  1,  0,  0,  1,
+   -6, -8, -8, -6, -2,  2,  6,  8,  1,  1, -1, -2,  0,  3,  5,  7,
+    3,  3,  1, -1, -1,  0,  0,  2,  0,  1,  0, -1, -1, -1, -2, -1,
+    1,  0,  0,  0,  0,  0,  2,  4,  2,  1,  3,  4,  3,  1,  0,  2,
+    2,  1,  0,  0, -1, -1,  0,  3,  5,  1, -6,-12,-13, -8, -1,  4,
+   -2,  0, -1, -2, -1,  0,  2,  3, -6, -3, -2,  0,  1,  1,  1,  1,
+   -9, -5,  0,  4,  5,  3,  1,  0, -8, -3,  3,  7,  8,  4,  1,  0,
+    1,  2,  2,  3,  3,  1, -1, -3,  4,  5,  5,  6,  6,  5,  2,  0,
+    0,  0,  0,  0,  1,  0, -2, -4, -3, -3, -4, -3, -3, -4, -7, -8,
+   14, 12,  6, -1, -3, -3,  0,  0,  7,  5,  1, -3, -5, -4, -2, -1,
+   -2, -2, -2, -2, -2, -2, -1, -1, -6, -4, -1,  1,  1,  1,  0, -1,
+    2,  2,  1, -3, -6, -7, -6, -3,  1,  0, -1, -3, -2,  1,  4,  6,
+    0,  0,  1,  2,  4,  7,  8,  7,  0,  0,  0,  0, -1, -4, -7, -8,
+    0,  2,  1, -2, -3, -3, -2, -1, -1,  1,  0, -3, -5, -2,  0,  2,
+   -2, -1, -2, -5, -4,  1,  6,  9, -3, -2, -3, -4, -2,  5, 11, 13,
+   -4, -2,  2,  6,  4, -3,-10,-14, -2, -1,  1,  4,  4,  1, -1, -2,
+    0,  0, -1, -2, -2,  0,  4,  6,  2,  2,  0, -3, -3,  0,  5,  9,
+   -4, -4, -2,  1,  6,  9,  3, -7, -2, -2, -2, -1,  4,  8,  0,-11,
+    1,  1,  0,  0,  2,  6, -1,-10,  2,  2,  1,  0,  2,  4,  0, -7,
+   -1, -2, -3, -6, -7, -8, -8, -8,  2,  3,  3,  1, -1, -2, -3, -4,
+    5,  5,  5,  4,  3,  2,  0, -1,  3,  3,  3,  3,  2,  2,  1,  1,
+    3,  3,  2, -2, -3,  0,  7, 10,  1,  2,  2, -2, -5, -4,  0,  3,
+    0,  3,  4,  2, -3, -5, -6, -4,  0,  2,  4,  4,  1, -4, -7, -7,
+    2,  4,  5,  5,  5,  5,  6,  6, -4, -4, -3, -5, -5, -3, -3, -2,
+   -3, -4, -4, -5, -4, -2, -2, -2,  1,  1,  0,  0,  2,  4,  5,  4,
+   -2,  0,  3,  4,  4,  3,  2,  2, -9, -7, -4,  0,  3,  6,  6,  6,
+   -5, -5, -3, -2,  0,  1,  3,  4,  5,  5,  2, -2, -4, -6, -5, -3,
+    1, -6, -4,  7,  5, -2, -2,  1,  5, -5, -4,  6,  4, -5, -4,  1,
+    5, -5, -4,  6,  4, -5, -3,  1,  1, -7, -3,  8,  7, -1, -3,  1,
+   -8, -7, -4,  0,  2,  4,  5,  5,  5,  6,  5,  2, -1, -5, -7, -7,
+    5,  6,  4,  1, -3, -5, -6, -5, -7, -7, -5, -2,  1,  6,  9, 10,
+    6,  3,  0,  1,  3,  0, -8,-14,  3,  0, -1,  1,  4,  3,  0, -4,
+    1,  0,  0,  1,  2,  1,  1,  1, -1, -1,  1,  2,  1, -1, -1,  0,
+    1,  1,  1,  1,  0, -2, -3,  0,  1,  2,  1,  0, -2, -8, -9, -4,
+    1,  3,  3,  2,  1, -3, -3,  1,  0,  1,  1,  1,  1,  1,  4,  8,
+    2,  5,  9,  7,  2, -1, -1,  1, -4, -1,  1,  0, -3, -4, -1,  2,
+   -3,  0,  3,  3,  0, -1,  0,  2, -4, -1,  1,  1, -2, -4, -5, -4,
+    1, -1, -2, -2, -1,  2,  4,  5,  2,  1,  1,  0, -1, -1,  0,  0,
+    2,  3,  4,  5,  4,  2,  1,  0, -9, -9, -6, -3, -1, -1, -1, -1,
+   -6, -6,  4,  7,  0, -2, -1, -2, -1, -2,  5,  6, -1, -2,  0, -1,
+    4, -1,  1,  0, -4, -2,  0, -2,  7,  1, -1, -2, -3,  1,  3,  1,
+    4,  2,  1,  3,  3,  1,  1,  2,  2, -2, -4,  0,  3,  1,  0,  0,
+    1, -4, -8, -4,  1,  2,  1,  0,  2, -3, -9, -6,  0,  3,  3,  2,
+   -1, -1,  0, -1, -1,  0,  1,  2,  3,  1, -4, -8, -7, -3,  1,  2,
+    2, -1, -3, -2, -1,  0,  1,  0, -1,  0,  5, 11,  9,  3, -1, -3,
+   -1, -2, -2, -1,  1,  1,  1,  1,  0, -1,  0,  3,  6,  6,  5,  5,
+    2,  1, -1, -1, -2, -5, -6, -4,  2,  2,  2,  1, -1, -4, -5, -5,
+   -1, -3, -6, -7, -6, -4, -1,  1,  5,  5,  3,  4,  4,  3,  4,  5,
+   -1, -2, -3, -2, -2, -2,  0,  1,  0,  0,  0,  0,  0,  1,  2,  3,
+   -6, -6, -4, -1,  2,  2,  2,  2, -6, -7, -5, -2,  0, -1, -1,  0,
+    2,  2,  2,  4,  4,  3,  3,  4,  2,  1,  0, -1,  0,  0,  2,  4,
+   12,  5, -5, -8, -5,  0,  2,  2,  2, -3, -6, -3,  0,  0, -1, -2,
+   -2, -3, -1,  3,  4,  1, -2, -3,  2,  2,  3,  4,  3,  1, -1, -1,
+    3,  2,  1,  0,  1,  4,  3,  0,  4,  3,  0, -5, -6,  0,  3,  3,
+    2,  3,  1, -7,-12, -6,  1,  3,  1,  3,  4, -1, -6, -4,  0,  1,
+   -9, -4,  2,  6,  7,  4,  1,  0, -7, -1,  4,  6,  4,  0, -3, -3,
+   -6,  0,  4,  4,  1, -2, -3, -2, -4,  1,  3,  2,  0, -2, -1,  0,
+    0,  5,  2, -5, -3,  3,  1, -4, -2,  4,  2, -6, -3,  6,  4, -3,
+   -1,  5,  3, -5, -1,  7,  3, -4, -1,  2,  0, -6, -3,  5,  3, -3,
+   -8, -3,  3,  5,  3,  1, -2, -2,  2,  4,  4, -2, -4, -3,  1,  3,
+    2,  1, -3, -5, -3,  3,  4,  3, -5, -6, -5,  3, 10,  8, -1, -5,
+    0,  3,  2, -4, -9, -7,  0,  6, -5, -1,  5,  7,  4, -1, -3, -3,
+   -5, -5, -2,  3,  6,  5, -1, -4,  9,  6,  0, -4, -2,  1,  1, -1,
+   -1, -1, -1,  1,  1,  0, -1,  0, -1,  0,  0,  0,  0, -1, -1,  0,
+    2,  1, -2, -1,  1,  1,  0,  0, 12,  8,  2, -1, -1, -4, -7, -7,
+    2,  1,  3,  6,  7,  4,  2,  0,  1,  0, -1,  0, -1, -4, -7, -8,
+    0,  0, -1,  0,  0,  0, -1, -3,  0,  0,  0,  0,  1,  1,  0, -2,
+   -1,  0,  1,  1,  0,  0, -1, -2,  0,  0, -1, -3, -4, -3, -1,  1,
+   -1,  0,  0,  0,  1,  4, 10, 12, -1,  0, -2, -2, -3, -3, -1,  1,
+   -3, -1, -2, -4,  2,  9,  9,  7, -3,  0, -1, -3,  0,  2, -1,  1,
+   -1,  1, -2, -3,  0, -1, -3,  0,  0,  0, -3, -2,  0, -1, -1,  1,
+   -1, -2, -1, -1, -2, -1, -1, -2,  2, -1, -2, -1,  0,  1,  0, -2,
+    3, -1, -2,  2,  5,  3, -1, -3,  1, -5, -5,  1,  6,  6,  2,  0,
+    1,  2,  0, -1,  0,  1,  0, -2, -5, -3, -1,  0,  1,  2,  1, -2,
+   -7, -5, -2, -2, -2, -2,  0,  1, -1,  0,  1,  1,  0,  3,  9, 12,
+    0,  6,  5,  1, -2, -3,  0,  3,  0,  6,  5,  1,  1,  1,  2,  3,
+   -5, -2, -2, -3,  0,  0,  0,  0, -6, -3, -3, -2,  0,  0, -1, -2,
+    4,  4,  2,  1,  0, -1, -1,  0, -2, -2,  0,  1,  2,  1,  1,  0,
+    2,  2,  1, -1, -3, -5, -9,-10,  2,  1, -1, -1,  1,  4,  4,  1,
+    4,  0, -2, -2, -2, -2, -1,  0,  7,  1, -4, -3, -2,  0,  1,  1,
+   10,  5, -1, -2,  0,  1,  1,  0,  5,  1, -3, -4, -3, -1, -1, -2,
+    2,  1, -1, -3, -3,  1,  1, -1, -2, -1,  3,  0, -1,  1,  1,  0,
+   -3,  1,  7,  2, -3, -2, -1,  0, -2,  4,  8, -1, -8, -5,  0,  2,
+   -4, -1,  1,  2,  1, -3, -4, -2, -5, -3, -2,  1,  4,  4,  4,  6,
+   -3, -2, -4, -3,  0,  1,  1,  2,  2,  2,  2,  1,  2,  1, -1, -1,
+   -4, -1,  0, -1, -3, -3, -1, -1,  1,  4,  4,  2,  0, -1, -2, -3,
+    4,  6,  5,  3,  2,  1, -2, -4,  0,  1,  1,  1,  1, -1, -4, -6,
+    1,  2,  2, -1, -6, -5, -1,  2, -3, -2,  1,  1, -4, -3,  2,  5,
+   -2, -1,  2,  2, -3, -4,  0,  3, -2, -2,  2,  6,  5,  2,  1,  2,
+    2, -3, -3,  0,  0,  2,  3,  1,  3, -1,  1,  3,  1,  2, -1, -5,
+   -5, -7, -4, -2,  1,  8,  8,  1, -1,  0,  2,  0, -3,  0,  1, -3,
+   -2, -5, -5, -2, -3, -1,  0, -2, -1, -4,  0,  4,  0,  2,  4,  0,
+    0,  0,  8, 10,  2,  1,  3, -1, -4, -3,  2,  3, -3, -3,  1, -1,
+    1, -2, -4,  2,  7,  3, -2, -1,  6,  4, -2, -1,  2,  0, -1,  3,
+    1,  1, -2, -2, -2, -5, -3,  4, -6, -2,  1,  1, -1, -4, -2,  4,
+   -2, -1, -2, -2,  0,  1,  0, -2, -1,  1,  0, -1,  0,  0, -1, -3,
+    0,  1, -2, -4, -3, -1,  0,  0,  6,  8,  5,  0,  0,  1,  2,  3,
+   -2, -2,  2,  5,  2,  0,  0,  1,  2, -2, -2, -1, -1,  1,  2,  4,
+    2, -1,  0,  1,  0,  0,  0,  1, -8, -7, -1,  1, -1, -1,  1,  3,
+    0,  3,  6,  2, -2,  1,  2,  0,-10, -7, -1,  0, -3, -1,  2,  1,
+    0,  0,  2,  2,  1,  1,  1, -1,  3,  0, -2, -2,  0,  2,  1,  0,
+    8,  1,  0,  0, -2, -3, -1,  0,  2, -2,  2,  5,  1, -2, -1,  1,
+   -3, -6, -3, -1, -3, -3, -1,  2,  2,  0,  1,  2,  2,  1,  0,  0,
+    1, -1, -1, -2, -1,  0,  1,  0, 15,  9,  2, -1, -2, -3, -3, -3,
+    0, -3, -2,  0,  0, -1, -1, -1,  1,  0,  1,  0,  0, -1, -1, -1,
+    0,  2,  2, -2, -3, -3, -7, -8,  0,  2,  2,  0,  1,  2,  1,  1,
+    1,  2,  2,  2,  3,  1,  0,  3,  1,  0, -1, -2, -1, -2,  0,  5,
+  -11, -6, -1,  1,  2,  3,  1, -3,  1,  4,  3, -1, -2,  1,  2, -1,
+    2,  2,  1, -1, -2,  0,  1, -1,  0,  0, -1, -1,  0,  2,  3,  2,
+    1,  1,  2,  1, -1,  1,  0, -4,  0,  0,  0, -2, -2,  2,  4, -2,
+   -2, -3,  0,  0, -1,  2,  1, -6,  0,  2,  5,  5,  3,  2, -1, -7,
+    4,  2,  0,  0,  3,  3,  1, -1,  0, -1, -1,  3,  6,  4,  1, -1,
+   -2, -2,  0,  2,  2,  0, -2, -2, -1,  0, -1, -5, -7, -5, -1,  1,
+    5, -1, -2,  0,  2,  4,  2, -5,  0, -5, -2,  2,  1,  2,  0, -6,
+    6,  1,  0,  1, -2, -1,  4,  2,  2, -3, -3,  0, -1, -2,  0,  0,
+    1, -1,  0,  2,  0,  0,  6, 11,  2, -1, -1,  0, -3, -2,  3,  5,
+    0, -2, -1,  0, -1,  0,  0, -3,  1, -1, -1, -1, -2, -1, -3, -7,
+    1,  1, -2, -2,  1,  3,  1, -2, -1,  2,  0, -1, -1,  1,  0,  0,
+   -4,  2,  3, -1, -2, -2,  0,  1,-11, -2,  4,  5,  6,  2, -1, -2,
+   -6, -2,  1, -1, -3, -4,  1,  9, -3,  0,  3,  3,  2, -3, -3,  3,
+    1,  1,  0,  0,  1, -1, -2,  3,  2,  0, -3, -3,  0, -1, -1,  3,
+    1, -1, -3,  1,  2, -6, -4,  6,  0, -2, -5, -2,  0, -3, -2,  3,
+    2,  2,  1, -2, -2,  1,  2, -1, -1,  1,  1, -2, -1,  6,  7, -1,
+    1,  0, -4, -2,  1, -2, -3,  1, -4,  0, -3, -2,  2,  0, -3,  0,
+   -3,  4,  3,  1,  8,  7,  0, -1, -3,  4,  1, -4,  2,  3, -2, -3,
+   -3,  6,  1, -4,  1,  1, -1, -1, -2,  4, -3, -3,  3,  0, -1, -1,
+    1,  2, -4,  2,  4, -3, -1,  2,  3, -1, -4,  5,  4, -6, -3,  2
+};
+
+/* 6x16-entry codebook for inter-coded 8x8 vectors */
+static const int8_t svq1_inter_codebook_8x8[6144] = {
+   -4, -3,  4,  5,  2,  1,  1,  0, -5, -3,  5,  5,  2,  1,  0,  0,
+   -6, -4,  5,  5,  2,  1,  0,  0, -7, -4,  4,  5,  2,  1,  0,  0,
+   -8, -5,  3,  4,  2,  1,  0,  0, -8, -6,  3,  4,  1,  1,  1,  0,
+   -8, -6,  2,  4,  2,  1,  1,  0, -8, -6,  2,  4,  1,  1,  1,  1,
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2,
+   -2, -3, -3, -3, -3, -3, -3, -3, -2, -3, -3, -3, -3, -3, -4, -3,
+   -2, -2, -2, -2, -2, -3, -3, -2,  1,  1,  1,  1,  1,  0, -1, -1,
+    4,  5,  5,  5,  4,  3,  3,  2,  7,  7,  8,  8,  8,  7,  6,  5,
+    2,  1,  2,  4,  4,  0, -4, -6,  1,  1,  2,  5,  5,  1, -5, -7,
+    1,  2,  1,  4,  5,  1, -5, -8,  1,  1,  1,  5,  5,  0, -6, -8,
+    0,  1,  1,  5,  6,  1, -6, -9,  0,  0,  1,  4,  5,  0, -5, -8,
+    0,  0,  1,  4,  5,  0, -5, -7,  0,  0,  1,  4,  4,  1, -4, -7,
+    1,  2,  3,  0, -3, -4, -3, -1,  1,  3,  4,  0, -3, -4, -3, -1,
+    2,  4,  5,  1, -3, -4, -3, -2,  2,  5,  6,  1, -3, -5, -4, -2,
+    3,  6,  6,  1, -3, -5, -4, -2,  3,  6,  6,  1, -3, -5, -4, -2,
+    3,  6,  6,  1, -3, -5, -4, -2,  3,  5,  5,  1, -3, -4, -4, -2,
+    2,  2,  2,  2,  1,  0,  0, -1,  4,  4,  4,  3,  2,  1,  1,  0,
+    4,  5,  4,  4,  3,  3,  2,  1,  4,  4,  4,  4,  4,  3,  2,  2,
+    2,  3,  3,  3,  3,  3,  2,  1, -1, -1, -1, -1,  0,  0,  0,  0,
+   -5, -6, -6, -5, -5, -4, -3, -3, -7, -9, -9, -8, -7, -6, -6, -5,
+    6,  6,  6,  6,  6,  5,  5,  4,  4,  4,  4,  3,  3,  3,  3,  2,
+    0, -1, -1, -1, -2, -2, -1, -1, -3, -5, -6, -6, -6, -6, -5, -4,
+   -3, -5, -6, -7, -6, -6, -5, -4, -1, -2, -2, -2, -2, -2, -1, -1,
+    0,  1,  1,  1,  1,  1,  1,  1,  3,  3,  3,  3,  3,  3,  3,  3,
+    2,  1, -2, -5, -4,  0,  2,  5,  2,  1, -2, -6, -5,  0,  3,  5,
+    2,  1, -2, -6, -6, -1,  3,  6,  3,  2, -2, -7, -6,  0,  4,  7,
+    2,  1, -2, -7, -5,  0,  5,  7,  2,  1, -2, -6, -5,  0,  4,  7,
+    2,  1, -2, -6, -4,  0,  4,  6,  1,  1, -2, -5, -4,  0,  3,  6,
+  -10, -9, -6, -4, -1,  2,  3,  2,-10, -9, -5, -3,  0,  4,  4,  3,
+   -9, -7, -3, -1,  2,  5,  5,  3, -7, -5, -2,  0,  3,  5,  5,  3,
+   -6, -3,  0,  1,  4,  6,  5,  3, -4, -2,  1,  2,  3,  5,  4,  2,
+   -2,  0,  1,  2,  2,  4,  3,  1, -1,  1,  2,  2,  2,  3,  3,  1,
+   -4, -5, -5, -6, -6, -6, -6, -5, -3, -3, -4, -4, -4, -4, -4, -4,
+    0,  0,  0,  0, -1, -1, -1, -1,  5,  5,  6,  5,  5,  4,  3,  2,
+    5,  6,  7,  7,  7,  6,  5,  4,  3,  3,  4,  4,  4,  4,  3,  2,
+    0, -1,  0,  0, -1, -1,  0, -1, -3, -3, -4, -4, -4, -4, -3, -3,
+    1, -2, -5,  1,  5,  4,  2,  0,  1, -3, -6,  1,  6,  5,  2,  0,
+    0, -4, -7,  0,  6,  6,  2,  1, -1, -5, -9, -1,  6,  6,  3,  1,
+   -1, -6,-10, -2,  6,  6,  3,  1, -1, -6, -9, -2,  5,  6,  3,  1,
+   -2, -6, -9, -2,  5,  5,  3,  1, -2, -6, -7, -2,  4,  4,  2,  1,
+   -5, -7, -8, -9, -9, -8, -7, -6, -5, -6, -6, -7, -7, -6, -6, -5,
+   -3, -3, -3, -4, -5, -5, -4, -4, -1,  0,  0, -1, -1, -1, -1, -1,
+    0,  1,  2,  2,  2,  2,  2,  1,  2,  3,  4,  5,  5,  5,  5,  4,
+    3,  4,  5,  6,  8,  8,  8,  7,  3,  4,  5,  6,  7,  7,  7,  6,
+    5,  6,  7,  8,  9, 10, 10,  9,  3,  4,  6,  7,  8,  9,  9,  8,
+    0,  1,  2,  3,  4,  5,  5,  5, -1, -2, -1, -1,  0,  1,  2,  2,
+   -2, -3, -3, -3, -3, -2, -1,  0, -3, -4, -5, -5, -5, -5, -5, -4,
+   -4, -5, -5, -6, -7, -7, -6, -5, -3, -4, -5, -6, -7, -7, -6, -6,
+   13,  7,  0, -3, -3, -4, -4, -5, 14,  7,  0, -3, -3, -4, -4, -4,
+   15,  8, -1, -4, -4, -4, -5, -4, 15,  8, -1, -4, -4, -5, -4, -3,
+   15,  7, -1, -4, -5, -5, -5, -4, 14,  7, -1, -4, -4, -4, -4, -3,
+   12,  6, -1, -4, -4, -4, -4, -3, 11,  5, -1, -4, -4, -4, -4, -3,
+  -17, -4,  5,  4,  4,  4,  3,  3,-18, -5,  5,  4,  4,  4,  3,  3,
+  -19, -5,  6,  4,  4,  4,  3,  2,-20, -5,  6,  4,  4,  4,  3,  3,
+  -20, -4,  6,  4,  4,  5,  3,  3,-19, -5,  6,  4,  4,  5,  3,  3,
+  -18, -4,  5,  4,  4,  4,  3,  2,-17, -5,  4,  3,  4,  4,  3,  3,
+   -6, -6, -6, -4, -2,  1,  6, 11, -6, -7, -7, -4, -2,  2,  8, 13,
+   -8, -8, -7, -4, -2,  3,  9, 14, -8, -8, -7, -5, -1,  4, 10, 16,
+   -8, -8, -7, -5, -1,  4, 10, 17, -8, -8, -7, -4,  0,  5, 10, 16,
+   -8, -8, -6, -3,  0,  4,  9, 15, -7, -7, -5, -3,  0,  4,  8, 12,
+    8,  7,  7,  5,  2, -2, -8,-14,  8,  8,  7,  5,  2, -2, -8,-15,
+    8,  8,  7,  5,  1, -3, -9,-16,  8,  8,  7,  5,  1, -3,-10,-17,
+    8,  9,  8,  5,  1, -3,-10,-17,  8,  8,  7,  4,  1, -4,-10,-16,
+    7,  7,  7,  4,  1, -3, -9,-14,  6,  7,  6,  3,  0, -3, -9,-13,
+    5,  1, -4, -4, -3, -1,  0,  0,  7,  2, -3, -3, -2, -1,  1,  0,
+    7,  1, -3, -3, -1,  0,  1,  1,  6,  1, -3, -2, -1,  1,  1,  0,
+    6,  0, -4, -2, -1,  0,  1,  0,  5,  0, -4, -3, -1,  0,  0, -1,
+    5,  0, -3, -1,  0,  0,  0, -2,  4,  1, -2, -1,  0,  1,  0, -1,
+    2,  2,  1,  1, -2, -6, -8, -8,  1,  1,  1,  1, -2, -5, -8, -8,
+    1,  1,  1,  0, -1, -3, -5, -5,  0,  0,  0,  0, -1, -1, -1, -2,
+    0, -1,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  1,  2,  3,  2,
+    2,  1,  1,  1,  2,  3,  4,  3,  3,  3,  3,  3,  4,  4,  5,  4,
+   -4, -4, -3, -2,  0,  0,  1,  1, -4, -4, -3, -2, -1,  0,  0,  1,
+   -2, -2, -2, -1, -1, -1,  0,  0,  0,  1,  0,  0,  0,  0,  0, -1,
+    2,  2,  2,  2,  2,  2,  1,  1,  3,  4,  4,  4,  4,  4,  4,  3,
+    1,  1,  1,  3,  3,  4,  3,  3, -5, -6, -5, -4, -3, -3, -2, -2,
+   -4, -2, -1, -1, -1, -1,  0,  1, -4, -2, -1, -1, -1, -1,  0,  1,
+   -3, -2, -1, -1, -1,  0,  1,  2, -4, -3, -2, -1, -1,  1,  3,  3,
+   -4, -3, -3, -1, -1,  1,  4,  5, -4, -3, -2, -2, -1,  1,  4,  7,
+   -2, -2, -1, -1,  0,  2,  6,  8, -1,  0,  0,  1,  1,  4,  7,  8,
+   -3, -3, -3, -2, -2, -1, -1,  0, -1, -1,  0,  1,  2,  2,  3,  3,
+    0,  1,  2,  4,  5,  6,  6,  5, -1,  0,  2,  3,  5,  6,  5,  3,
+   -1, -1,  0,  2,  3,  3,  2,  1, -2, -2, -1,  0, -1, -3, -4, -4,
+    0,  0, -1, -1, -2, -4, -8, -7,  1,  2,  1,  0, -1, -4, -6, -7,
+   -2,  4,  1, -6,  0,  3,  0,  0, -2,  5,  1, -7,  0,  3,  0,  0,
+   -3,  5,  1, -8,  0,  3, -1, -1, -2,  6,  1, -9,  0,  3,  0, -1,
+   -2,  6,  2, -8,  0,  4,  0, -1, -3,  5,  1, -7,  1,  4,  0,  0,
+   -2,  4,  1, -7,  0,  4,  1,  0, -1,  4,  1, -6,  0,  3,  1,  0,
+    0,  0,  0,  3,  4,  5,  4,  1,  1,  1,  1,  2,  3,  3,  2,  0,
+    2,  2,  1,  2,  2,  1, -1, -2,  4,  3,  1,  1,  0, -1, -3, -5,
+    5,  3,  1, -1, -2, -3, -4, -6,  5,  3,  0, -2, -3, -5, -6, -7,
+    4,  3,  0, -2, -3, -4, -5, -5,  4,  3,  0, -1, -2, -2, -3, -3,
+    0,  0,  0,  0, -1, -5, -2,  6,  0,  0,  0,  1, -1, -6, -2,  8,
+    0,  0,  0,  2,  0, -6, -3,  9,  0, -1,  0,  2,  0, -7, -2, 10,
+    0, -1,  0,  2, -1, -8, -3, 10,  0, -1, -1,  2, -1, -7, -3,  9,
+    0, -1,  0,  1, -1, -6, -3,  8,  0,  0,  0,  1,  0, -5, -2,  7,
+    2,  3,  3,  2,  1,  0, -1, -1,  3,  4,  3,  2,  1,  0, -1, -2,
+    3,  4,  4,  2,  1, -1, -2, -3,  2,  3,  3,  2,  0, -1, -2, -3,
+   -1,  0,  1,  1,  0, -1, -2, -2, -5, -4, -3, -1,  0,  1,  1,  1,
+   -8, -8, -5, -1,  1,  3,  4,  3,-10, -9, -5,  0,  3,  5,  6,  5,
+   -5, -1,  4,  5,  3,  1,  0,  0, -6, -1,  4,  5,  2,  0, -1, -2,
+   -6, -1,  5,  4,  2, -1, -2, -2, -7, -1,  4,  4,  1, -2, -3, -3,
+   -6, -1,  5,  4,  1, -2, -3, -3, -5,  0,  4,  4,  1, -1, -2, -2,
+   -4,  0,  5,  4,  1, -1, -1, -2, -3,  1,  4,  3,  1, -1, -1, -2,
+   -2, -3, -2,  1,  4,  6,  5,  3, -3, -4, -4,  0,  3,  5,  4,  2,
+   -3, -5, -5, -1,  2,  4,  3,  1, -4, -6, -4, -1,  2,  4,  2, -1,
+   -2, -4, -3,  1,  2,  4,  2, -1, -2, -4, -2,  1,  3,  3,  1, -2,
+   -2, -3, -2,  1,  3,  3,  1, -2, -2, -2, -1,  1,  3,  3,  0, -2,
+   -4, -4, -3, -2, -1,  2,  5,  7, -4, -4, -3, -3, -2,  1,  5,  7,
+   -2, -3, -2, -3, -3, -1,  3,  5, -1, -1,  0, -2, -3, -2,  2,  4,
+    1,  1,  1, -1, -4, -3,  1,  3,  4,  3,  2, -1, -4, -3, -1,  1,
+    6,  4,  3,  0, -3, -3, -2,  0,  6,  5,  3,  1, -2, -3, -2, -1,
+   12, 11,  8,  4,  0, -2, -2, -1, 10,  9,  6,  2, -1, -2, -1,  0,
+    4,  3,  2,  0, -1, -1,  0,  1, -1, -1, -1, -1, -2,  0,  1,  2,
+   -3, -5, -4, -2, -2,  0,  2,  3, -5, -5, -4, -2, -1,  0,  1,  2,
+   -5, -5, -4, -2, -1,  0,  1,  1, -4, -4, -3, -2, -2, -1,  0,  0,
+    3,  3,  2, -1, -3, -4, -3, -2,  3,  2,  0, -2, -4, -4, -3, -2,
+    2,  2,  1, -1, -3, -5, -4, -3,  3,  3,  3,  1, -2, -3, -3, -3,
+    4,  4,  4,  3,  0, -2, -2, -2,  5,  5,  5,  3,  0, -1, -2, -2,
+    5,  5,  4,  2, -1, -2, -3, -2,  3,  3,  3,  0, -2, -4, -4, -4,
+   -1, -1,  4, -2, -2,  6,  2, -5, -1,  0,  4, -2, -3,  6,  2, -6,
+   -1,  0,  4, -2, -3,  7,  3, -7, -1, -1,  4, -3, -4,  8,  3, -7,
+    0, -1,  4, -3, -4,  7,  3, -6, -1, -1,  4, -3, -4,  7,  3, -6,
+   -1, -1,  3, -3, -4,  6,  3, -6, -1,  0,  3, -2, -3,  6,  3, -5,
+    1, -2, -7,  2,  5, -2, -1,  1,  1, -2, -8,  3,  6, -3, -1,  2,
+    2, -2, -9,  4,  7, -4, -2,  2,  3, -1, -9,  5,  7, -4, -1,  3,
+    3, -1, -9,  4,  7, -4, -2,  2,  3, -1, -7,  4,  6, -4, -2,  1,
+    2,  0, -6,  4,  6, -4, -1,  1,  2,  0, -5,  3,  4, -3, -1,  1,
+   -2,  2,  2,  0,  0, -1, -3, -4, -2,  2,  2,  1,  1,  0, -2, -4,
+   -2,  2,  2,  2,  2,  1, -1, -2, -3,  2,  3,  3,  4,  2,  0, -2,
+   -3,  2,  3,  2,  4,  2,  0, -3, -4,  1,  2,  1,  2,  1, -1, -3,
+   -5,  0,  1,  0,  1,  1, -2, -3, -4,  0,  0,  0,  1,  0, -2, -3,
+    0,  0, -1, -2, -2,  2,  7,  8,  0,  0, -1, -3, -2,  1,  6,  7,
+    0,  1, -1, -3, -3,  0,  4,  5,  0,  1,  0, -1, -1,  0,  1,  3,
+    0,  2,  1,  1,  0, -1,  0,  1, -2,  0,  1,  2,  1,  0, -1, -1,
+   -5, -2,  0,  1,  1,  0, -3, -3, -6, -4, -1,  1,  1, -1, -3, -4,
+   -4, -2,  2,  5,  6,  4,  3,  2, -5, -3,  1,  4,  4,  2,  0,  0,
+   -4, -2,  0,  2,  1, -1, -2, -2, -2, -1,  0,  1,  0, -2, -3, -2,
+   -2,  0,  0,  0, -1, -1, -2, -1, -2, -1, -1,  0,  0,  0,  1,  2,
+   -2, -2, -1, -1,  0,  1,  3,  4, -2, -3, -2, -1,  0,  2,  4,  5,
+    2,  1, -2, -2, -1,  0,  1,  0,  1,  0, -3, -3, -1,  0,  1,  0,
+    0, -1, -3, -3, -1,  1,  1,  1,  0,  0, -3, -1,  1,  2,  3,  3,
+    0, -1, -3, -1,  1,  3,  3,  3, -2, -2, -4, -2,  1,  3,  4,  4,
+   -3, -3, -4, -2,  1,  3,  3,  4, -2, -3, -5, -2,  1,  2,  3,  3,
+    4,  5,  3,  4,  4,  4,  4,  5,  3,  3,  1,  0,  0,  0,  0,  1,
+    1,  1, -1, -2, -3, -4, -3, -2,  2,  2,  0, -2, -2, -4, -3, -2,
+    2,  3,  1, -1, -1, -3, -3, -2,  1,  2,  0,  0, -1, -2, -2, -1,
+    0,  1,  0, -1, -1, -3, -2, -1,  1,  1,  0, -1, -1, -2, -2, -2,
+   -2, -1, -1,  0,  1,  2,  1,  0,  1,  2,  3,  5,  6,  5,  5,  3,
+    1,  2,  3,  4,  5,  5,  4,  3, -2, -2, -3, -3, -2, -1,  0,  0,
+   -3, -3, -4, -5, -4, -3, -2, -1, -1, -1, -2, -2, -2, -1,  0,  0,
+    0,  1,  0, -1, -1,  0,  0,  1, -1,  0, -1, -2, -3, -2, -2, -1,
+    7,  7,  6,  5,  4,  2, -1, -2,  3,  3,  2,  2,  1,  0, -2, -3,
+    0, -1, -1, -1,  0, -1, -2, -2, -1, -3, -2, -1,  0,  0,  0,  1,
+    0, -2, -2, -1, -1,  1,  2,  2,  3,  1, -1, -1, -1,  1,  2,  2,
+    3,  1, -2, -3, -2, -1,  1,  2,  1, -2, -5, -6, -5, -3, -2,  0,
+    0, -1, -2, -3, -1,  0, -2, -2,  0,  0, -1, -1,  0,  1, -1, -2,
+    0,  0, -2, -1,  0,  0,  0, -2, -1, -2, -3, -3, -2, -1, -3, -3,
+   -1, -2, -3, -3, -2, -2, -3, -4,  2,  2,  0,  0,  0,  0, -1, -2,
+    5,  5,  3,  2,  2,  2,  0, -1,  8,  8,  6,  5,  4,  4,  2,  1,
+   -7, -8, -6, -3, -1, -1, -2, -1, -5, -5, -3,  0,  2,  1,  0,  0,
+   -1, -1,  0,  3,  4,  3,  1,  1,  2,  1,  1,  3,  4,  3,  2,  2,
+    3,  2,  0,  2,  3,  2,  1,  2,  4,  2, -1, -1,  0,  1,  1,  1,
+    3,  2, -2, -3, -2, -1,  0,  1,  3,  1, -3, -4, -3, -2,  0,  1,
+   -4, -2, -1,  2,  3,  3,  1,  0, -7, -5, -4, -2,  0,  0, -1, -2,
+   -6, -5, -5, -4, -2, -2, -2, -3, -1,  0, -1, -1,  0,  0,  0, -1,
+    2,  3,  2,  2,  2,  2,  1,  0,  3,  5,  4,  3,  1,  0,  1,  0,
+    3,  4,  3,  2,  0, -1, -1, -1,  5,  5,  3,  1,  0, -1, -1, -1,
+    1,  1,  0, -1, -3, -5, -6, -4,  1,  1,  0,  0,  0, -3, -3, -1,
+    0, -1, -1,  0,  1,  0,  1,  3, -2, -2, -3, -1,  2,  2,  4,  7,
+   -2, -2, -2,  0,  2,  2,  3,  6, -1,  0,  0,  1,  1,  0,  0,  3,
+    0,  3,  3,  3,  1, -2, -3, -1,  1,  3,  4,  3,  0, -3, -5, -4,
+    0,  2,  0, -1, -3, -4, -2, -2,  1,  4,  2,  0, -2, -3, -2, -1,
+    3,  6,  3,  1, -2, -2,  0, -1,  4,  7,  4,  1, -2, -3, -1,  0,
+    3,  6,  3,  0, -3, -3, -1,  0,  1,  3,  0, -1, -3, -2,  1,  1,
+    0,  1, -1, -2, -3, -1,  2,  2, -2, -1, -3, -3, -3, -1,  1,  2,
+    3,  1, -1,  0,  1,  0,  0,  0,  2, -1, -2, -1,  1,  0, -1, -1,
+    1, -1, -2,  0,  1,  0, -2, -3,  0, -2, -1,  1,  3,  1, -3, -5,
+    0, -2, -1,  2,  5,  2, -3, -5,  0, -2, -1,  4,  6,  3, -2, -5,
+    0, -2,  0,  4,  7,  4, -2, -4,  0, -2,  0,  4,  6,  4, -2, -4,
+   -2, -2, -3, -4, -3, -2, -1,  0,  1,  1,  0, -1, -1, -1,  0,  1,
+    3,  3,  2,  2,  1,  1,  1,  1,  2,  2,  2,  2,  1,  0,  0,  1,
+    0,  0,  0,  0, -1, -1, -1, -1, -4, -4, -4, -4, -4, -4, -4, -3,
+   -3, -3, -2, -3, -2, -1, -1,  0,  3,  4,  4,  5,  5,  6,  6,  7,
+   -1, -2,  7, -2, -4, -1, -1,  0, -1, -2,  9, -1, -4, -1, -1,  0,
+   -1, -3, 10, -1, -4, -1, -1,  1, -1, -3, 10, -2, -3, -1, -1,  2,
+   -1, -2, 10, -2, -4, -1, -1,  2, -1, -2,  9, -2, -4, -1, -1,  2,
+   -1, -2,  8, -2, -4,  0, -1,  1,  0, -2,  7, -2, -3, -1,  0,  2,
+    3, -4,  1,  3, -3, -2,  1,  0,  3, -5,  1,  4, -3, -2,  1,  0,
+    3, -6,  2,  5, -3, -1,  3,  0,  3, -6,  2,  5, -3, -1,  2,  0,
+    3, -6,  1,  5, -4, -2,  3,  0,  3, -6,  1,  5, -3, -2,  2,  0,
+    2, -6,  1,  4, -3, -1,  1,  0,  2, -6,  1,  4, -2, -1,  1,  0,
+    0,  0,  1,  1,  1,  0,  0,  2,  0, -1,  1,  1,  1,  0,  0,  2,
+    0, -1,  0,  0,  0,  0,  0,  2,  0, -1,  0,  0,  0,  0, -1,  0,
+    1,  0,  1,  0,  0, -1, -2, -1,  3,  1,  1,  0,  0, -2, -4, -3,
+    5,  3,  2,  1,  0, -3, -5, -4,  5,  4,  2,  0, -1, -4, -5, -5,
+    1,  0, -1, -2, -2, -3, -6, -9,  2,  0, -1, -1,  0,  0, -3, -6,
+    1,  0,  0, -1,  0,  0, -2, -5,  2,  1,  1,  1,  1,  2, -1, -3,
+    1,  1,  2,  1,  2,  2,  1, -1,  1,  1,  2,  1,  1,  1,  1,  1,
+    0,  0,  2,  1,  0,  0,  2,  2,  0,  1,  2,  2,  0,  0,  2,  2,
+   -4, -3,  0,  1,  4,  6,  4,  3, -3, -2,  0,  0,  2,  4,  1,  0,
+   -1, -1,  0,  0,  1,  1, -2, -3,  1,  1,  1,  0,  1,  1, -3, -5,
+    1,  1,  1,  0,  1,  1, -3, -5, -1,  0,  0, -1,  1,  1, -2, -4,
+   -1,  0,  0, -1,  1,  2,  0, -2, -1,  0,  0,  0,  2,  3,  1,  0,
+   -1,  0,  3,  4,  0, -4, -5, -5,  0,  0,  4,  5,  2, -2, -3, -2,
+    0, -1,  2,  4,  2, -1, -1,  0,  0, -2, -1,  1,  0, -2,  0,  1,
+    1, -2, -2,  0,  0, -1, -1,  1,  1, -2, -3,  0,  1,  0, -1,  0,
+    1, -2, -2,  1,  3,  1,  0,  0,  1, -2, -1,  2,  4,  2,  0,  0,
+    1,  2,  3,  2,  0,  2,  2,  1, -1,  0,  1,  0, -3,  1,  1,  1,
+   -1,  0,  0, -2, -4,  0,  2,  1, -1,  2,  2, -1, -5,  0,  2,  1,
+   -1,  3,  4, -1, -5,  0,  2,  1, -2,  2,  4,  0, -4, -1,  0,  0,
+   -4,  0,  2,  0, -4, -2,  0,  0, -5, -1,  2,  1, -2,  1,  3,  2,
+    1,  0,  1,  0,  1,  2, -1, -2,  2,  0, -1, -2,  1,  3,  0, -1,
+    3,  0, -2, -4,  0,  3,  1,  0,  5,  1, -3, -5, -2,  2,  1,  1,
+    6,  1, -2, -5, -2,  1,  0,  1,  5,  1, -1, -5, -2,  0, -1,  0,
+    3,  0, -2, -4, -2,  0, -1,  0,  1, -1,  0, -2,  0,  1,  0,  1,
+    1,  1,  2,  3,  2,  1,  1,  2, -1, -1,  0,  1,  1,  0,  1,  1,
+   -4, -3,  0,  0,  1,  1,  1,  2, -4, -3,  0,  2,  2,  2,  3,  2,
+   -5, -4,  0,  1,  1,  1,  1,  2, -5, -4, -1, -1, -2, -2, -1,  0,
+   -3, -2,  0,  0, -2, -3, -2, -1,  2,  3,  4,  4,  2,  0,  0,  0,
+   -4, -2,  0,  1,  0,  0,  0,  0, -3, -1,  1,  1,  0,  0,  0,  0,
+   -2,  0,  2,  2,  0,  0,  0,  2, -1,  1,  2,  1, -1,  0,  3,  5,
+    0,  2,  1, -1, -2,  0,  5,  6,  0,  1,  0, -3, -3,  0,  4,  6,
+    1,  1, -2, -4, -4, -3,  1,  2,  1,  0, -2, -4, -5, -4, -2,  0,
+   -1, -3, -3, -3, -3, -2, -1, -1,  3,  2,  1,  0,  0,  1,  1,  1,
+    5,  4,  3,  2,  1,  1,  2,  2,  2,  1,  0, -2, -2, -2, -1, -1,
+    0,  0,  0, -1, -2, -2, -2, -2,  0,  1,  3,  3,  2,  1, -1, -1,
+    0,  1,  3,  4,  3,  2,  1, -1, -4, -3, -1,  1,  0, -2, -3, -3,
+   -3, -4, -7, -8, -7, -4, -1,  2,  0, -1, -3, -4, -4, -2,  0,  2,
+    1,  0,  0, -1, -3, -2,  0,  2,  2,  1,  1,  0, -1, -1,  0,  2,
+    1,  1,  1,  1,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    0,  0,  1,  2,  3,  3,  2,  2,  0,  0,  1,  3,  4,  4,  3,  2,
+    3,  3,  3,  0, -1,  0,  1,  2,  1,  1,  1, -1, -2, -1, -1,  1,
+   -2, -2, -1, -3, -3, -2, -2,  0, -4, -4, -2, -2, -2, -2, -3,  0,
+   -4, -4, -1,  1,  1,  0, -1,  2, -3, -1,  2,  3,  4,  3,  3,  5,
+   -2,  0,  2,  3,  3,  3,  3,  3, -2, -2,  0,  0,  0,  0,  0,  1,
+    0,  2,  1, -1, -3, -1,  3, -2, -1,  0, -1, -1, -3,  0,  4, -2,
+   -2, -2, -2, -2, -2,  1,  5, -2, -3, -2, -3, -1, -2,  1,  4, -3,
+   -2,  0, -1,  0, -1,  0,  3, -5,  1,  2,  1,  2,  0,  0,  2, -5,
+    2,  4,  2,  3,  1,  1,  3, -3,  1,  2,  1,  1,  0,  1,  4, -2,
+    4, -3, -4, -1,  3,  3,  1,  3,  4, -4, -4, -1,  3,  2,  0,  2,
+    4, -3, -4,  0,  2,  2, -1,  1,  4, -3, -2,  1,  2,  1, -2,  0,
+    2, -4, -2,  1,  2,  0, -3,  0,  2, -3, -2,  0,  1,  0, -2,  2,
+    3, -1, -1,  0,  0,  0,  0,  3,  2, -2, -2, -2, -1, -1, -1,  2,
+    2,  2,  3,  4,  3,  1,  0, -1,  1,  0,  1,  2,  1, -1, -2, -2,
+    2,  1,  2,  1,  1,  0, -1, -1,  4,  3,  4,  3,  2,  1,  1,  1,
+    3,  2,  2,  2,  1,  1,  1,  1, -1, -2, -1,  0, -1, -1, -1, -1,
+   -3, -3, -2, -1, -2, -2, -2, -2, -4, -4, -3, -3, -4, -4, -3, -3,
+    2,  1, -1, -3, -4, -2,  3,  4,  2,  2,  1, -1, -3, -2,  1,  2,
+    1,  2,  3,  3,  0, -2, -1, -2, -1,  0,  2,  4,  2,  0, -1, -3,
+   -2, -2,  0,  3,  3,  2,  0, -3,  0, -2, -3, -1,  1,  2,  2, -1,
+    3, -1, -4, -5, -3,  0,  2,  0,  6,  3, -2, -6, -5,  0,  3,  1,
+   -2,  3, -2,  0,  3, -2, -2,  1, -3,  4, -3,  0,  3, -2, -1,  2,
+   -3,  5, -3,  0,  4, -2, -1,  2, -2,  4, -4, -1,  3, -3, -2,  2,
+   -3,  4, -3,  0,  3, -3, -1,  2, -2,  5, -2,  0,  3, -3, -1,  2,
+   -2,  4, -3,  1,  3, -2, -1,  2, -2,  3, -2,  1,  3, -2,  0,  2,
+    1,  0,  0, -1,  1,  2, -4, -1,  2,  0,  0, -1,  1,  2, -4, -2,
+    1,  1,  1, -1,  2,  4, -2,  0,  0, -1,  1, -1,  2,  5, -1,  1,
+    0, -1,  0, -2,  1,  5, -1,  1,  0, -1, -1, -2,  0,  3, -3, -1,
+    1,  1,  0, -2,  0,  3, -3, -1,  1,  1,  0, -3,  0,  3, -2,  0,
+    1,  0, -1,  1,  1,  2,  4,  5,  1,  0, -1,  1,  1,  1,  5,  7,
+    0,  0, -2, -1, -1,  0,  3,  5,  0, -1, -2, -1, -1, -1,  2,  3,
+    0, -1, -3, -1, -1, -1,  1,  2, -1, -2, -4, -2, -2, -2,  0,  0,
+   -1, -2, -2, -1, -2, -2,  0,  0,  0, -1, -1,  0, -1, -1,  0,  0,
+    3,  3,  0, -1, -1,  1,  4,  4,  2,  3,  0, -2, -2,  0,  1,  1,
+    2,  3,  1, -1, -1,  0,  1,  0,  1,  2,  0, -1, -1, -1,  0, -2,
+    0,  1,  0, -1, -2, -1,  0, -2,  0,  1,  0, -1, -2, -1,  1,  0,
+    1,  1, -1, -3, -4, -3,  1,  3,  1,  2, -1, -3, -5, -4,  1,  3,
+   -3, -2,  0,  1,  1,  1,  0, -2,  0,  1,  1,  1,  0,  0, -1, -3,
+    1,  2,  1,  1,  0, -1, -1, -2,  0, -1, -3, -1, -1, -1,  0, -1,
+    0, -3, -6, -3, -2, -1,  1,  1,  2, -1, -4, -3, -2,  0,  2,  2,
+    5,  4,  1,  1,  0,  1,  3,  2,  5,  4,  2,  1,  0, -1,  0,  1,
+   -2,  0, -2, -5, -6, -3,  0,  0, -2,  0,  1,  0, -1,  1,  2,  2,
+   -2,  0,  1,  3,  2,  2,  2,  1, -2,  0,  2,  4,  3,  2,  1,  1,
+   -2,  0,  2,  3,  2,  0, -1,  0, -3, -1,  1,  1,  0, -1, -1,  1,
+   -4, -1,  1,  0, -1, -2,  0,  2, -4, -1,  0, -1, -1, -2,  1,  4,
+   -3,  0,  0, -1,  1,  1,  1,  0, -3,  1,  0, -1,  0,  0, -1, -1,
+   -1,  3,  3,  0,  1,  0,  0,  1, -3,  2,  2, -2, -1,  0,  0,  1,
+   -5,  0,  0, -2, -1,  1,  0,  2, -7, -2,  1,  0,  1,  2,  2,  2,
+   -5,  0,  3,  2,  3,  3,  2,  2, -3,  2,  4,  1,  0,  0, -2, -3,
+    5,  2, -2, -2,  0, -1, -1, -1,  2, -1, -4, -3, -1, -2, -1, -1,
+    0, -2, -2,  1,  2, -1,  0,  1, -1, -2, -1,  3,  3, -1,  0,  2,
+    1,  0,  0,  3,  3, -2, -1,  2,  2,  1,  1,  3,  2, -2, -2,  0,
+    1,  0, -1,  1,  1, -3, -3, -2,  1,  0,  1,  2,  3,  0,  0,  0,
+   -4, -5, -3,  0,  1, -1, -2, -1, -2, -3, -1,  1,  2,  0,  0,  0,
+    1,  1,  2,  1,  2,  1,  1,  1,  3,  4,  3,  1,  0, -2, -1, -1,
+    3,  3,  2,  0, -2, -3, -3, -2,  1,  1,  0, -1, -2, -4, -2, -2,
+    2,  1,  0,  0,  0, -1,  0,  1,  2,  1,  1,  1,  1,  1,  1,  3,
+    0,  0,  0, -1, -2, -1,  1,  0, -2, -1, -1, -2, -3, -2,  0,  0,
+   -1,  0,  0, -1, -2,  0,  1,  1,  1,  1,  0, -1, -1,  1,  3,  1,
+    2,  2,  0, -2, -1,  2,  3,  0,  3,  1, -1, -1,  1,  4,  2, -2,
+    2,  0, -3, -1,  3,  5,  0, -5,  1, -1, -2,  0,  3,  3, -1, -6,
+   -1,  0,  3,  4,  2,  0,  1,  2, -2, -1,  0,  1, -1, -2,  0,  1,
+   -2, -3, -2, -3, -6, -7, -6, -3,  2,  2,  3,  1, -1, -2, -3, -2,
+    2,  2,  3,  1,  0,  0,  0,  0,  2,  1,  1,  0,  1,  1,  0,  1,
+    1,  0,  0,  0,  0,  1,  1,  2,  1,  0, -1,  0,  0,  2,  2,  1,
+    1,  1,  3,  1, -1, -1, -1,  1, -2, -1,  0,  0, -2, -2, -1,  2,
+   -2, -2,  1,  1,  1,  0,  1,  3, -2, -2,  0, -1,  0, -1,  0,  2,
+    0,  0,  1,  0, -1, -1, -2,  1,  3,  2,  2,  1,  0, -2, -2,  1,
+    5,  3,  3,  2,  1,  1,  1,  4,  0, -3, -4, -5, -4, -3, -1,  1,
+   -6, -4, -1,  2,  2,  0,  0, -1, -4, -2,  1,  3,  3,  2,  2,  0,
+   -3, -2, -1,  2,  3,  3,  2,  0, -3, -2, -2,  1,  2,  1,  1, -1,
+   -2, -2, -2,  0,  2,  2,  1, -1, -1, -1, -1,  1,  2,  3,  2,  0,
+   -1, -1, -2,  1,  2,  2,  2, -1,  0, -1, -2,  0,  2,  1,  0, -1,
+    6,  4,  2,  1,  0,  0,  0,  1,  4,  2, -1, -2, -2, -2, -1, -1,
+    2,  1, -1, -2, -2, -2, -2, -1,  2,  2,  0, -2, -2, -2, -1,  0,
+    0,  0, -1, -2, -2, -1,  0,  1, -3, -3, -2, -1, -1, -2, -1,  0,
+   -3, -2,  2,  3,  2,  0, -1, -2, -2,  0,  4,  5,  5,  2,  0, -1,
+    5,  4,  2,  0, -1, -2, -1, -1,  4,  3,  2,  1,  0, -1,  0, -1,
+    1,  1,  0,  1,  1,  0,  1, -1, -2, -1, -1,  0,  0, -2, -2, -3,
+   -1,  0,  0,  0, -1, -3, -3, -5,  0,  1,  1, -1, -1, -2, -2, -3,
+   -1, -1, -1, -2, -1,  1,  3,  1, -1, -2, -2, -1,  2,  5,  6,  5,
+   -3, -3, -2,  1,  1, -2, -1, -1,  1,  2,  3,  4,  1, -3, -1, -3,
+    3,  2,  0,  1, -1, -3, -1, -3,  1,  0, -1,  0, -1, -1,  1,  0,
+    1,  1,  0,  1,  2,  2,  5,  3,  1,  1,  1,  2,  2,  2,  3,  0,
+   -3, -1, -2, -2, -3, -3, -1, -3, -1,  1,  1,  0, -1, -1,  0, -2,
+    2,  0, -2, -2,  2,  4,  1, -2,  1,  0, -2, -1,  3,  5,  2, -1,
+   -1, -2, -3, -2,  1,  3,  1, -2, -1, -2, -1, -1,  0,  2,  1, -1,
+    0,  0,  1,  1,  1,  2,  2,  0,  0,  1,  4,  4,  2,  2,  3,  1,
+   -2, -1,  2,  1, -2, -3, -2, -3, -1,  0,  1,  0, -3, -4, -4, -5,
+    4,  0, -3, -4, -4, -4, -2, -1,  5,  0, -1,  0, -1, -3, -2, -1,
+    4,  0,  0,  1,  1,  0,  0,  0,  0, -3, -2, -1,  0,  0,  1,  0,
+    0, -2,  0,  0,  1,  1,  2,  1,  2,  0,  0,  0,  1,  1,  1,  0,
+    2,  0, -1, -1,  1,  1,  1,  0,  1, -1, -2, -2,  0,  2,  2,  2,
+   -3, -5, -2,  0, -1, -3, -3,  0,  0, -2,  0,  2,  2,  0,  0,  3,
+    2, -1, -2,  0,  0, -1, -1,  2,  5,  2, -1, -1, -1, -1, -1,  2,
+    5,  2,  0, -1, -1,  0, -1,  2,  2,  1,  0,  0,  0,  1,  0,  2,
+   -1, -1,  1,  1,  2,  2,  1,  2, -3, -2,  0,  0,  0,  0, -2, -1,
+    0,  3,  2,  0, -2, -3, -3, -3,  0,  3,  3,  1,  0,  0,  1,  2,
+   -1,  0, -1, -2, -1, -1,  1,  3, -1,  0, -1, -2, -1, -1,  0,  2,
+   -1,  0, -1, -2,  0,  0, -1,  2, -1,  0, -1, -2, -1, -1, -2,  1,
+    0,  1,  0, -3, -1, -1, -1,  2,  5,  5,  2, -1, -1, -1,  1,  3,
+    0,  0,  1, -1, -3, -2,  0,  2,  1,  1,  3,  0, -2, -2,  0,  1,
+    1,  1,  3,  1,  0,  0, -1, -1,  0, -1,  2,  1,  1,  0, -1, -3,
+   -1, -2,  1,  1,  1,  0, -2, -4, -1,  0,  2,  1,  1,  0, -1, -3,
+    1,  1,  3,  2,  1,  0, -2, -3,  2,  2,  4,  2,  1, -1, -2, -4,
+    1,  2,  2,  2,  0, -2,  0,  2, -1, -1, -2, -3, -4, -5, -3,  1,
+    0,  1,  1,  0, -1, -1, -1,  1,  0,  1,  1,  1,  0,  0,  0,  2,
+    0,  1,  1,  2,  1,  1,  1,  2, -1, -1,  0,  2,  2,  2,  2,  3,
+   -2, -4, -4, -1, -2, -2, -2,  0,  1,  0,  0,  1,  0,  0,  0,  1,
+    0, -1, -3, -2,  0,  2,  2,  1,  0, -1, -2, -3,  0,  1,  1,  2,
+    1,  0, -2, -3, -1,  0,  0,  1, -1,  0, -1, -2,  0,  0, -1,  0,
+   -1,  1,  1,  0,  2,  2,  0,  0,  0,  2,  3,  1,  3,  5,  3,  2,
+   -1,  1,  1, -2,  0,  3,  1,  1, -1,  0,  0, -4, -4, -1, -1, -1,
+   -1,  1,  1,  0,  1,  2,  1,  2, -3,  0,  1,  0,  1,  1,  0,  2,
+   -5, -3, -1, -1,  0,  1,  0,  1, -4, -3, -2, -3, -2, -1, -1,  0,
+    0,  0, -1, -2, -2, -2, -2,  0,  3,  4,  2,  0,  0,  0,  0,  1,
+    2,  1,  0,  0,  0,  0, -1,  0,  0,  1,  2,  3,  4,  4,  3,  2,
+   -1,  4,  7,  4,  0,  0,  0,  0, -1,  4,  6,  3,  0,  1,  1,  1,
+    0,  3,  4,  0, -1,  0,  0,  1,  0,  1,  1, -2, -1,  0, -1, -1,
+   -1,  0, -1, -1, -1,  0,  0,  0, -1, -1, -1,  0,  0,  0,  0,  0,
+   -1, -3, -3,  0,  1, -1, -2, -1, -3, -4, -4, -2, -1, -2, -2, -1,
+    2,  2,  1,  0,  1,  1,  0, -3, -2, -1,  0,  0,  1,  1,  0, -3,
+   -2, -1,  0,  1,  2,  1,  1, -2,  1,  2,  2,  2,  3,  3,  2, -1,
+    1,  2,  1,  0,  1,  1,  2, -1,  0,  1, -2, -4, -2,  0,  1, -1,
+    1,  1, -1, -3, -2,  0, -1, -3,  1,  2,  0, -1,  0,  1, -1, -4,
+   -1, -1, -2, -2,  0,  3,  4,  3,  1,  1, -1, -3, -2,  0,  0,  0,
+    2,  2,  2,  2,  2,  1, -1, -1,  1,  1,  1,  3,  3,  0, -2, -2,
+    0, -1, -1, -1,  0, -2, -1, -1, -1, -3, -4, -3, -2, -2,  0,  2,
+   -1, -1,  0,  1,  2,  2,  3,  5, -2, -1, -1,  0,  0,  0,  0,  1,
+   -2, -3,  2,  0,  0,  1,  1, -1, -1, -4,  1, -2, -1,  2,  2,  0,
+    1, -4,  0, -2, -2,  1,  1, -1,  2, -3,  1, -1, -1,  1,  1, -1,
+    3, -2,  3,  1,  0,  1,  1, -1,  1, -3,  2,  1,  0,  1,  0, -1,
+   -1, -5,  1,  0, -1,  0,  1,  1,  0, -3,  3,  3,  1,  2,  3,  3,
+    0, -1, -2,  1,  5,  5,  2, -1,  1, -1, -2, -1,  1,  1, -2, -5,
+    1,  1, -1, -2, -1, -1, -1, -3,  1,  1, -1, -1, -1,  2,  4,  3,
+   -1, -1, -1, -1, -1,  0,  4,  3, -1, -1,  0,  1, -1, -3, -1, -1,
+    0,  0,  0,  2,  2,  0,  0, -1,  0, -2, -3,  0,  1,  1,  3,  2,
+    2,  3,  2,  1,  0,  0, -2, -2,  2,  3,  0,  1,  1,  3,  3,  2,
+    0,  0, -3, -1, -1,  2,  2,  3, -2, -2, -3,  1,  1,  2,  1,  1,
+   -2, -1, -2,  2,  1,  1, -1, -2,  0,  1,  0,  2,  0,  0, -2, -2,
+    0,  1,  0,  2,  0,  0, -2, -2, -3, -2, -2,  0, -1, -2, -2, -3,
+    0,  1, -1,  3, -1,  1,  3, -1,  0,  1, -1,  3, -1, -1,  2, -3,
+    1,  1, -2,  3, -1, -3,  0, -3,  2,  2, -2,  3,  0, -2,  1, -2,
+    1,  1, -3,  3, -1, -2,  1, -3,  1,  1, -3,  3,  0, -1,  1, -2,
+    1,  2, -1,  4,  0, -1,  1, -2,  0,  1, -1,  3, -1, -3,  0, -3,
+   -3, -3, -1,  1,  2,  1, -1, -2, -2, -2,  0,  2,  1,  0, -2, -2,
+   -3, -2,  1,  2,  1, -1, -2, -1, -3, -2,  2,  4,  0, -2, -2,  1,
+   -3, -1,  2,  4,  0, -2, -2,  2, -1,  1,  4,  3, -1, -3, -2,  2,
+    0,  2,  4,  2, -1, -2, -1,  2,  0,  1,  2,  0, -1,  0,  1,  3,
+    3,  0, -5,  1,  4,  0,  0,  1,  1, -2, -5,  2,  5, -1, -2,  1,
+   -1,  0,  0,  3,  3,  1,  0, -1, -2,  3,  4, -2, -3, -1,  0, -2,
+   -3,  3,  5, -3, -3,  0,  0, -2, -1,  3,  2, -2, -2,  2,  2, -1,
+    2,  0,  0, -1,  0,  0,  0,  0,  0, -3, -2,  1,  3,  0, -2, -2
+};
+
+/* list of codebooks for inter-coded vectors */
+static const int8_t* const svq1_inter_codebooks[6] = {
+    svq1_inter_codebook_4x2, svq1_inter_codebook_4x4,
+    svq1_inter_codebook_8x4, svq1_inter_codebook_8x8,
+    NULL, NULL,
+};
+
+static const int8_t svq1_inter_codebook_sum[4][16*6] = {
+ {
+ -1,  1, -2,  0,  1, -1, -1, -1, -2, -1,  1, -1, -1,  0, -1, -1,
+  0, -1, -1, -1, -1,  0, -1,  0,  0,  0, -3,  1, -1,  0,  1, -1,
+  1, -1,  2,  2,  1,  1,  2,  0,  0,  0, -1,  1,  1,  0,  0,  0,
+  1, -1,  0,  1, -1,  1,  1,  0,  1,  0, -1,  1,  1,  0,  0,  0,
+ -2,  0,  0, -2,  0,  0, -2,  0, -2, -1, -2, -1,  0,  0, -1,  0,
+  1,  0,  1, -1,  2,  2,  1,  2,  2,  1,  0,  1,  1,  0,  1,  1,
+ },{
+ -2,  1, -1, -1,  1,  0,  1, -1, -1, -1,  1, -1,  0, -1,  0, -1,
+  0,  0,  0, -2,  0,  1,  0, -1, -1,  0,  2, -3,  1, -2,  3, -1,
+  2,  0,  2,  1,  1, -1,  1,  1,  0,  0,  1,  1,  2, -2,  1,  0,
+ -2, -1,  2, -2, -2,  0, -3,  0, -1,  0, -1,  0, -1,  0, -2, -3,
+  1, -2, -2, -1,  1, -1, -1,  1, -1,  1,  1,  0, -2,  0,  1,  1,
+  1,  1,  2,  1,  0,  0, -1,  0,  0,  1,  0,  1, -1,  1,  0,  2,
+ },{
+  0,  0,  0, -3,  1,  1,  1, -3,  0, -1,  0, -3,  1, -3,  0, -2,
+  1,  2, -1, -3,  0, -3,  1, -1,  0, -1,  0,  0,  1,  2,  1,  1,
+ -1,  2, -3,  3,  1,  0, -5,  1,  0, -1, -3,  1,  0,  2,  0, -3,
+  4,  2,  0, -2,  1, -2,  3, -2,  1,  1,  0, -1,  2,  5,  3,  1,
+ -1,  0,  2, -3, -2,  0,  0, -2,  2, -3, -1, -1,  2,  1,  0, -2,
+  3, -1,  1, -1,  2,  4,  0,  1,  0,  1,  0, -1, -3, -2, -1,  0,
+ },{
+  0,  2, -1, -1,  2, -4, -2,  3,  0, -1, -5,  1,  0,  1,  0,  6,
+ -2,  2,  0,  1,  1, -1, -1, -2,  1, -2, -1,  0,  2, -2, -2, -1,
+ -4,  2, -1, -3, -1, -2,  2, -1,  2, -1,  2,  0,  3, -3, -3,  0,
+ -3,  0,  0, -2,  4, -4,  0, -1,  4,  0, -2, -2,  3, -2,  0,  4,
+  5,  0,  1,  0, -3,  3,  3,  2,  0,  0,  1,  2, -5, -2, -3,  0,
+ -3,  2, -2,  2, -2,  4,  7, -3,  4,  2,  3,  2, -1,  0, -3,  1,
+ }
+};
+
+/* 6x16-entry codebook for intra-coded 4x2 vectors */
+static const int8_t svq1_intra_codebook_4x2[768] = {
+   12, 13, 13, 11, -7,-10,-15,-17,-16,-15,-12,-10, 11, 15, 15, 12,
+    2, 17, 20, 15,-45,-24,  2, 13, 21, 20, -6,-36, 12, 16, -1,-27,
+  -18,-21, 10, 45,-11,-20, -7, 21, 43, -8,-28,  0, 33,-16,-28,  3,
+  -12,-18,-18, -6,-20,-10, 28, 55, -5,-18,-21,-18, 56, 30, -6,-20,
+  -34, 27, 29,-22,-30, 29, 26,-25, 30, 34, 33, 26,-25,-31,-35,-33,
+  -31,-35,-36,-32, 29, 36, 37, 31,-71,-12, 38, 34,-63, -1, 42, 33,
+   58, 37,-31,-60, 55, 34,-33,-61,-57,-57, 22, 93,-57,-58, 21, 93,
+   59, 69, 70, 62,-63,-68,-68,-60,-64,-71,-71,-64, 63, 73, 72, 62,
+   -2,  0,  7, 15,-11,-10, -3,  5, -5, -8,-10,-10,  1,  9, 14,  9,
+   15,  8, -4,-11, 12,  2,-11,-12, -8,  0, 19, 28,  4, -1,-15,-26,
+  -15, 27,  2,-14,-14, 22,  1, -9, -4, -6,-13,-10, -6,-14,  6, 47,
+  -35,-20,  6, 23,  6,  9,  6,  4, -6,  2, 23,-22, -7,  4, 28,-21,
+   20,-22, -2,  6, 22,-28, -5,  8,-10,-18,-16,-12, 36, 19,  2, -1,
+   -3,  0,  4,  8,-45,-10, 23, 23, 40, 15,-20,-35, -4, -1,  4,  1,
+    9, -5,-33, 24,  8,  3,-26, 19, -1,  4,  6, -3, 32, 25,-13,-49,
+   24, 24, 15,  7,-17,-27,-19, -7,-47,  0, 39, 24,-21, -6,  7,  4,
+   -1,  0,-10,-13,  1,  1,  5, 16, 20,  5, -3, -9, -1, -4, -2, -6,
+  -17, -7,  1,  4, 12,  7,  0,  0,  3,  0, 12, 11, -3,  1,  0,-23,
+    4, 17, -6,  0,  6,  3,-25,  0,-17, 10,  8,  5,-14,  4,  1,  4,
+   13, 10,  4,  2,-23, -9,  1,  2,  3, -3,  1,  7,  1,-23, -7, 20,
+   -7,-18,  2, 12, -5, -4, 10,  9,  4, 10,  7,-24,  6,  3,  4,-10,
+   22,-14,-22,  6,  0,  5,  5, -1, -4,  3,-11, -4, -7, 31,  7,-14,
+   -5,-16, -1, 42, -4, -2, -9, -5,  5, -8, -6, -3, 42, -4,-21, -5,
+  -18, 12, 20,-12, 13,-13,-10,  7, -8, -9, -2,-18,-16,  6, 40,  8,
+   10, -1,  0,  4, -3,  4, -1,-13, -2,  6,  1,-15,  5,  3,  1,  2,
+   -4, -2,  1,  3, 15,  0, -9, -4, -3, -4, -4, -4, -3,  5, 16, -3,
+    2, 13,  3,  4, -3, -8,-10,  0, -6, -2, -4, -1, -2, -3, -6, 23,
+    6, -6,  7,  1,  4,-18,  5,  1, -1,  1,-15, 14, -5,  6, -4,  4,
+    2,  2,  2,  6,-24,  2,  7,  3,-26,  0,  3,  3,  5,  7,  1,  6,
+   14, -2,-18, -3,  7,  5, -4,  2, -6,  3, 32,  1, -6, -6, -6,-12,
+    5,-36,  7,  6,  9, -1, 11,  0,  4,  4,  5,  3,  4, 15,  3,-38,
+   10, 23, -5,-42,  0,  4,  4,  4, 23, 17, -6,-13,-13,-37,  1, 29,
+    5,-14, -1,  1,  5,  0,  3,  1,  0,  4, -5,  2,  8,  0,  0,-10,
+    4,  7, -2, -3,-10,  3,  1,  1,-12, -1, 13,  3,  0, -1,  1, -3,
+    0, -1,  3,  1, -6, -9,  3,  9, -6,  1, -4, -6,  8, -1,  0,  8,
+   -3, -3,  0, 18, -5, -1, -4, -1, -8, -2,  3, -4,  0, 17, -1, -5,
+    5, -2,  9,-10,  1, -5,  6, -5,  4,  2,  2,  3, 10,-14, -8,  1,
+   -1, -2,-18, -1, -1, 20,  1,  2, -1,  1, -9,  1, -1, -9, 22, -4,
+    6, -4,  8, -3, -1,  7,-19,  5, -7, 31, -4, -4, -6,  0, -5, -5,
+   -7, -8,-19, -4,  1,  1,  4, 32, 38, -1, -8,  4, -7, -8, -6,-12,
+   -1,  0, -7,  1, -1,  9, -1,  0,  9, -1, -1,  0,  2, -6,  1, -3,
+  -12,  0,  2,  1,  1,  1,  8,  0,  9,  1,  0,  2, -2,  1,-11,  0,
+    0,  8,  2,-10, -1,  2, -1,  0, -2, -4,  0, -5, -2, -1, -1, 14,
+   -3,  7, -1,  5,  0,-10,  1,  1, -1, -5, 14, -1, -2,  1, -3, -2,
+   -6,  0,  0,  6,  2,  3, -9,  4,  4, -5, -1, -1, -7,  3,  8, -1,
+    2, -4, -1,-11, 11,  2,  1,  0, -1,  2,  3,  9,  0,  2,  0,-15,
+    3,  5,-20,  3,  3, -1,  3,  3,  1, -1, 16,  1,  2,-29,  9,  2,
+  -13, -6, -1, -3, 36, -1, -8, -3,  2,  5,  4,  2,-37,  9, 11,  3
+};
+
+/* 6x16-entry codebook for intra-coded 4x4 vectors */
+static const int8_t svq1_intra_codebook_4x4[1536] = {
+  -11, -3,  3,  6,-10, -1,  5,  7, -9, -1,  6,  7, -9, -1,  4,  6,
+    5,  7,  0,-14,  6,  9,  2,-15,  6,  9,  2,-15,  4,  6,  0,-14,
+   16,  3, -5, -6, 16,  1, -8, -8, 14, -1, -9, -9, 12,  0, -8, -8,
+    8, 12, 16, 17, -2,  2,  6,  9,-10, -8, -4,  0,-15,-14,-11, -7,
+   -7,-10, -2, 16, -7,-11, -3, 18, -7,-11, -1, 20, -6, -8,  1, 19,
+   -9,-13,-16,-17,  2, -2, -7, -9, 11,  8,  4, -1, 16, 15, 11,  7,
+  -22, -2, 13, 15,-24, -2, 14, 16,-25, -4, 13, 15,-25, -6, 10, 13,
+   26, 26, 22, 16, 17, 15,  9,  3, -2, -6,-11,-14,-20,-25,-28,-28,
+  -27,-27,-25,-21,-16,-15,-11, -7,  3,  8, 12, 13, 23, 28, 31, 30,
+   20, 16, -7,-33, 22, 19, -6,-35, 22, 19, -6,-34, 20, 17, -6,-32,
+  -20,-20,  2, 38,-21,-22,  2, 40,-21,-22,  2, 40,-20,-20,  3, 38,
+  -47, -4, 24, 26,-50, -3, 26, 27,-50, -3, 26, 27,-47, -4, 24, 26,
+   45,  6,-23,-27, 48,  5,-25,-28, 48,  5,-26,-28, 44,  6,-24,-27,
+  -30,-36,-10, 76,-31,-37,-11, 78,-31,-37,-11, 78,-31,-36,-10, 77,
+  -53,-32, 35, 52,-54,-34, 36, 52,-54,-34, 36, 52,-53,-33, 34, 51,
+  -93,-34, 62, 65,-93,-34, 62, 66,-93,-34, 62, 65,-93,-34, 60, 64,
+   -7,  0,  2,  2, -8, -1,  3,  3, -8,  0,  4,  5, -6,  1,  5,  5,
+    3,  7, 11, 11,  2,  2,  3,  3,  1, -2, -6, -7,  1, -5,-11,-13,
+    3, -2, -4, -3,  7,  0, -5, -5, 12,  4, -5, -7, 14,  6, -4, -7,
+   18, 14,  3, -2,  6,  4,  0, -3, -8, -5, -2,  0,-16,-11, -2,  2,
+   -8, -6,  7, 18, -7, -8,  2, 13, -4, -6, -2,  6,  0, -4, -3,  1,
+    1, -3,-13,-18,  0, -1, -5, -7, -1,  1,  6,  7, -2,  4, 15, 17,
+  -15,-14, -7, -2, -6, -5, -1,  0,  6,  6,  3,  1, 15, 13,  6,  1,
+    2, -2,-11, 10,  2, -1,-12, 11,  3, -1,-12, 11,  2, -2,-11, 11,
+   -9, 14, -1, -5, -9, 15, -2, -5, -8, 16, -2, -5, -7, 15, -1, -4,
+    2,  6,  8,  8, -2,  3,  9, 12,-11, -5,  4, 10,-19,-16, -8,  0,
+   14,  8, -7,-15, 12,  7, -7,-14,  8,  5, -4, -9,  5,  3, -1, -4,
+   12,-14, -2,  2, 13,-15, -1,  3, 14,-15, -1,  3, 13,-14, -1,  3,
+    0,  6, 10,-13,  0,  6, 10,-15,  0,  7,  9,-17,  1,  6,  8,-16,
+   -8, -5, 15, -2, -8, -6, 17, -2, -8, -6, 16, -3, -8, -5, 15, -2,
+   -9,-11,-11,-10,  9, 10,  9,  8,  8, 10, 10,  9, -8, -9, -8, -7,
+    9, 10,  9,  7, -8,-10,-10,-10, -7,-10,-11,-11, 11, 12, 11,  8,
+    0, 10,  7,  0,  0,  7,  0, -6,  0,  2, -5, -6, -2, -1, -4, -1,
+    5,  0, -6, -9,  2,  2,  2,  1, -2,  0,  5,  7, -6, -5,  1,  4,
+    3, -8,  2, -1,  4, -9,  3,  0,  5, -7,  3,  0,  7, -5,  3,  0,
+   -5, -3,  2,  9, -6, -3,  1,  8, -6, -3,  1,  7, -5, -2,  0,  4,
+   13,  8,  3,  1, -3, -5, -4, -1, -8, -7, -3,  0, -1,  1,  3,  2,
+    3,  2, -5,-12,  4,  3, -2, -9,  3,  4,  1, -4,  3,  5,  4, -1,
+   -9, -8, -4,  0,  8,  6,  2,  0, 10,  8,  3,  0, -6, -5, -3, -1,
+   -3, -9,-12, -5,  0, -3, -5,  0,  2,  3,  2,  4,  5,  8,  7,  6,
+   -1, -2,  5, 12, -1, -1,  5,  9,  2,  1, -1, -2,  2, -1,-11,-17,
+   -7,  3,  3, -1, -9,  3,  4, -1,-10,  4,  6, -1, -9,  5,  7,  0,
+  -18, -7,  2,  2, -8,  1,  5,  3,  3,  4,  1,  0,  9,  5, -2, -3,
+   -2,  0,  6,  8, -4, -5, -5, -3,  1, -2, -6, -8, 10,  9,  3, -1,
+    0, -2, -2,  0,  0, -4, -5,  0, -2, -8, -4,  8, -5, -7,  6, 24,
+    9,  1, -7,  1,  9,  1, -8,  1,  8,  0,-10,  1,  8, -1,-11, -1,
+    8,  8,  6,  3,  5,  4,  3,  2, -2, -3, -1,  0,-10,-13, -8, -4,
+    0,  4,  2, -3,  0,  6,  3, -5,  3, 10,  2,-12,  5, 10, -4,-22,
+    0, -4, -1,  3,  1, -4, -1,  5,  1, -5,  0,  8, -1, -6, -2,  7,
+   -1, -1, -2, -4, -1, -2, -4, -6, -1, -1, -1, -2,  1,  5, 10,  9,
+   10,  3,  0, -2,  6, -1, -2, -5,  3, -1, -2, -6,  2,  0,  0, -5,
+    6,  3,  0,  0,  6,  3,  1,  1,  4, -2, -2,  1,  0, -9, -9, -2,
+  -11, -3,  1,  2, -6,  2,  4,  5, -3,  2,  3,  4, -2,  1,  1,  2,
+   -6, -4, -1, -2,  2, -1, -1, -2, 10,  2, -2, -2, 11,  2, -4, -1,
+    6,  0, -2,  2,  3,  3,  0,  0, -6,  3,  3,  0,-17, -1,  5,  0,
+   -1,  4, 10, 11, -3, -2,  0,  1, -3, -4, -5, -3, -1, -2, -2, -1,
+    2, -3, -9,-12,  3,  3,  3,  2,  2,  2,  4,  4,  2,  1, -1, -2,
+   -2,  9,  5,-10, -3,  5,  5, -5, -2,  1,  2,  0, -1, -2, -2,  1,
+   -2, -3,  7, -2, -1, -3,  7, -3, -1, -2,  8, -4, -2, -2,  7, -3,
+    1, -8, -3, 12,  2, -2, -2,  4,  1,  3,  0, -5, -1,  5,  2, -7,
+   -1,  3,  1, -5, -7, -2,  3,  1, -2, -7, -2,  2, 20,  3, -5, -1,
+    5,  0, -3, -2, -7, -7,  0,  6, -6,  0,  7,  6,  2,  6,  0, -7,
+   -2,  6, -7,  1, -2,  7, -8,  3, -2,  7, -7,  3, -1,  7, -6,  2,
+   -5, -2,  5,  7,  4,  1, -4, -8,  6,  3, -2, -5, -7, -5,  3,  7,
+   -1, -1,  6,  5,  0, -1,  1, -4,  2,  1,  0, -7,  1,  0,  0, -4,
+   -8,  0,  3,  1, -2,  1, -1, -1,  1, -1, -3,  1,  1, -2,  1,  9,
+    5,  2, -3, -4, -1,  0, -1, -3, -3,  1,  3,  1, -4,  0,  4,  2,
+    2, -2, -2, 12,  0, -2, -5,  3, -1,  0, -3,  1, -3, -1, -2,  1,
+    1,  5,  3,  0, -6, -4, -2,  1,  0, -2, -2,  2,  6,  1, -4, -1,
+   -3, -5, -5, -1,  3,  5,  5,  4,  0,  3,  1, -1, -2,  1, -2, -3,
+    2, -4, -5, -3,  4, -2, -3, -2,  6,  0, -1, -1,  7,  1,  0,  0,
+   -3, -2, -2,  0, -2, -3, -5, -1, -2,  2,  0, -1, -1, 11,  9, -1,
+    0,  1, -1,-10, -1,  1,  0, -6,  1,  0,  1,  4,  2, -5, -1, 13,
+   -2,  4,  5,  0, -5,  1,  6,  3, -6, -2,  3,  2, -5, -2,  0, -2,
+   -1,  1,  1, -2, -1, -2,  0,  2,  5,  5,  5,  7,  0, -4, -8, -7,
+    0,  2, -1, -5, -1,  2,  2, -3,  0,  5,  3, -5,  3,  8,  2,-12,
+    8,  4,  0, -2, 10, -1, -4, -1,  3, -6, -3,  0, -4, -5,  0,  0,
+    0,-10, -4,  2, -1, -6,  3,  5, -1, -3,  6,  4,  0, -2,  4,  2,
+    0,  8,  1, -1,  0, 11,  1, -3, -1,  6, -2, -4, -3, -2, -7, -4,
+    0, -1, -1, -1,  4,  5,  6,  5, -5, -9, -8, -5,  2,  2,  3,  2,
+    0,  2,  6,  1,  2,  0,  3,  0,  1, -2, -1, -2,  0, -1, -3, -6,
+    0,  0,  2,  0,  4,  0,  2,  1,  5, -2,  0,  0, -2, -9, -1,  2,
+    0,  1,  0,-10, -1,  1,  8,  0, -1, -2,  4,  0,  1, -1,  2, -1,
+   -3, -2,  2, -1, -3, -1,  2, -3,  0, -1,  1,  0,  8,  1, -1,  3,
+    0,  1,  1,  2,  0, -4, -2,  0, -1, -5,  1, -1, -2, -1, 11,  2,
+    1,  5, -2, -2,  0,  2, -4,  0, -2,  1, -5,  1,  0,  5,  0,  1,
+   -5, -3,  0,  6, -4,  2,  0,  0, -3,  5,  1,  0, -3,  3,  0,  0,
+    3, -2, -3,  1,  1, -4,  0,  8, -2, -3, -2,  3,  1,  2, -1, -1,
+    1,  1,  0,  2,  2,  0,  1,  6,  1, -1,  2,  1,  0,  3,  0,-19,
+    1, -3, -2,  2,  6,  5, -2, -7, -3,  1,  3,  1, -1, -1,  0,  2,
+   -8, -1, -1, -4,  1,  1, -1,  2,  4,  3,  2,  3, -5,  1,  3,  0,
+    0,  2, -1,  1, -3,  0,  0,  5, -5, -2,  0,  8, -4, -4, -4,  6,
+    1,  2,  1,  2,  2,  2, -3,  2,  4,  0, -9,  0,  7,  0,-11,  1,
+    0,  0,  0, -2,  3,  3, -1, -6,  4,  3, -3,-10, -1,  2,  6,  2,
+    7, -2, -3,  5, -4,  0,  3, -1, -4,  2,  1, -7,  2, -1, -1,  3,
+    3,  2,  2,  2, -5, -7, -7, -5,  5,  6,  4,  2, -2, -1,  0,  1
+};
+
+/* 6x16-entry codebook for intra-coded 8x4 vectors */
+static const int8_t svq1_intra_codebook_8x4[3072] = {
+    5,  6,  6,  6,  7,  7,  8,  8,  0,  0,  0,  0,  0,  1,  2,  3,
+   -3, -4, -4, -5, -5, -4, -3, -2, -4, -4, -4, -5, -4, -4, -3, -3,
+    1,  2,  2,  2,  2,  3,  3,  3,  2,  3,  3,  4,  4,  5,  5,  5,
+   -1,  0,  1,  1,  2,  3,  4,  4, -9,-10, -9, -9, -8, -7, -6, -5,
+   -4, -4, -5, -6, -6, -7, -7, -7,  0, -1, -2, -2, -3, -3, -4, -4,
+    4,  4,  3,  3,  2,  1,  1,  0,  7,  7,  7,  6,  6,  5,  4,  4,
+    2,  4,  5,  6,  4,  1, -3, -6,  3,  4,  5,  5,  4,  0, -5, -8,
+    2,  3,  4,  4,  2, -2, -7,-10,  2,  2,  2,  1,  0, -4, -9,-12,
+   -9, -7, -3,  1,  4,  4,  3,  3,-10, -7, -2,  3,  5,  5,  3,  3,
+   -9, -6, -2,  3,  6,  5,  4,  3, -8, -6, -1,  3,  4,  4,  3,  2,
+   -5, -5, -5, -5, -3,  1,  4,  7, -5, -5, -5, -4, -2,  1,  6,  8,
+   -4, -5, -4, -3, -1,  3,  8, 10, -3, -4, -3, -2,  1,  5,  9, 11,
+   -2, -2, -2, -2, -2, -2, -2, -2, -4, -5, -5, -5, -5, -5, -5, -4,
+   -3, -4, -4, -4, -4, -4, -4, -3,  9, 10, 10, 11, 11, 11, 10, 10,
+    7,  4,  1, -2, -4, -6, -9,-10,  9,  7,  3,  0, -2, -4, -8, -9,
+   11,  8,  4,  2,  0, -3, -6, -8, 11,  9,  5,  3,  1, -2, -5, -7,
+  -13,-13,-13,-12,-11,-10, -8, -8,  0,  1,  2,  3,  4,  4,  4,  3,
+    3,  4,  5,  6,  6,  6,  5,  4,  3,  4,  4,  4,  3,  3,  3,  2,
+   10, 10, 11, 10,  9,  9,  8,  7,  6,  6,  6,  6,  5,  4,  3,  2,
+    0,  0,  0, -1, -2, -3, -4, -4,-10,-10,-11,-12,-13,-14,-14,-14,
+   16, 16, 17, 16, 15, 13, 12, 11, -1, -2, -3, -4, -4, -4, -4, -3,
+   -4, -5, -6, -6, -6, -6, -6, -6, -5, -6, -6, -6, -6, -6, -5, -5,
+  -13,-13,-13,-12,-11,-10, -8, -6, -9, -8, -7, -6, -4, -2,  0,  1,
+   -2, -1,  1,  3,  5,  7,  8,  9,  5,  7,  9, 11, 13, 14, 15, 15,
+   16, 14, 11,  7,  2, -3, -7, -9, 14, 12,  8,  3, -1, -6, -9,-11,
+   11,  9,  4,  0, -4, -8,-11,-13,  8,  5,  1, -3, -6,-10,-12,-14,
+  -18,-15, -9, -3,  1,  6,  9, 11,-17,-13, -7, -1,  3,  7, 11, 12,
+  -15,-11, -5,  1,  5,  9, 12, 13,-13, -9, -3,  2,  5,  9, 11, 13,
+   22, 21, 19, 15, 10,  3, -4, -9, 20, 18, 15,  9,  2, -5,-12,-17,
+   16, 13,  8,  1, -7,-14,-20,-24, 10,  6, -1, -8,-15,-21,-25,-27,
+  -25,-23,-20,-14, -7,  1,  9, 14,-23,-21,-16, -9,  0,  9, 16, 21,
+  -20,-16,-10, -1,  8, 16, 22, 25,-15,-11, -3,  6, 14, 20, 25, 27,
+   -4, -2,  0,  1,  2,  2,  2,  2, -5, -2,  0,  2,  3,  3,  3,  3,
+   -6, -4, -1,  1,  2,  3,  3,  3, -7, -5, -2,  0,  1,  1,  2,  2,
+    2,  1,  1,  1,  1,  0, -2, -3,  3,  3,  2,  1,  0, -1, -3, -4,
+    4,  3,  2,  1,  0, -2, -4, -6,  5,  4,  3,  1, -1, -3, -5, -6,
+    5,  6,  6,  4,  2,  0, -2, -3,  3,  4,  4,  4,  3,  1,  0, -1,
+   -2, -2, -1, -1, -1, -1, -2, -2, -5, -4, -3, -2, -2, -2, -3, -3,
+   -1, -1, -1, -1, -1, -1, -1, -1, -3, -4, -4, -4, -3, -3, -3, -3,
+   -1, -1, -1, -1, -1, -1, -1, -2,  5,  6,  6,  6,  6,  5,  4,  3,
+    4,  4,  4,  4,  4,  5,  6,  7,  0, -1, -1, -1, -1,  0,  1,  2,
+   -2, -3, -3, -3, -3, -2, -1,  0, -3, -3, -4, -4, -4, -3, -2, -1,
+    0, -2, -4, -4, -2,  0,  2,  3,  0, -2, -3, -3, -1,  2,  4,  5,
+   -1, -2, -4, -3,  0,  3,  5,  6, -2, -3, -4, -3, -1,  2,  4,  5,
+    9,  4,  0, -3, -3, -1,  0,  1,  8,  4, -1, -4, -3, -1,  1,  2,
+    6,  2, -3, -5, -4, -2,  0,  1,  5,  1, -3, -4, -4, -2,  0,  1,
+    5,  3,  1, -1, -4, -8,-10,-10,  3,  3,  2,  1,  0, -2, -3, -4,
+    1,  1,  1,  2,  3,  2,  1,  0, -1,  0,  1,  2,  3,  4,  3,  2,
+    0,  1,  2,  2,  1, -1, -3, -3,  0,  1,  1,  1, -1, -2, -4, -3,
+   -3, -3, -3, -3, -3, -3, -1,  2, -4, -4, -3,  0,  3,  7, 12, 14,
+   -5, -5, -6, -6, -6, -6, -6, -5,  2,  2,  2,  1,  0,  0,  0,  0,
+    4,  4,  3,  2,  1,  0,  0,  0,  6,  6,  5,  4,  2,  2,  1,  1,
+   -7, -7, -6, -3,  0,  4,  7,  8, -1, -2, -3, -3, -2, -1,  1,  2,
+    3,  3,  1, -1, -2, -2, -2, -1,  6,  6,  4,  2,  0, -2, -2, -2,
+   -6, -5, -2,  2,  5,  9, 11, 12, -4, -4, -2,  0,  2,  4,  5,  6,
+   -3, -2, -2, -2, -2, -1,  0,  1, -2, -2, -2, -3, -3, -3, -3, -2,
+   -7, -3,  1,  3,  3,  0, -3, -5, -6, -2,  3,  5,  4,  1, -3, -5,
+   -5, -1,  4,  6,  5,  2, -3, -4, -4,  0,  5,  7,  6,  3, -1, -3,
+    0,  0,  0,  0,  0,  0,  0,  0, -2, -2, -3, -3, -3, -3, -2, -1,
+    6,  7,  8,  9,  9,  8,  7,  6, -4, -4, -5, -5, -6, -6, -5, -4,
+   -9, -8, -6, -4,  0,  3,  6,  6, -5, -4, -1,  3,  5,  6,  5,  3,
+    1,  3,  6,  6,  4,  1, -2, -5,  6,  7,  5,  1, -3, -7,-10,-11,
+   10,  9,  5,  1, -3, -6, -6, -4,  5,  3, -1, -5, -6, -5, -2,  2,
+   -2, -4, -6, -6, -4,  1,  6, 10, -6, -7, -7, -4,  1,  7, 11, 12,
+    6,  5,  3,  2,  0,  0,  0,  0,  2,  1, -1, -2, -3, -2, -1, -1,
+    0, -1, -2, -4, -4, -2, -1,  1,  0,  0, -1, -2, -1,  0,  2,  3,
+    0, -1, -2, -2, -2, -2, -1, -1,  5,  4,  2,  1,  0,  0,  0,  0,
+    6,  5,  3,  1,  0,  0,  0,  0,  2,  0, -2, -4, -4, -3, -2, -2,
+   -7, -4,  0,  2,  2,  2,  2,  1, -7, -3,  0,  0,  0,  0,  0,  0,
+   -4, -1,  1,  1,  0,  0,  0,  1, -1,  1,  2,  2,  2,  2,  3,  3,
+   -2,  0,  2,  2,  1,  1,  1,  1, -1,  1,  2,  2,  1,  0,  0, -1,
+    0,  2,  4,  2,  0, -1, -2, -3,  1,  2,  3,  1, -2, -4, -6, -6,
+    1,  2,  2,  4,  5,  6,  4,  1,  0, -1, -1, -1,  0,  0, -2, -4,
+    0,  0, -1, -2, -2, -2, -4, -6,  2,  1,  0,  0,  1,  1, -1, -3,
+    1,  1,  1,  1,  1,  2,  3,  3,  0,  0,  1,  0,  1,  2,  4,  4,
+   -1, -1, -1, -1,  0,  1,  2,  3, -4, -4, -5, -5, -5, -3, -1,  0,
+   -6, -5, -5, -4, -3, -2, -1, -1, -1,  0,  0,  1,  1,  2,  3,  3,
+    0,  1,  1,  1,  2,  2,  3,  4,  0,  0, -1, -1,  0,  1,  2,  3,
+    0,  1,  1,  1,  0,  0, -1, -1,  1,  3,  3,  2,  1, -1, -2, -2,
+   -2,  0,  2,  2,  2,  2,  1,  1, -9, -8, -4, -2,  1,  3,  3,  3,
+   -1, -1, -1, -2, -3, -3, -3, -4,  0,  0,  0, -1, -2, -2, -3, -3,
+    2,  2,  2,  0, -1, -1, -1, -1,  5,  5,  4,  3,  2,  2,  2,  2,
+    6,  3, -1, -4, -3, -1,  1,  1,  2, -1, -3, -4, -1,  2,  2,  0,
+   -1, -2, -2,  1,  4,  4,  1, -3, -2, -1,  1,  4,  6,  3, -3, -8,
+    3,  3,  2,  1, -1, -2, -2, -2, -4, -4, -2, -1,  1,  3,  4,  4,
+   -4, -5, -5, -4, -2,  0,  2,  2,  7,  7,  4,  1, -1, -2, -3, -2,
+   -1,  1,  3,  0, -4, -6,  0,  6, -2,  1,  4,  1, -4, -6, -1,  7,
+   -3,  1,  4,  2, -3, -6, -1,  6, -2,  0,  3,  2, -2, -5, -1,  4,
+    1, -1, -2,  1,  4,  4, -1, -7,  1, -1, -4, -1,  5,  6,  0, -6,
+    3,  0, -4, -3,  3,  6,  2, -4,  3,  0, -5, -4,  1,  4,  1, -3,
+    2,  2,  3,  3,  3,  3,  2,  2, -4, -5, -6, -7, -7, -7, -7, -6,
+    1,  2,  3,  3,  3,  3,  2,  2,  0,  0,  1,  1,  1,  2,  2,  1,
+    3, -3, -3,  3,  4, -2, -2,  2,  3, -4, -4,  4,  4, -4, -4,  2,
+    4, -4, -4,  4,  4, -4, -3,  3,  3, -3, -4,  3,  3, -3, -3,  3,
+   -2, -2, -2, -2, -2, -2, -1, -1,  6,  7,  8,  8,  8,  7,  6,  5,
+   -5, -6, -7, -7, -8, -7, -6, -5,  1,  1,  2,  2,  2,  2,  1,  1,
+    0,  0,  0,  0,  0, -1,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0,
+   -2, -3, -2, -2, -2, -3, -3, -3,  2,  3,  5,  6,  4,  2,  1,  0,
+    8,  6,  2,  0,  0,  0,  0,  0,  4,  1,  0,  0,  0, -1, -1, -1,
+    1, -1,  0,  0,  0, -1, -2, -3, -2, -2, -1,  0,  0, -2, -4, -5,
+    3,  1, -1, -2, -3, -4, -5, -5,  2,  1,  0,  0,  1,  1,  0,  0,
+    0, -1, -1,  0,  2,  2,  2,  2, -1, -2, -1,  1,  2,  2,  2,  2,
+    0, -1, -2, -1, -1, -1, -1,  0, -1, -2, -2, -1, -1,  0,  0,  1,
+    2,  1,  1,  2,  2,  1,  1,  0,  6,  5,  3,  1,  0, -2, -4, -4,
+   -3, -2, -1,  0,  1,  1,  0, -1,  0,  1,  3,  4,  5,  5,  3,  1,
+   -1, -1, -1,  0,  1,  0, -1, -2, -2, -2, -2, -1,  0, -1, -2, -3,
+    0, -1, -2, -2, -1, -1,  0,  2,  1, -1, -2, -1, -1, -1,  0,  2,
+    1,  0, -2, -2, -2, -2,  1,  5,  1, -1, -2, -2, -2,  0,  5, 10,
+    0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1,  0,  0,  0,  1,  2,
+    1,  2,  2,  3,  4,  4,  6,  5, -3, -3, -3, -2, -2, -3, -3, -3,
+    1, -1, -2, -2,  0,  3,  5,  7,  2,  0, -2, -3, -2,  0,  2,  3,
+    3,  1, -2, -3, -3, -2, -1, -1,  3,  1,  0, -1, -1, -1, -1, -1,
+    1,  3,  5,  4,  2, -1, -3, -4, -3, -2,  1,  2,  1,  0, -1, -2,
+   -5, -3,  0,  2,  2,  1,  0,  0, -3, -1,  1,  2,  2,  1,  0,  0,
+    0, -1, -1, -1,  1,  2,  3,  4, -3, -4, -4, -3, -1,  0,  0,  1,
+   -2, -3, -2, -1,  1,  1,  1,  1, -2, -2,  0,  3,  4,  4,  3,  2,
+   -4, -4, -3, -2, -1,  1,  2,  3,  0,  1,  1,  1, -1, -2, -3, -3,
+    3,  4,  5,  4,  2, -1, -3, -3, -2, -2,  0,  2,  2,  2,  1,  0,
+   -4,  0,  5,  7,  4, -1, -4, -4, -1,  2,  4,  3,  0, -3, -3, -2,
+    2,  1,  0, -1, -2, -2,  0,  1,  0,  0, -1, -2, -2, -1,  1,  2,
+   -4, -3, -2, -1,  0,  1,  2,  2, 10,  9,  5,  0, -3, -4, -3, -2,
+    1, -1, -2, -2, -1,  0,  0,  0, -2, -2, -1,  1,  1,  1,  0, -1,
+   -5, -3,  0,  3,  4,  2,  0, -2, -2, -1,  0,  1,  1,  0, -1, -1,
+    3,  2, -1, -2, -2, -1,  1,  1,  7,  5, -1, -5, -6, -2,  2,  4,
+   -2,  3,  3, -3, -4,  1,  2, -2, -3,  3,  4, -3, -4,  2,  3, -2,
+   -3,  3,  4, -3, -4,  2,  3, -2, -4,  2,  4, -2, -3,  1,  2, -1,
+    4,  3, -1, -3, -3, -1,  1,  2, -4, -6, -4,  0,  4,  5,  4,  1,
+    0,  2,  5,  6,  2, -3, -5, -4,  1,  1, -1, -3, -5, -2,  2,  4,
+   -1,  0,  1,  2,  2,  3,  3,  4, -1,  0,  1,  1,  0, -1, -1, -1,
+   -1,  0,  1,  2,  2,  1, -1, -2, -3, -2, -1,  0,  0, -1, -2, -3,
+    1,  1,  1,  1,  0,  0,  1,  2,  1,  0, -1,  0,  0,  1,  1,  0,
+    1, -2, -4, -1,  1,  2,  1,  0,  1, -4, -7, -3,  1,  3,  2,  1,
+    1,  1,  1,  1,  1,  1,  0, -1,  1,  1,  1,  0,  1,  2,  2,  0,
+    1,  1,  0,  0,  0,  2,  0, -3,  3,  2,  0, -1, -1, -2, -6, -9,
+    0,  0,  0,  1,  0,  0,  1,  2,  1,  0,  0,  0, -1, -1,  0,  2,
+    0,  1,  1,  1, -1, -3, -2,  0, -7, -5,  1,  6,  6,  2, -1, -1,
+    3,  1, -1, -3, -4, -2,  1,  4,  2,  0, -2, -3, -4, -3, -1,  2,
+    2,  2,  1,  1,  1,  0,  0,  1,  1,  1,  0,  0,  0,  0,  0,  1,
+   -1,  1,  1, -2, -5, -6, -4, -1, -1,  1,  4,  3,  2,  0,  1,  2,
+   -1,  0,  2,  3,  1,  0,  0,  1, -1,  0,  1,  0,  0, -1, -1,  0,
+    0,  1,  2,  2,  0, -2, -1,  1, -2, -1, -1, -2, -1,  2,  6,  8,
+   -1, -1, -2, -3, -2,  0,  1,  2, -1,  0,  0, -1, -1,  0, -1, -1,
+    2,  1,  1,  1,  1,  0,  0,  0,  0,  0,  1,  1,  1, -1, -1,  1,
+   -1,  0,  2,  2, -1, -3, -2,  3,  0,  2,  3,  0, -5, -7, -2,  4,
+   -1,  0,  0,  0, -1, -2, -3, -3, -1,  0, -1, -2, -2, -2, -2, -2,
+    1,  1,  0,  0,  1,  2,  0, -1,  1,  2,  1,  2,  5,  6,  2,  0,
+   -2, -4, -3,  0,  2,  2,  0, -3,  3,  1,  0,  1,  2,  1, -2, -3,
+    3,  1,  0,  0,  0,  0,  0, -1,  1, -1, -2, -2, -1,  1,  3,  3,
+    3,  2,  1,  2,  4,  3,  1, -2, -2, -4, -4, -3, -1,  0, -2, -3,
+    1,  0, -1, -1,  0,  1,  0, -1,  3,  2,  0,  0,  0,  1,  1,  0,
+    1,  1,  0,  0,  0,  0,  0,  0,  2,  3,  3,  2,  2,  2,  1,  1,
+    0, -1, -2, -3, -5, -5, -5, -4,  1,  1,  0, -1,  0,  1,  3,  3,
+   -9, -6, -2,  0,  1,  1,  2,  2, -6, -2,  1,  2,  1,  1,  0,  1,
+   -2,  1,  2,  2,  1,  1,  1,  1,  0,  2,  2,  1,  0,  1,  1,  1,
+    1,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0, -1, -3, -2,  0,
+   -3, -3, -3, -2, -1,  3,  7,  9,  1,  2,  2,  2,  0, -2, -4, -3,
+    2,  0, -2, -1,  3,  4, -1, -6,  1,  0, -2, -3, -1,  3,  3,  0,
+    0,  3,  3,  0, -2, -1,  1,  1, -6, -1,  3,  2, -1, -2,  0,  1,
+    5,  3,  0, -2, -3,  0,  2,  1,  1,  1,  2,  2,  0, -2, -4, -7,
+   -3, -2,  1,  2,  2,  1, -1, -4,  2,  2,  0, -2, -2,  0,  2,  2,
+    0,  0, -2, -3, -2, -1,  0,  0,  0,  0,  0,  0,  0,  0,  2,  2,
+   -2, -1,  0,  1,  0,  1,  2,  3, -4, -2,  0,  0, -1,  0,  2,  3,
+   -2, -2, -2, -1, -1,  0,  2,  4,  0,  0,  0,  0, -1, -1,  0,  1,
+    0, -1, -1, -1, -1, -1,  0,  0,  6,  4,  2,  0, -1, -2, -1, -1,
+    0,  1,  1,  1,  1, -1, -5,-10,  1,  1,  1,  1,  1,  1,  0, -4,
+    1,  0,  1,  1,  1,  1,  1, -1,  2,  1,  1,  1,  0,  0,  0,  0,
+   -3,  1,  4,  3,  3,  1, -1,  0, -4,  0,  1,  0, -1,  0,  0,  0,
+   -5,  0,  2,  1,  1,  1,  0, -1, -1,  2,  1, -2, -2, -1,  0, -1,
+    2,  4,  5,  3,  0, -1,  1,  2,  0,  0,  1,  0, -2, -2, -1, -1,
+   -2, -2, -2, -2, -3, -2, -1,  0,  0,  0,  1,  0,  0,  0,  1,  2,
+    0, -2, -2, -3, -1,  2,  2, -1,  1,  0,  0,  0,  1,  5,  3, -2,
+   -1, -1,  0, -1,  0,  2,  0, -5, -1,  0,  1,  0,  0,  2,  2, -2,
+    3,  1, -1, -1,  0,  1,  1,  2,  1,  0,  0,  1,  1,  1,  1,  1,
+  -10, -8, -2,  1,  2,  1,  1,  1, -1,  1,  2,  1,  0,  0,  0,  0,
+   -1, -1,  0,  1,  2,  2,  2,  1, -1, -1, -1,  0, -1, -3, -5, -4,
+    1,  1,  2,  1,  1,  0,  0,  2, -1, -2, -1, -1, -1,  0,  2,  4,
+   -3, -7, -5,  0,  2,  0,  0,  0,  3, -1, -2,  1,  2,  1,  1,  2,
+    1, -2, -1,  1,  2,  1,  0,  1,  0, -1,  0,  3,  2, -1, -1, -1,
+    2,  1,  1,  0,  0,  0,  0,  0, -9, -7, -2,  3,  3,  2,  1,  1,
+    3,  2,  0, -2, -2, -1,  1,  1,  0, -1,  0,  0,  1,  1,  0,  0,
+   -2, -1,  1,  1,  1,  0,  0,  0,  1,  2,  1, -2, -4, -3,  1,  2,
+    1,  2,  1, -2, -3,  0,  3,  1, -1, -1,  0,  0,  1,  3,  0, -4,
+    2,  0, -1,  1,  2, -2, -2,  3,  2,  0, -1,  2,  3, -2, -4,  1,
+    0,  1,  1,  1,  2, -2, -6, -2, -1,  0,  0,  0,  2,  0, -2, -1,
+   -1, -1,  1,  2,  1, -2, -3, -2,  3, -1, -2, -1, -1,  0,  1,  2,
+   10,  4,  0,  0, -1, -2, -2, -1,  3, -1, -2, -1,  0, -1, -1,  0,
+   -5,  2,  7,  1, -4, -2,  1,  0, -2,  2,  3, -1, -3,  0,  2,  0,
+    2,  1,  0,  0,  1,  1, -1, -2,  1, -2, -2, -1, -1, -2,  0,  0,
+    0,  3, -2, -7, -1,  3,  0,  0,  1,  3, -3, -5,  2,  3, -1,  0,
+    0,  2, -2, -2,  4,  2, -2,  0, -1,  1, -1,  0,  2, -1, -2,  1,
+    4,  0, -3, -4, -2,  1,  2,  1,  0,  0,  3,  5,  3,  1, -1, -2,
+    1,  1,  1, -1, -3, -1,  1,  1,  1, -1, -2, -2,  0,  0, -1, -2
+};
+
+/* 6x16-entry codebook for intra-coded 8x8 vectors */
+static const int8_t svq1_intra_codebook_8x8[6144] = {
+    4,  4,  3,  2,  2,  1,  0, -1,  4,  3,  3,  2,  1,  0, -1, -1,
+    3,  3,  2,  2,  1,  0, -1, -2,  3,  2,  2,  1,  0, -1, -2, -3,
+    2,  2,  1,  0, -1, -1, -2, -3,  2,  1,  0,  0, -1, -2, -3, -4,
+    1,  0,  0, -1, -2, -3, -4, -4,  0,  0, -1, -2, -2, -3, -4, -4,
+    2,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  3,  3,
+    1,  2,  2,  2,  2,  2,  2,  2,  0,  1,  1,  1,  1,  1,  1,  1,
+   -1,  0,  0,  0,  0,  0,  1,  1, -2, -2, -1, -1, -1, -1, -1, -1,
+   -3, -3, -3, -3, -3, -3, -2, -2, -5, -4, -4, -4, -4, -4, -4, -3,
+   -4, -2, -1,  0,  1,  2,  2,  3, -4, -2, -1,  0,  1,  2,  3,  3,
+   -4, -3, -1,  0,  1,  2,  3,  3, -4, -3, -1,  0,  1,  2,  3,  3,
+   -5, -3, -1,  0,  1,  2,  3,  3, -5, -3, -1,  0,  1,  2,  3,  3,
+   -5, -3, -1,  0,  1,  1,  2,  3, -5, -3, -2, -1,  0,  1,  2,  3,
+    4,  4,  5,  5,  6,  6,  7,  7,  2,  2,  2,  3,  3,  4,  4,  4,
+    0,  0,  0,  0,  1,  1,  1,  2, -2, -2, -2, -2, -1, -1, -1,  0,
+   -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+   -1, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -2, -2, -2, -2,
+    5,  3,  1, -1, -2, -3, -3, -3,  5,  3,  1, -1, -2, -3, -3, -3,
+    5,  3,  1, -1, -2, -3, -3, -3,  5,  3,  1, -1, -2, -3, -3, -3,
+    5,  4,  1,  0, -2, -3, -3, -3,  6,  4,  2,  0, -2, -2, -3, -3,
+    6,  4,  2,  0, -1, -2, -2, -3,  6,  4,  2,  1, -1, -2, -2, -2,
+   -1,  1,  3,  3,  2,  0, -3, -6, -1,  1,  3,  4,  3,  0, -3, -6,
+   -1,  1,  4,  4,  3,  1, -3, -6, -1,  1,  3,  4,  3,  1, -3, -6,
+   -2,  1,  3,  4,  3,  1, -3, -6, -2,  1,  3,  4,  3,  1, -3, -7,
+   -2,  1,  3,  3,  2,  0, -3, -7, -2,  0,  2,  3,  2,  0, -3, -6,
+   10,  9,  8,  6,  6,  5,  4,  4,  6,  5,  4,  3,  2,  2,  2,  1,
+    2,  1,  0, -1, -2, -2, -2, -1, -1, -2, -3, -4, -4, -4, -4, -3,
+   -2, -3, -4, -4, -5, -4, -4, -3, -2, -2, -3, -3, -3, -3, -2, -2,
+   -1, -1, -1, -1, -1, -1, -1,  0,  1,  1,  1,  1,  1,  1,  1,  2,
+   -2, -1,  1,  2,  4,  5,  7,  8, -3, -2,  0,  1,  3,  5,  7,  8,
+   -4, -3, -1,  0,  2,  4,  6,  7, -5, -4, -2, -1,  1,  3,  5,  7,
+   -6, -5, -3, -2,  0,  2,  4,  6, -6, -5, -4, -2, -1,  1,  3,  5,
+   -7, -6, -5, -3, -2,  0,  2,  3, -8, -7, -5, -4, -3, -1,  1,  2,
+   11,  9,  7,  5,  3,  1, -1, -1, 10,  8,  6,  3,  1,  0, -2, -2,
+    9,  7,  5,  2,  0, -2, -3, -4,  8,  6,  3,  1, -1, -3, -4, -4,
+    6,  4,  2, -1, -3, -4, -5, -5,  5,  3,  0, -2, -4, -5, -6, -6,
+    3,  1, -1, -3, -5, -6, -7, -7,  2,  0, -2, -4, -6, -6, -7, -7,
+    5,  6,  7,  7,  7,  8,  8,  8,  3,  4,  5,  5,  6,  6,  6,  6,
+    0,  2,  2,  3,  4,  4,  4,  5, -2, -1,  0,  1,  2,  2,  3,  3,
+   -4, -3, -2, -1,  0,  1,  1,  2, -6, -5, -4, -3, -2, -2, -1,  0,
+   -8, -7, -6, -6, -5, -4, -3, -3,-10, -9, -8, -8, -7, -6, -6, -5,
+    6,  5,  3,  1, -1, -3, -6, -8,  6,  5,  4,  2, -1, -3, -6, -8,
+    6,  5,  4,  2,  0, -3, -6, -8,  6,  5,  4,  2,  0, -3, -6, -8,
+    6,  6,  4,  2,  0, -3, -6, -8,  6,  5,  4,  2,  0, -3, -6, -8,
+    6,  5,  4,  2,  0, -3, -6, -8,  6,  5,  4,  2, -1, -3, -5, -8,
+   11, 10,  9,  8,  7,  6,  5,  4,  8,  8,  7,  6,  5,  4,  3,  2,
+    6,  5,  4,  4,  2,  2,  1,  0,  3,  3,  2,  1,  0,  0, -1, -2,
+    1,  1,  0, -1, -2, -2, -3, -3, -1, -1, -2, -3, -4, -4, -5, -5,
+   -3, -4, -4, -5, -6, -6, -7, -7, -5, -5, -6, -7, -8, -8, -8, -8,
+  -14,-13,-12,-11, -9, -7, -6, -4,-12,-11,-10, -9, -7, -5, -3, -1,
+  -10, -9, -7, -6, -3, -2,  0,  2, -8, -6, -4, -2,  0,  2,  4,  5,
+   -5, -3,  0,  2,  4,  5,  7,  8, -2,  0,  2,  4,  6,  8,  9, 10,
+    0,  3,  5,  7,  8, 10, 11, 12,  3,  5,  7,  8, 10, 11, 12, 12,
+  -19,-19,-18,-18,-17,-16,-15,-14,-15,-15,-14,-13,-12,-11,-10, -9,
+  -11,-10, -9, -8, -6, -5, -4, -3, -6, -5, -3, -2, -1,  0,  1,  2,
+   -1,  0,  2,  3,  4,  5,  6,  6,  4,  6,  7,  8,  9, 10, 10, 10,
+    9, 10, 11, 12, 13, 14, 14, 14, 12, 14, 14, 15, 16, 16, 16, 16,
+   22, 21, 19, 17, 14, 11,  9,  5, 20, 19, 17, 14, 11,  8,  4,  1,
+   17, 15, 13, 10,  6,  3,  0, -4, 13, 11,  8,  5,  1, -2, -5, -9,
+    9,  6,  3, -1, -4, -7,-11,-13,  4,  0, -3, -6, -9,-12,-15,-17,
+   -2, -5, -8,-11,-14,-16,-18,-20, -8,-10,-13,-16,-17,-19,-21,-22,
+   17, 18, 18, 18, 17, 16, 16, 14, 16, 16, 15, 15, 14, 13, 12, 11,
+   12, 12, 11, 10,  9,  8,  7,  5,  7,  6,  6,  4,  3,  2,  1, -1,
+    1,  0, -1, -2, -3, -4, -5, -6, -5, -6, -7, -8, -9,-10,-11,-12,
+  -11,-12,-13,-14,-15,-16,-16,-17,-16,-17,-17,-18,-19,-20,-20,-20,
+    0,  0,  0,  0, -1, -1, -2, -3,  1,  0,  0,  0,  0, -1, -2, -3,
+    1,  1,  0,  0, -1, -1, -2, -2,  1,  1,  1,  0,  0, -1, -1, -2,
+    2,  1,  1,  1,  0, -1, -1, -2,  2,  2,  1,  1,  0,  0, -1, -2,
+    2,  2,  1,  1,  1,  0, -1, -1,  2,  2,  1,  1,  1,  0,  0, -2,
+    0, -1, -1,  0,  0,  1,  2,  3,  0, -1, -1,  0,  1,  1,  2,  2,
+   -1, -1, -1, -1,  0,  1,  2,  2, -1, -1, -2, -1,  0,  1,  1,  2,
+   -1, -2, -2, -1,  0,  0,  1,  2, -1, -2, -2, -2, -1,  0,  1,  2,
+   -1, -1, -2, -1,  0,  0,  1,  2, -1, -1, -1, -1,  0,  1,  1,  2,
+    3,  2,  2,  2,  1,  1,  0,  0,  3,  2,  2,  2,  2,  1,  0,  0,
+    2,  2,  2,  1,  1,  1,  0,  0,  2,  2,  1,  1,  1,  0,  0, -1,
+    1,  1,  1,  0,  0,  0, -1, -1,  0,  0, -1, -1, -1, -1, -1, -1,
+   -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -2, -2, -2, -2,
+    5,  2,  0,  0, -1,  0,  0,  0,  4,  2,  0, -1, -1, -1,  0, -1,
+    4,  1, -1, -1, -2, -1, -1, -1,  4,  1, -1, -1, -2, -1, -1, -1,
+    4,  1, -1, -2, -2, -1, -1, -1,  4,  1, -1, -2, -2, -1, -1, -1,
+    4,  1, -1, -1, -1, -1, -1, -1,  4,  2,  0, -1,  0,  0,  0, -1,
+   -2, -1,  0,  1,  1,  1,  1,  1, -3, -1,  0,  1,  1,  1,  1,  1,
+   -3, -1,  0,  1,  1,  1,  1,  1, -3, -1,  0,  1,  1,  1,  1,  1,
+   -3, -2,  0,  1,  2,  2,  1,  1, -4, -2,  0,  1,  2,  2,  2,  2,
+   -5, -3, -1,  1,  1,  2,  1,  2, -5, -3, -2,  0,  1,  1,  1,  1,
+    3,  3,  1,  0, -2, -4, -4, -5,  3,  3,  2,  0, -1, -2, -3, -4,
+    2,  2,  1,  1,  0, -1, -2, -2,  1,  1,  1,  1,  1,  0,  0,  0,
+    0,  0,  0,  1,  1,  1,  1,  1, -2, -1, -1,  0,  0,  1,  2,  2,
+   -3, -2, -2, -1,  0,  1,  2,  3, -3, -3, -2, -1,  0,  1,  2,  3,
+   -3, -3, -3, -3, -3, -2, -2, -2, -3, -3, -2, -2, -2, -1, -1, -1,
+   -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  1,  1,  1,  1,  0,  0,  1,  1,  2,  2,  2,  2,
+    1,  1,  1,  2,  2,  3,  3,  3,  2,  2,  2,  2,  3,  3,  3,  3,
+   -8, -7, -5, -3, -2, -1,  0, -1, -4, -3, -1,  0,  1,  2,  1,  1,
+   -1,  1,  2,  3,  3,  2,  2,  1,  1,  2,  3,  3,  2,  2,  1,  0,
+    2,  3,  3,  2,  1,  0,  0, -1,  1,  2,  1,  0, -1, -1, -1, -1,
+    1,  1,  0, -1, -1, -2, -2, -1,  1,  1,  0,  0, -1, -1,  0, -1,
+   -4, -3, -2,  0,  1,  2,  3,  3, -4, -3, -2,  0,  1,  2,  2,  2,
+   -3, -3, -2, -1,  0,  1,  1,  1, -2, -2, -2, -1, -1,  0,  0,  0,
+    0, -1, -1, -1, -1, -1, -1, -1,  2,  1,  1,  0,  0, -1, -1, -2,
+    3,  3,  3,  1,  0, -1, -2, -2,  5,  4,  4,  2,  1,  0, -1, -2,
+    0,  0,  0,  0,  1,  2,  3,  3,  0, -1,  0,  0,  1,  2,  3,  3,
+    0, -1,  0,  0,  1,  2,  3,  2,  0,  0,  0,  1,  1,  2,  2,  2,
+    2,  1,  1,  1,  1,  1,  1,  0,  2,  2,  2,  1,  0,  0, -1, -2,
+    2,  1,  0,  0, -2, -3, -5, -6,  0, -1, -1, -3, -5, -6, -8, -9,
+   -2,  0,  1,  2,  2,  1, -1, -4, -2,  0,  2,  2,  2,  1, -1, -4,
+   -2,  0,  2,  2,  2,  1, -1, -3, -2,  0,  2,  2,  2,  1, -1, -3,
+   -2, -1,  2,  2,  2,  1, -1, -3, -2, -1,  1,  2,  2,  1, -1, -3,
+   -3, -1,  1,  2,  2,  1, -1, -3, -2, -1,  1,  2,  2,  1, -1, -3,
+   -1,  1,  1, -1, -3, -3,  0,  4, -1,  1,  1, -1, -3, -3,  0,  4,
+   -1,  1,  1,  0, -3, -3,  0,  4, -1,  1,  2,  0, -3, -3,  0,  5,
+    0,  1,  2,  0, -3, -4,  0,  4,  0,  1,  2,  0, -3, -4,  0,  5,
+    0,  1,  2,  0, -3, -3,  0,  4,  0,  1,  2, -1, -2, -2,  0,  4,
+    6,  6,  5,  6,  5,  5,  5,  5,  2,  2,  2,  2,  2,  2,  2,  2,
+    0,  0,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -2, -2, -2, -2,
+   -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+   -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    2,  2,  2,  2,  2,  2,  2,  2,  0,  1,  1,  0,  0,  0,  0,  0,
+   -1, -2, -2, -2, -2, -2, -2, -1, -3, -3, -3, -3, -3, -3, -3, -2,
+   -3, -4, -4, -3, -3, -3, -2, -2, -2, -2, -2, -2, -1, -1,  0,  0,
+    0,  1,  1,  1,  2,  2,  3,  3,  3,  4,  4,  5,  5,  6,  6,  6,
+    4,  1, -2, -3, -3, -1,  1,  3,  4,  1, -2, -4, -3, -1,  1,  3,
+    5,  1, -2, -4, -3, -1,  1,  4,  5,  1, -2, -3, -3, -1,  2,  4,
+    5,  1, -2, -3, -3, -1,  2,  4,  4,  0, -3, -4, -3, -1,  2,  4,
+    4,  0, -3, -3, -3, -1,  1,  3,  3,  0, -2, -3, -2, -1,  1,  3,
+   -3, -4, -4, -4, -4, -4, -4, -4, -1, -1, -1, -1, -1, -1, -2, -2,
+    2,  1,  1,  2,  2,  1,  1,  1,  3,  3,  3,  4,  4,  3,  3,  3,
+    3,  3,  3,  4,  4,  4,  3,  3,  1,  2,  1,  2,  2,  2,  2,  2,
+   -2, -2, -2, -1, -1, -1,  0,  0, -4, -4, -4, -4, -3, -3, -3, -3,
+   -1, -2, -3, -3, -2, -2, -1,  0,  0, -1, -2, -2, -2, -1,  0,  1,
+    2,  1, -1, -1, -1, -1,  0,  1,  3,  1,  0, -1, -1,  0,  0,  1,
+    3,  2,  0, -1,  0,  0,  0,  1,  3,  1,  0, -1,  0,  0,  0,  1,
+    3,  1,  0, -1,  0,  0,  0,  1,  2,  1,  0,  0,  0,  0,  0,  1,
+    0,  0,  0,  1,  1,  2,  3,  4,  0,  0, -1,  0,  0,  0,  2,  3,
+    0, -1, -1, -1, -1, -1,  0,  1,  0, -1, -1, -1, -1, -1, -1,  0,
+    0,  0, -1, -1, -1, -2, -2, -1,  1,  0,  0, -1, -1, -2, -2, -1,
+    2,  2,  1,  0, -1, -1, -1, -1,  3,  3,  2,  1,  0, -1, -1,  0,
+    1,  0,  1,  0,  0, -1, -2, -1,  0,  0,  0,  0, -1, -1, -2, -1,
+    0, -1,  0,  0, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0,
+   -1, -1, -1,  0,  0,  0,  1,  1, -1, -1, -1,  0,  1,  1,  2,  3,
+   -2, -2, -1,  0,  1,  2,  3,  4, -2, -2, -1,  0,  1,  2,  4,  5,
+   -3, -1,  1,  0,  0, -1,  0,  1, -3,  0,  1,  0, -1, -1,  0,  2,
+   -3,  0,  1,  0, -1, -1,  0,  2, -2,  1,  2,  0, -1, -1,  0,  2,
+   -2,  1,  2,  0, -1, -1,  0,  2, -2,  1,  2,  0, -1, -1,  0,  2,
+   -1,  2,  2,  0, -1, -1,  0,  2, -1,  1,  1,  0, -1, -1, -1,  1,
+   -2, -2, -1,  1,  3,  4,  3,  1, -2, -2, -1,  0,  2,  3,  2,  0,
+   -2, -2, -1,  0,  1,  2,  1, -1, -1, -1, -1,  0,  1,  2,  1, -1,
+   -1, -1, -1,  0,  1,  1,  0, -2,  0, -1, -1,  0,  1,  1,  0, -1,
+    0, -1, -1,  0,  1,  1,  1, -1,  0, -1, -1,  0,  0,  1,  0, -1,
+   -2, -1,  0,  1,  1,  1,  1,  1, -2, -1,  0,  0,  0,  0,  0,  0,
+   -2, -1, -1,  0, -1, -1, -2, -2, -2, -1, -1, -1, -1, -2, -2, -3,
+   -1,  0,  1,  1,  0, -1, -2, -2,  1,  2,  3,  3,  2,  1,  0,  0,
+    1,  2,  3,  3,  3,  2,  1,  0,  0,  0,  1,  1,  1,  1,  0,  0,
+    0, -1, -1, -1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1, -1,  0,  0,  1,  1,  0,  0,  0,
+   -3, -2, -1, -1, -1, -1,  0, -1, -5, -5, -4, -3, -2, -2, -2, -1,
+    1,  1,  1,  1,  2,  1,  0, -1,  1,  1,  1,  2,  1,  1,  0, -1,
+    1,  1,  1,  1,  1,  1,  0, -2,  2,  1,  1,  1,  1,  1,  0, -2,
+    1,  1,  0,  0,  0,  0, -1, -3,  1,  1,  0,  0,  0, -1, -2, -3,
+    1,  1,  0,  0, -1, -1, -2, -4,  1,  0,  0, -1, -2, -2, -3, -4,
+    8,  7,  5,  3,  2,  1,  1,  1,  2,  1,  0,  0, -1, -1, -2, -1,
+   -1, -1, -1, -2, -2, -2, -2, -1, -1, -1, -1, -1,  0, -1, -1,  0,
+    0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  1,  1,  0,  0,  0,
+   -1,  0,  0,  0,  0,  0, -1, -1, -2, -2, -1, -1, -1, -2, -2, -1,
+    9,  4,  0, -2, -2, -2, -1, -1,  7,  2, -1, -2, -2, -1,  0,  0,
+    4,  0, -2, -2, -1,  0,  1,  1,  1, -2, -2, -2, -1,  0,  1,  1,
+   -1, -2, -2, -1,  0,  1,  1,  1, -1, -2, -1,  0,  1,  1,  1,  0,
+   -1, -1,  0,  1,  1,  1,  0, -1,  0, -1,  0,  1,  0,  0, -1, -1,
+    0,  1,  1,  1,  1,  1,  0,  0,  1,  2,  2,  2,  1,  0,  0,  0,
+    2,  2,  2,  2,  1,  0, -1, -1,  1,  1,  1,  0, -1, -2, -2, -2,
+    0,  0,  0, -1, -2, -3, -2, -2, -1, -1, -1, -2, -2, -2, -1,  0,
+   -1, -1, -1, -1,  0,  0,  1,  2, -1, -1, -1,  0,  1,  2,  3,  4,
+   -1, -1,  0,  0, -1, -2, -3, -3, -1, -1,  0,  0,  0, -1, -1, -1,
+   -2, -2, -1,  0,  1,  1,  1,  1, -2, -2, -2,  0,  1,  2,  3,  3,
+   -1, -1, -1,  0,  1,  3,  3,  3,  1,  0,  0,  0,  1,  1,  2,  2,
+    2,  2,  1,  0,  0, -1, -1, -1,  3,  2,  1,  0, -1, -2, -3, -3,
+   -1, -1, -1, -2, -2, -3, -4, -5,  0,  0,  0, -1, -1, -3, -3, -4,
+    1,  1,  1,  0,  0, -1, -2, -3,  2,  2,  2,  1,  1,  0, -1, -1,
+    2,  2,  2,  2,  1,  1,  0, -1,  2,  2,  2,  2,  2,  1,  0,  0,
+    1,  1,  2,  1,  1,  1,  0,  0,  0,  0,  1,  1,  0,  0,  0, -1,
+   -2,  2,  3,  1, -1,  1,  1, -1, -3,  2,  3,  0, -1,  1,  1, -1,
+   -3,  2,  3,  0, -1,  1,  1, -1, -4,  2,  3,  0, -1,  1,  1, -2,
+   -4,  1,  3,  0, -1,  1,  1, -2, -4,  1,  3, -1, -2,  1,  1, -2,
+   -3,  1,  2,  0, -1,  1,  1, -2, -3,  1,  2,  0, -1,  1,  1, -1,
+   -1, -1, -1, -2, -2, -2, -2, -2,  1,  1,  1,  1,  0,  0,  0,  0,
+    1,  2,  2,  2,  2,  2,  2,  2,  0,  0,  1,  1,  1,  2,  2,  2,
+   -2, -2, -1, -1, -1,  0,  0,  0, -3, -3, -3, -3, -3, -3, -3, -2,
+   -1, -1, -1, -1, -2, -2, -2, -2,  4,  4,  4,  4,  4,  3,  3,  2,
+   -3, -3, -2, -1,  0,  1,  2,  5, -3, -3, -3, -2, -1,  1,  3,  6,
+   -3, -3, -2, -2,  0,  2,  3,  5, -3, -2, -2, -2,  0,  1,  3,  5,
+   -2, -2, -2, -1, -1,  1,  3,  5, -2, -2, -1, -1,  0,  1,  2,  4,
+   -1, -1, -1, -1,  0,  1,  1,  4, -1, -1, -1, -1,  0,  1,  2,  3,
+    0, -1,  0,  1,  1,  0, -1, -1,  0,  0,  0,  1,  2,  0, -1, -1,
+    1,  0, -1,  0,  1,  0,  0,  0,  1, -1, -2, -1,  0,  0,  0,  0,
+    1, -2, -3, -1,  0,  0,  0,  1,  1, -1, -3, -2,  0,  1,  1,  2,
+    1, -1, -2, -1,  0,  1,  1,  2,  2,  0, -1,  0,  1,  1,  2,  2,
+    1,  1,  1,  1,  0,  0,  1,  2, -1,  0,  0, -1,  0,  0,  0,  1,
+   -3, -2, -1, -1, -1,  0,  1,  1, -4, -2, -1,  0,  0,  1,  1,  1,
+   -3, -2,  0,  0,  1,  1,  1,  1, -3, -1,  0,  1,  1,  1,  0,  0,
+   -1,  0,  1,  1,  1,  0,  0, -1,  0,  1,  2,  2,  1,  0,  0, -1,
+   -4, -4, -4, -3, -2, -1, -1, -1, -2, -2, -2, -1,  0,  0,  0,  0,
+   -1,  0,  0,  0,  1,  1,  1,  1,  0,  0,  1,  1,  1,  1,  1,  1,
+    0,  0,  1,  1,  2,  2,  1,  0,  0,  0,  1,  1,  1,  1,  1,  0,
+    0,  0,  0,  1,  1,  1,  1,  0, -1,  0,  0,  1,  1,  1,  0,  0,
+    1,  2,  2,  2,  1, -1, -2, -4,  1,  1,  2,  2,  1,  0, -2, -4,
+    0,  1,  1,  1,  1,  0, -1, -3, -1,  0,  1,  1,  0,  0, -1, -2,
+   -1,  0,  1,  1,  1,  0,  0, -1, -2, -1,  0,  0,  0,  0,  0, -1,
+   -1, -1,  0,  1,  1,  0,  0,  0, -1,  0,  1,  1,  1,  1,  1,  0,
+    2,  2,  0, -1, -2, -1, -1, -2,  1,  1, -1, -2, -2, -1, -1, -2,
+    1,  1, -1, -2, -2,  0,  0, -1,  1,  1,  0, -2, -1,  1,  1,  0,
+    1,  1,  0, -1, -1,  1,  2,  1,  1,  1,  0, -1, -1,  1,  2,  1,
+    1,  1,  0, -1, -1,  1,  1,  1,  1,  1,  0, -1,  0,  1,  1,  1,
+    0,  0, -1, -2, -4, -4, -4, -4,  3,  3,  3,  2,  1,  0,  0,  0,
+    3,  3,  3,  3,  2,  2,  2,  2,  0,  0,  0,  0,  0,  0,  0,  1,
+   -1, -1, -1, -1, -1, -1, -1,  0,  0, -1,  0,  0, -1,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1,  0,
+   -1, -1,  0, -1, -1,  1,  2, -1,  1,  1,  0,  0,  0,  2,  3, -1,
+    1,  1,  0, -1, -1,  1,  3, -1,  1,  1,  0, -2, -2,  0,  1, -2,
+    1,  0,  0, -2, -2,  0,  1, -3,  0,  0,  0,  0, -1,  1,  1, -3,
+    0,  1,  1,  0,  1,  2,  1, -3, -1,  0,  1,  1,  1,  2,  1, -4,
+   -4, -3,  0,  1,  1,  1,  0,  0, -4, -2,  0,  1,  1,  1,  0, -1,
+   -3, -1,  1,  1,  1,  0, -1, -1, -1,  1,  1,  1,  1,  0, -1,  0,
+    1,  2,  2,  1,  0, -1,  0,  0,  2,  2,  1,  0, -1, -1,  0,  1,
+    2,  1,  0, -1, -2, -1,  0,  1,  2,  2,  0, -1, -2, -1,  1,  1,
+    1,  1,  0,  0, -1, -1, -1, -1,  0, -1, -1, -1, -1, -1, -1, -1,
+   -1, -1, -1, -1, -1, -1, -1, -1,  0,  0, -1, -1, -1, -1, -1, -1,
+    1,  0,  0, -1, -1, -1, -1, -1,  2,  1,  0,  0, -1, -1, -1, -1,
+    5,  3,  2,  1,  0,  0,  0,  0,  6,  5,  3,  2,  1,  0,  0,  0,
+    4,  4,  3,  1,  0,  0,  0,  1,  3,  3,  2,  1,  0,  0,  0,  1,
+    2,  2,  1,  0, -1, -1,  0,  1,  0,  0,  0, -1, -1, -1,  0,  1,
+    0,  0, -1, -1, -2, -1,  0,  2,  0, -1, -1, -2, -2, -2,  0,  1,
+    0, -1, -1, -2, -2, -2, -1,  0,  0,  0, -1, -2, -2, -2, -1,  0,
+    0,  0, -1, -1, -1,  0,  2,  3,  0, -1, -2, -2, -1, -1,  1,  2,
+    1,  0, -1, -1, -1,  0,  0,  0,  1,  1,  1,  0,  0,  0, -1, -1,
+    1,  2,  1,  0,  0, -1, -1, -1, -1,  0,  0,  0, -1, -1, -1, -1,
+   -3, -2, -1, -1,  0,  1,  1,  2, -4, -3, -1,  1,  2,  3,  5,  5,
+    0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
+    0,  0,  0, -1,  0,  0,  0,  1, -1, -1, -2, -2, -2, -1, -1,  0,
+    0,  0,  0,  0,  0,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,
+    1,  1,  1,  1,  2,  2,  1,  1, -4, -3, -4, -4, -4, -4, -3, -3,
+   -1,  0,  1,  2,  2,  3,  3,  3, -1, -1, -1, -1,  0,  0,  0,  0,
+    0,  0, -1, -2, -2, -3, -3, -2,  3,  2,  1,  0, -1, -2, -2, -2,
+    4,  3,  2,  1,  1,  0,  0,  0,  2,  2,  1,  1,  0,  1,  1,  1,
+    0, -1, -1, -1, -1,  0,  0,  1, -2, -2, -2, -2, -2, -1,  0,  0,
+    1, -1,  0,  2,  1, -2, -1,  1,  1, -1,  0,  2,  1, -2, -2,  1,
+    1, -1,  0,  3,  2, -2, -1,  1,  0, -2,  0,  3,  2, -2, -2,  1,
+    0, -2,  0,  3,  2, -2, -2,  1,  0, -2,  0,  3,  1, -2, -1,  1,
+    0, -2,  0,  2,  1, -2, -2,  1,  0, -1,  0,  2,  1, -2, -1,  1,
+    0,  1,  2,  2,  3,  3,  2,  2,  0,  1,  1,  2,  3,  3,  2,  1,
+    0,  0,  1,  2,  2,  2,  2,  1, -1,  0,  0,  1,  1,  1,  1,  1,
+   -1, -1,  0,  0,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1, -1, -1,
+   -2, -2, -2, -2, -2, -2, -2, -1, -2, -2, -2, -2, -2, -2, -2, -1,
+    0,  0, -1, -2, -1,  0,  3,  5,  0,  0, -1, -1, -1,  0,  2,  4,
+    1,  1,  0,  0, -1, -1,  1,  2,  1,  2,  1,  1,  0, -1, -1,  0,
+    0,  1,  2,  1,  0, -1, -2, -2, -1,  0,  1,  2,  1,  0, -3, -3,
+   -2, -1,  1,  2,  2,  0, -2, -4, -2, -1,  0,  2,  2,  1, -1, -3,
+    0,  0,  0,  0,  0,  0, -1, -1,  0,  0, -1,  0,  0,  0,  0,  0,
+   -1, -1, -1, -1,  0,  0,  0,  0, -1, -1, -1, -1, -1, -1, -1,  0,
+   -1, -1, -1, -1, -1, -1, -1,  0, -1,  0,  0,  0,  0, -1, -1,  0,
+    0,  0,  1,  1,  0,  0,  0,  1,  3,  3,  3,  4,  3,  3,  3,  3,
+    5,  1, -2, -2,  0,  0,  0, -1,  4, -1, -3, -1,  0,  0,  0, -1,
+    3, -1, -1,  0,  1,  1,  0, -1,  2,  0,  0,  1,  1,  1,  0, -2,
+    1,  0,  0,  1,  1,  1,  0, -2,  0, -1, -1, -1,  0,  0,  0, -1,
+    0, -1, -1, -1, -1,  0,  0, -1,  2,  1,  0,  0,  0,  1,  0,  0,
+    1,  0,  1,  1,  1,  1,  0,  0,  1,  0,  0,  1,  1,  0,  0,  0,
+    1, -1, -1,  0,  0,  0,  0,  0,  2,  0, -1, -1, -1, -1, -1,  0,
+    3,  1, -1, -1, -2, -2, -2, -1,  4,  2,  1,  0, -1, -2, -2, -1,
+    2,  1,  0,  0, -1, -1,  0,  0,  0, -1, -1, -1, -1,  0,  1,  1,
+    0,  1,  2,  2,  2,  1, -1, -3,  0,  0,  1,  1,  1,  0, -1, -2,
+    0,  0,  0,  0,  0,  0, -1, -1,  0,  0, -1,  0,  0,  1,  1,  0,
+    0,  0, -1,  0,  1,  1,  1,  1,  0,  0,  0,  0,  1,  1,  1,  0,
+    0,  0,  1,  1,  2,  1, -1, -3,  0,  0,  0,  1,  1, -1, -4, -5,
+   -2, -2, -2, -1,  0,  2,  2,  2,  0,  0,  0,  0,  1,  1,  1,  0,
+    1,  1,  1,  1,  1,  0, -2, -3,  0,  0,  1,  1,  0, -1, -3, -4,
+   -1, -1,  0,  1,  0,  0, -2, -3, -1, -1,  0,  1,  1,  1,  0, -1,
+    0,  0,  0,  1,  1,  1,  1,  0,  1,  1,  1,  1,  0,  0,  0,  0,
+    0,  1,  0,  0,  1,  1,  1,  2,  1,  2,  0,  0,  0,  0, -1,  1,
+    0,  2,  0, -1,  1,  0, -1,  0,  0,  1,  0,  0,  2,  1,  0,  1,
+    0,  1, -1,  0,  2,  2,  0,  1, -1,  0, -1, -1,  2,  1,  1,  2,
+   -2, -2, -3, -2,  0,  1,  1,  1, -2, -2, -3, -3, -1, -1, -1,  0,
+   -3, -1,  0,  1,  2,  1,  1,  0, -3, -1,  0,  1,  2,  1,  1,  1,
+   -2,  0,  0,  1,  1,  1,  1,  1, -1,  0,  0,  0,  0,  0,  0,  0,
+   -2,  0,  0,  0,  0, -1, -1,  0, -2,  0,  0,  0,  0,  0, -1, -1,
+   -3,  0,  1,  1,  1,  1,  0,  1, -5, -2,  0,  1,  2,  2,  1,  2,
+   -2, -1, -1,  0,  0,  1,  2,  3,  0,  0,  1,  1,  0,  0,  1,  2,
+    0,  0,  1,  0, -1, -1,  0,  1, -1, -1, -1, -1, -2, -2, -1,  0,
+   -2, -2, -2, -2, -2, -1,  0,  1,  0,  0,  0, -1,  0,  1,  2,  2,
+    2,  1,  0,  0,  0,  1,  2,  2,  2,  1,  0, -1, -1, -1,  0,  0,
+    0,  1,  1,  1,  1,  1, -1, -4, -1, -1,  0,  1,  1,  1,  0, -3,
+   -2, -1,  0,  0,  1,  2,  2, -2, -1,  0,  0,  0,  0,  2,  3, -1,
+   -1,  0,  0,  0,  0,  1,  2,  0,  0,  0, -1, -2, -1,  1,  1,  0,
+    0,  0, -1, -2, -2,  0,  2,  1,  0,  0, -1, -2, -1,  1,  2,  2,
+    1,  0,  0,  0, -2, -3, -2, -3,  0,  0,  1,  0, -2, -2, -1, -1,
+    0, -1,  1,  1, -1, -1,  0,  0,  0, -1,  1,  1, -1, -1,  0,  0,
+    0,  1,  2,  1, -1, -1,  0,  1,  1,  2,  3,  2,  0,  0,  1,  2,
+   -1,  0,  2,  1,  0,  0,  2,  3, -2, -1,  0,  0, -1,  0,  1,  2,
+    1,  1,  0, -1, -2, -2, -1,  1,  1,  1,  1, -1, -2, -2,  0,  2,
+    1,  1,  1, -1, -1, -1,  0,  2,  0,  0,  0,  0,  0,  0,  1,  2,
+   -1, -1, -1,  0,  0,  0,  1,  2, -1, -2, -1,  1,  1,  1,  0,  0,
+   -1, -2, -1,  1,  2,  2,  0, -1, -1, -2, -1,  2,  2,  2,  0, -1,
+   -1, -1, -1, -2, -1, -1,  0,  1,  0,  0, -1, -1, -1,  0,  1,  2,
+    1,  0,  0,  0,  0,  1,  1,  2,  1,  1,  0,  0,  1,  1,  1,  1,
+    1,  1,  0,  1,  1,  0,  1,  1,  1,  1,  1,  1,  0, -1, -1, -1,
+    1,  2,  1,  0, -1, -2, -2, -3,  2,  2,  1,  0, -2, -3, -4, -4,
+   -4, -2,  1,  1,  1,  1,  0,  0, -2,  0,  1,  0,  0,  0,  0,  0,
+    0,  1,  1, -2, -2, -1,  0,  1,  2,  2,  1, -2, -2, -1,  1,  2,
+    1,  2,  1, -2, -2, -1,  1,  2, -1,  1,  1, -1, -1, -1,  0,  1,
+   -2,  0,  1,  1,  0, -1, -1,  0, -2,  0,  2,  2,  1, -1, -1,  0,
+    1,  1,  0,  0,  0,  1,  0,  0, -2, -3, -3, -2, -2, -1,  0,  0,
+   -3, -4, -3, -2, -1,  0,  0,  0, -1, -1,  0,  1,  2,  3,  2,  1,
+    0,  1,  2,  3,  3,  3,  2,  1,  1,  1,  1,  2,  1,  0,  0, -1,
+    0,  0,  0,  0, -1, -1, -1, -1,  0, -1, -1,  0,  0,  0,  0,  0,
+    1,  1,  0,  0, -1, -1,  0,  2,  0,  0,  1,  0, -1, -1,  1,  1,
+   -2, -1,  0,  1,  1,  1,  1,  1, -3, -3,  0,  2,  2,  1,  1,  0,
+   -2, -2,  0,  1,  1,  1,  0,  0,  1,  0,  0,  0,  0,  0, -1, -1,
+    3,  1, -1, -3, -2, -1,  0,  1,  4,  2, -1, -3, -3, -1,  1,  2,
+    0,  0,  0, -1, -1, -1, -1, -1,  1,  2,  1,  0,  0,  0, -1, -1,
+    2,  3,  3,  2,  1,  0, -1, -1,  3,  4,  4,  2,  1,  0, -1, -2,
+    3,  3,  2,  1,  0, -1, -2, -2,  1,  1,  0, -1, -1, -2, -2, -3,
+    0,  0,  0, -1, -1, -2, -2, -2, -1, -1, -1, -1, -1, -2, -2, -1,
+    1,  2,  2,  2,  2,  1,  2,  2,  0,  1,  1,  1,  1,  0,  0,  0,
+    0,  0,  0,  1,  1,  0, -1, -2,  0,  0,  0,  0,  1,  0, -1, -4,
+    1,  0,  0,  0,  0,  0, -2, -5,  1,  0,  0,  0,  0,  0, -1, -4,
+    1,  0, -1,  0,  0,  0, -1, -3,  0, -1, -1,  0,  1,  1,  1, -1,
+   -2, -1,  0,  0, -1, -1, -1, -2, -1,  0,  0,  0, -1, -1, -2, -2,
+    0,  1,  1,  0, -1, -1, -1, -2,  0,  1,  1,  0,  0,  0, -1, -1,
+    0,  1,  0,  0,  1,  1,  1,  0,  1,  1,  0,  0,  1,  2,  2,  1,
+    1,  1,  0,  0,  1,  2,  2,  1,  1,  1,  0, -1,  0,  1,  1,  0,
+    4,  2,  1,  0,  0,  1,  1,  1,  4,  2,  1,  0,  0,  0,  0,  1,
+    3,  1,  0,  0, -1, -1, -1,  0,  1,  0,  0, -1, -1, -2, -1,  0,
+    0,  0,  0,  0, -1, -1, -1,  0, -1, -1,  0,  0, -1, -1,  0,  1,
+   -2, -1,  0, -1, -1,  0,  0,  1, -2, -2, -1, -2, -1,  0,  0,  1,
+    0,  1,  1,  1,  2,  1,  0, -1, -1, -1, -1,  0,  0, -1, -2, -2,
+   -1,  0, -1,  0,  0, -1, -2, -1,  0,  0,  0,  0,  0,  0,  1,  2,
+    0,  0,  0,  0,  0,  0,  2,  3, -1,  0, -1, -1, -1, -1,  0,  3,
+   -1,  0,  0, -1, -1, -2,  0,  3,  0,  0,  0,  0, -1, -1,  1,  4,
+    2,  2,  0,  0,  0,  0,  0,  1,  1,  1, -1, -2, -1, -2, -1,  1,
+   -1, -1, -2, -2, -2, -3, -2,  0, -1,  0, -1, -1, -1, -2, -1,  1,
+    1,  1,  0,  0,  1,  0,  0,  1,  2,  2,  0,  0,  1,  0,  0,  1,
+    2,  2,  0,  0,  0,  0, -1, -1,  2,  2,  0,  0,  1,  0, -1, -1,
+   -1,  0,  1,  1,  0, -1, -1, -1,  1,  2,  3,  2,  1,  0,  0,  0,
+    0,  1,  1,  1,  0, -1,  0,  0, -2, -2, -1,  0,  1,  0,  0,  0,
+   -2, -2, -1,  2,  2,  2,  1,  0, -2, -1,  0,  1,  1,  0,  0, -1,
+   -1, -1,  0,  0, -1, -2, -1, -2,  0,  1,  1,  1,  0,  0,  1,  1,
+   -3, -3, -3, -2, -1, -1, -2, -2, -1, -1,  0,  1,  2,  1,  0,  0,
+    1,  1,  1,  2,  2,  1,  0,  0,  1,  1,  1,  1,  1,  0, -1,  1,
+    1,  0, -1, -1,  0,  0, -1,  1,  0, -1, -1, -1,  0, -1, -1,  1,
+    1,  0, -1,  0,  0, -1,  0,  2,  2,  0, -1,  0,  0,  0,  0,  2,
+    1,  0, -2, -1,  0,  1,  1,  0,  2,  0, -1, -1,  0,  1,  1,  0,
+    1,  0, -2, -1,  0,  1,  0, -1,  1,  0, -1, -1,  0,  1,  0, -1,
+    0,  1,  1,  0,  1,  1,  0,  0, -2,  1,  2,  1,  0,  0,  0,  1,
+   -5,  0,  2,  1,  0, -1,  0,  1, -6, -1,  2,  1,  0, -1,  0,  0,
+    5,  3,  0, -1, -2, -1, -1, -1,  1,  1,  0, -1, -1,  0, -1, -1,
+   -1,  0,  1,  1,  2,  2,  1,  0, -2, -1,  0,  1,  2,  1,  1,  1,
+   -2, -1, -1, -1,  0, -1,  0,  1,  0,  1,  0,  0, -1, -1,  0,  0,
+    0,  1,  1,  1,  1,  0,  0,  0, -3, -2,  0,  1,  1,  0,  0, -1,
+   -1,  0,  1,  0, -1,  0,  2,  3, -1,  0,  0, -2, -4, -2, -1,  0,
+    0,  1,  1,  0, -2, -1,  0, -1,  1,  2,  3,  1,  0,  1,  1,  0,
+   -1,  0,  1,  1,  1,  1,  1,  0, -2, -3, -2,  0,  0,  0,  1,  0,
+   -1, -2, -2,  0,  1,  0,  0, -1,  3,  1,  0,  0,  1,  0, -1, -1,
+   -2, -1,  0,  0, -1, -1,  0,  0, -1,  0,  0,  0,  0,  1,  1,  1,
+   -1, -1, -1,  0,  1,  1,  1,  1,  0, -2, -3, -1,  1,  0,  0,  0,
+    1, -1, -3, -1,  1,  1,  0, -1,  3,  1, -1,  1,  2,  2,  0, -1,
+    3,  1,  0,  1,  2,  1,  1,  0,  0, -2, -2, -1, -1,  0,  0,  0,
+    1,  0, -1, -1,  1,  2,  1,  0,  0, -1, -2, -1,  1,  2,  2,  1,
+   -1, -1, -1,  0,  0,  1,  2,  0, -2,  0,  0,  0,  0,  0,  1, -1,
+   -1,  0,  1,  0, -1, -1, -1, -1,  0,  1,  1,  2,  0, -2, -1,  0,
+    1,  2,  2,  2,  1, -1, -1,  0,  0,  1,  1,  1,  0, -2, -2, -1,
+    0,  0, -1, -1, -1, -1, -2, -2,  0,  0, -1,  0,  1,  2,  2,  1,
+    0,  0, -1, -1,  0,  1,  2,  2,  1,  1, -1, -2, -1, -1, -1, -1,
+    2,  2,  1,  0,  0, -1, -2, -2,  1,  2,  2,  1,  0,  0, -2, -2,
+    0,  0,  0,  0,  1,  1,  0, -1,  0, -1, -1, -1,  2,  3,  2,  1,
+    0, -2,  1,  2, -1,  0,  0,  1, -1, -2,  2,  3, -1,  0,  0,  0,
+    0, -2,  2,  3, -1, -1,  0,  0,  0, -1,  3,  2, -2,  0,  1,  0,
+    0, -1,  3,  1, -2,  0,  1,  0,  0, -1,  2,  1, -1,  1,  0, -1,
+    0,  0,  1, -1, -2,  0,  0, -1,  1,  0,  0, -2, -2, -1, -1, -1,
+    1,  1,  1,  1,  1, -1, -1, -2,  0,  0,  0,  1,  1,  1,  1,  1,
+    0,  0,  0,  1,  1,  1,  2,  3,  1,  0,  0, -1,  0,  0,  1,  2,
+    0, -1, -1, -2, -1,  0,  1,  2, -2, -2, -2, -2, -1,  0,  1,  1,
+   -1, -1, -1, -1,  0,  0,  0, -1,  2,  2,  2,  0, -1, -1, -2, -4,
+   -1, -2, -1, -1,  0,  1,  2,  3, -1, -1, -1, -1,  0,  1,  2,  3,
+    1,  0, -1,  0, -1,  0,  1,  2,  1,  0,  0,  0, -1,  0,  2,  2,
+    1,  0, -1, -1, -2,  0,  1,  2,  0, -2, -2, -2, -3, -1,  0,  1,
+    0, -2, -2, -2, -2, -1,  1,  1,  0,  0,  0,  0,  0,  1,  2,  2
+};
+
+/* list of codebooks for intra-coded vectors */
+static const int8_t* const svq1_intra_codebooks[6] = {
+    svq1_intra_codebook_4x2, svq1_intra_codebook_4x4,
+    svq1_intra_codebook_8x4, svq1_intra_codebook_8x8,
+    NULL, NULL,
+};
+
+static const int8_t svq1_intra_codebook_sum[4][16*6] = {
+ {
+  0,  0,  0, -1, -1, -1, -1, -2,  0, -1, -1,  0, -1,  0,  1,  0,
+  1,  0, -1,  1,  0,  0, -1,  1, -1,  0,  0,  0, -1,  1,  0,  0,
+ -1,  0,  0,  1, -1,  1,  0, -1, -1,  0,  1,  1,  0,  0, -1,  1,
+  0,  1,  0,  0,  1, -1,  0,  0,  0, -1,  1,  0,  1,  0, -2,  1,
+  0, -1,  1,  0,  0,  0,  1,  0, -1,  0,  0,  0, -1,  0,  0,  0,
+  0,  1,  1,  0,  0, -1,  0,  1,  0,  0,  0,  0, -1,  1,  1, -1,
+ },{
+ -1, -2,  0, -1,  1,  0, -1,  0, -1, -4, -1, -2, -1, -2,  1, -2,
+  0,  0,  4, -2, -1,  1,  1,  0,  2,  1,  1,  0,  2,  0,  0,  0,
+  1,  1,  0, -1, -1, -1,  1,  0, -1, -3, -3,  1, -1,  1, -2, -1,
+  1, -1,  0,  1,  2,  1, -1, -1,  1,  1,  1,  2,  1,  0,  1, -2,
+ -2,  0, -1, -2, -2,  0, -1, -1, -1,  0,  1,  0, -1, -1,  0, -1,
+  0,  2,  1,  2,  2,  1, -1,  1,  0,  2,  0, -1,  1,  0,  0,  0,
+ },{
+ -2,  0, -1, -1,  1,  1, -2,  0, -2,  0,  1, -2, -2,  1, -1, -1,
+  3, -2,  0, -3, -4, -3,  2,  1,  0,  3, -2,  2,  3,  2,  2, -1,
+ -3,  1,  0,  1,  0,  0,  0,  1, -2,  1, -2, -2, -1, -2, -2,  2,
+  0, -4,  0,  2, -1,  0,  2,  2,  2,  1,  0, -1, -1,  1, -3,  2,
+  2,  1,  0,  3,  1, -1,  1,  3,  1,  0,  1,  1,  2, -1,  1, -1,
+ -2, -1,  0, -1,  1, -1,  1, -2, -2, -1, -1, -3,  1, -4, -3,  1,
+ },{
+ -2,  0, -2,  3, -1, -1,  0,  2,  2, -1, -3,  2,  1,  0, -2, -1,
+ -3, -2, -2,  1,  2, -3,  0,  1, -5, -2, -3,  0, -2, -1,  2,  0,
+ -1, -1,  0, -2,  1,  3, -7, -2, -2, -1,  2, -1,  0,  3,  1,  3,
+  1,  0,  0,  1,  2,  3,  1,  2,  0, -2, -2,  1,  1,  2,  2,  3,
+  4,  1, -1,  2, -2,  4,  0,  0,  0,  4,  2,  0, -2, -2,  2, -4,
+ -1,  5, -2, -2, -3,  2, -3, -1,  3, -3,  0,  4,  3,  0,  1, -2,
+ }
+};
diff --git a/contrib/ffmpeg/libavcodec/svq1_vlc.h b/contrib/ffmpeg/libavcodec/svq1_vlc.h
new file mode 100644
index 000000000..56463700f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/svq1_vlc.h
@@ -0,0 +1,281 @@
+/*
+ * copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SVQ1_VLC_H
+#define SVQ1_VLC_H
+
+/* values in this table range from 0..3; adjust retrieved value by +0 */
+static const uint8_t svq1_block_type_vlc[4][2] = {
+ /* { code, length } */
+    { 0x1, 1 },  { 0x1, 2 },  { 0x1, 3 },  { 0x0, 3 }
+
+};
+
+/* values in this table range from -1..6; adjust retrieved value by -1 */
+static const uint8_t svq1_intra_multistage_vlc[6][8][2] = {
+ /* { code, length } */
+{
+    { 0x1, 5 },  { 0x1, 1 },  { 0x3, 3 },  { 0x2, 3 },
+    { 0x3, 4 },  { 0x2, 4 },  { 0x0, 5 },  { 0x1, 4 }
+},{
+    { 0x1, 4 },  { 0x3, 2 },  { 0x5, 3 },  { 0x4, 3 },
+    { 0x3, 3 },  { 0x2, 3 },  { 0x0, 4 },  { 0x1, 3 }
+},{
+    { 0x1, 5 },  { 0x1, 1 },  { 0x3, 3 },  { 0x0, 5 },
+    { 0x3, 4 },  { 0x2, 3 },  { 0x2, 4 },  { 0x1, 4 }
+},{
+    { 0x1, 6 },  { 0x1, 1 },  { 0x1, 2 },  { 0x0, 6 },
+    { 0x3, 4 },  { 0x2, 4 },  { 0x1, 5 },  { 0x1, 4 }
+},{
+    { 0x1, 6 },  { 0x1, 1 },  { 0x1, 2 },  { 0x3, 5 },
+    { 0x2, 5 },  { 0x0, 6 },  { 0x1, 5 },  { 0x1, 3 }
+},{
+    { 0x1, 7 },  { 0x1, 1 },  { 0x1, 2 },  { 0x1, 3 },
+    { 0x1, 4 },  { 0x1, 6 },  { 0x0, 7 },  { 0x1, 5 }
+}
+};
+
+/* values in this table range from -1..6; adjust retrieved value by -1 */
+static const uint8_t svq1_inter_multistage_vlc[6][8][2] = {
+ /* { code, length } */
+{
+    { 0x3, 2 },  { 0x5, 3 },  { 0x4, 3 },  { 0x3, 3 },
+    { 0x2, 3 },  { 0x1, 3 },  { 0x1, 4 },  { 0x0, 4 }
+},{
+    { 0x3, 2 },  { 0x5, 3 },  { 0x4, 3 },  { 0x3, 3 },
+    { 0x2, 3 },  { 0x1, 3 },  { 0x1, 4 },  { 0x0, 4 }
+},{
+    { 0x1, 1 },  { 0x3, 3 },  { 0x2, 3 },  { 0x3, 4 },
+    { 0x2, 4 },  { 0x1, 4 },  { 0x1, 5 },  { 0x0, 5 }
+},{
+    { 0x1, 1 },  { 0x3, 3 },  { 0x2, 3 },  { 0x3, 4 },
+    { 0x2, 4 },  { 0x1, 4 },  { 0x1, 5 },  { 0x0, 5 }
+},{
+    { 0x1, 1 },  { 0x3, 3 },  { 0x2, 3 },  { 0x3, 4 },
+    { 0x2, 4 },  { 0x1, 4 },  { 0x1, 5 },  { 0x0, 5 }
+},{
+    { 0x1, 1 },  { 0x1, 2 },  { 0x1, 3 },  { 0x3, 5 },
+    { 0x2, 5 },  { 0x1, 5 },  { 0x1, 6 },  { 0x0, 6 }
+}
+};
+
+/* values in this table range from 0..255; adjust retrieved value by +0 */
+static const uint16_t svq1_intra_mean_vlc[256][2] = {
+ /* { code, length } */
+    { 0x37, 6 },  { 0x56, 7 },  { 0x1, 17 },  { 0x1, 20 },
+    { 0x2, 20 },  { 0x3, 20 },  { 0x0, 20 },  { 0x4, 20 },
+    { 0x5, 20 },  { 0x3, 19 },  { 0x15, 11 },  { 0x42, 9 },
+    { 0x14, 11 },  { 0x3, 14 },  { 0x2, 14 },  { 0x1, 15 },
+    { 0x1, 16 },  { 0x1, 12 },  { 0x2B, 10 },  { 0x18, 11 },
+    { 0xC, 11 },  { 0x41, 9 },  { 0x78, 8 },  { 0x6C, 8 },
+    { 0x55, 7 },  { 0xF, 4 },  { 0xE, 4 },  { 0x34, 6 },
+    { 0x51, 7 },  { 0x72, 8 },  { 0x6E, 8 },  { 0x40, 9 },
+    { 0x3F, 9 },  { 0x3E, 9 },  { 0x3D, 9 },  { 0x3C, 9 },
+    { 0x3B, 9 },  { 0x3A, 9 },  { 0x39, 9 },  { 0x38, 9 },
+    { 0x37, 9 },  { 0x43, 9 },  { 0x46, 9 },  { 0x47, 9 },
+    { 0x45, 9 },  { 0x44, 9 },  { 0x49, 9 },  { 0x48, 9 },
+    { 0x4A, 8 },  { 0x79, 8 },  { 0x76, 8 },  { 0x77, 8 },
+    { 0x71, 8 },  { 0x75, 8 },  { 0x74, 8 },  { 0x73, 8 },
+    { 0x6A, 8 },  { 0x55, 8 },  { 0x70, 8 },  { 0x6F, 8 },
+    { 0x52, 8 },  { 0x6D, 8 },  { 0x4C, 8 },  { 0x6B, 8 },
+    { 0x40, 7 },  { 0x69, 8 },  { 0x68, 8 },  { 0x67, 8 },
+    { 0x66, 8 },  { 0x65, 8 },  { 0x64, 8 },  { 0x63, 8 },
+    { 0x62, 8 },  { 0x61, 8 },  { 0x60, 8 },  { 0x5F, 8 },
+    { 0x5E, 8 },  { 0x5D, 8 },  { 0x5C, 8 },  { 0x5B, 8 },
+    { 0x5A, 8 },  { 0x59, 8 },  { 0x58, 8 },  { 0x57, 8 },
+    { 0x56, 8 },  { 0x3D, 7 },  { 0x54, 8 },  { 0x53, 8 },
+    { 0x3F, 7 },  { 0x51, 8 },  { 0x50, 8 },  { 0x4F, 8 },
+    { 0x4E, 8 },  { 0x4D, 8 },  { 0x41, 7 },  { 0x4B, 8 },
+    { 0x53, 7 },  { 0x3E, 7 },  { 0x48, 8 },  { 0x4F, 7 },
+    { 0x52, 7 },  { 0x45, 8 },  { 0x50, 7 },  { 0x43, 8 },
+    { 0x42, 8 },  { 0x41, 8 },  { 0x42, 7 },  { 0x43, 7 },
+    { 0x3E, 8 },  { 0x44, 7 },  { 0x3C, 8 },  { 0x45, 7 },
+    { 0x46, 7 },  { 0x47, 7 },  { 0x48, 7 },  { 0x49, 7 },
+    { 0x4A, 7 },  { 0x4B, 7 },  { 0x4C, 7 },  { 0x4D, 7 },
+    { 0x4E, 7 },  { 0x58, 7 },  { 0x59, 7 },  { 0x5A, 7 },
+    { 0x5B, 7 },  { 0x5C, 7 },  { 0x5D, 7 },  { 0x44, 8 },
+    { 0x49, 8 },  { 0x29, 8 },  { 0x3F, 8 },  { 0x3D, 8 },
+    { 0x3B, 8 },  { 0x2C, 8 },  { 0x28, 8 },  { 0x25, 8 },
+    { 0x26, 8 },  { 0x5E, 7 },  { 0x57, 7 },  { 0x54, 7 },
+    { 0x5F, 7 },  { 0x62, 7 },  { 0x63, 7 },  { 0x64, 7 },
+    { 0x61, 7 },  { 0x65, 7 },  { 0x67, 7 },  { 0x66, 7 },
+    { 0x35, 6 },  { 0x36, 6 },  { 0x60, 7 },  { 0x39, 8 },
+    { 0x3A, 8 },  { 0x38, 8 },  { 0x37, 8 },  { 0x36, 8 },
+    { 0x35, 8 },  { 0x34, 8 },  { 0x33, 8 },  { 0x32, 8 },
+    { 0x31, 8 },  { 0x30, 8 },  { 0x2D, 8 },  { 0x2B, 8 },
+    { 0x2A, 8 },  { 0x27, 8 },  { 0x40, 8 },  { 0x46, 8 },
+    { 0x47, 8 },  { 0x26, 9 },  { 0x25, 9 },  { 0x24, 9 },
+    { 0x23, 9 },  { 0x22, 9 },  { 0x2E, 8 },  { 0x2F, 8 },
+    { 0x1F, 9 },  { 0x36, 9 },  { 0x1D, 9 },  { 0x21, 9 },
+    { 0x1B, 9 },  { 0x1C, 9 },  { 0x19, 9 },  { 0x1A, 9 },
+    { 0x18, 9 },  { 0x17, 9 },  { 0x16, 9 },  { 0x1E, 9 },
+    { 0x20, 9 },  { 0x27, 9 },  { 0x28, 9 },  { 0x29, 9 },
+    { 0x2A, 9 },  { 0x2B, 9 },  { 0x2C, 9 },  { 0x2D, 9 },
+    { 0x2E, 9 },  { 0x2F, 9 },  { 0x30, 9 },  { 0x35, 9 },
+    { 0x31, 9 },  { 0x32, 9 },  { 0x33, 9 },  { 0x34, 9 },
+    { 0x19, 10 },  { 0x2A, 10 },  { 0x17, 10 },  { 0x16, 10 },
+    { 0x15, 10 },  { 0x28, 10 },  { 0x26, 10 },  { 0x25, 10 },
+    { 0x22, 10 },  { 0x21, 10 },  { 0x18, 10 },  { 0x14, 10 },
+    { 0x29, 10 },  { 0x12, 10 },  { 0xD, 10 },  { 0xE, 10 },
+    { 0xF, 10 },  { 0x10, 10 },  { 0x11, 10 },  { 0x1A, 10 },
+    { 0x1B, 10 },  { 0x1C, 10 },  { 0x1D, 10 },  { 0x1E, 10 },
+    { 0x1F, 10 },  { 0x20, 10 },  { 0x13, 10 },  { 0x23, 10 },
+    { 0x24, 10 },  { 0x9, 11 },  { 0x8, 11 },  { 0x7, 11 },
+    { 0x27, 10 },  { 0x5, 11 },  { 0xB, 11 },  { 0x6, 11 },
+    { 0x4, 11 },  { 0x3, 11 },  { 0x2, 11 },  { 0x1, 11 },
+    { 0xA, 11 },  { 0x16, 11 },  { 0x19, 11 },  { 0x17, 11 },
+    { 0xD, 11 },  { 0xE, 11 },  { 0xF, 11 },  { 0x10, 11 },
+    { 0x11, 11 },  { 0x12, 11 },  { 0x13, 11 },  { 0x1, 14 }
+};
+
+/* values in this table range from -256..255; adjust retrieved value by -256 */
+static const uint16_t svq1_inter_mean_vlc[512][2] = {
+ /* { code, length } */
+    { 0x5A, 22 },  { 0xD4, 22 },  { 0xD5, 22 },  { 0xD6, 22 },
+    { 0xD7, 22 },  { 0xD8, 22 },  { 0xD9, 22 },  { 0xDA, 22 },
+    { 0xDB, 22 },  { 0xDC, 22 },  { 0xDD, 22 },  { 0xDE, 22 },
+    { 0xDF, 22 },  { 0xE0, 22 },  { 0xE1, 22 },  { 0xE2, 22 },
+    { 0xE3, 22 },  { 0xE4, 22 },  { 0xE5, 22 },  { 0xE6, 22 },
+    { 0xE8, 22 },  { 0xCB, 22 },  { 0xE9, 22 },  { 0xEA, 22 },
+    { 0xE7, 22 },  { 0xEC, 22 },  { 0xED, 22 },  { 0xEE, 22 },
+    { 0xEF, 22 },  { 0xF0, 22 },  { 0xF1, 22 },  { 0xF2, 22 },
+    { 0xF3, 22 },  { 0xF4, 22 },  { 0xF5, 22 },  { 0xF6, 22 },
+    { 0xF7, 22 },  { 0xF8, 22 },  { 0x102, 22 },  { 0xEB, 22 },
+    { 0xF9, 22 },  { 0xFC, 22 },  { 0xFD, 22 },  { 0xFE, 22 },
+    { 0x100, 22 },  { 0x5C, 22 },  { 0x60, 22 },  { 0x101, 22 },
+    { 0x71, 22 },  { 0x104, 22 },  { 0x105, 22 },  { 0xFB, 22 },
+    { 0xFF, 22 },  { 0x86, 21 },  { 0xFA, 22 },  { 0x7C, 22 },
+    { 0x75, 22 },  { 0x103, 22 },  { 0x78, 22 },  { 0xD3, 22 },
+    { 0x7B, 22 },  { 0x82, 22 },  { 0xD2, 22 },  { 0xD1, 22 },
+    { 0xD0, 22 },  { 0xCF, 22 },  { 0xCE, 22 },  { 0xCD, 22 },
+    { 0xCC, 22 },  { 0xC3, 22 },  { 0xCA, 22 },  { 0xC9, 22 },
+    { 0xC8, 22 },  { 0xC7, 22 },  { 0xC6, 22 },  { 0xC5, 22 },
+    { 0x8B, 22 },  { 0xC4, 22 },  { 0xC2, 22 },  { 0xC1, 22 },
+    { 0xC0, 22 },  { 0xBF, 22 },  { 0xBE, 22 },  { 0xBD, 22 },
+    { 0xBC, 22 },  { 0xBB, 22 },  { 0xBA, 22 },  { 0xB9, 22 },
+    { 0x61, 22 },  { 0x84, 22 },  { 0x85, 22 },  { 0x86, 22 },
+    { 0x87, 22 },  { 0x88, 22 },  { 0x89, 22 },  { 0x8A, 22 },
+    { 0x8C, 22 },  { 0x8D, 22 },  { 0x8E, 22 },  { 0x8F, 22 },
+    { 0x90, 22 },  { 0x91, 22 },  { 0x92, 22 },  { 0x93, 22 },
+    { 0x94, 22 },  { 0x95, 22 },  { 0x96, 22 },  { 0x97, 22 },
+    { 0x98, 22 },  { 0x99, 22 },  { 0x9A, 22 },  { 0x9B, 22 },
+    { 0x9C, 22 },  { 0x9D, 22 },  { 0x9E, 22 },  { 0x9F, 22 },
+    { 0xA0, 22 },  { 0xA1, 22 },  { 0xA2, 22 },  { 0xA3, 22 },
+    { 0xA4, 22 },  { 0xA5, 22 },  { 0xA6, 22 },  { 0xA7, 22 },
+    { 0xA8, 22 },  { 0xA9, 22 },  { 0xAA, 22 },  { 0xAB, 22 },
+    { 0x7F, 22 },  { 0x8F, 21 },  { 0xAC, 22 },  { 0xAD, 22 },
+    { 0xAE, 22 },  { 0xAF, 22 },  { 0xB0, 22 },  { 0xB1, 22 },
+    { 0x53, 20 },  { 0x90, 21 },  { 0xB2, 22 },  { 0x91, 21 },
+    { 0xB3, 22 },  { 0xB4, 22 },  { 0x54, 20 },  { 0xB5, 22 },
+    { 0xB6, 22 },  { 0x8C, 21 },  { 0x34, 19 },  { 0x3D, 18 },
+    { 0x55, 20 },  { 0xB7, 22 },  { 0xB8, 22 },  { 0x8B, 21 },
+    { 0x56, 20 },  { 0x3D, 19 },  { 0x57, 20 },  { 0x58, 20 },
+    { 0x40, 19 },  { 0x43, 19 },  { 0x47, 19 },  { 0x2A, 18 },
+    { 0x2E, 19 },  { 0x2C, 18 },  { 0x46, 19 },  { 0x59, 20 },
+    { 0x49, 19 },  { 0x2D, 19 },  { 0x38, 18 },  { 0x36, 18 },
+    { 0x39, 18 },  { 0x45, 19 },  { 0x28, 18 },  { 0x30, 18 },
+    { 0x35, 18 },  { 0x20, 17 },  { 0x44, 19 },  { 0x32, 18 },
+    { 0x31, 18 },  { 0x1F, 17 },  { 0x2F, 18 },  { 0x2E, 18 },
+    { 0x2D, 18 },  { 0x21, 17 },  { 0x22, 17 },  { 0x23, 17 },
+    { 0x24, 17 },  { 0x27, 16 },  { 0x23, 16 },  { 0x20, 16 },
+    { 0x1D, 16 },  { 0x25, 16 },  { 0x1E, 16 },  { 0x24, 16 },
+    { 0x2A, 16 },  { 0x26, 16 },  { 0x21, 15 },  { 0x29, 16 },
+    { 0x22, 15 },  { 0x23, 15 },  { 0x24, 15 },  { 0x1B, 15 },
+    { 0x1A, 15 },  { 0x1D, 15 },  { 0x1F, 15 },  { 0x27, 15 },
+    { 0x17, 14 },  { 0x18, 14 },  { 0x19, 14 },  { 0x1B, 14 },
+    { 0x1C, 14 },  { 0x1E, 14 },  { 0x25, 14 },  { 0x20, 14 },
+    { 0x21, 14 },  { 0x13, 13 },  { 0x14, 13 },  { 0x15, 13 },
+    { 0x16, 13 },  { 0x17, 13 },  { 0x18, 13 },  { 0x19, 13 },
+    { 0x1A, 13 },  { 0x18, 12 },  { 0x17, 12 },  { 0x15, 12 },
+    { 0x14, 12 },  { 0x13, 12 },  { 0x12, 12 },  { 0xF, 11 },
+    { 0x10, 11 },  { 0x12, 11 },  { 0x13, 11 },  { 0x1B, 11 },
+    { 0x1A, 11 },  { 0xE, 10 },  { 0x13, 10 },  { 0xF, 10 },
+    { 0x10, 10 },  { 0x11, 10 },  { 0x12, 10 },  { 0xD, 9 },
+    { 0x14, 9 },  { 0x15, 9 },  { 0xC, 9 },  { 0x13, 9 },
+    { 0xF, 8 },  { 0xE, 8 },  { 0x10, 8 },  { 0x11, 8 },
+    { 0xC, 7 },  { 0x9, 7 },  { 0xA, 7 },  { 0x8, 6 },
+    { 0x9, 6 },  { 0x9, 5 },  { 0x8, 5 },  { 0x5, 4 },
+    { 0x1, 1 },  { 0x3, 3 },  { 0x7, 5 },  { 0x6, 5 },
+    { 0xB, 6 },  { 0xA, 6 },  { 0xE, 7 },  { 0xF, 7 },
+    { 0xB, 7 },  { 0xD, 7 },  { 0xB, 8 },  { 0xD, 8 },
+    { 0xC, 8 },  { 0xF, 9 },  { 0x10, 9 },  { 0x11, 9 },
+    { 0xE, 9 },  { 0x12, 9 },  { 0x17, 10 },  { 0x14, 10 },
+    { 0x16, 10 },  { 0x15, 10 },  { 0x19, 11 },  { 0x18, 11 },
+    { 0x17, 11 },  { 0x16, 11 },  { 0x15, 11 },  { 0x14, 11 },
+    { 0x11, 11 },  { 0x19, 12 },  { 0x1A, 12 },  { 0x16, 12 },
+    { 0x1D, 12 },  { 0x1B, 12 },  { 0x1C, 12 },  { 0x20, 13 },
+    { 0x1C, 13 },  { 0x23, 13 },  { 0x22, 13 },  { 0x21, 13 },
+    { 0x1F, 13 },  { 0x1E, 13 },  { 0x1B, 13 },  { 0x1D, 13 },
+    { 0x24, 14 },  { 0x16, 14 },  { 0x1A, 14 },  { 0x22, 14 },
+    { 0x1D, 14 },  { 0x1F, 14 },  { 0x15, 14 },  { 0x23, 14 },
+    { 0x18, 15 },  { 0x20, 15 },  { 0x29, 15 },  { 0x28, 15 },
+    { 0x26, 15 },  { 0x25, 15 },  { 0x19, 15 },  { 0x1C, 15 },
+    { 0x1E, 15 },  { 0x17, 15 },  { 0x2C, 16 },  { 0x2B, 16 },
+    { 0x1C, 16 },  { 0x21, 16 },  { 0x2D, 16 },  { 0x28, 16 },
+    { 0x1F, 16 },  { 0x1B, 16 },  { 0x1A, 16 },  { 0x22, 16 },
+    { 0x2D, 17 },  { 0x32, 17 },  { 0x2C, 17 },  { 0x27, 17 },
+    { 0x31, 17 },  { 0x33, 17 },  { 0x2F, 17 },  { 0x2B, 17 },
+    { 0x37, 18 },  { 0x2A, 17 },  { 0x2E, 17 },  { 0x30, 17 },
+    { 0x29, 17 },  { 0x28, 17 },  { 0x26, 17 },  { 0x25, 17 },
+    { 0x2F, 19 },  { 0x33, 18 },  { 0x34, 18 },  { 0x30, 19 },
+    { 0x3A, 18 },  { 0x3B, 18 },  { 0x31, 19 },  { 0x3C, 18 },
+    { 0x2B, 18 },  { 0x29, 18 },  { 0x48, 19 },  { 0x27, 18 },
+    { 0x42, 19 },  { 0x41, 19 },  { 0x26, 18 },  { 0x52, 20 },
+    { 0x51, 20 },  { 0x3F, 19 },  { 0x3E, 19 },  { 0x39, 19 },
+    { 0x3C, 19 },  { 0x3B, 19 },  { 0x3A, 19 },  { 0x25, 18 },
+    { 0x38, 19 },  { 0x50, 20 },  { 0x37, 19 },  { 0x36, 19 },
+    { 0x87, 21 },  { 0x4F, 20 },  { 0x35, 19 },  { 0x4E, 20 },
+    { 0x33, 19 },  { 0x32, 19 },  { 0x4D, 20 },  { 0x4C, 20 },
+    { 0x83, 22 },  { 0x4B, 20 },  { 0x81, 22 },  { 0x80, 22 },
+    { 0x8E, 21 },  { 0x7E, 22 },  { 0x7D, 22 },  { 0x84, 21 },
+    { 0x8D, 21 },  { 0x7A, 22 },  { 0x79, 22 },  { 0x4A, 20 },
+    { 0x77, 22 },  { 0x76, 22 },  { 0x89, 21 },  { 0x74, 22 },
+    { 0x73, 22 },  { 0x72, 22 },  { 0x49, 20 },  { 0x70, 22 },
+    { 0x6F, 22 },  { 0x6E, 22 },  { 0x6D, 22 },  { 0x6C, 22 },
+    { 0x6B, 22 },  { 0x6A, 22 },  { 0x69, 22 },  { 0x68, 22 },
+    { 0x67, 22 },  { 0x66, 22 },  { 0x65, 22 },  { 0x64, 22 },
+    { 0x63, 22 },  { 0x62, 22 },  { 0x8A, 21 },  { 0x88, 21 },
+    { 0x5F, 22 },  { 0x5E, 22 },  { 0x5D, 22 },  { 0x85, 21 },
+    { 0x5B, 22 },  { 0x83, 21 },  { 0x59, 22 },  { 0x58, 22 },
+    { 0x57, 22 },  { 0x56, 22 },  { 0x55, 22 },  { 0x54, 22 },
+    { 0x53, 22 },  { 0x52, 22 },  { 0x51, 22 },  { 0x50, 22 },
+    { 0x4F, 22 },  { 0x4E, 22 },  { 0x4D, 22 },  { 0x4C, 22 },
+    { 0x4B, 22 },  { 0x4A, 22 },  { 0x49, 22 },  { 0x48, 22 },
+    { 0x47, 22 },  { 0x46, 22 },  { 0x45, 22 },  { 0x44, 22 },
+    { 0x43, 22 },  { 0x42, 22 },  { 0x41, 22 },  { 0x40, 22 },
+    { 0x3F, 22 },  { 0x3E, 22 },  { 0x3D, 22 },  { 0x3C, 22 },
+    { 0x3B, 22 },  { 0x3A, 22 },  { 0x39, 22 },  { 0x38, 22 },
+    { 0x37, 22 },  { 0x36, 22 },  { 0x35, 22 },  { 0x34, 22 },
+    { 0x33, 22 },  { 0x32, 22 },  { 0x31, 22 },  { 0x30, 22 },
+    { 0x2F, 22 },  { 0x2E, 22 },  { 0x2D, 22 },  { 0x2C, 22 },
+    { 0x2B, 22 },  { 0x2A, 22 },  { 0x29, 22 },  { 0x28, 22 },
+    { 0x27, 22 },  { 0x26, 22 },  { 0x25, 22 },  { 0x24, 22 },
+    { 0x23, 22 },  { 0x22, 22 },  { 0x21, 22 },  { 0x20, 22 },
+    { 0x1F, 22 },  { 0x1E, 22 },  { 0x1D, 22 },  { 0x1C, 22 },
+    { 0x1B, 22 },  { 0x1A, 22 },  { 0x19, 22 },  { 0x18, 22 },
+    { 0x17, 22 },  { 0x16, 22 },  { 0x15, 22 },  { 0x14, 22 },
+    { 0x13, 22 },  { 0x12, 22 },  { 0x11, 22 },  { 0x10, 22 },
+    { 0xF, 22 },  { 0xE, 22 },  { 0xD, 22 },  { 0xC, 22 },
+    { 0xB, 22 },  { 0xA, 22 },  { 0x9, 22 },  { 0x8, 22 },
+    { 0x7, 22 },  { 0x6, 22 },  { 0x5, 22 },  { 0x4, 22 },
+    { 0x3, 22 },  { 0x2, 22 },  { 0x1, 22 },  { 0x0, 22 }
+};
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/svq3.c b/contrib/ffmpeg/libavcodec/svq3.c
new file mode 100644
index 000000000..edf3b6714
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/svq3.c
@@ -0,0 +1,1014 @@
+/*
+ * Copyright (c) 2003 The FFmpeg Project.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *
+ * How to use this decoder:
+ * SVQ3 data is transported within Apple Quicktime files. Quicktime files
+ * have stsd atoms to describe media trak properties. A stsd atom for a
+ * video trak contains 1 or more ImageDescription atoms. These atoms begin
+ * with the 4-byte length of the atom followed by the codec fourcc. Some
+ * decoders need information in this atom to operate correctly. Such
+ * is the case with SVQ3. In order to get the best use out of this decoder,
+ * the calling app must make the SVQ3 ImageDescription atom available
+ * via the AVCodecContext's extradata[_size] field:
+ *
+ * AVCodecContext.extradata = pointer to ImageDescription, first characters
+ * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
+ * AVCodecContext.extradata_size = size of ImageDescription atom memory
+ * buffer (which will be the same as the ImageDescription atom size field
+ * from the QT file, minus 4 bytes since the length is missing)
+ *
+ * You will know you have these parameters passed correctly when the decoder
+ * correctly decodes this file:
+ *  ftp://ftp.mplayerhq.hu/MPlayer/samples/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
+ *
+ */
+
+/**
+ * @file svq3.c
+ * svq3 decoder.
+ */
+
+#define FULLPEL_MODE  1
+#define HALFPEL_MODE  2
+#define THIRDPEL_MODE 3
+#define PREDICT_MODE  4
+
+/* dual scan (from some older h264 draft)
+ o-->o-->o   o
+         |  /|
+ o   o   o / o
+ | / |   |/  |
+ o   o   o   o
+   /
+ o-->o-->o-->o
+*/
+static const uint8_t svq3_scan[16]={
+ 0+0*4, 1+0*4, 2+0*4, 2+1*4,
+ 2+2*4, 3+0*4, 3+1*4, 3+2*4,
+ 0+1*4, 0+2*4, 1+1*4, 1+2*4,
+ 0+3*4, 1+3*4, 2+3*4, 3+3*4,
+};
+
+static const uint8_t svq3_pred_0[25][2] = {
+  { 0, 0 },
+  { 1, 0 }, { 0, 1 },
+  { 0, 2 }, { 1, 1 }, { 2, 0 },
+  { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
+  { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
+  { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
+  { 2, 4 }, { 3, 3 }, { 4, 2 },
+  { 4, 3 }, { 3, 4 },
+  { 4, 4 }
+};
+
+static const int8_t svq3_pred_1[6][6][5] = {
+  { { 2,-1,-1,-1,-1 }, { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 },
+    { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 }, { 1, 2,-1,-1,-1 } },
+  { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
+    { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
+  { { 2, 0,-1,-1,-1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
+    { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
+  { { 2, 0,-1,-1,-1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
+    { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
+  { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
+    { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
+  { { 0, 2,-1,-1,-1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
+    { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
+};
+
+static const struct { uint8_t run; uint8_t level; } svq3_dct_tables[2][16] = {
+  { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
+    { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
+  { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
+    { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
+};
+
+static const uint32_t svq3_dequant_coeff[32] = {
+   3881,  4351,  4890,  5481,  6154,  6914,  7761,  8718,
+   9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
+  24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
+  61694, 68745, 77615, 89113,100253,109366,126635,141533
+};
+
+
+static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
+    const int qmul= svq3_dequant_coeff[qp];
+#define stride 16
+    int i;
+    int temp[16];
+    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
+    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
+
+    for(i=0; i<4; i++){
+        const int offset= y_offset[i];
+        const int z0= 13*(block[offset+stride*0] +    block[offset+stride*4]);
+        const int z1= 13*(block[offset+stride*0] -    block[offset+stride*4]);
+        const int z2=  7* block[offset+stride*1] - 17*block[offset+stride*5];
+        const int z3= 17* block[offset+stride*1] +  7*block[offset+stride*5];
+
+        temp[4*i+0]= z0+z3;
+        temp[4*i+1]= z1+z2;
+        temp[4*i+2]= z1-z2;
+        temp[4*i+3]= z0-z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int offset= x_offset[i];
+        const int z0= 13*(temp[4*0+i] +    temp[4*2+i]);
+        const int z1= 13*(temp[4*0+i] -    temp[4*2+i]);
+        const int z2=  7* temp[4*1+i] - 17*temp[4*3+i];
+        const int z3= 17* temp[4*1+i] +  7*temp[4*3+i];
+
+        block[stride*0 +offset]= ((z0 + z3)*qmul + 0x80000)>>20;
+        block[stride*2 +offset]= ((z1 + z2)*qmul + 0x80000)>>20;
+        block[stride*8 +offset]= ((z1 - z2)*qmul + 0x80000)>>20;
+        block[stride*10+offset]= ((z0 - z3)*qmul + 0x80000)>>20;
+    }
+}
+#undef stride
+
+static void svq3_add_idct_c (uint8_t *dst, DCTELEM *block, int stride, int qp, int dc){
+    const int qmul= svq3_dequant_coeff[qp];
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    if (dc) {
+        dc = 13*13*((dc == 1) ? 1538*block[0] : ((qmul*(block[0] >> 3)) / 2));
+        block[0] = 0;
+    }
+
+    for (i=0; i < 4; i++) {
+        const int z0= 13*(block[0 + 4*i] +    block[2 + 4*i]);
+        const int z1= 13*(block[0 + 4*i] -    block[2 + 4*i]);
+        const int z2=  7* block[1 + 4*i] - 17*block[3 + 4*i];
+        const int z3= 17* block[1 + 4*i] +  7*block[3 + 4*i];
+
+        block[0 + 4*i]= z0 + z3;
+        block[1 + 4*i]= z1 + z2;
+        block[2 + 4*i]= z1 - z2;
+        block[3 + 4*i]= z0 - z3;
+    }
+
+    for (i=0; i < 4; i++) {
+        const int z0= 13*(block[i + 4*0] +    block[i + 4*2]);
+        const int z1= 13*(block[i + 4*0] -    block[i + 4*2]);
+        const int z2=  7* block[i + 4*1] - 17*block[i + 4*3];
+        const int z3= 17* block[i + 4*1] +  7*block[i + 4*3];
+        const int rr= (dc + 0x80000);
+
+        dst[i + stride*0]= cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ];
+        dst[i + stride*1]= cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ];
+        dst[i + stride*2]= cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ];
+        dst[i + stride*3]= cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ];
+    }
+}
+
+static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+    const __attribute__((unused)) int unu0= t0;
+    const __attribute__((unused)) int unu1= l0;
+
+    src[0+0*stride]=(l1 + t1)>>1;
+    src[1+0*stride]=
+    src[0+1*stride]=(l2 + t2)>>1;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=
+    src[3+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=(l3 + t3)>>1;
+}
+
+static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 1);
+}
+
+static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
+                                     int index, const int type) {
+
+  static const uint8_t *const scan_patterns[4] =
+  { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
+
+  int run, level, sign, vlc, limit;
+  const int intra = (3 * type) >> 2;
+  const uint8_t *const scan = scan_patterns[type];
+
+  for (limit=(16 >> intra); index < 16; index=limit, limit+=8) {
+    for (; (vlc = svq3_get_ue_golomb (gb)) != 0; index++) {
+
+      if (vlc == INVALID_VLC)
+        return -1;
+
+      sign = (vlc & 0x1) - 1;
+      vlc  = (vlc + 1) >> 1;
+
+      if (type == 3) {
+        if (vlc < 3) {
+          run   = 0;
+          level = vlc;
+        } else if (vlc < 4) {
+          run   = 1;
+          level = 1;
+        } else {
+          run   = (vlc & 0x3);
+          level = ((vlc + 9) >> 2) - run;
+        }
+      } else {
+        if (vlc < 16) {
+          run   = svq3_dct_tables[intra][vlc].run;
+          level = svq3_dct_tables[intra][vlc].level;
+        } else if (intra) {
+          run   = (vlc & 0x7);
+          level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
+        } else {
+          run   = (vlc & 0xF);
+          level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
+        }
+      }
+
+      if ((index += run) >= limit)
+        return -1;
+
+      block[scan[index]] = (level ^ sign) - sign;
+    }
+
+    if (type != 2) {
+      break;
+    }
+  }
+
+  return 0;
+}
+
+static inline void svq3_mc_dir_part (MpegEncContext *s,
+                                     int x, int y, int width, int height,
+                                     int mx, int my, int dxy,
+                                     int thirdpel, int dir, int avg) {
+
+  const Picture *pic = (dir == 0) ? &s->last_picture : &s->next_picture;
+  uint8_t *src, *dest;
+  int i, emu = 0;
+  int blocksize= 2 - (width>>3); //16->0, 8->1, 4->2
+
+  mx += x;
+  my += y;
+
+  if (mx < 0 || mx >= (s->h_edge_pos - width  - 1) ||
+      my < 0 || my >= (s->v_edge_pos - height - 1)) {
+
+    if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
+      emu = 1;
+    }
+
+    mx = clip (mx, -16, (s->h_edge_pos - width  + 15));
+    my = clip (my, -16, (s->v_edge_pos - height + 15));
+  }
+
+  /* form component predictions */
+  dest = s->current_picture.data[0] + x + y*s->linesize;
+  src  = pic->data[0] + mx + my*s->linesize;
+
+  if (emu) {
+    ff_emulated_edge_mc (s->edge_emu_buffer, src, s->linesize, (width + 1), (height + 1),
+                         mx, my, s->h_edge_pos, s->v_edge_pos);
+    src = s->edge_emu_buffer;
+  }
+  if(thirdpel)
+    (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->linesize, width, height);
+  else
+    (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->linesize, height);
+
+  if (!(s->flags & CODEC_FLAG_GRAY)) {
+    mx     = (mx + (mx < (int) x)) >> 1;
+    my     = (my + (my < (int) y)) >> 1;
+    width  = (width  >> 1);
+    height = (height >> 1);
+    blocksize++;
+
+    for (i=1; i < 3; i++) {
+      dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
+      src  = pic->data[i] + mx + my*s->uvlinesize;
+
+      if (emu) {
+        ff_emulated_edge_mc (s->edge_emu_buffer, src, s->uvlinesize, (width + 1), (height + 1),
+                             mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
+        src = s->edge_emu_buffer;
+      }
+      if(thirdpel)
+        (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->uvlinesize, width, height);
+      else
+        (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->uvlinesize, height);
+    }
+  }
+}
+
+static inline int svq3_mc_dir (H264Context *h, int size, int mode, int dir, int avg) {
+
+  int i, j, k, mx, my, dx, dy, x, y;
+  MpegEncContext *const s = (MpegEncContext *) h;
+  const int part_width  = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
+  const int part_height = 16 >> ((unsigned) (size + 1) / 3);
+  const int extra_width = (mode == PREDICT_MODE) ? -16*6 : 0;
+  const int h_edge_pos  = 6*(s->h_edge_pos - part_width ) - extra_width;
+  const int v_edge_pos  = 6*(s->v_edge_pos - part_height) - extra_width;
+
+  for (i=0; i < 16; i+=part_height) {
+    for (j=0; j < 16; j+=part_width) {
+      const int b_xy = (4*s->mb_x+(j>>2)) + (4*s->mb_y+(i>>2))*h->b_stride;
+      int dxy;
+      x = 16*s->mb_x + j;
+      y = 16*s->mb_y + i;
+      k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8);
+
+      if (mode != PREDICT_MODE) {
+        pred_motion (h, k, (part_width >> 2), dir, 1, &mx, &my);
+      } else {
+        mx = s->next_picture.motion_val[0][b_xy][0]<<1;
+        my = s->next_picture.motion_val[0][b_xy][1]<<1;
+
+        if (dir == 0) {
+          mx = ((mx * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
+          my = ((my * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
+        } else {
+          mx = ((mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
+          my = ((my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
+        }
+      }
+
+      /* clip motion vector prediction to frame border */
+      mx = clip (mx, extra_width - 6*x, h_edge_pos - 6*x);
+      my = clip (my, extra_width - 6*y, v_edge_pos - 6*y);
+
+      /* get (optional) motion vector differential */
+      if (mode == PREDICT_MODE) {
+        dx = dy = 0;
+      } else {
+        dy = svq3_get_se_golomb (&s->gb);
+        dx = svq3_get_se_golomb (&s->gb);
+
+        if (dx == INVALID_VLC || dy == INVALID_VLC) {
+          av_log(h->s.avctx, AV_LOG_ERROR, "invalid MV vlc\n");
+          return -1;
+        }
+      }
+
+      /* compute motion vector */
+      if (mode == THIRDPEL_MODE) {
+        int fx, fy;
+        mx = ((mx + 1)>>1) + dx;
+        my = ((my + 1)>>1) + dy;
+        fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
+        fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
+        dxy= (mx - 3*fx) + 4*(my - 3*fy);
+
+        svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
+        mx += mx;
+        my += my;
+      } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
+        mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
+        my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
+        dxy= (mx&1) + 2*(my&1);
+
+        svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
+        mx *= 3;
+        my *= 3;
+      } else {
+        mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
+        my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
+
+        svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
+        mx *= 6;
+        my *= 6;
+      }
+
+      /* update mv_cache */
+      if (mode != PREDICT_MODE) {
+        int32_t mv = pack16to32(mx,my);
+
+        if (part_height == 8 && i < 8) {
+          *(int32_t *) h->mv_cache[dir][scan8[k] + 1*8] = mv;
+
+          if (part_width == 8 && j < 8) {
+            *(int32_t *) h->mv_cache[dir][scan8[k] + 1 + 1*8] = mv;
+          }
+        }
+        if (part_width == 8 && j < 8) {
+          *(int32_t *) h->mv_cache[dir][scan8[k] + 1] = mv;
+        }
+        if (part_width == 4 || part_height == 4) {
+          *(int32_t *) h->mv_cache[dir][scan8[k]] = mv;
+        }
+      }
+
+      /* write back motion vectors */
+      fill_rectangle(s->current_picture.motion_val[dir][b_xy], part_width>>2, part_height>>2, h->b_stride, pack16to32(mx,my), 4);
+    }
+  }
+
+  return 0;
+}
+
+static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
+  int i, j, k, m, dir, mode;
+  int cbp = 0;
+  uint32_t vlc;
+  int8_t *top, *left;
+  MpegEncContext *const s = (MpegEncContext *) h;
+  const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
+  const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+
+  h->top_samples_available        = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
+  h->left_samples_available        = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
+  h->topright_samples_available        = 0xFFFF;
+
+  if (mb_type == 0) {           /* SKIP */
+    if (s->pict_type == P_TYPE || s->next_picture.mb_type[mb_xy] == -1) {
+      svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 0, 0);
+
+      if (s->pict_type == B_TYPE) {
+        svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 1, 1);
+      }
+
+      mb_type = MB_TYPE_SKIP;
+    } else {
+      mb_type= FFMIN(s->next_picture.mb_type[mb_xy], 6);
+      if(svq3_mc_dir (h, mb_type, PREDICT_MODE, 0, 0) < 0)
+        return -1;
+      if(svq3_mc_dir (h, mb_type, PREDICT_MODE, 1, 1) < 0)
+        return -1;
+
+      mb_type = MB_TYPE_16x16;
+    }
+  } else if (mb_type < 8) {     /* INTER */
+    if (h->thirdpel_flag && h->halfpel_flag == !get_bits (&s->gb, 1)) {
+      mode = THIRDPEL_MODE;
+    } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits (&s->gb, 1)) {
+      mode = HALFPEL_MODE;
+    } else {
+      mode = FULLPEL_MODE;
+    }
+
+    /* fill caches */
+    /* note ref_cache should contain here:
+        ????????
+        ???11111
+        N??11111
+        N??11111
+        N??11111
+        N
+    */
+
+    for (m=0; m < 2; m++) {
+      if (s->mb_x > 0 && h->intra4x4_pred_mode[mb_xy - 1][0] != -1) {
+        for (i=0; i < 4; i++) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
+        }
+      } else {
+        for (i=0; i < 4; i++) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = 0;
+        }
+      }
+      if (s->mb_y > 0) {
+        memcpy (h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
+        memset (&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
+
+        if (s->mb_x < (s->mb_width - 1)) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
+          h->ref_cache[m][scan8[0] + 4 - 1*8] =
+                  (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
+                   h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1;
+        }else
+          h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
+        if (s->mb_x > 0) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
+          h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
+        }else
+          h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
+      }else
+        memset (&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
+
+      if (s->pict_type != B_TYPE)
+        break;
+    }
+
+    /* decode motion vector(s) and form prediction(s) */
+    if (s->pict_type == P_TYPE) {
+      if(svq3_mc_dir (h, (mb_type - 1), mode, 0, 0) < 0)
+        return -1;
+    } else {        /* B_TYPE */
+      if (mb_type != 2) {
+        if(svq3_mc_dir (h, 0, mode, 0, 0) < 0)
+          return -1;
+      } else {
+        for (i=0; i < 4; i++) {
+          memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+        }
+      }
+      if (mb_type != 1) {
+        if(svq3_mc_dir (h, 0, mode, 1, (mb_type == 3)) < 0)
+          return -1;
+      } else {
+        for (i=0; i < 4; i++) {
+          memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+        }
+      }
+    }
+
+    mb_type = MB_TYPE_16x16;
+  } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
+    memset (h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));
+
+    if (mb_type == 8) {
+      if (s->mb_x > 0) {
+        for (i=0; i < 4; i++) {
+          h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
+        }
+        if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
+          h->left_samples_available = 0x5F5F;
+        }
+      }
+      if (s->mb_y > 0) {
+        h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
+        h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
+        h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
+        h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
+
+        if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
+          h->top_samples_available = 0x33FF;
+        }
+      }
+
+      /* decode prediction codes for luma blocks */
+      for (i=0; i < 16; i+=2) {
+        vlc = svq3_get_ue_golomb (&s->gb);
+
+        if (vlc >= 25){
+          av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc);
+          return -1;
+        }
+
+        left    = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
+        top     = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
+
+        left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
+        left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
+
+        if (left[1] == -1 || left[2] == -1){
+          av_log(h->s.avctx, AV_LOG_ERROR, "weird prediction\n");
+          return -1;
+        }
+      }
+    } else {    /* mb_type == 33, DC_128_PRED block type */
+      for (i=0; i < 4; i++) {
+        memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
+      }
+    }
+
+    write_back_intra_pred_mode (h);
+
+    if (mb_type == 8) {
+      check_intra4x4_pred_mode (h);
+
+      h->top_samples_available  = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
+      h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
+    } else {
+      for (i=0; i < 4; i++) {
+        memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_128_PRED, 4);
+      }
+
+      h->top_samples_available  = 0x33FF;
+      h->left_samples_available = 0x5F5F;
+    }
+
+    mb_type = MB_TYPE_INTRA4x4;
+  } else {                      /* INTRA16x16 */
+    dir = i_mb_type_info[mb_type - 8].pred_mode;
+    dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
+
+    if ((h->intra16x16_pred_mode = check_intra_pred_mode (h, dir)) == -1){
+      av_log(h->s.avctx, AV_LOG_ERROR, "check_intra_pred_mode = -1\n");
+      return -1;
+    }
+
+    cbp = i_mb_type_info[mb_type - 8].cbp;
+    mb_type = MB_TYPE_INTRA16x16;
+  }
+
+  if (!IS_INTER(mb_type) && s->pict_type != I_TYPE) {
+    for (i=0; i < 4; i++) {
+      memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+    }
+    if (s->pict_type == B_TYPE) {
+      for (i=0; i < 4; i++) {
+        memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+      }
+    }
+  }
+  if (!IS_INTRA4x4(mb_type)) {
+    memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
+  }
+  if (!IS_SKIP(mb_type) || s->pict_type == B_TYPE) {
+    memset (h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
+    s->dsp.clear_blocks(h->mb);
+  }
+
+  if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == B_TYPE)) {
+    if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48){
+      av_log(h->s.avctx, AV_LOG_ERROR, "cbp_vlc=%d\n", vlc);
+      return -1;
+    }
+
+    cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc] : golomb_to_inter_cbp[vlc];
+  }
+  if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) {
+    s->qscale += svq3_get_se_golomb (&s->gb);
+
+    if (s->qscale > 31){
+      av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
+      return -1;
+    }
+  }
+  if (IS_INTRA16x16(mb_type)) {
+    if (svq3_decode_block (&s->gb, h->mb, 0, 0)){
+      av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
+      return -1;
+    }
+  }
+
+  if (cbp) {
+    const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
+    const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
+
+    for (i=0; i < 4; i++) {
+      if ((cbp & (1 << i))) {
+        for (j=0; j < 4; j++) {
+          k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
+          h->non_zero_count_cache[ scan8[k] ] = 1;
+
+          if (svq3_decode_block (&s->gb, &h->mb[16*k], index, type)){
+            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding block\n");
+            return -1;
+          }
+        }
+      }
+    }
+
+    if ((cbp & 0x30)) {
+      for (i=0; i < 2; ++i) {
+        if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
+          av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
+          return -1;
+        }
+      }
+
+      if ((cbp & 0x20)) {
+        for (i=0; i < 8; i++) {
+          h->non_zero_count_cache[ scan8[16+i] ] = 1;
+
+          if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1)){
+            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
+            return -1;
+          }
+        }
+      }
+    }
+  }
+
+  s->current_picture.mb_type[mb_xy] = mb_type;
+
+  if (IS_INTRA(mb_type)) {
+    h->chroma_pred_mode = check_intra_pred_mode (h, DC_PRED8x8);
+  }
+
+  return 0;
+}
+
+static int svq3_decode_slice_header (H264Context *h) {
+  MpegEncContext *const s = (MpegEncContext *) h;
+  const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
+  int i, header;
+
+  header = get_bits (&s->gb, 8);
+
+  if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
+    /* TODO: what? */
+    av_log(h->s.avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
+    return -1;
+  } else {
+    int length = (header >> 5) & 3;
+
+    h->next_slice_index = get_bits_count(&s->gb) + 8*show_bits (&s->gb, 8*length) + 8*length;
+
+    if (h->next_slice_index > s->gb.size_in_bits){
+      av_log(h->s.avctx, AV_LOG_ERROR, "slice after bitstream end\n");
+      return -1;
+    }
+
+    s->gb.size_in_bits = h->next_slice_index - 8*(length - 1);
+    skip_bits(&s->gb, 8);
+
+    if (length > 0) {
+      memcpy ((uint8_t *) &s->gb.buffer[get_bits_count(&s->gb) >> 3],
+             &s->gb.buffer[s->gb.size_in_bits >> 3], (length - 1));
+    }
+  }
+
+  if ((i = svq3_get_ue_golomb (&s->gb)) == INVALID_VLC || i >= 3){
+    av_log(h->s.avctx, AV_LOG_ERROR, "illegal slice type %d \n", i);
+    return -1;
+  }
+
+  h->slice_type = golomb_to_pict_type[i];
+
+  if ((header & 0x9F) == 2) {
+    i = (s->mb_num < 64) ? 6 : (1 + av_log2 (s->mb_num - 1));
+    s->mb_skip_run = get_bits (&s->gb, i) - (s->mb_x + (s->mb_y * s->mb_width));
+  } else {
+    get_bits1 (&s->gb);
+    s->mb_skip_run = 0;
+  }
+
+  h->slice_num = get_bits (&s->gb, 8);
+  s->qscale = get_bits (&s->gb, 5);
+  s->adaptive_quant = get_bits1 (&s->gb);
+
+  /* unknown fields */
+  get_bits1 (&s->gb);
+
+  if (h->unknown_svq3_flag) {
+    get_bits1 (&s->gb);
+  }
+
+  get_bits1 (&s->gb);
+  get_bits (&s->gb, 2);
+
+  while (get_bits1 (&s->gb)) {
+    get_bits (&s->gb, 8);
+  }
+
+  /* reset intra predictors and invalidate motion vector references */
+  if (s->mb_x > 0) {
+    memset (h->intra4x4_pred_mode[mb_xy - 1], -1, 4*sizeof(int8_t));
+    memset (h->intra4x4_pred_mode[mb_xy - s->mb_x], -1, 8*sizeof(int8_t)*s->mb_x);
+  }
+  if (s->mb_y > 0) {
+    memset (h->intra4x4_pred_mode[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
+
+    if (s->mb_x > 0) {
+      h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] = -1;
+    }
+  }
+
+  return 0;
+}
+
+static int svq3_decode_frame (AVCodecContext *avctx,
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size) {
+  MpegEncContext *const s = avctx->priv_data;
+  H264Context *const h = avctx->priv_data;
+  int m, mb_type;
+  unsigned char *extradata;
+  unsigned int size;
+
+  s->flags = avctx->flags;
+  s->flags2 = avctx->flags2;
+  s->unrestricted_mv = 1;
+
+  if (!s->context_initialized) {
+    s->width = avctx->width;
+    s->height = avctx->height;
+    h->pred4x4[DIAG_DOWN_LEFT_PRED] = pred4x4_down_left_svq3_c;
+    h->pred16x16[PLANE_PRED8x8] = pred16x16_plane_svq3_c;
+    h->halfpel_flag = 1;
+    h->thirdpel_flag = 1;
+    h->unknown_svq3_flag = 0;
+    h->chroma_qp = 4;
+
+    if (MPV_common_init (s) < 0)
+      return -1;
+
+    h->b_stride = 4*s->mb_width;
+
+    alloc_tables (h);
+
+    /* prowl for the "SEQH" marker in the extradata */
+    extradata = (unsigned char *)avctx->extradata;
+    for (m = 0; m < avctx->extradata_size; m++) {
+      if (!memcmp (extradata, "SEQH", 4))
+        break;
+      extradata++;
+    }
+
+    /* if a match was found, parse the extra data */
+    if (!memcmp (extradata, "SEQH", 4)) {
+
+      GetBitContext gb;
+
+      size = BE_32(&extradata[4]);
+      init_get_bits (&gb, extradata + 8, size*8);
+
+      /* 'frame size code' and optional 'width, height' */
+      if (get_bits (&gb, 3) == 7) {
+        get_bits (&gb, 12);
+        get_bits (&gb, 12);
+      }
+
+      h->halfpel_flag = get_bits1 (&gb);
+      h->thirdpel_flag = get_bits1 (&gb);
+
+      /* unknown fields */
+      get_bits1 (&gb);
+      get_bits1 (&gb);
+      get_bits1 (&gb);
+      get_bits1 (&gb);
+
+      s->low_delay = get_bits1 (&gb);
+
+      /* unknown field */
+      get_bits1 (&gb);
+
+      while (get_bits1 (&gb)) {
+        get_bits (&gb, 8);
+      }
+
+      h->unknown_svq3_flag = get_bits1 (&gb);
+      avctx->has_b_frames = !s->low_delay;
+    }
+  }
+
+  /* special case for last picture */
+  if (buf_size == 0) {
+    if (s->next_picture_ptr && !s->low_delay) {
+      *(AVFrame *) data = *(AVFrame *) &s->next_picture;
+      *data_size = sizeof(AVFrame);
+    }
+    return 0;
+  }
+
+  init_get_bits (&s->gb, buf, 8*buf_size);
+
+  s->mb_x = s->mb_y = 0;
+
+  if (svq3_decode_slice_header (h))
+    return -1;
+
+  s->pict_type = h->slice_type;
+  s->picture_number = h->slice_num;
+
+  if(avctx->debug&FF_DEBUG_PICT_INFO){
+      av_log(h->s.avctx, AV_LOG_DEBUG, "%c hpel:%d, tpel:%d aqp:%d qp:%d\n",
+      av_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
+      s->adaptive_quant, s->qscale
+      );
+  }
+
+  /* for hurry_up==5 */
+  s->current_picture.pict_type = s->pict_type;
+  s->current_picture.key_frame = (s->pict_type == I_TYPE);
+
+  /* skip b frames if we dont have reference frames */
+  if (s->last_picture_ptr == NULL && s->pict_type == B_TYPE) return 0;
+  /* skip b frames if we are in a hurry */
+  if (avctx->hurry_up && s->pict_type == B_TYPE) return 0;
+  /* skip everything if we are in a hurry >= 5 */
+  if (avctx->hurry_up >= 5) return 0;
+  if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
+     ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
+     || avctx->skip_frame >= AVDISCARD_ALL)
+      return 0;
+
+  if (s->next_p_frame_damaged) {
+    if (s->pict_type == B_TYPE)
+      return 0;
+    else
+      s->next_p_frame_damaged = 0;
+  }
+
+  frame_start (h);
+
+  if (s->pict_type == B_TYPE) {
+    h->frame_num_offset = (h->slice_num - h->prev_frame_num);
+
+    if (h->frame_num_offset < 0) {
+      h->frame_num_offset += 256;
+    }
+    if (h->frame_num_offset == 0 || h->frame_num_offset >= h->prev_frame_num_offset) {
+      av_log(h->s.avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
+      return -1;
+    }
+  } else {
+    h->prev_frame_num = h->frame_num;
+    h->frame_num = h->slice_num;
+    h->prev_frame_num_offset = (h->frame_num - h->prev_frame_num);
+
+    if (h->prev_frame_num_offset < 0) {
+      h->prev_frame_num_offset += 256;
+    }
+  }
+
+  for(m=0; m<2; m++){
+    int i;
+    for(i=0; i<4; i++){
+      int j;
+      for(j=-1; j<4; j++)
+        h->ref_cache[m][scan8[0] + 8*i + j]= 1;
+      h->ref_cache[m][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
+    }
+  }
+
+  for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
+    for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
+
+      if ( (get_bits_count(&s->gb) + 7) >= s->gb.size_in_bits &&
+          ((get_bits_count(&s->gb) & 7) == 0 || show_bits (&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) {
+
+        skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb));
+        s->gb.size_in_bits = 8*buf_size;
+
+        if (svq3_decode_slice_header (h))
+          return -1;
+
+        /* TODO: support s->mb_skip_run */
+      }
+
+      mb_type = svq3_get_ue_golomb (&s->gb);
+
+      if (s->pict_type == I_TYPE) {
+        mb_type += 8;
+      } else if (s->pict_type == B_TYPE && mb_type >= 4) {
+        mb_type += 4;
+      }
+      if (mb_type > 33 || svq3_decode_mb (h, mb_type)) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+      }
+
+      if (mb_type != 0) {
+        hl_decode_mb (h);
+      }
+
+      if (s->pict_type != B_TYPE && !s->low_delay) {
+        s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
+                        (s->pict_type == P_TYPE && mb_type < 8) ? (mb_type - 1) : -1;
+      }
+    }
+
+    ff_draw_horiz_band(s, 16*s->mb_y, 16);
+  }
+
+  MPV_frame_end(s);
+
+  if (s->pict_type == B_TYPE || s->low_delay) {
+    *(AVFrame *) data = *(AVFrame *) &s->current_picture;
+  } else {
+    *(AVFrame *) data = *(AVFrame *) &s->last_picture;
+  }
+
+  avctx->frame_number = s->picture_number - 1;
+
+  /* dont output the last pic after seeking */
+  if (s->last_picture_ptr || s->low_delay) {
+    *data_size = sizeof(AVFrame);
+  }
+
+  return buf_size;
+}
+
+
+AVCodec svq3_decoder = {
+    "svq3",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_SVQ3,
+    sizeof(H264Context),
+    decode_init,
+    NULL,
+    decode_end,
+    svq3_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+};
diff --git a/contrib/ffmpeg/libavcodec/targa.c b/contrib/ffmpeg/libavcodec/targa.c
new file mode 100644
index 000000000..4eb18f87e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/targa.c
@@ -0,0 +1,254 @@
+/*
+ * Targa (.tga) image decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+#include "avcodec.h"
+
+enum TargaCompr{
+    TGA_NODATA = 0, // no image data
+    TGA_PAL    = 1, // palettized
+    TGA_RGB    = 2, // true-color
+    TGA_BW     = 3, // black & white or grayscale
+    TGA_RLE    = 8, // flag pointing that data is RLE-coded
+};
+
+typedef struct TargaContext {
+    AVFrame picture;
+
+    int width, height;
+    int bpp;
+    int color_type;
+    int compression_type;
+} TargaContext;
+
+static void targa_decode_rle(AVCodecContext *avctx, TargaContext *s, uint8_t *src, uint8_t *dst, int w, int h, int stride, int bpp)
+{
+    int i, x, y;
+    int depth = (bpp + 1) >> 3;
+    int type, count;
+    int diff;
+
+    diff = stride - w * depth;
+    x = y = 0;
+    while(y < h){
+        type = *src++;
+        count = (type & 0x7F) + 1;
+        type &= 0x80;
+        if((x + count > w) && (x + count + 1 > (h - y) * w)){
+            av_log(avctx, AV_LOG_ERROR, "Packet went out of bounds: position (%i,%i) size %i\n", x, y, count);
+            return;
+        }
+        for(i = 0; i < count; i++){
+            switch(depth){
+            case 1:
+                *dst = *src;
+                break;
+            case 2:
+                *((uint16_t*)dst) = LE_16(src);
+                break;
+            case 3:
+                dst[0] = src[0];
+                dst[1] = src[1];
+                dst[2] = src[2];
+                break;
+            case 4:
+                *((uint32_t*)dst) = LE_32(src);
+                break;
+            }
+            dst += depth;
+            if(!type)
+                src += depth;
+
+            x++;
+            if(x == w){
+                x = 0;
+                y++;
+                dst += diff;
+            }
+        }
+        if(type)
+            src += depth;
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    TargaContext * const s = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    uint8_t *dst;
+    int stride;
+    int idlen, pal, compr, x, y, w, h, bpp, flags;
+    int first_clr, colors, csize;
+
+    /* parse image header */
+    idlen = *buf++;
+    pal = *buf++;
+    compr = *buf++;
+    first_clr = LE_16(buf); buf += 2;
+    colors = LE_16(buf); buf += 2;
+    csize = *buf++;
+    x = LE_16(buf); buf += 2;
+    y = LE_16(buf); buf += 2;
+    w = LE_16(buf); buf += 2;
+    h = LE_16(buf); buf += 2;
+    bpp = *buf++;
+    flags = *buf++;
+    //skip identifier if any
+    buf += idlen;
+    s->bpp = bpp;
+    s->width = w;
+    s->height = h;
+    switch(s->bpp){
+    case 8:
+        avctx->pix_fmt = ((compr & (~TGA_RLE)) == TGA_BW) ? PIX_FMT_GRAY8 : PIX_FMT_PAL8;
+        break;
+    case 15:
+        avctx->pix_fmt = PIX_FMT_RGB555;
+        break;
+    case 16:
+        avctx->pix_fmt = PIX_FMT_RGB555;
+        break;
+    case 24:
+        avctx->pix_fmt = PIX_FMT_BGR24;
+        break;
+    case 32:
+        avctx->pix_fmt = PIX_FMT_RGBA32;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Bit depth %i is not supported\n", s->bpp);
+        return -1;
+    }
+
+    if(s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+
+    if(avcodec_check_dimensions(avctx, w, h))
+        return -1;
+    if(w != avctx->width || h != avctx->height)
+        avcodec_set_dimensions(avctx, w, h);
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    if(flags & 0x20){
+        dst = p->data[0];
+        stride = p->linesize[0];
+    }else{ //image is upside-down
+        dst = p->data[0] + p->linesize[0] * (h - 1);
+        stride = -p->linesize[0];
+    }
+
+    if(avctx->pix_fmt == PIX_FMT_PAL8 && avctx->palctrl){
+        memcpy(p->data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
+        if(avctx->palctrl->palette_changed){
+            p->palette_has_changed = 1;
+            avctx->palctrl->palette_changed = 0;
+        }
+    }
+    if(colors){
+        if((colors + first_clr) > 256){
+            av_log(avctx, AV_LOG_ERROR, "Incorrect palette: %i colors with offset %i\n", colors, first_clr);
+            return -1;
+        }
+        if(csize != 24){
+            av_log(avctx, AV_LOG_ERROR, "Palette entry size %i bits is not supported\n", csize);
+            return -1;
+        }
+        if(avctx->pix_fmt != PIX_FMT_PAL8)//should not occur but skip palette anyway
+            buf += colors * ((csize + 1) >> 3);
+        else{
+            int r, g, b, t;
+            int32_t *pal = ((int32_t*)p->data[1]) + first_clr;
+            for(t = 0; t < colors; t++){
+                r = *buf++;
+                g = *buf++;
+                b = *buf++;
+                *pal++ = (b << 16) | (g << 8) | r;
+            }
+            p->palette_has_changed = 1;
+            avctx->palctrl->palette_changed = 0;
+        }
+    }
+    if((compr & (~TGA_RLE)) == TGA_NODATA)
+        memset(p->data[0], 0, p->linesize[0] * s->height);
+    else{
+        if(compr & TGA_RLE)
+            targa_decode_rle(avctx, s, buf, dst, avctx->width, avctx->height, stride, bpp);
+        else{
+            for(y = 0; y < s->height; y++){
+#ifdef WORDS_BIGENDIAN
+                if((s->bpp + 1) >> 3 == 2){
+                    uint16_t *dst16 = (uint16_t*)dst;
+                    for(x = 0; x < s->width; x++)
+                        dst16[x] = LE_16(buf + x * 2);
+                }else if((s->bpp + 1) >> 3 == 4){
+                    uint32_t *dst32 = (uint32_t*)dst;
+                    for(x = 0; x < s->width; x++)
+                        dst32[x] = LE_32(buf + x * 4);
+                }else
+#endif
+                    memcpy(dst, buf, s->width * ((s->bpp + 1) >> 3));
+
+                dst += stride;
+                buf += s->width * ((s->bpp + 1) >> 3);
+            }
+        }
+    }
+
+    *picture= *(AVFrame*)&s->picture;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+static int targa_init(AVCodecContext *avctx){
+    TargaContext *s = avctx->priv_data;
+
+    avcodec_get_frame_defaults((AVFrame*)&s->picture);
+    avctx->coded_frame= (AVFrame*)&s->picture;
+    s->picture.data[0] = NULL;
+
+    return 0;
+}
+
+static int targa_end(AVCodecContext *avctx){
+    TargaContext *s = avctx->priv_data;
+
+    if(s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+
+    return 0;
+}
+
+AVCodec targa_decoder = {
+    "targa",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_TARGA,
+    sizeof(TargaContext),
+    targa_init,
+    NULL,
+    targa_end,
+    decode_frame,
+    0,
+    NULL
+};
diff --git a/contrib/ffmpeg/libavcodec/tiertexseqv.c b/contrib/ffmpeg/libavcodec/tiertexseqv.c
new file mode 100644
index 000000000..ca3baf09e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/tiertexseqv.c
@@ -0,0 +1,232 @@
+/*
+ * Tiertex Limited SEQ Video Decoder
+ * Copyright (c) 2006 Gregory Montoir (cyx@users.sourceforge.net)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file tiertexseqv.c
+ * Tiertex Limited SEQ video decoder
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#define ALT_BITSTREAM_READER_LE
+#include "bitstream.h"
+
+
+typedef struct SeqVideoContext {
+    AVCodecContext *avctx;
+    AVFrame frame;
+    unsigned int palette[256];
+    unsigned char block[8 * 8];
+} SeqVideoContext;
+
+
+static unsigned char *seq_unpack_rle_block(unsigned char *src, unsigned char *dst, int dst_size)
+{
+    int i, len, sz;
+    GetBitContext gb;
+    int code_table[64];
+
+    /* get the rle codes (at most 64 bytes) */
+    init_get_bits(&gb, src, 64 * 8);
+    for (i = 0, sz = 0; i < 64 && sz < dst_size; i++) {
+        code_table[i] = get_sbits(&gb, 4);
+        sz += FFABS(code_table[i]);
+    }
+    src += (get_bits_count(&gb) + 7) / 8;
+
+    /* do the rle unpacking */
+    for (i = 0; i < 64 && dst_size > 0; i++) {
+        len = code_table[i];
+        if (len < 0) {
+            len = -len;
+            memset(dst, *src++, FFMIN(len, dst_size));
+        } else {
+            memcpy(dst, src, FFMIN(len, dst_size));
+            src += len;
+        }
+        dst += len;
+        dst_size -= len;
+    }
+    return src;
+}
+
+static unsigned char *seq_decode_op1(SeqVideoContext *seq, unsigned char *src, unsigned char *dst)
+{
+    unsigned char *color_table;
+    int b, i, len, bits;
+    GetBitContext gb;
+
+    len = *src++;
+    if (len & 0x80) {
+        switch (len & 3) {
+        case 1:
+            src = seq_unpack_rle_block(src, seq->block, sizeof(seq->block));
+            for (b = 0; b < 8; b++) {
+                memcpy(dst, &seq->block[b * 8], 8);
+                dst += seq->frame.linesize[0];
+            }
+            break;
+        case 2:
+            src = seq_unpack_rle_block(src, seq->block, sizeof(seq->block));
+            for (i = 0; i < 8; i++) {
+                for (b = 0; b < 8; b++)
+                    dst[b * seq->frame.linesize[0]] = seq->block[i * 8 + b];
+                ++dst;
+            }
+            break;
+        }
+    } else {
+        color_table = src;
+        src += len;
+        bits = ff_log2_tab[len - 1] + 1;
+        init_get_bits(&gb, src, bits * 8 * 8); src += bits * 8;
+        for (b = 0; b < 8; b++) {
+            for (i = 0; i < 8; i++)
+                dst[i] = color_table[get_bits(&gb, bits)];
+            dst += seq->frame.linesize[0];
+        }
+    }
+
+    return src;
+}
+
+static unsigned char *seq_decode_op2(SeqVideoContext *seq, unsigned char *src, unsigned char *dst)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        memcpy(dst, src, 8);
+        src += 8;
+        dst += seq->frame.linesize[0];
+    }
+
+    return src;
+}
+
+static unsigned char *seq_decode_op3(SeqVideoContext *seq, unsigned char *src, unsigned char *dst)
+{
+    int pos, offset;
+
+    do {
+        pos = *src++;
+        offset = ((pos >> 3) & 7) * seq->frame.linesize[0] + (pos & 7);
+        dst[offset] = *src++;
+    } while (!(pos & 0x80));
+
+    return src;
+}
+
+static void seqvideo_decode(SeqVideoContext *seq, unsigned char *data, int data_size)
+{
+    GetBitContext gb;
+    int flags, i, j, x, y, op;
+    unsigned char c[3];
+    unsigned char *dst;
+
+    flags = *data++;
+
+    if (flags & 1) {
+        for (i = 0; i < 256; i++) {
+            for (j = 0; j < 3; j++, data++)
+                c[j] = (*data << 2) | (*data >> 4);
+            seq->palette[i] = (c[0] << 16) | (c[1] << 8) | c[2];
+        }
+        memcpy(seq->frame.data[1], seq->palette, sizeof(seq->palette));
+        seq->frame.palette_has_changed = 1;
+    }
+
+    if (flags & 2) {
+        init_get_bits(&gb, data, 128 * 8); data += 128;
+        for (y = 0; y < 128; y += 8)
+            for (x = 0; x < 256; x += 8) {
+                dst = &seq->frame.data[0][y * seq->frame.linesize[0] + x];
+                op = get_bits(&gb, 2);
+                switch (op) {
+                case 1:
+                    data = seq_decode_op1(seq, data, dst);
+                    break;
+                case 2:
+                    data = seq_decode_op2(seq, data, dst);
+                    break;
+                case 3:
+                    data = seq_decode_op3(seq, data, dst);
+                    break;
+                }
+            }
+    }
+}
+
+static int seqvideo_decode_init(AVCodecContext *avctx)
+{
+    SeqVideoContext *seq = (SeqVideoContext *)avctx->priv_data;
+
+    seq->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+
+    seq->frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int seqvideo_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+
+    SeqVideoContext *seq = (SeqVideoContext *)avctx->priv_data;
+
+    seq->frame.reference = 1;
+    seq->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if (avctx->reget_buffer(avctx, &seq->frame)) {
+        av_log(seq->avctx, AV_LOG_ERROR, "tiertexseqvideo: reget_buffer() failed\n");
+        return -1;
+    }
+
+    seqvideo_decode(seq, buf, buf_size);
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame *)data = seq->frame;
+
+    return buf_size;
+}
+
+static int seqvideo_decode_end(AVCodecContext *avctx)
+{
+    SeqVideoContext *seq = (SeqVideoContext *)avctx->priv_data;
+
+    if (seq->frame.data[0])
+        avctx->release_buffer(avctx, &seq->frame);
+
+    return 0;
+}
+
+AVCodec tiertexseqvideo_decoder = {
+    "tiertexseqvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_TIERTEXSEQVIDEO,
+    sizeof(SeqVideoContext),
+    seqvideo_decode_init,
+    NULL,
+    seqvideo_decode_end,
+    seqvideo_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/tiff.c b/contrib/ffmpeg/libavcodec/tiff.c
new file mode 100644
index 000000000..5925af1ae
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/tiff.c
@@ -0,0 +1,531 @@
+/*
+ * TIFF image decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+#include "avcodec.h"
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+#endif
+#include "lzw.h"
+
+/* abridged list of TIFF tags */
+enum TiffTags{
+    TIFF_WIDTH = 0x100,
+    TIFF_HEIGHT,
+    TIFF_BPP,
+    TIFF_COMPR,
+    TIFF_INVERT = 0x106,
+    TIFF_STRIP_OFFS = 0x111,
+    TIFF_ROWSPERSTRIP = 0x116,
+    TIFF_STRIP_SIZE,
+    TIFF_PLANAR = 0x11C,
+    TIFF_XPOS = 0x11E,
+    TIFF_YPOS = 0x11F,
+    TIFF_PREDICTOR = 0x13D,
+    TIFF_PAL = 0x140
+};
+
+enum TiffCompr{
+    TIFF_RAW = 1,
+    TIFF_CCITT_RLE,
+    TIFF_G3,
+    TIFF_G4,
+    TIFF_LZW,
+    TIFF_JPEG,
+    TIFF_NEWJPEG,
+    TIFF_ADOBE_DEFLATE,
+    TIFF_PACKBITS = 0x8005,
+    TIFF_DEFLATE = 0x80B2
+};
+
+enum TiffTypes{
+    TIFF_BYTE = 1,
+    TIFF_STRING,
+    TIFF_SHORT,
+    TIFF_LONG,
+    TIFF_LONGLONG
+};
+
+/** sizes of various TIFF field types */
+static const int type_sizes[6] = {
+    0, 1, 100, 2, 4, 8
+};
+
+typedef struct TiffContext {
+    AVCodecContext *avctx;
+    AVFrame picture;
+
+    int width, height;
+    unsigned int bpp;
+    int le;
+    int compr;
+    int invert;
+
+    int strips, rps;
+    int sot;
+    uint8_t* stripdata;
+    uint8_t* stripsizes;
+    int stripsize, stripoff;
+    LZWState *lzw;
+} TiffContext;
+
+static int tget_short(uint8_t **p, int le){
+    int v = le ? LE_16(*p) : BE_16(*p);
+    *p += 2;
+    return v;
+}
+
+static int tget_long(uint8_t **p, int le){
+    int v = le ? LE_32(*p) : BE_32(*p);
+    *p += 4;
+    return v;
+}
+
+static int tget(uint8_t **p, int type, int le){
+    switch(type){
+    case TIFF_BYTE : return *(*p)++;
+    case TIFF_SHORT: return tget_short(p, le);
+    case TIFF_LONG : return tget_long (p, le);
+    default        : return -1;
+    }
+}
+
+static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, uint8_t *src, int size, int lines){
+    int c, line, pixels, code;
+    uint8_t *ssrc = src;
+    int width = s->width * (s->bpp / 8);
+#ifdef CONFIG_ZLIB
+    uint8_t *zbuf; unsigned long outlen;
+
+    if(s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE){
+        outlen = width * lines;
+        zbuf = av_malloc(outlen);
+        if(uncompress(zbuf, &outlen, src, size) != Z_OK){
+            av_log(s->avctx, AV_LOG_ERROR, "Uncompressing failed (%lu of %lu)\n", outlen, (unsigned long)width * lines);
+            av_free(zbuf);
+            return -1;
+        }
+        src = zbuf;
+        for(line = 0; line < lines; line++){
+            memcpy(dst, src, width);
+            dst += stride;
+            src += width;
+        }
+        av_free(zbuf);
+        return 0;
+    }
+#endif
+    if(s->compr == TIFF_LZW){
+        if(ff_lzw_decode_init(s->lzw, 8, src, size, FF_LZW_TIFF) < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "Error initializing LZW decoder\n");
+            return -1;
+        }
+    }
+    for(line = 0; line < lines; line++){
+        if(src - ssrc > size){
+            av_log(s->avctx, AV_LOG_ERROR, "Source data overread\n");
+            return -1;
+        }
+        switch(s->compr){
+        case TIFF_RAW:
+            memcpy(dst, src, s->width * (s->bpp / 8));
+            src += s->width * (s->bpp / 8);
+            break;
+        case TIFF_PACKBITS:
+            for(pixels = 0; pixels < width;){
+                code = (int8_t)*src++;
+                if(code >= 0){
+                    code++;
+                    if(pixels + code > width){
+                        av_log(s->avctx, AV_LOG_ERROR, "Copy went out of bounds\n");
+                        return -1;
+                    }
+                    memcpy(dst + pixels, src, code);
+                    src += code;
+                    pixels += code;
+                }else if(code != -128){ // -127..-1
+                    code = (-code) + 1;
+                    if(pixels + code > width){
+                        av_log(s->avctx, AV_LOG_ERROR, "Run went out of bounds\n");
+                        return -1;
+                    }
+                    c = *src++;
+                    memset(dst + pixels, c, code);
+                    pixels += code;
+                }
+            }
+            break;
+        case TIFF_LZW:
+            pixels = ff_lzw_decode(s->lzw, dst, width);
+            if(pixels < width){
+                av_log(s->avctx, AV_LOG_ERROR, "Decoded only %i bytes of %i\n", pixels, width);
+                return -1;
+            }
+            break;
+        }
+        dst += stride;
+    }
+    return 0;
+}
+
+
+static int tiff_decode_tag(TiffContext *s, uint8_t *start, uint8_t *buf, uint8_t *end_buf, AVFrame *pic)
+{
+    int tag, type, count, off, value = 0;
+    uint8_t *src, *dst;
+    int i, j, ssize, soff, stride;
+    int *pal, *rp, *gp, *bp;
+
+    tag = tget_short(&buf, s->le);
+    type = tget_short(&buf, s->le);
+    count = tget_long(&buf, s->le);
+    off = tget_long(&buf, s->le);
+
+    if(count == 1){
+        switch(type){
+        case TIFF_BYTE:
+        case TIFF_SHORT:
+            buf -= 4;
+            value = tget(&buf, type, s->le);
+            buf = NULL;
+            break;
+        case TIFF_LONG:
+            value = off;
+            buf = NULL;
+            break;
+        default:
+            value = -1;
+            buf = start + off;
+        }
+    }else if(type_sizes[type] * count <= 4){
+        buf -= 4;
+    }else{
+        buf = start + off;
+    }
+
+    if(buf && (buf < start || buf > end_buf)){
+        av_log(s->avctx, AV_LOG_ERROR, "Tag referencing position outside the image\n");
+        return -1;
+    }
+
+    switch(tag){
+    case TIFF_WIDTH:
+        s->width = value;
+        break;
+    case TIFF_HEIGHT:
+        s->height = value;
+        break;
+    case TIFF_BPP:
+        if(count == 1) s->bpp = value;
+        else{
+            switch(type){
+            case TIFF_BYTE:
+                s->bpp = (off & 0xFF) + ((off >> 8) & 0xFF) + ((off >> 16) & 0xFF) + ((off >> 24) & 0xFF);
+                break;
+            case TIFF_SHORT:
+            case TIFF_LONG:
+                s->bpp = 0;
+                for(i = 0; i < count; i++) s->bpp += tget(&buf, type, s->le);
+                break;
+            default:
+                s->bpp = -1;
+            }
+        }
+        switch(s->bpp){
+        case 8:
+            s->avctx->pix_fmt = PIX_FMT_PAL8;
+            break;
+        case 24:
+            s->avctx->pix_fmt = PIX_FMT_RGB24;
+            break;
+        case 16:
+            if(count == 1){
+                s->avctx->pix_fmt = PIX_FMT_GRAY16BE;
+            }else{
+                av_log(s->avctx, AV_LOG_ERROR, "This format is not supported (bpp=%i)\n", s->bpp);
+                return -1;
+            }
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "This format is not supported (bpp=%i)\n", s->bpp);
+            return -1;
+        }
+        if(s->width != s->avctx->width || s->height != s->avctx->height){
+            if(avcodec_check_dimensions(s->avctx, s->width, s->height))
+                return -1;
+            avcodec_set_dimensions(s->avctx, s->width, s->height);
+        }
+        if(s->picture.data[0])
+            s->avctx->release_buffer(s->avctx, &s->picture);
+        if(s->avctx->get_buffer(s->avctx, &s->picture) < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            return -1;
+        }
+        if(s->bpp == 8){
+            /* make default grayscale pal */
+            pal = s->picture.data[1];
+            for(i = 0; i < 256; i++)
+                pal[i] = i * 0x010101;
+        }
+        break;
+    case TIFF_COMPR:
+        s->compr = value;
+        switch(s->compr){
+        case TIFF_RAW:
+        case TIFF_PACKBITS:
+        case TIFF_LZW:
+            break;
+        case TIFF_DEFLATE:
+        case TIFF_ADOBE_DEFLATE:
+#ifdef CONFIG_ZLIB
+            break;
+#else
+            av_log(s->avctx, AV_LOG_ERROR, "Deflate: ZLib not compiled in\n");
+            return -1;
+#endif
+        case TIFF_G3:
+            av_log(s->avctx, AV_LOG_ERROR, "CCITT G3 compression is not supported\n");
+            return -1;
+        case TIFF_G4:
+            av_log(s->avctx, AV_LOG_ERROR, "CCITT G4 compression is not supported\n");
+            return -1;
+        case TIFF_CCITT_RLE:
+            av_log(s->avctx, AV_LOG_ERROR, "CCITT RLE compression is not supported\n");
+            return -1;
+        case TIFF_JPEG:
+        case TIFF_NEWJPEG:
+            av_log(s->avctx, AV_LOG_ERROR, "JPEG compression is not supported\n");
+            return -1;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "Unknown compression method %i\n", s->compr);
+            return -1;
+        }
+        break;
+    case TIFF_ROWSPERSTRIP:
+        if(value < 1){
+            av_log(s->avctx, AV_LOG_ERROR, "Incorrect value of rows per strip\n");
+            return -1;
+        }
+        s->rps = value;
+        break;
+    case TIFF_STRIP_OFFS:
+        if(count == 1){
+            s->stripdata = NULL;
+            s->stripoff = value;
+        }else
+            s->stripdata = start + off;
+        s->strips = count;
+        s->sot = type;
+        if(s->stripdata > end_buf){
+            av_log(s->avctx, AV_LOG_ERROR, "Tag referencing position outside the image\n");
+            return -1;
+        }
+        break;
+    case TIFF_STRIP_SIZE:
+        if(count == 1){
+            s->stripsizes = NULL;
+            s->stripsize = value;
+            s->strips = 1;
+        }else{
+            s->stripsizes = start + off;
+        }
+        s->strips = count;
+        if(s->stripsizes > end_buf){
+            av_log(s->avctx, AV_LOG_ERROR, "Tag referencing position outside the image\n");
+            return -1;
+        }
+        if(!pic->data[0]){
+            av_log(s->avctx, AV_LOG_ERROR, "Picture initialization missing\n");
+            return -1;
+        }
+        /* now we have the data and may start decoding */
+        stride = pic->linesize[0];
+        dst = pic->data[0];
+        for(i = 0; i < s->height; i += s->rps){
+            if(s->stripsizes)
+                ssize = tget(&s->stripsizes, type, s->le);
+            else
+                ssize = s->stripsize;
+
+            if(s->stripdata){
+                soff = tget(&s->stripdata, s->sot, s->le);
+            }else
+                soff = s->stripoff;
+            src = start + soff;
+            if(tiff_unpack_strip(s, dst, stride, src, ssize, FFMIN(s->rps, s->height - i)) < 0)
+                break;
+            dst += s->rps * stride;
+        }
+        break;
+    case TIFF_PREDICTOR:
+        if(!pic->data[0]){
+            av_log(s->avctx, AV_LOG_ERROR, "Picture initialization missing\n");
+            return -1;
+        }
+        if(value == 2){
+            src = pic->data[0];
+            stride = pic->linesize[0];
+            soff = s->bpp >> 3;
+            ssize = s->width * soff;
+            for(i = 0; i < s->height; i++) {
+                for(j = soff; j < ssize; j++)
+                    src[j] += src[j - soff];
+                src += stride;
+            }
+        }
+        break;
+    case TIFF_INVERT:
+        switch(value){
+        case 0:
+            s->invert = 1;
+            break;
+        case 1:
+            s->invert = 0;
+            break;
+        case 2:
+        case 3:
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "Color mode %d is not supported\n", value);
+            return -1;
+        }
+        break;
+    case TIFF_PAL:
+        if(s->avctx->pix_fmt != PIX_FMT_PAL8){
+            av_log(s->avctx, AV_LOG_ERROR, "Palette met but this is not palettized format\n");
+            return -1;
+        }
+        pal = s->picture.data[1];
+        off = type_sizes[type];
+        rp = buf;
+        gp = buf + count / 3 * off;
+        bp = buf + count / 3 * off * 2;
+        off = (type_sizes[type] - 1) << 3;
+        for(i = 0; i < count / 3; i++){
+            j = (tget(&rp, type, s->le) >> off) << 16;
+            j |= (tget(&gp, type, s->le) >> off) << 8;
+            j |= tget(&bp, type, s->le) >> off;
+            pal[i] = j;
+        }
+        break;
+    case TIFF_PLANAR:
+        if(value == 2){
+            av_log(s->avctx, AV_LOG_ERROR, "Planar format is not supported\n");
+            return -1;
+        }
+        break;
+    }
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    TiffContext * const s = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    uint8_t *orig_buf = buf, *end_buf = buf + buf_size;
+    int id, le, off;
+    int i, entries;
+
+    //parse image header
+    id = LE_16(buf); buf += 2;
+    if(id == 0x4949) le = 1;
+    else if(id == 0x4D4D) le = 0;
+    else{
+        av_log(avctx, AV_LOG_ERROR, "TIFF header not found\n");
+        return -1;
+    }
+    s->le = le;
+    s->invert = 0;
+    // As TIFF 6.0 specification puts it "An arbitrary but carefully chosen number
+    // that further identifies the file as a TIFF file"
+    if(tget_short(&buf, le) != 42){
+        av_log(avctx, AV_LOG_ERROR, "The answer to life, universe and everything is not correct!\n");
+        return -1;
+    }
+    /* parse image file directory */
+    off = tget_long(&buf, le);
+    if(orig_buf + off + 14 >= end_buf){
+        av_log(avctx, AV_LOG_ERROR, "IFD offset is greater than image size\n");
+        return -1;
+    }
+    buf = orig_buf + off;
+    entries = tget_short(&buf, le);
+    for(i = 0; i < entries; i++){
+        if(tiff_decode_tag(s, orig_buf, buf, end_buf, p) < 0)
+            return -1;
+        buf += 12;
+    }
+
+    if(s->invert){
+        uint8_t *src;
+        int j;
+
+        src = s->picture.data[0];
+        for(j = 0; j < s->height; j++){
+            for(i = 0; i < s->picture.linesize[0]; i++)
+                src[i] = 255 - src[i];
+            src += s->picture.linesize[0];
+        }
+    }
+    *picture= *(AVFrame*)&s->picture;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+static int tiff_init(AVCodecContext *avctx){
+    TiffContext *s = avctx->priv_data;
+
+    s->width = 0;
+    s->height = 0;
+    s->avctx = avctx;
+    avcodec_get_frame_defaults((AVFrame*)&s->picture);
+    avctx->coded_frame= (AVFrame*)&s->picture;
+    s->picture.data[0] = NULL;
+    ff_lzw_decode_open(&s->lzw);
+
+    return 0;
+}
+
+static int tiff_end(AVCodecContext *avctx)
+{
+    TiffContext * const s = avctx->priv_data;
+
+    ff_lzw_decode_close(&s->lzw);
+    if(s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+    return 0;
+}
+
+AVCodec tiff_decoder = {
+    "tiff",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_TIFF,
+    sizeof(TiffContext),
+    tiff_init,
+    NULL,
+    tiff_end,
+    decode_frame,
+    0,
+    NULL
+};
diff --git a/contrib/ffmpeg/libavcodec/truemotion1.c b/contrib/ffmpeg/libavcodec/truemotion1.c
new file mode 100644
index 000000000..11d9320c0
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/truemotion1.c
@@ -0,0 +1,923 @@
+/*
+ * Duck TrueMotion 1.0 Decoder
+ * Copyright (C) 2003 Alex Beregszaszi & Mike Melanson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file truemotion1.c
+ * Duck TrueMotion v1 Video Decoder by
+ * Alex Beregszaszi (alex@fsn.hu) and
+ * Mike Melanson (melanson@pcisys.net)
+ *
+ * The TrueMotion v1 decoder presently only decodes 16-bit TM1 data and
+ * outputs RGB555 (or RGB565) data. 24-bit TM1 data is not supported yet.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#include "truemotion1data.h"
+
+typedef struct TrueMotion1Context {
+    AVCodecContext *avctx;
+    AVFrame frame;
+    AVFrame prev_frame;
+
+    uint8_t *buf;
+    int size;
+
+    uint8_t *mb_change_bits;
+    int mb_change_bits_row_size;
+    uint8_t *index_stream;
+    int index_stream_size;
+
+    int flags;
+    int x, y, w, h;
+
+    uint32_t y_predictor_table[1024];
+    uint32_t c_predictor_table[1024];
+    uint32_t fat_y_predictor_table[1024];
+    uint32_t fat_c_predictor_table[1024];
+
+    int compression;
+    int block_type;
+    int block_width;
+    int block_height;
+
+    int16_t ydt[8];
+    int16_t cdt[8];
+    int16_t fat_ydt[8];
+    int16_t fat_cdt[8];
+
+    int last_deltaset, last_vectable;
+
+    unsigned int *vert_pred;
+
+} TrueMotion1Context;
+
+#define FLAG_SPRITE         32
+#define FLAG_KEYFRAME       16
+#define FLAG_INTERFRAME      8
+#define FLAG_INTERPOLATED    4
+
+struct frame_header {
+    uint8_t header_size;
+    uint8_t compression;
+    uint8_t deltaset;
+    uint8_t vectable;
+    uint16_t ysize;
+    uint16_t xsize;
+    uint16_t checksum;
+    uint8_t version;
+    uint8_t header_type;
+    uint8_t flags;
+    uint8_t control;
+    uint16_t xoffset;
+    uint16_t yoffset;
+    uint16_t width;
+    uint16_t height;
+};
+
+#define ALGO_NOP        0
+#define ALGO_RGB16V     1
+#define ALGO_RGB16H     2
+#define ALGO_RGB24H     3
+
+/* these are the various block sizes that can occupy a 4x4 block */
+#define BLOCK_2x2  0
+#define BLOCK_2x4  1
+#define BLOCK_4x2  2
+#define BLOCK_4x4  3
+
+typedef struct comp_types {
+    int algorithm;
+    int block_width; // vres
+    int block_height; // hres
+    int block_type;
+} comp_types;
+
+/* { valid for metatype }, algorithm, num of deltas, vert res, horiz res */
+static comp_types compression_types[17] = {
+    { ALGO_NOP,    0, 0, 0 },
+
+    { ALGO_RGB16V, 4, 4, BLOCK_4x4 },
+    { ALGO_RGB16H, 4, 4, BLOCK_4x4 },
+    { ALGO_RGB16V, 4, 2, BLOCK_4x2 },
+    { ALGO_RGB16H, 4, 2, BLOCK_4x2 },
+
+    { ALGO_RGB16V, 2, 4, BLOCK_2x4 },
+    { ALGO_RGB16H, 2, 4, BLOCK_2x4 },
+    { ALGO_RGB16V, 2, 2, BLOCK_2x2 },
+    { ALGO_RGB16H, 2, 2, BLOCK_2x2 },
+
+    { ALGO_NOP,    4, 4, BLOCK_4x4 },
+    { ALGO_RGB24H, 4, 4, BLOCK_4x4 },
+    { ALGO_NOP,    4, 2, BLOCK_4x2 },
+    { ALGO_RGB24H, 4, 2, BLOCK_4x2 },
+
+    { ALGO_NOP,    2, 4, BLOCK_2x4 },
+    { ALGO_RGB24H, 2, 4, BLOCK_2x4 },
+    { ALGO_NOP,    2, 2, BLOCK_2x2 },
+    { ALGO_RGB24H, 2, 2, BLOCK_2x2 }
+};
+
+static void select_delta_tables(TrueMotion1Context *s, int delta_table_index)
+{
+    int i;
+
+    if (delta_table_index > 3)
+        return;
+
+    memcpy(s->ydt, ydts[delta_table_index], 8 * sizeof(int16_t));
+    memcpy(s->cdt, cdts[delta_table_index], 8 * sizeof(int16_t));
+    memcpy(s->fat_ydt, fat_ydts[delta_table_index], 8 * sizeof(int16_t));
+    memcpy(s->fat_cdt, fat_cdts[delta_table_index], 8 * sizeof(int16_t));
+
+    /* Y skinny deltas need to be halved for some reason; maybe the
+     * skinny Y deltas should be modified */
+    for (i = 0; i < 8; i++)
+    {
+        /* drop the lsb before dividing by 2-- net effect: round down
+         * when dividing a negative number (e.g., -3/2 = -2, not -1) */
+        s->ydt[i] &= 0xFFFE;
+        s->ydt[i] /= 2;
+    }
+}
+
+#ifdef WORDS_BIGENDIAN
+static int make_ydt15_entry(int p2, int p1, int16_t *ydt)
+#else
+static int make_ydt15_entry(int p1, int p2, int16_t *ydt)
+#endif
+{
+    int lo, hi;
+
+    lo = ydt[p1];
+    lo += (lo << 5) + (lo << 10);
+    hi = ydt[p2];
+    hi += (hi << 5) + (hi << 10);
+    return ((lo + (hi << 16)) << 1);
+}
+
+#ifdef WORDS_BIGENDIAN
+static int make_cdt15_entry(int p2, int p1, int16_t *cdt)
+#else
+static int make_cdt15_entry(int p1, int p2, int16_t *cdt)
+#endif
+{
+    int r, b, lo;
+
+    b = cdt[p2];
+    r = cdt[p1] << 10;
+    lo = b + r;
+    return ((lo + (lo << 16)) << 1);
+}
+
+#ifdef WORDS_BIGENDIAN
+static int make_ydt16_entry(int p2, int p1, int16_t *ydt)
+#else
+static int make_ydt16_entry(int p1, int p2, int16_t *ydt)
+#endif
+{
+    int lo, hi;
+
+    lo = ydt[p1];
+    lo += (lo << 6) + (lo << 11);
+    hi = ydt[p2];
+    hi += (hi << 6) + (hi << 11);
+    return ((lo + (hi << 16)) << 1);
+}
+
+#ifdef WORDS_BIGENDIAN
+static int make_cdt16_entry(int p2, int p1, int16_t *cdt)
+#else
+static int make_cdt16_entry(int p1, int p2, int16_t *cdt)
+#endif
+{
+    int r, b, lo;
+
+    b = cdt[p2];
+    r = cdt[p1] << 11;
+    lo = b + r;
+    return ((lo + (lo << 16)) << 1);
+}
+
+#ifdef WORDS_BIGENDIAN
+static int make_ydt24_entry(int p2, int p1, int16_t *ydt)
+#else
+static int make_ydt24_entry(int p1, int p2, int16_t *ydt)
+#endif
+{
+    int lo, hi;
+
+    lo = ydt[p1];
+    hi = ydt[p2];
+    return ((lo + (hi << 8) + (hi << 16)) << 1);
+}
+
+#ifdef WORDS_BIGENDIAN
+static int make_cdt24_entry(int p2, int p1, int16_t *cdt)
+#else
+static int make_cdt24_entry(int p1, int p2, int16_t *cdt)
+#endif
+{
+    int r, b;
+
+    b = cdt[p2];
+    r = cdt[p1]<<16;
+    return ((b+r) << 1);
+}
+
+static void gen_vector_table15(TrueMotion1Context *s, const uint8_t *sel_vector_table)
+{
+    int len, i, j;
+    unsigned char delta_pair;
+
+    for (i = 0; i < 1024; i += 4)
+    {
+        len = *sel_vector_table++ / 2;
+        for (j = 0; j < len; j++)
+        {
+            delta_pair = *sel_vector_table++;
+            s->y_predictor_table[i+j] = 0xfffffffe &
+                make_ydt15_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt);
+            s->c_predictor_table[i+j] = 0xfffffffe &
+                make_cdt15_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt);
+        }
+        s->y_predictor_table[i+(j-1)] |= 1;
+        s->c_predictor_table[i+(j-1)] |= 1;
+    }
+}
+
+static void gen_vector_table16(TrueMotion1Context *s, const uint8_t *sel_vector_table)
+{
+    int len, i, j;
+    unsigned char delta_pair;
+
+    for (i = 0; i < 1024; i += 4)
+    {
+        len = *sel_vector_table++ / 2;
+        for (j = 0; j < len; j++)
+        {
+            delta_pair = *sel_vector_table++;
+            s->y_predictor_table[i+j] = 0xfffffffe &
+                make_ydt16_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt);
+            s->c_predictor_table[i+j] = 0xfffffffe &
+                make_cdt16_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt);
+        }
+        s->y_predictor_table[i+(j-1)] |= 1;
+        s->c_predictor_table[i+(j-1)] |= 1;
+    }
+}
+
+static void gen_vector_table24(TrueMotion1Context *s, const uint8_t *sel_vector_table)
+{
+    int len, i, j;
+    unsigned char delta_pair;
+
+    for (i = 0; i < 1024; i += 4)
+    {
+        len = *sel_vector_table++ / 2;
+        for (j = 0; j < len; j++)
+        {
+            delta_pair = *sel_vector_table++;
+            s->y_predictor_table[i+j] = 0xfffffffe &
+                make_ydt24_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt);
+            s->c_predictor_table[i+j] = 0xfffffffe &
+                make_cdt24_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt);
+            s->fat_y_predictor_table[i+j] = 0xfffffffe &
+                make_ydt24_entry(delta_pair >> 4, delta_pair & 0xf, s->fat_ydt);
+            s->fat_c_predictor_table[i+j] = 0xfffffffe &
+                make_cdt24_entry(delta_pair >> 4, delta_pair & 0xf, s->fat_cdt);
+        }
+        s->y_predictor_table[i+(j-1)] |= 1;
+        s->c_predictor_table[i+(j-1)] |= 1;
+        s->fat_y_predictor_table[i+(j-1)] |= 1;
+        s->fat_c_predictor_table[i+(j-1)] |= 1;
+    }
+}
+
+/* Returns the number of bytes consumed from the bytestream. Returns -1 if
+ * there was an error while decoding the header */
+static int truemotion1_decode_header(TrueMotion1Context *s)
+{
+    int i;
+    struct frame_header header;
+    uint8_t header_buffer[128];  /* logical maximum size of the header */
+    const uint8_t *sel_vector_table;
+
+    /* There is 1 change bit per 4 pixels, so each change byte represents
+     * 32 pixels; divide width by 4 to obtain the number of change bits and
+     * then round up to the nearest byte. */
+    s->mb_change_bits_row_size = ((s->avctx->width >> 2) + 7) >> 3;
+
+    header.header_size = ((s->buf[0] >> 5) | (s->buf[0] << 3)) & 0x7f;
+    if (s->buf[0] < 0x10)
+    {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid header size (%d)\n", s->buf[0]);
+        return -1;
+    }
+
+    /* unscramble the header bytes with a XOR operation */
+    memset(header_buffer, 0, 128);
+    for (i = 1; i < header.header_size; i++)
+        header_buffer[i - 1] = s->buf[i] ^ s->buf[i + 1];
+
+    header.compression = header_buffer[0];
+    header.deltaset = header_buffer[1];
+    header.vectable = header_buffer[2];
+    header.ysize = LE_16(&header_buffer[3]);
+    header.xsize = LE_16(&header_buffer[5]);
+    header.checksum = LE_16(&header_buffer[7]);
+    header.version = header_buffer[9];
+    header.header_type = header_buffer[10];
+    header.flags = header_buffer[11];
+    header.control = header_buffer[12];
+
+    /* Version 2 */
+    if (header.version >= 2)
+    {
+        if (header.header_type > 3)
+        {
+            av_log(s->avctx, AV_LOG_ERROR, "invalid header type (%d)\n", header.header_type);
+            return -1;
+        } else if ((header.header_type == 2) || (header.header_type == 3)) {
+            s->flags = header.flags;
+            if (!(s->flags & FLAG_INTERFRAME))
+                s->flags |= FLAG_KEYFRAME;
+        } else
+            s->flags = FLAG_KEYFRAME;
+    } else /* Version 1 */
+        s->flags = FLAG_KEYFRAME;
+
+    if (s->flags & FLAG_SPRITE) {
+        av_log(s->avctx, AV_LOG_INFO, "SPRITE frame found, please report the sample to the developers\n");
+        s->w = header.width;
+        s->h = header.height;
+        s->x = header.xoffset;
+        s->y = header.yoffset;
+    } else {
+        s->w = header.xsize;
+        s->h = header.ysize;
+        if (header.header_type < 2) {
+            if ((s->w < 213) && (s->h >= 176))
+            {
+                s->flags |= FLAG_INTERPOLATED;
+                av_log(s->avctx, AV_LOG_INFO, "INTERPOLATION selected, please report the sample to the developers\n");
+            }
+        }
+    }
+
+    if (header.compression > 17) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid compression type (%d)\n", header.compression);
+        return -1;
+    }
+
+    if ((header.deltaset != s->last_deltaset) ||
+        (header.vectable != s->last_vectable))
+        select_delta_tables(s, header.deltaset);
+
+    if ((header.compression & 1) && header.header_type)
+        sel_vector_table = pc_tbl2;
+    else {
+        if (header.vectable < 4)
+            sel_vector_table = tables[header.vectable - 1];
+        else {
+            av_log(s->avctx, AV_LOG_ERROR, "invalid vector table id (%d)\n", header.vectable);
+            return -1;
+        }
+    }
+
+    // FIXME: where to place this ?!?!
+    if (compression_types[header.compression].algorithm == ALGO_RGB24H)
+        s->avctx->pix_fmt = PIX_FMT_RGBA32;
+    else
+        s->avctx->pix_fmt = PIX_FMT_RGB555; // RGB565 is supported aswell
+
+    if ((header.deltaset != s->last_deltaset) || (header.vectable != s->last_vectable))
+    {
+        if (compression_types[header.compression].algorithm == ALGO_RGB24H)
+            gen_vector_table24(s, sel_vector_table);
+        else
+        if (s->avctx->pix_fmt == PIX_FMT_RGB555)
+            gen_vector_table15(s, sel_vector_table);
+        else
+            gen_vector_table16(s, sel_vector_table);
+    }
+
+    /* set up pointers to the other key data chunks */
+    s->mb_change_bits = s->buf + header.header_size;
+    if (s->flags & FLAG_KEYFRAME) {
+        /* no change bits specified for a keyframe; only index bytes */
+        s->index_stream = s->mb_change_bits;
+    } else {
+        /* one change bit per 4x4 block */
+        s->index_stream = s->mb_change_bits +
+            (s->mb_change_bits_row_size * (s->avctx->height >> 2));
+    }
+    s->index_stream_size = s->size - (s->index_stream - s->buf);
+
+    s->last_deltaset = header.deltaset;
+    s->last_vectable = header.vectable;
+    s->compression = header.compression;
+    s->block_width = compression_types[header.compression].block_width;
+    s->block_height = compression_types[header.compression].block_height;
+    s->block_type = compression_types[header.compression].block_type;
+
+    if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_INFO, "tables: %d / %d c:%d %dx%d t:%d %s%s%s%s\n",
+            s->last_deltaset, s->last_vectable, s->compression, s->block_width,
+            s->block_height, s->block_type,
+            s->flags & FLAG_KEYFRAME ? " KEY" : "",
+            s->flags & FLAG_INTERFRAME ? " INTER" : "",
+            s->flags & FLAG_SPRITE ? " SPRITE" : "",
+            s->flags & FLAG_INTERPOLATED ? " INTERPOL" : "");
+
+    return header.header_size;
+}
+
+static int truemotion1_decode_init(AVCodecContext *avctx)
+{
+    TrueMotion1Context *s = (TrueMotion1Context *)avctx->priv_data;
+
+    s->avctx = avctx;
+
+    // FIXME: it may change ?
+//    if (avctx->bits_per_sample == 24)
+//        avctx->pix_fmt = PIX_FMT_RGB24;
+//    else
+//        avctx->pix_fmt = PIX_FMT_RGB555;
+
+    avctx->has_b_frames = 0;
+    s->frame.data[0] = s->prev_frame.data[0] = NULL;
+
+    /* there is a vertical predictor for each pixel in a line; each vertical
+     * predictor is 0 to start with */
+    s->vert_pred =
+        (unsigned int *)av_malloc(s->avctx->width * sizeof(unsigned int));
+
+    return 0;
+}
+
+/*
+Block decoding order:
+
+dxi: Y-Y
+dxic: Y-C-Y
+dxic2: Y-C-Y-C
+
+hres,vres,i,i%vres (0 < i < 4)
+2x2 0: 0 dxic2
+2x2 1: 1 dxi
+2x2 2: 0 dxic2
+2x2 3: 1 dxi
+2x4 0: 0 dxic2
+2x4 1: 1 dxi
+2x4 2: 2 dxi
+2x4 3: 3 dxi
+4x2 0: 0 dxic
+4x2 1: 1 dxi
+4x2 2: 0 dxic
+4x2 3: 1 dxi
+4x4 0: 0 dxic
+4x4 1: 1 dxi
+4x4 2: 2 dxi
+4x4 3: 3 dxi
+*/
+
+#define GET_NEXT_INDEX() \
+{\
+    if (index_stream_index >= s->index_stream_size) { \
+        av_log(s->avctx, AV_LOG_INFO, " help! truemotion1 decoder went out of bounds\n"); \
+        return; \
+    } \
+    index = s->index_stream[index_stream_index++] * 4; \
+}
+
+#define APPLY_C_PREDICTOR() \
+    predictor_pair = s->c_predictor_table[index]; \
+    horiz_pred += (predictor_pair >> 1); \
+    if (predictor_pair & 1) { \
+        GET_NEXT_INDEX() \
+        if (!index) { \
+            GET_NEXT_INDEX() \
+            predictor_pair = s->c_predictor_table[index]; \
+            horiz_pred += ((predictor_pair >> 1) * 5); \
+            if (predictor_pair & 1) \
+                GET_NEXT_INDEX() \
+            else \
+                index++; \
+        } \
+    } else \
+        index++;
+
+#define APPLY_C_PREDICTOR_24() \
+    predictor_pair = s->c_predictor_table[index]; \
+    horiz_pred += (predictor_pair >> 1); \
+    if (predictor_pair & 1) { \
+        GET_NEXT_INDEX() \
+        if (!index) { \
+            GET_NEXT_INDEX() \
+            predictor_pair = s->fat_c_predictor_table[index]; \
+            horiz_pred += (predictor_pair >> 1); \
+            if (predictor_pair & 1) \
+                GET_NEXT_INDEX() \
+            else \
+                index++; \
+        } \
+    } else \
+        index++;
+
+
+#define APPLY_Y_PREDICTOR() \
+    predictor_pair = s->y_predictor_table[index]; \
+    horiz_pred += (predictor_pair >> 1); \
+    if (predictor_pair & 1) { \
+        GET_NEXT_INDEX() \
+        if (!index) { \
+            GET_NEXT_INDEX() \
+            predictor_pair = s->y_predictor_table[index]; \
+            horiz_pred += ((predictor_pair >> 1) * 5); \
+            if (predictor_pair & 1) \
+                GET_NEXT_INDEX() \
+            else \
+                index++; \
+        } \
+    } else \
+        index++;
+
+#define APPLY_Y_PREDICTOR_24() \
+    predictor_pair = s->y_predictor_table[index]; \
+    horiz_pred += (predictor_pair >> 1); \
+    if (predictor_pair & 1) { \
+        GET_NEXT_INDEX() \
+        if (!index) { \
+            GET_NEXT_INDEX() \
+            predictor_pair = s->fat_y_predictor_table[index]; \
+            horiz_pred += (predictor_pair >> 1); \
+            if (predictor_pair & 1) \
+                GET_NEXT_INDEX() \
+            else \
+                index++; \
+        } \
+    } else \
+        index++;
+
+#define OUTPUT_PIXEL_PAIR() \
+    *current_pixel_pair = *vert_pred + horiz_pred; \
+    *vert_pred++ = *current_pixel_pair++; \
+    prev_pixel_pair++;
+
+static void truemotion1_decode_16bit(TrueMotion1Context *s)
+{
+    int y;
+    int pixels_left;  /* remaining pixels on this line */
+    unsigned int predictor_pair;
+    unsigned int horiz_pred;
+    unsigned int *vert_pred;
+    unsigned int *current_pixel_pair;
+    unsigned int *prev_pixel_pair;
+    unsigned char *current_line = s->frame.data[0];
+    unsigned char *prev_line = s->prev_frame.data[0];
+    int keyframe = s->flags & FLAG_KEYFRAME;
+
+    /* these variables are for managing the stream of macroblock change bits */
+    unsigned char *mb_change_bits = s->mb_change_bits;
+    unsigned char mb_change_byte;
+    unsigned char mb_change_byte_mask;
+    int mb_change_index;
+
+    /* these variables are for managing the main index stream */
+    int index_stream_index = 0;  /* yes, the index into the index stream */
+    int index;
+
+    /* clean out the line buffer */
+    memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned int));
+
+    GET_NEXT_INDEX();
+
+    for (y = 0; y < s->avctx->height; y++) {
+
+        /* re-init variables for the next line iteration */
+        horiz_pred = 0;
+        current_pixel_pair = (unsigned int *)current_line;
+        prev_pixel_pair = (unsigned int *)prev_line;
+        vert_pred = s->vert_pred;
+        mb_change_index = 0;
+        mb_change_byte = mb_change_bits[mb_change_index++];
+        mb_change_byte_mask = 0x01;
+        pixels_left = s->avctx->width;
+
+        while (pixels_left > 0) {
+
+            if (keyframe || ((mb_change_byte & mb_change_byte_mask) == 0)) {
+
+                switch (y & 3) {
+                case 0:
+                    /* if macroblock width is 2, apply C-Y-C-Y; else
+                     * apply C-Y-Y */
+                    if (s->block_width == 2) {
+                        APPLY_C_PREDICTOR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_C_PREDICTOR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                    } else {
+                        APPLY_C_PREDICTOR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                    }
+                    break;
+
+                case 1:
+                case 3:
+                    /* always apply 2 Y predictors on these iterations */
+                    APPLY_Y_PREDICTOR();
+                    OUTPUT_PIXEL_PAIR();
+                    APPLY_Y_PREDICTOR();
+                    OUTPUT_PIXEL_PAIR();
+                    break;
+
+                case 2:
+                    /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y
+                     * depending on the macroblock type */
+                    if (s->block_type == BLOCK_2x2) {
+                        APPLY_C_PREDICTOR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_C_PREDICTOR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                    } else if (s->block_type == BLOCK_4x2) {
+                        APPLY_C_PREDICTOR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                    } else {
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_Y_PREDICTOR();
+                        OUTPUT_PIXEL_PAIR();
+                    }
+                    break;
+                }
+
+            } else {
+
+                /* skip (copy) four pixels, but reassign the horizontal
+                 * predictor */
+                *current_pixel_pair = *prev_pixel_pair++;
+                *vert_pred++ = *current_pixel_pair++;
+                *current_pixel_pair = *prev_pixel_pair++;
+                horiz_pred = *current_pixel_pair - *vert_pred;
+                *vert_pred++ = *current_pixel_pair++;
+
+            }
+
+            if (!keyframe) {
+                mb_change_byte_mask <<= 1;
+
+                /* next byte */
+                if (!mb_change_byte_mask) {
+                    mb_change_byte = mb_change_bits[mb_change_index++];
+                    mb_change_byte_mask = 0x01;
+                }
+            }
+
+            pixels_left -= 4;
+        }
+
+        /* next change row */
+        if (((y + 1) & 3) == 0)
+            mb_change_bits += s->mb_change_bits_row_size;
+
+        current_line += s->frame.linesize[0];
+        prev_line += s->prev_frame.linesize[0];
+    }
+}
+
+static void truemotion1_decode_24bit(TrueMotion1Context *s)
+{
+    int y;
+    int pixels_left;  /* remaining pixels on this line */
+    unsigned int predictor_pair;
+    unsigned int horiz_pred;
+    unsigned int *vert_pred;
+    unsigned int *current_pixel_pair;
+    unsigned int *prev_pixel_pair;
+    unsigned char *current_line = s->frame.data[0];
+    unsigned char *prev_line = s->prev_frame.data[0];
+    int keyframe = s->flags & FLAG_KEYFRAME;
+
+    /* these variables are for managing the stream of macroblock change bits */
+    unsigned char *mb_change_bits = s->mb_change_bits;
+    unsigned char mb_change_byte;
+    unsigned char mb_change_byte_mask;
+    int mb_change_index;
+
+    /* these variables are for managing the main index stream */
+    int index_stream_index = 0;  /* yes, the index into the index stream */
+    int index;
+
+    /* clean out the line buffer */
+    memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned int));
+
+    GET_NEXT_INDEX();
+
+    for (y = 0; y < s->avctx->height; y++) {
+
+        /* re-init variables for the next line iteration */
+        horiz_pred = 0;
+        current_pixel_pair = (unsigned int *)current_line;
+        prev_pixel_pair = (unsigned int *)prev_line;
+        vert_pred = s->vert_pred;
+        mb_change_index = 0;
+        mb_change_byte = mb_change_bits[mb_change_index++];
+        mb_change_byte_mask = 0x01;
+        pixels_left = s->avctx->width;
+
+        while (pixels_left > 0) {
+
+            if (keyframe || ((mb_change_byte & mb_change_byte_mask) == 0)) {
+
+                switch (y & 3) {
+                case 0:
+                    /* if macroblock width is 2, apply C-Y-C-Y; else
+                     * apply C-Y-Y */
+                    if (s->block_width == 2) {
+                        APPLY_C_PREDICTOR_24();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_C_PREDICTOR_24();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                    } else {
+                        APPLY_C_PREDICTOR_24();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                    }
+                    break;
+
+                case 1:
+                case 3:
+                    /* always apply 2 Y predictors on these iterations */
+                    APPLY_Y_PREDICTOR_24();
+                    OUTPUT_PIXEL_PAIR();
+                    APPLY_Y_PREDICTOR_24();
+                    OUTPUT_PIXEL_PAIR();
+                    break;
+
+                case 2:
+                    /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y
+                     * depending on the macroblock type */
+                    if (s->block_type == BLOCK_2x2) {
+                        APPLY_C_PREDICTOR_24();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_C_PREDICTOR_24();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                    } else if (s->block_type == BLOCK_4x2) {
+                        APPLY_C_PREDICTOR_24();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                    } else {
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                        APPLY_Y_PREDICTOR_24();
+                        OUTPUT_PIXEL_PAIR();
+                    }
+                    break;
+                }
+
+            } else {
+
+                /* skip (copy) four pixels, but reassign the horizontal
+                 * predictor */
+                *current_pixel_pair = *prev_pixel_pair++;
+                *vert_pred++ = *current_pixel_pair++;
+                *current_pixel_pair = *prev_pixel_pair++;
+                horiz_pred = *current_pixel_pair - *vert_pred;
+                *vert_pred++ = *current_pixel_pair++;
+
+            }
+
+            if (!keyframe) {
+                mb_change_byte_mask <<= 1;
+
+                /* next byte */
+                if (!mb_change_byte_mask) {
+                    mb_change_byte = mb_change_bits[mb_change_index++];
+                    mb_change_byte_mask = 0x01;
+                }
+            }
+
+            pixels_left -= 4;
+        }
+
+        /* next change row */
+        if (((y + 1) & 3) == 0)
+            mb_change_bits += s->mb_change_bits_row_size;
+
+        current_line += s->frame.linesize[0];
+        prev_line += s->prev_frame.linesize[0];
+    }
+}
+
+
+static int truemotion1_decode_frame(AVCodecContext *avctx,
+                                    void *data, int *data_size,
+                                    uint8_t *buf, int buf_size)
+{
+    TrueMotion1Context *s = (TrueMotion1Context *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    if (truemotion1_decode_header(s) == -1)
+        return -1;
+
+    s->frame.reference = 1;
+    if (avctx->get_buffer(avctx, &s->frame) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    /* check for a do-nothing frame and copy the previous frame */
+    if (compression_types[s->compression].algorithm == ALGO_NOP)
+    {
+        memcpy(s->frame.data[0], s->prev_frame.data[0],
+            s->frame.linesize[0] * s->avctx->height);
+    } else if (compression_types[s->compression].algorithm == ALGO_RGB24H) {
+        truemotion1_decode_24bit(s);
+    } else {
+        truemotion1_decode_16bit(s);
+    }
+
+    if (s->prev_frame.data[0])
+        avctx->release_buffer(avctx, &s->prev_frame);
+
+    /* shuffle frames */
+    s->prev_frame = s->frame;
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int truemotion1_decode_end(AVCodecContext *avctx)
+{
+    TrueMotion1Context *s = (TrueMotion1Context *)avctx->priv_data;
+
+    /* release the last frame */
+    if (s->prev_frame.data[0])
+        avctx->release_buffer(avctx, &s->prev_frame);
+
+    av_free(s->vert_pred);
+
+    return 0;
+}
+
+AVCodec truemotion1_decoder = {
+    "truemotion1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_TRUEMOTION1,
+    sizeof(TrueMotion1Context),
+    truemotion1_decode_init,
+    NULL,
+    truemotion1_decode_end,
+    truemotion1_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/truemotion1data.h b/contrib/ffmpeg/libavcodec/truemotion1data.h
new file mode 100644
index 000000000..63d307c65
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/truemotion1data.h
@@ -0,0 +1,829 @@
+/*
+ * Duck Truemotion v1 Decoding Tables
+ *
+ * Data in this file was originally part of VpVision from On2 which is
+ * distributed under the GNU GPL. It is redistributed with ffmpeg under the
+ * GNU LGPL using the common understanding that data tables necessary for
+ * decoding algorithms are not necessarily licensable.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef TRUEMOTION1DATA_H
+#define TRUEMOTION1DATA_H
+
+/* Y delta tables, skinny and fat */
+static const int16_t ydt1[8] = { 0, -2, 2, -6, 6, -12, 12, -12 };
+static const int16_t ydt2[8] = { 0, -2, 4, -6, 8, -12, 12, -12 };
+static const int16_t ydt3[8] = { 4, -6, 20, -20, 46, -46, 94, -94 };
+static const int16_t fat_ydt3[8] = { 0, -15, 50, -50, 115, -115, 235, -235 };
+static const int16_t ydt4[8] = { 0, -4, 4, -16, 16, -36, 36, -80 };
+static const int16_t fat_ydt4[8] = { 0, 40, 80, -76, 160, -154, 236, -236 };
+
+/* C delta tables, skinny and fat */
+static const int16_t cdt1[8] = { 0, -1, 1, -2, 3, -4, 5, -4 };
+static const int16_t cdt2[8] = { 0, -4, 3, -16, 20, -32, 36, -32 };
+static const int16_t fat_cdt2[8] = { 0, -20, 15, -80, 100, -160, 180, -160 };
+static const int16_t cdt3[8] = { 0, -2, 2, -8, 8, -18, 18, -40 };
+/* NOTE: This table breaks the [+,-] pattern that the rest of the
+ * tables maintain. Is this intentional? */
+static const int16_t fat_cdt3[8] = { 0, 40, 80, -76, 160, -154, 236, -236 };
+
+/* all the delta tables to choose from, at all 4 delta levels */
+static const int16_t *ydts[] = { ydt1, ydt2, ydt3, ydt4, NULL };
+static const int16_t *fat_ydts[] = { fat_ydt3, fat_ydt3, fat_ydt3, fat_ydt4, NULL };
+static const int16_t *cdts[] = { cdt1, cdt1, cdt2, cdt3, NULL };
+static const int16_t *fat_cdts[] = { fat_cdt2, fat_cdt2, fat_cdt2, fat_cdt3, NULL };
+
+static const uint8_t pc_tbl2[] = {
+0x8,0x00,0x00,0x00,0x00,
+0x8,0x00,0x00,0x00,0x00,
+0x8,0x10,0x00,0x00,0x00,
+0x8,0x01,0x00,0x00,0x00,
+0x8,0x00,0x10,0x00,0x00,
+0x8,0x00,0x01,0x00,0x00,
+0x8,0x00,0x00,0x10,0x00,
+0x8,0x00,0x00,0x01,0x00,
+0x8,0x00,0x00,0x00,0x10,
+0x8,0x00,0x00,0x00,0x01,
+0x6,0x00,0x00,0x00,
+0x6,0x10,0x00,0x00,
+0x6,0x01,0x00,0x00,
+0x6,0x00,0x10,0x00,
+0x6,0x00,0x01,0x00,
+0x6,0x00,0x00,0x01,
+0x6,0x00,0x00,0x10,
+0x6,0x00,0x00,0x02,
+0x6,0x00,0x00,0x20,
+0x6,0x20,0x10,0x00,
+0x6,0x00,0x02,0x01,
+0x6,0x00,0x20,0x10,
+0x6,0x02,0x01,0x00,
+0x6,0x11,0x00,0x00,
+0x6,0x00,0x20,0x00,
+0x6,0x00,0x02,0x00,
+0x6,0x20,0x00,0x00,
+0x6,0x01,0x10,0x00,
+0x6,0x02,0x00,0x00,
+0x6,0x01,0x00,0x02,
+0x6,0x10,0x00,0x20,
+0x6,0x00,0x01,0x02,
+0x6,0x10,0x01,0x00,
+0x6,0x00,0x10,0x20,
+0x6,0x10,0x10,0x00,
+0x6,0x10,0x00,0x01,
+0x6,0x20,0x00,0x10,
+0x6,0x02,0x00,0x01,
+0x6,0x01,0x01,0x00,
+0x6,0x01,0x00,0x10,
+0x6,0x00,0x11,0x00,
+0x6,0x10,0x00,0x02,
+0x6,0x00,0x01,0x10,
+0x6,0x00,0x00,0x11,
+0x6,0x10,0x00,0x10,
+0x6,0x01,0x00,0x01,
+0x6,0x00,0x00,0x22,
+0x6,0x02,0x01,0x01,
+0x6,0x10,0x20,0x10,
+0x6,0x01,0x02,0x01,
+0x6,0x20,0x10,0x10,
+0x6,0x01,0x00,0x20,
+0x6,0x00,0x10,0x01,
+0x6,0x21,0x10,0x00,
+0x6,0x10,0x02,0x01,
+0x6,0x12,0x01,0x00,
+0x6,0x01,0x20,0x10,
+0x6,0x01,0x02,0x00,
+0x6,0x10,0x20,0x00,
+0x6,0x00,0x10,0x02,
+0x6,0x00,0x01,0x20,
+0x6,0x00,0x02,0x21,
+0x6,0x00,0x02,0x20,
+0x6,0x00,0x00,0x12,
+0x6,0x00,0x00,0x21,
+0x6,0x20,0x11,0x00,
+0x6,0x00,0x01,0x01,
+0x6,0x11,0x10,0x00,
+0x6,0x00,0x20,0x12,
+0x6,0x00,0x20,0x11,
+0x6,0x20,0x10,0x02,
+0x6,0x02,0x01,0x20,
+0x6,0x00,0x22,0x11,
+0x6,0x00,0x10,0x10,
+0x6,0x02,0x11,0x00,
+0x6,0x00,0x21,0x10,
+0x6,0x00,0x02,0x03,
+0x6,0x20,0x10,0x01,
+0x6,0x00,0x12,0x01,
+0x4,0x11,0x00,
+0x4,0x00,0x22,
+0x4,0x20,0x00,
+0x4,0x01,0x10,
+0x4,0x02,0x20,
+0x4,0x00,0x20,
+0x4,0x02,0x00,
+0x4,0x10,0x01,
+0x4,0x00,0x11,
+0x4,0x02,0x01,
+0x4,0x02,0x21,
+0x4,0x00,0x02,
+0x4,0x20,0x02,
+0x4,0x01,0x01,
+0x4,0x10,0x10,
+0x4,0x10,0x02,
+0x4,0x22,0x00,
+0x4,0x10,0x00,
+0x4,0x01,0x00,
+0x4,0x21,0x00,
+0x4,0x12,0x00,
+0x4,0x00,0x10,
+0x4,0x20,0x12,
+0x4,0x01,0x11,
+0x4,0x00,0x01,
+0x4,0x01,0x02,
+0x4,0x11,0x02,
+0x4,0x11,0x01,
+0x4,0x10,0x20,
+0x4,0x20,0x01,
+0x4,0x22,0x11,
+0x4,0x00,0x12,
+0x4,0x20,0x10,
+0x4,0x22,0x01,
+0x4,0x01,0x20,
+0x4,0x00,0x21,
+0x4,0x10,0x11,
+0x4,0x21,0x10,
+0x4,0x10,0x22,
+0x4,0x02,0x03,
+0x4,0x12,0x01,
+0x4,0x20,0x11,
+0x4,0x11,0x10,
+0x4,0x20,0x30,
+0x4,0x11,0x20,
+0x4,0x02,0x10,
+0x4,0x22,0x10,
+0x4,0x11,0x11,
+0x4,0x30,0x20,
+0x4,0x30,0x00,
+0x4,0x01,0x22,
+0x4,0x01,0x12,
+0x4,0x02,0x11,
+0x4,0x03,0x02,
+0x4,0x03,0x00,
+0x4,0x10,0x21,
+0x4,0x12,0x20,
+0x4,0x00,0x00,
+0x4,0x12,0x21,
+0x4,0x21,0x11,
+0x4,0x02,0x22,
+0x4,0x10,0x12,
+0x4,0x31,0x00,
+0x4,0x20,0x20,
+0x4,0x00,0x03,
+0x4,0x02,0x02,
+0x4,0x22,0x20,
+0x4,0x01,0x21,
+0x4,0x21,0x02,
+0x4,0x21,0x12,
+0x4,0x11,0x22,
+0x4,0x00,0x30,
+0x4,0x12,0x11,
+0x4,0x20,0x22,
+0x4,0x31,0x20,
+0x4,0x21,0x30,
+0x4,0x22,0x02,
+0x4,0x22,0x22,
+0x4,0x20,0x31,
+0x4,0x13,0x02,
+0x4,0x03,0x10,
+0x4,0x11,0x12,
+0x4,0x00,0x13,
+0x4,0x21,0x01,
+0x4,0x12,0x03,
+0x4,0x13,0x00,
+0x4,0x13,0x10,
+0x4,0x02,0x13,
+0x4,0x30,0x01,
+0x4,0x12,0x10,
+0x4,0x22,0x13,
+0x4,0x03,0x12,
+0x4,0x31,0x01,
+0x4,0x30,0x22,
+0x4,0x00,0x31,
+0x4,0x01,0x31,
+0x4,0x02,0x23,
+0x4,0x01,0x30,
+0x4,0x11,0x21,
+0x4,0x22,0x21,
+0x4,0x01,0x13,
+0x4,0x10,0x03,
+0x4,0x22,0x03,
+0x4,0x30,0x21,
+0x4,0x21,0x31,
+0x4,0x33,0x00,
+0x4,0x13,0x12,
+0x4,0x11,0x31,
+0x4,0x30,0x02,
+0x4,0x12,0x02,
+0x4,0x11,0x13,
+0x4,0x12,0x22,
+0x4,0x20,0x32,
+0x4,0x10,0x13,
+0x4,0x22,0x31,
+0x4,0x21,0x20,
+0x4,0x01,0x33,
+0x4,0x33,0x10,
+0x4,0x20,0x13,
+0x4,0x31,0x22,
+0x4,0x13,0x30,
+0x4,0x01,0x03,
+0x4,0x11,0x33,
+0x4,0x20,0x21,
+0x4,0x13,0x31,
+0x4,0x03,0x22,
+0x4,0x31,0x02,
+0x4,0x00,0x24,
+0x2,0x00,
+0x2,0x10,
+0x2,0x20,
+0x2,0x30,
+0x2,0x40,
+0x2,0x50,
+0x2,0x60,
+0x2,0x01,
+0x2,0x11,
+0x2,0x21,
+0x2,0x31,
+0x2,0x41,
+0x2,0x51,
+0x2,0x61,
+0x2,0x02,
+0x2,0x12,
+0x2,0x22,
+0x2,0x32,
+0x2,0x42,
+0x2,0x52,
+0x2,0x62,
+0x2,0x03,
+0x2,0x13,
+0x2,0x23,
+0x2,0x33,
+0x2,0x43,
+0x2,0x53,
+0x2,0x63,
+0x2,0x04,
+0x2,0x14,
+0x2,0x24,
+0x2,0x34,
+0x2,0x44,
+0x2,0x54,
+0x2,0x64,
+0x2,0x05,
+0x2,0x15,
+0x2,0x25,
+0x2,0x35,
+0x2,0x45,
+0x2,0x55,
+0x2,0x65,
+0x2,0x06,
+0x2,0x16,
+0x2,0x26,
+0x2,0x36,
+0x2,0x46,
+0x2,0x56,
+0x2,0x66
+};
+
+static const uint8_t pc_tbl3[] = {
+0x6,0x00,0x00,0x00,
+0x6,0x00,0x00,0x00,
+0x6,0x00,0x00,0x01,
+0x6,0x00,0x00,0x10,
+0x6,0x00,0x00,0x11,
+0x6,0x00,0x01,0x00,
+0x6,0x00,0x01,0x01,
+0x6,0x00,0x01,0x10,
+0x6,0x00,0x01,0x11,
+0x6,0x00,0x10,0x00,
+0x6,0x00,0x10,0x01,
+0x6,0x00,0x10,0x10,
+0x6,0x00,0x10,0x11,
+0x6,0x00,0x11,0x00,
+0x6,0x00,0x11,0x01,
+0x6,0x00,0x11,0x10,
+0x6,0x00,0x11,0x11,
+0x6,0x01,0x00,0x00,
+0x6,0x01,0x00,0x01,
+0x6,0x01,0x00,0x10,
+0x6,0x01,0x00,0x11,
+0x6,0x01,0x01,0x00,
+0x6,0x01,0x01,0x01,
+0x6,0x01,0x01,0x10,
+0x6,0x01,0x01,0x11,
+0x6,0x01,0x10,0x00,
+0x6,0x01,0x10,0x01,
+0x6,0x01,0x10,0x10,
+0x6,0x01,0x10,0x11,
+0x6,0x01,0x11,0x00,
+0x6,0x01,0x11,0x01,
+0x6,0x01,0x11,0x10,
+0x6,0x01,0x11,0x11,
+0x6,0x10,0x00,0x00,
+0x6,0x10,0x00,0x01,
+0x6,0x10,0x00,0x10,
+0x6,0x10,0x00,0x11,
+0x6,0x10,0x01,0x00,
+0x6,0x10,0x01,0x01,
+0x6,0x10,0x01,0x10,
+0x6,0x10,0x01,0x11,
+0x6,0x10,0x10,0x00,
+0x6,0x10,0x10,0x01,
+0x6,0x10,0x10,0x10,
+0x6,0x10,0x10,0x11,
+0x6,0x10,0x11,0x00,
+0x6,0x10,0x11,0x01,
+0x6,0x10,0x11,0x10,
+0x6,0x10,0x11,0x11,
+0x6,0x11,0x00,0x00,
+0x6,0x11,0x00,0x01,
+0x6,0x11,0x00,0x10,
+0x6,0x11,0x00,0x11,
+0x6,0x11,0x01,0x00,
+0x6,0x11,0x01,0x01,
+0x6,0x11,0x01,0x10,
+0x6,0x11,0x01,0x11,
+0x6,0x11,0x10,0x00,
+0x6,0x11,0x10,0x01,
+0x6,0x11,0x10,0x10,
+0x6,0x11,0x10,0x11,
+0x6,0x11,0x11,0x00,
+0x6,0x11,0x11,0x01,
+0x6,0x11,0x11,0x10,
+0x4,0x00,0x00,
+0x4,0x00,0x01,
+0x4,0x00,0x02,
+0x4,0x00,0x03,
+0x4,0x00,0x10,
+0x4,0x00,0x11,
+0x4,0x00,0x12,
+0x4,0x00,0x13,
+0x4,0x00,0x20,
+0x4,0x00,0x21,
+0x4,0x00,0x22,
+0x4,0x00,0x23,
+0x4,0x00,0x30,
+0x4,0x00,0x31,
+0x4,0x00,0x32,
+0x4,0x00,0x33,
+0x4,0x01,0x00,
+0x4,0x01,0x01,
+0x4,0x01,0x02,
+0x4,0x01,0x03,
+0x4,0x01,0x10,
+0x4,0x01,0x11,
+0x4,0x01,0x12,
+0x4,0x01,0x13,
+0x4,0x01,0x20,
+0x4,0x01,0x21,
+0x4,0x01,0x22,
+0x4,0x01,0x23,
+0x4,0x01,0x30,
+0x4,0x01,0x31,
+0x4,0x01,0x32,
+0x4,0x01,0x33,
+0x4,0x02,0x00,
+0x4,0x02,0x01,
+0x4,0x02,0x02,
+0x4,0x02,0x03,
+0x4,0x02,0x10,
+0x4,0x02,0x11,
+0x4,0x02,0x12,
+0x4,0x02,0x13,
+0x4,0x02,0x20,
+0x4,0x02,0x21,
+0x4,0x02,0x22,
+0x4,0x02,0x23,
+0x4,0x02,0x30,
+0x4,0x02,0x31,
+0x4,0x02,0x32,
+0x4,0x02,0x33,
+0x4,0x03,0x00,
+0x4,0x03,0x01,
+0x4,0x03,0x02,
+0x4,0x03,0x03,
+0x4,0x03,0x10,
+0x4,0x03,0x11,
+0x4,0x03,0x12,
+0x4,0x03,0x13,
+0x4,0x03,0x20,
+0x4,0x03,0x21,
+0x4,0x03,0x22,
+0x4,0x03,0x23,
+0x4,0x03,0x30,
+0x4,0x03,0x31,
+0x4,0x03,0x32,
+0x4,0x03,0x33,
+0x4,0x10,0x00,
+0x4,0x10,0x01,
+0x4,0x10,0x02,
+0x4,0x10,0x03,
+0x4,0x10,0x10,
+0x4,0x10,0x11,
+0x4,0x10,0x12,
+0x4,0x10,0x13,
+0x4,0x10,0x20,
+0x4,0x10,0x21,
+0x4,0x10,0x22,
+0x4,0x10,0x23,
+0x4,0x10,0x30,
+0x4,0x10,0x31,
+0x4,0x10,0x32,
+0x4,0x10,0x33,
+0x4,0x11,0x00,
+0x4,0x11,0x01,
+0x4,0x11,0x02,
+0x4,0x11,0x03,
+0x4,0x11,0x10,
+0x4,0x11,0x11,
+0x4,0x11,0x12,
+0x4,0x11,0x13,
+0x4,0x11,0x20,
+0x4,0x11,0x21,
+0x4,0x11,0x22,
+0x4,0x11,0x23,
+0x4,0x11,0x30,
+0x4,0x11,0x31,
+0x4,0x11,0x32,
+0x4,0x11,0x33,
+0x4,0x12,0x00,
+0x4,0x12,0x01,
+0x4,0x12,0x02,
+0x4,0x12,0x03,
+0x4,0x12,0x10,
+0x4,0x12,0x11,
+0x4,0x12,0x12,
+0x4,0x12,0x13,
+0x4,0x12,0x20,
+0x4,0x12,0x21,
+0x4,0x12,0x22,
+0x4,0x12,0x23,
+0x4,0x12,0x30,
+0x4,0x12,0x31,
+0x4,0x12,0x32,
+0x4,0x12,0x33,
+0x4,0x13,0x00,
+0x4,0x13,0x01,
+0x4,0x13,0x02,
+0x4,0x13,0x03,
+0x4,0x13,0x10,
+0x4,0x13,0x11,
+0x4,0x13,0x12,
+0x4,0x13,0x13,
+0x4,0x13,0x20,
+0x4,0x13,0x21,
+0x4,0x13,0x22,
+0x4,0x13,0x23,
+0x4,0x13,0x30,
+0x4,0x13,0x31,
+0x4,0x13,0x32,
+0x4,0x13,0x33,
+0x2,0x00,
+0x2,0x10,
+0x2,0x20,
+0x2,0x30,
+0x2,0x40,
+0x2,0x50,
+0x2,0x60,
+0x2,0x70,
+0x2,0x01,
+0x2,0x11,
+0x2,0x21,
+0x2,0x31,
+0x2,0x41,
+0x2,0x51,
+0x2,0x61,
+0x2,0x71,
+0x2,0x02,
+0x2,0x12,
+0x2,0x22,
+0x2,0x32,
+0x2,0x42,
+0x2,0x52,
+0x2,0x62,
+0x2,0x72,
+0x2,0x03,
+0x2,0x13,
+0x2,0x23,
+0x2,0x33,
+0x2,0x43,
+0x2,0x53,
+0x2,0x63,
+0x2,0x73,
+0x2,0x04,
+0x2,0x14,
+0x2,0x24,
+0x2,0x34,
+0x2,0x44,
+0x2,0x54,
+0x2,0x64,
+0x2,0x74,
+0x2,0x05,
+0x2,0x15,
+0x2,0x25,
+0x2,0x35,
+0x2,0x45,
+0x2,0x55,
+0x2,0x65,
+0x2,0x75,
+0x2,0x06,
+0x2,0x16,
+0x2,0x26,
+0x2,0x36,
+0x2,0x46,
+0x2,0x56,
+0x2,0x66,
+0x2,0x76,
+0x2,0x07,
+0x2,0x17,
+0x2,0x27,
+0x2,0x37,
+0x2,0x47,
+0x2,0x57,
+0x2,0x67,
+0x2,0x77
+};
+
+static const uint8_t pc_tbl4[] = {
+0x8,0x00,0x00,0x00,0x00,
+0x8,0x00,0x00,0x00,0x00,
+0x8,0x20,0x00,0x00,0x00,
+0x8,0x00,0x00,0x00,0x01,
+0x8,0x10,0x00,0x00,0x00,
+0x8,0x00,0x00,0x00,0x02,
+0x8,0x01,0x00,0x00,0x00,
+0x8,0x00,0x00,0x00,0x10,
+0x8,0x02,0x00,0x00,0x00,
+0x6,0x00,0x00,0x00,
+0x6,0x20,0x00,0x00,
+0x6,0x00,0x00,0x01,
+0x6,0x10,0x00,0x00,
+0x6,0x00,0x00,0x02,
+0x6,0x00,0x10,0x00,
+0x6,0x00,0x20,0x00,
+0x6,0x00,0x02,0x00,
+0x6,0x00,0x01,0x00,
+0x6,0x01,0x00,0x00,
+0x6,0x00,0x00,0x20,
+0x6,0x02,0x00,0x00,
+0x6,0x00,0x00,0x10,
+0x6,0x10,0x00,0x20,
+0x6,0x01,0x00,0x02,
+0x6,0x20,0x00,0x10,
+0x6,0x02,0x00,0x01,
+0x6,0x20,0x10,0x00,
+0x6,0x00,0x12,0x00,
+0x6,0x00,0x02,0x01,
+0x6,0x02,0x01,0x00,
+0x6,0x00,0x21,0x00,
+0x6,0x00,0x01,0x02,
+0x6,0x00,0x20,0x10,
+0x6,0x00,0x00,0x21,
+0x6,0x00,0x00,0x12,
+0x6,0x00,0x01,0x20,
+0x6,0x12,0x00,0x00,
+0x6,0x00,0x10,0x20,
+0x6,0x01,0x20,0x00,
+0x6,0x02,0x10,0x00,
+0x6,0x10,0x20,0x00,
+0x6,0x01,0x02,0x00,
+0x6,0x21,0x00,0x00,
+0x6,0x00,0x02,0x10,
+0x6,0x20,0x01,0x00,
+0x6,0x00,0x22,0x00,
+0x6,0x10,0x02,0x00,
+0x6,0x00,0x10,0x02,
+0x6,0x11,0x00,0x00,
+0x6,0x00,0x11,0x00,
+0x6,0x22,0x00,0x00,
+0x6,0x20,0x00,0x02,
+0x6,0x10,0x00,0x01,
+0x6,0x00,0x20,0x01,
+0x6,0x02,0x20,0x00,
+0x6,0x01,0x10,0x00,
+0x6,0x01,0x00,0x20,
+0x6,0x00,0x20,0x02,
+0x6,0x01,0x20,0x02,
+0x6,0x10,0x01,0x00,
+0x6,0x02,0x00,0x10,
+0x6,0x00,0x10,0x01,
+0x6,0x10,0x01,0x20,
+0x6,0x20,0x02,0x10,
+0x6,0x00,0x00,0x22,
+0x6,0x10,0x00,0x02,
+0x6,0x00,0x02,0x20,
+0x6,0x20,0x02,0x00,
+0x6,0x00,0x00,0x11,
+0x6,0x02,0x10,0x01,
+0x6,0x00,0x01,0x10,
+0x6,0x00,0x02,0x11,
+0x4,0x01,0x02,
+0x4,0x02,0x01,
+0x4,0x01,0x00,
+0x4,0x10,0x20,
+0x4,0x20,0x10,
+0x4,0x20,0x00,
+0x4,0x11,0x00,
+0x4,0x02,0x00,
+0x4,0x12,0x00,
+0x4,0x00,0x21,
+0x4,0x22,0x00,
+0x4,0x00,0x12,
+0x4,0x21,0x00,
+0x4,0x02,0x11,
+0x4,0x00,0x01,
+0x4,0x10,0x02,
+0x4,0x02,0x20,
+0x4,0x20,0x11,
+0x4,0x01,0x10,
+0x4,0x21,0x10,
+0x4,0x10,0x00,
+0x4,0x10,0x22,
+0x4,0x20,0x20,
+0x4,0x00,0x22,
+0x4,0x01,0x22,
+0x4,0x20,0x01,
+0x4,0x02,0x02,
+0x4,0x00,0x20,
+0x4,0x00,0x10,
+0x4,0x00,0x11,
+0x4,0x22,0x01,
+0x4,0x11,0x20,
+0x4,0x12,0x01,
+0x4,0x12,0x20,
+0x4,0x11,0x02,
+0x4,0x10,0x10,
+0x4,0x01,0x01,
+0x4,0x02,0x21,
+0x4,0x20,0x12,
+0x4,0x01,0x12,
+0x4,0x22,0x11,
+0x4,0x21,0x12,
+0x4,0x22,0x10,
+0x4,0x21,0x02,
+0x4,0x20,0x02,
+0x4,0x10,0x01,
+0x4,0x00,0x02,
+0x4,0x10,0x21,
+0x4,0x01,0x20,
+0x4,0x11,0x22,
+0x4,0x12,0x21,
+0x4,0x22,0x20,
+0x4,0x02,0x10,
+0x4,0x02,0x22,
+0x4,0x11,0x10,
+0x4,0x22,0x02,
+0x4,0x20,0x21,
+0x4,0x01,0x11,
+0x4,0x11,0x01,
+0x4,0x10,0x12,
+0x4,0x02,0x12,
+0x4,0x20,0x22,
+0x4,0x21,0x20,
+0x4,0x01,0x21,
+0x4,0x12,0x02,
+0x4,0x21,0x11,
+0x4,0x12,0x22,
+0x4,0x12,0x10,
+0x4,0x22,0x21,
+0x4,0x10,0x11,
+0x4,0x21,0x01,
+0x4,0x11,0x12,
+0x4,0x12,0x11,
+0x4,0x66,0x66,
+0x4,0x22,0x22,
+0x4,0x11,0x21,
+0x4,0x11,0x11,
+0x4,0x21,0x22,
+0x4,0x00,0x00,
+0x4,0x22,0x12,
+0x4,0x12,0x12,
+0x4,0x21,0x21,
+0x4,0x42,0x00,
+0x4,0x00,0x04,
+0x4,0x40,0x00,
+0x4,0x30,0x00,
+0x4,0x31,0x00,
+0x4,0x00,0x03,
+0x4,0x00,0x14,
+0x4,0x00,0x13,
+0x4,0x01,0x24,
+0x4,0x20,0x13,
+0x4,0x01,0x42,
+0x4,0x14,0x20,
+0x4,0x42,0x02,
+0x4,0x13,0x00,
+0x4,0x00,0x24,
+0x4,0x31,0x20,
+0x4,0x22,0x13,
+0x4,0x11,0x24,
+0x4,0x12,0x66,
+0x4,0x30,0x01,
+0x4,0x02,0x13,
+0x4,0x12,0x42,
+0x4,0x40,0x10,
+0x4,0x40,0x02,
+0x4,0x01,0x04,
+0x4,0x24,0x00,
+0x4,0x42,0x10,
+0x4,0x21,0x13,
+0x4,0x13,0x12,
+0x4,0x31,0x21,
+0x4,0x21,0x24,
+0x4,0x00,0x40,
+0x4,0x10,0x24,
+0x4,0x10,0x42,
+0x4,0x32,0x01,
+0x4,0x11,0x42,
+0x4,0x20,0x31,
+0x4,0x12,0x40,
+0x2,0x00,
+0x2,0x10,
+0x2,0x20,
+0x2,0x30,
+0x2,0x40,
+0x2,0x50,
+0x2,0x60,
+0x2,0x70,
+0x2,0x01,
+0x2,0x11,
+0x2,0x21,
+0x2,0x31,
+0x2,0x41,
+0x2,0x51,
+0x2,0x61,
+0x2,0x71,
+0x2,0x02,
+0x2,0x12,
+0x2,0x22,
+0x2,0x32,
+0x2,0x42,
+0x2,0x52,
+0x2,0x62,
+0x2,0x72,
+0x2,0x03,
+0x2,0x13,
+0x2,0x23,
+0x2,0x33,
+0x2,0x43,
+0x2,0x53,
+0x2,0x63,
+0x2,0x73,
+0x2,0x04,
+0x2,0x14,
+0x2,0x24,
+0x2,0x34,
+0x2,0x44,
+0x2,0x54,
+0x2,0x64,
+0x2,0x74,
+0x2,0x05,
+0x2,0x15,
+0x2,0x25,
+0x2,0x35,
+0x2,0x45,
+0x2,0x55,
+0x2,0x65,
+0x2,0x75,
+0x2,0x06,
+0x2,0x16,
+0x2,0x26,
+0x2,0x36,
+0x2,0x46,
+0x2,0x56,
+0x2,0x66,
+0x2,0x76,
+0x2,0x07,
+0x2,0x17,
+0x2,0x27,
+0x2,0x37,
+0x2,0x47,
+0x2,0x57,
+0x2,0x67,
+0x2,0x77
+};
+
+static const uint8_t *tables[] = { pc_tbl2, pc_tbl3, pc_tbl4 };
+#endif
diff --git a/contrib/ffmpeg/libavcodec/truemotion2.c b/contrib/ffmpeg/libavcodec/truemotion2.c
new file mode 100644
index 000000000..1b67bd22a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/truemotion2.c
@@ -0,0 +1,893 @@
+/*
+ * Duck/ON2 TrueMotion 2 Decoder
+ * Copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file truemotion2.c
+ * Duck TrueMotion2 decoder.
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+#define TM2_ESCAPE 0x80000000
+#define TM2_DELTAS 64
+/* Huffman-coded streams of different types of blocks */
+enum TM2_STREAMS{ TM2_C_HI = 0, TM2_C_LO, TM2_L_HI, TM2_L_LO,
+     TM2_UPD, TM2_MOT, TM2_TYPE, TM2_NUM_STREAMS};
+/* Block types */
+enum TM2_BLOCKS{ TM2_HI_RES = 0, TM2_MED_RES, TM2_LOW_RES, TM2_NULL_RES,
+                 TM2_UPDATE, TM2_STILL, TM2_MOTION};
+
+typedef struct TM2Context{
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    GetBitContext gb;
+    DSPContext dsp;
+
+    /* TM2 streams */
+    int *tokens[TM2_NUM_STREAMS];
+    int tok_lens[TM2_NUM_STREAMS];
+    int tok_ptrs[TM2_NUM_STREAMS];
+    int deltas[TM2_NUM_STREAMS][TM2_DELTAS];
+    /* for blocks decoding */
+    int D[4];
+    int CD[4];
+    int *last;
+    int *clast;
+
+    /* data for current and previous frame */
+    int *Y1, *U1, *V1, *Y2, *U2, *V2;
+    int cur;
+} TM2Context;
+
+/**
+* Huffman codes for each of streams
+*/
+typedef struct TM2Codes{
+    VLC vlc; ///< table for FFmpeg bitstream reader
+    int bits;
+    int *recode; ///< table for converting from code indexes to values
+    int length;
+} TM2Codes;
+
+/**
+* structure for gathering Huffman codes information
+*/
+typedef struct TM2Huff{
+    int val_bits; ///< length of literal
+    int max_bits; ///< maximum length of code
+    int min_bits; ///< minimum length of code
+    int nodes; ///< total number of nodes in tree
+    int num; ///< current number filled
+    int max_num; ///< total number of codes
+    int *nums; ///< literals
+    uint32_t *bits; ///< codes
+    int *lens; ///< codelengths
+} TM2Huff;
+
+static int tm2_read_tree(TM2Context *ctx, uint32_t prefix, int length, TM2Huff *huff)
+{
+    if(length > huff->max_bits) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Tree exceeded its given depth (%i)\n", huff->max_bits);
+        return -1;
+    }
+
+    if(!get_bits1(&ctx->gb)) { /* literal */
+        if (length == 0) {
+            length = 1;
+        }
+        if(huff->num >= huff->max_num) {
+            av_log(ctx->avctx, AV_LOG_DEBUG, "Too many literals\n");
+            return -1;
+        }
+        huff->nums[huff->num] = get_bits_long(&ctx->gb, huff->val_bits);
+        huff->bits[huff->num] = prefix;
+        huff->lens[huff->num] = length;
+        huff->num++;
+        return 0;
+    } else { /* non-terminal node */
+        if(tm2_read_tree(ctx, prefix << 1, length + 1, huff) == -1)
+            return -1;
+        if(tm2_read_tree(ctx, (prefix << 1) | 1, length + 1, huff) == -1)
+            return -1;
+    }
+    return 0;
+}
+
+static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
+{
+    TM2Huff huff;
+    int res = 0;
+
+    huff.val_bits = get_bits(&ctx->gb, 5);
+    huff.max_bits = get_bits(&ctx->gb, 5);
+    huff.min_bits = get_bits(&ctx->gb, 5);
+    huff.nodes = get_bits_long(&ctx->gb, 17);
+    huff.num = 0;
+
+    /* check for correct codes parameters */
+    if((huff.val_bits < 1) || (huff.val_bits > 32) ||
+       (huff.max_bits < 0) || (huff.max_bits > 32)) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect tree parameters - literal length: %i, max code length: %i\n",
+               huff.val_bits, huff.max_bits);
+        return -1;
+    }
+    if((huff.nodes < 0) || (huff.nodes > 0x10000)) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect number of Huffman tree nodes: %i\n", huff.nodes);
+        return -1;
+    }
+    /* one-node tree */
+    if(huff.max_bits == 0)
+        huff.max_bits = 1;
+
+    /* allocate space for codes - it is exactly ceil(nodes / 2) entries */
+    huff.max_num = (huff.nodes + 1) >> 1;
+    huff.nums = av_mallocz(huff.max_num * sizeof(int));
+    huff.bits = av_mallocz(huff.max_num * sizeof(uint32_t));
+    huff.lens = av_mallocz(huff.max_num * sizeof(int));
+
+    if(tm2_read_tree(ctx, 0, 0, &huff) == -1)
+        res = -1;
+
+    if(huff.num != huff.max_num) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Got less codes than expected: %i of %i\n",
+               huff.num, huff.max_num);
+        res = -1;
+    }
+
+    /* convert codes to vlc_table */
+    if(res != -1) {
+        int i;
+
+        res = init_vlc(&code->vlc, huff.max_bits, huff.max_num,
+                    huff.lens, sizeof(int), sizeof(int),
+                    huff.bits, sizeof(uint32_t), sizeof(uint32_t), 0);
+        if(res < 0) {
+            av_log(ctx->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+            res = -1;
+        } else
+            res = 0;
+        if(res != -1) {
+            code->bits = huff.max_bits;
+            code->length = huff.max_num;
+            code->recode = av_malloc(code->length * sizeof(int));
+            for(i = 0; i < code->length; i++)
+                code->recode[i] = huff.nums[i];
+        }
+    }
+    /* free allocated memory */
+    av_free(huff.nums);
+    av_free(huff.bits);
+    av_free(huff.lens);
+
+    return res;
+}
+
+static void tm2_free_codes(TM2Codes *code)
+{
+    if(code->recode)
+        av_free(code->recode);
+    if(code->vlc.table)
+        free_vlc(&code->vlc);
+}
+
+static inline int tm2_get_token(GetBitContext *gb, TM2Codes *code)
+{
+    int val;
+    val = get_vlc2(gb, code->vlc.table, code->bits, 1);
+    return code->recode[val];
+}
+
+static inline int tm2_read_header(TM2Context *ctx, uint8_t *buf)
+{
+    uint32_t magic;
+    uint8_t *obuf;
+    int length;
+
+    obuf = buf;
+
+    magic = LE_32(buf);
+    buf += 4;
+
+    if(magic == 0x00000100) { /* old header */
+/*      av_log (ctx->avctx, AV_LOG_ERROR, "TM2 old header: not implemented (yet)\n"); */
+        return 40;
+    } else if(magic == 0x00000101) { /* new header */
+        int w, h, size, flags, xr, yr;
+
+        length = LE_32(buf);
+        buf += 4;
+
+        init_get_bits(&ctx->gb, buf, 32 * 8);
+        size = get_bits_long(&ctx->gb, 31);
+        h = get_bits(&ctx->gb, 15);
+        w = get_bits(&ctx->gb, 15);
+        flags = get_bits_long(&ctx->gb, 31);
+        yr = get_bits(&ctx->gb, 9);
+        xr = get_bits(&ctx->gb, 9);
+
+        return 40;
+    } else {
+        av_log (ctx->avctx, AV_LOG_ERROR, "Not a TM2 header: 0x%08X\n", magic);
+        return -1;
+    }
+
+    return (buf - obuf);
+}
+
+static int tm2_read_deltas(TM2Context *ctx, int stream_id) {
+    int d, mb;
+    int i, v;
+
+    d = get_bits(&ctx->gb, 9);
+    mb = get_bits(&ctx->gb, 5);
+
+    if((d < 1) || (d > TM2_DELTAS) || (mb < 1) || (mb > 32)) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect delta table: %i deltas x %i bits\n", d, mb);
+        return -1;
+    }
+
+    for(i = 0; i < d; i++) {
+        v = get_bits_long(&ctx->gb, mb);
+        if(v & (1 << (mb - 1)))
+            ctx->deltas[stream_id][i] = v - (1 << mb);
+        else
+            ctx->deltas[stream_id][i] = v;
+    }
+    for(; i < TM2_DELTAS; i++)
+        ctx->deltas[stream_id][i] = 0;
+
+    return 0;
+}
+
+static int tm2_read_stream(TM2Context *ctx, uint8_t *buf, int stream_id) {
+    int i;
+    int cur = 0;
+    int skip = 0;
+    int len, toks;
+    TM2Codes codes;
+
+    /* get stream length in dwords */
+    len = BE_32(buf); buf += 4; cur += 4;
+    skip = len * 4 + 4;
+
+    if(len == 0)
+        return 4;
+
+    toks = BE_32(buf); buf += 4; cur += 4;
+    if(toks & 1) {
+        len = BE_32(buf); buf += 4; cur += 4;
+        if(len == TM2_ESCAPE) {
+            len = BE_32(buf); buf += 4; cur += 4;
+        }
+        if(len > 0) {
+            init_get_bits(&ctx->gb, buf, (skip - cur) * 8);
+            if(tm2_read_deltas(ctx, stream_id) == -1)
+                return -1;
+            buf += ((get_bits_count(&ctx->gb) + 31) >> 5) << 2;
+            cur += ((get_bits_count(&ctx->gb) + 31) >> 5) << 2;
+        }
+    }
+    /* skip unused fields */
+    if(BE_32(buf) == TM2_ESCAPE) {
+        buf += 4; cur += 4; /* some unknown length - could be escaped too */
+    }
+    buf += 4; cur += 4;
+    buf += 4; cur += 4; /* unused by decoder */
+
+    init_get_bits(&ctx->gb, buf, (skip - cur) * 8);
+    if(tm2_build_huff_table(ctx, &codes) == -1)
+        return -1;
+    buf += ((get_bits_count(&ctx->gb) + 31) >> 5) << 2;
+    cur += ((get_bits_count(&ctx->gb) + 31) >> 5) << 2;
+
+    toks >>= 1;
+    /* check if we have sane number of tokens */
+    if((toks < 0) || (toks > 0xFFFFFF)){
+        av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect number of tokens: %i\n", toks);
+        tm2_free_codes(&codes);
+        return -1;
+    }
+    ctx->tokens[stream_id] = av_realloc(ctx->tokens[stream_id], toks * sizeof(int));
+    ctx->tok_lens[stream_id] = toks;
+    len = BE_32(buf); buf += 4; cur += 4;
+    if(len > 0) {
+        init_get_bits(&ctx->gb, buf, (skip - cur) * 8);
+        for(i = 0; i < toks; i++)
+            ctx->tokens[stream_id][i] = tm2_get_token(&ctx->gb, &codes);
+    } else {
+        for(i = 0; i < toks; i++)
+            ctx->tokens[stream_id][i] = codes.recode[0];
+    }
+    tm2_free_codes(&codes);
+
+    return skip;
+}
+
+static inline int GET_TOK(TM2Context *ctx,int type) {
+    if(ctx->tok_ptrs[type] >= ctx->tok_lens[type]) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Read token from stream %i out of bounds (%i>=%i)\n", type, ctx->tok_ptrs[type], ctx->tok_lens[type]);
+        return 0;
+    }
+    if(type <= TM2_MOT)
+        return ctx->deltas[type][ctx->tokens[type][ctx->tok_ptrs[type]++]];
+    return ctx->tokens[type][ctx->tok_ptrs[type]++];
+}
+
+/* blocks decoding routines */
+
+/* common Y, U, V pointers initialisation */
+#define TM2_INIT_POINTERS() \
+    int *last, *clast; \
+    int *Y, *U, *V;\
+    int Ystride, Ustride, Vstride;\
+\
+    Ystride = ctx->avctx->width;\
+    Vstride = (ctx->avctx->width + 1) >> 1;\
+    Ustride = (ctx->avctx->width + 1) >> 1;\
+    Y = (ctx->cur?ctx->Y2:ctx->Y1) + by * 4 * Ystride + bx * 4;\
+    V = (ctx->cur?ctx->V2:ctx->V1) + by * 2 * Vstride + bx * 2;\
+    U = (ctx->cur?ctx->U2:ctx->U1) + by * 2 * Ustride + bx * 2;\
+    last = ctx->last + bx * 4;\
+    clast = ctx->clast + bx * 4;
+
+#define TM2_INIT_POINTERS_2() \
+    int *Yo, *Uo, *Vo;\
+    int oYstride, oUstride, oVstride;\
+\
+    TM2_INIT_POINTERS();\
+    oYstride = Ystride;\
+    oVstride = Vstride;\
+    oUstride = Ustride;\
+    Yo = (ctx->cur?ctx->Y1:ctx->Y2) + by * 4 * oYstride + bx * 4;\
+    Vo = (ctx->cur?ctx->V1:ctx->V2) + by * 2 * oVstride + bx * 2;\
+    Uo = (ctx->cur?ctx->U1:ctx->U2) + by * 2 * oUstride + bx * 2;
+
+/* recalculate last and delta values for next blocks */
+#define TM2_RECALC_BLOCK(CHR, stride, last, CD) {\
+    CD[0] = (CHR[1] - 128) - last[1];\
+    CD[1] = (int)CHR[stride + 1] - (int)CHR[1];\
+    last[0] = (int)CHR[stride + 0] - 128;\
+    last[1] = (int)CHR[stride + 1] - 128;}
+
+/* common operations - add deltas to 4x4 block of luma or 2x2 blocks of chroma */
+static inline void tm2_apply_deltas(TM2Context *ctx, int* Y, int stride, int *deltas, int *last)
+{
+    int ct, d;
+    int i, j;
+
+    for(j = 0; j < 4; j++){
+        ct = ctx->D[j];
+        for(i = 0; i < 4; i++){
+            d = deltas[i + j * 4];
+            ct += d;
+            last[i] += ct;
+            Y[i] = clip_uint8(last[i]);
+        }
+        Y += stride;
+        ctx->D[j] = ct;
+    }
+}
+
+static inline void tm2_high_chroma(int *data, int stride, int *last, int *CD, int *deltas)
+{
+    int i, j;
+    for(j = 0; j < 2; j++){
+        for(i = 0; i < 2; i++){
+            CD[j] += deltas[i + j * 2];
+            last[i] += CD[j];
+            data[i] = last[i] + 128;
+        }
+        data += stride;
+    }
+}
+
+static inline void tm2_low_chroma(int *data, int stride, int *clast, int *CD, int *deltas, int bx)
+{
+    int t;
+    int l;
+    int prev;
+
+    if(bx > 0)
+        prev = clast[-3];
+    else
+        prev = 0;
+    t = (CD[0] + CD[1]) >> 1;
+    l = (prev - CD[0] - CD[1] + clast[1]) >> 1;
+    CD[1] = CD[0] + CD[1] - t;
+    CD[0] = t;
+    clast[0] = l;
+
+    tm2_high_chroma(data, stride, clast, CD, deltas);
+}
+
+static inline void tm2_hi_res_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i;
+    int deltas[16];
+    TM2_INIT_POINTERS();
+
+    /* hi-res chroma */
+    for(i = 0; i < 4; i++) {
+        deltas[i] = GET_TOK(ctx, TM2_C_HI);
+        deltas[i + 4] = GET_TOK(ctx, TM2_C_HI);
+    }
+    tm2_high_chroma(U, Ustride, clast, ctx->CD, deltas);
+    tm2_high_chroma(V, Vstride, clast + 2, ctx->CD + 2, deltas + 4);
+
+    /* hi-res luma */
+    for(i = 0; i < 16; i++)
+        deltas[i] = GET_TOK(ctx, TM2_L_HI);
+
+    tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
+}
+
+static inline void tm2_med_res_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i;
+    int deltas[16];
+    TM2_INIT_POINTERS();
+
+    /* low-res chroma */
+    deltas[0] = GET_TOK(ctx, TM2_C_LO);
+    deltas[1] = deltas[2] = deltas[3] = 0;
+    tm2_low_chroma(U, Ustride, clast, ctx->CD, deltas, bx);
+
+    deltas[0] = GET_TOK(ctx, TM2_C_LO);
+    deltas[1] = deltas[2] = deltas[3] = 0;
+    tm2_low_chroma(V, Vstride, clast + 2, ctx->CD + 2, deltas, bx);
+
+    /* hi-res luma */
+    for(i = 0; i < 16; i++)
+        deltas[i] = GET_TOK(ctx, TM2_L_HI);
+
+    tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
+}
+
+static inline void tm2_low_res_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i;
+    int t1, t2;
+    int deltas[16];
+    TM2_INIT_POINTERS();
+
+    /* low-res chroma */
+    deltas[0] = GET_TOK(ctx, TM2_C_LO);
+    deltas[1] = deltas[2] = deltas[3] = 0;
+    tm2_low_chroma(U, Ustride, clast, ctx->CD, deltas, bx);
+
+    deltas[0] = GET_TOK(ctx, TM2_C_LO);
+    deltas[1] = deltas[2] = deltas[3] = 0;
+    tm2_low_chroma(V, Vstride, clast + 2, ctx->CD + 2, deltas, bx);
+
+    /* low-res luma */
+    for(i = 0; i < 16; i++)
+        deltas[i] = 0;
+
+    deltas[ 0] = GET_TOK(ctx, TM2_L_LO);
+    deltas[ 2] = GET_TOK(ctx, TM2_L_LO);
+    deltas[ 8] = GET_TOK(ctx, TM2_L_LO);
+    deltas[10] = GET_TOK(ctx, TM2_L_LO);
+
+    if(bx > 0)
+        last[0] = (last[-1] - ctx->D[0] - ctx->D[1] - ctx->D[2] - ctx->D[3] + last[1]) >> 1;
+    else
+        last[0] = (last[1]  - ctx->D[0] - ctx->D[1] - ctx->D[2] - ctx->D[3])>> 1;
+    last[2] = (last[1] + last[3]) >> 1;
+
+    t1 = ctx->D[0] + ctx->D[1];
+    ctx->D[0] = t1 >> 1;
+    ctx->D[1] = t1 - (t1 >> 1);
+    t2 = ctx->D[2] + ctx->D[3];
+    ctx->D[2] = t2 >> 1;
+    ctx->D[3] = t2 - (t2 >> 1);
+
+    tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
+}
+
+static inline void tm2_null_res_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i;
+    int ct;
+    int left, right, diff;
+    int deltas[16];
+    TM2_INIT_POINTERS();
+
+    /* null chroma */
+    deltas[0] = deltas[1] = deltas[2] = deltas[3] = 0;
+    tm2_low_chroma(U, Ustride, clast, ctx->CD, deltas, bx);
+
+    deltas[0] = deltas[1] = deltas[2] = deltas[3] = 0;
+    tm2_low_chroma(V, Vstride, clast + 2, ctx->CD + 2, deltas, bx);
+
+    /* null luma */
+    for(i = 0; i < 16; i++)
+        deltas[i] = 0;
+
+    ct = ctx->D[0] + ctx->D[1] + ctx->D[2] + ctx->D[3];
+
+    if(bx > 0)
+        left = last[-1] - ct;
+    else
+        left = 0;
+
+    right = last[3];
+    diff = right - left;
+    last[0] = left + (diff >> 2);
+    last[1] = left + (diff >> 1);
+    last[2] = right - (diff >> 2);
+    last[3] = right;
+    {
+        int tp = left;
+
+        ctx->D[0] = (tp + (ct >> 2)) - left;
+        left += ctx->D[0];
+        ctx->D[1] = (tp + (ct >> 1)) - left;
+        left += ctx->D[1];
+        ctx->D[2] = ((tp + ct) - (ct >> 2)) - left;
+        left += ctx->D[2];
+        ctx->D[3] = (tp + ct) - left;
+    }
+    tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
+}
+
+static inline void tm2_still_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i, j;
+    TM2_INIT_POINTERS_2();
+
+    /* update chroma */
+    for(j = 0; j < 2; j++){
+        for(i = 0; i < 2; i++){
+            U[i] = Uo[i];
+            V[i] = Vo[i];
+        }
+        U += Ustride; V += Vstride;
+        Uo += oUstride; Vo += oVstride;
+    }
+    U -= Ustride * 2;
+    V -= Vstride * 2;
+    TM2_RECALC_BLOCK(U, Ustride, clast, ctx->CD);
+    TM2_RECALC_BLOCK(V, Vstride, (clast + 2), (ctx->CD + 2));
+
+    /* update deltas */
+    ctx->D[0] = Yo[3] - last[3];
+    ctx->D[1] = Yo[3 + oYstride] - Yo[3];
+    ctx->D[2] = Yo[3 + oYstride * 2] - Yo[3 + oYstride];
+    ctx->D[3] = Yo[3 + oYstride * 3] - Yo[3 + oYstride * 2];
+
+    for(j = 0; j < 4; j++){
+        for(i = 0; i < 4; i++){
+            Y[i] = Yo[i];
+            last[i] = Yo[i];
+        }
+        Y += Ystride;
+        Yo += oYstride;
+    }
+}
+
+static inline void tm2_update_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i, j;
+    int d;
+    TM2_INIT_POINTERS_2();
+
+    /* update chroma */
+    for(j = 0; j < 2; j++){
+        for(i = 0; i < 2; i++){
+            U[i] = Uo[i] + GET_TOK(ctx, TM2_UPD);
+            V[i] = Vo[i] + GET_TOK(ctx, TM2_UPD);
+        }
+        U += Ustride; V += Vstride;
+        Uo += oUstride; Vo += oVstride;
+    }
+    U -= Ustride * 2;
+    V -= Vstride * 2;
+    TM2_RECALC_BLOCK(U, Ustride, clast, ctx->CD);
+    TM2_RECALC_BLOCK(V, Vstride, (clast + 2), (ctx->CD + 2));
+
+    /* update deltas */
+    ctx->D[0] = Yo[3] - last[3];
+    ctx->D[1] = Yo[3 + oYstride] - Yo[3];
+    ctx->D[2] = Yo[3 + oYstride * 2] - Yo[3 + oYstride];
+    ctx->D[3] = Yo[3 + oYstride * 3] - Yo[3 + oYstride * 2];
+
+    for(j = 0; j < 4; j++){
+        d = last[3];
+        for(i = 0; i < 4; i++){
+            Y[i] = Yo[i] + GET_TOK(ctx, TM2_UPD);
+            last[i] = Y[i];
+        }
+        ctx->D[j] = last[3] - d;
+        Y += Ystride;
+        Yo += oYstride;
+    }
+}
+
+static inline void tm2_motion_block(TM2Context *ctx, AVFrame *pic, int bx, int by)
+{
+    int i, j;
+    int mx, my;
+    TM2_INIT_POINTERS_2();
+
+    mx = GET_TOK(ctx, TM2_MOT);
+    my = GET_TOK(ctx, TM2_MOT);
+
+    Yo += my * oYstride + mx;
+    Uo += (my >> 1) * oUstride + (mx >> 1);
+    Vo += (my >> 1) * oVstride + (mx >> 1);
+
+    /* copy chroma */
+    for(j = 0; j < 2; j++){
+        for(i = 0; i < 2; i++){
+            U[i] = Uo[i];
+            V[i] = Vo[i];
+        }
+        U += Ustride; V += Vstride;
+        Uo += oUstride; Vo += oVstride;
+    }
+    U -= Ustride * 2;
+    V -= Vstride * 2;
+    TM2_RECALC_BLOCK(U, Ustride, clast, ctx->CD);
+    TM2_RECALC_BLOCK(V, Vstride, (clast + 2), (ctx->CD + 2));
+
+    /* copy luma */
+    for(j = 0; j < 4; j++){
+        for(i = 0; i < 4; i++){
+            Y[i] = Yo[i];
+        }
+        Y += Ystride;
+        Yo += oYstride;
+    }
+    /* calculate deltas */
+    Y -= Ystride * 4;
+    ctx->D[0] = Y[3] - last[3];
+    ctx->D[1] = Y[3 + Ystride] - Y[3];
+    ctx->D[2] = Y[3 + Ystride * 2] - Y[3 + Ystride];
+    ctx->D[3] = Y[3 + Ystride * 3] - Y[3 + Ystride * 2];
+    for(i = 0; i < 4; i++)
+        last[i] = Y[i + Ystride * 3];
+}
+
+static int tm2_decode_blocks(TM2Context *ctx, AVFrame *p)
+{
+    int i, j;
+    int bw, bh;
+    int type;
+    int keyframe = 1;
+    uint8_t *Y, *U, *V;
+    int *src;
+
+    bw = ctx->avctx->width >> 2;
+    bh = ctx->avctx->height >> 2;
+
+    for(i = 0; i < TM2_NUM_STREAMS; i++)
+        ctx->tok_ptrs[i] = 0;
+
+    if (ctx->tok_lens[TM2_TYPE]<bw*bh){
+        av_log(ctx->avctx,AV_LOG_ERROR,"Got %i tokens for %i blocks\n",ctx->tok_lens[TM2_TYPE],bw*bh);
+        return -1;
+    }
+
+    memset(ctx->last, 0, 4 * bw * sizeof(int));
+    memset(ctx->clast, 0, 4 * bw * sizeof(int));
+
+    for(j = 0; j < bh; j++) {
+        memset(ctx->D, 0, 4 * sizeof(int));
+        memset(ctx->CD, 0, 4 * sizeof(int));
+        for(i = 0; i < bw; i++) {
+            type = GET_TOK(ctx, TM2_TYPE);
+            switch(type) {
+            case TM2_HI_RES:
+                tm2_hi_res_block(ctx, p, i, j);
+                break;
+            case TM2_MED_RES:
+                tm2_med_res_block(ctx, p, i, j);
+                break;
+            case TM2_LOW_RES:
+                tm2_low_res_block(ctx, p, i, j);
+                break;
+            case TM2_NULL_RES:
+                tm2_null_res_block(ctx, p, i, j);
+                break;
+            case TM2_UPDATE:
+                tm2_update_block(ctx, p, i, j);
+                keyframe = 0;
+                break;
+            case TM2_STILL:
+                tm2_still_block(ctx, p, i, j);
+                keyframe = 0;
+                break;
+            case TM2_MOTION:
+                tm2_motion_block(ctx, p, i, j);
+                keyframe = 0;
+                break;
+            default:
+                av_log(ctx->avctx, AV_LOG_ERROR, "Skipping unknown block type %i\n", type);
+            }
+        }
+    }
+
+    /* copy data from our buffer to AVFrame */
+    Y = p->data[0];
+    src = (ctx->cur?ctx->Y2:ctx->Y1);
+    for(j = 0; j < ctx->avctx->height; j++){
+        for(i = 0; i < ctx->avctx->width; i++){
+            Y[i] = clip_uint8(*src++);
+        }
+        Y += p->linesize[0];
+    }
+    U = p->data[2];
+    src = (ctx->cur?ctx->U2:ctx->U1);
+    for(j = 0; j < (ctx->avctx->height + 1) >> 1; j++){
+        for(i = 0; i < (ctx->avctx->width + 1) >> 1; i++){
+            U[i] = clip_uint8(*src++);
+        }
+        U += p->linesize[2];
+    }
+    V = p->data[1];
+    src = (ctx->cur?ctx->V2:ctx->V1);
+    for(j = 0; j < (ctx->avctx->height + 1) >> 1; j++){
+        for(i = 0; i < (ctx->avctx->width + 1) >> 1; i++){
+            V[i] = clip_uint8(*src++);
+        }
+        V += p->linesize[1];
+    }
+
+    return keyframe;
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    TM2Context * const l = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&l->pic;
+    int skip, t;
+
+    p->reference = 1;
+    p->buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if(avctx->reget_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    l->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, buf_size >> 2);
+    skip = tm2_read_header(l, buf);
+
+    if(skip == -1)
+        return -1;
+
+    t = tm2_read_stream(l, buf + skip, TM2_C_HI);
+    if(t == -1)
+        return -1;
+    skip += t;
+    t = tm2_read_stream(l, buf + skip, TM2_C_LO);
+    if(t == -1)
+        return -1;
+    skip += t;
+    t = tm2_read_stream(l, buf + skip, TM2_L_HI);
+    if(t == -1)
+        return -1;
+    skip += t;
+    t = tm2_read_stream(l, buf + skip, TM2_L_LO);
+    if(t == -1)
+        return -1;
+    skip += t;
+    t = tm2_read_stream(l, buf + skip, TM2_UPD);
+    if(t == -1)
+        return -1;
+    skip += t;
+    t = tm2_read_stream(l, buf + skip, TM2_MOT);
+    if(t == -1)
+        return -1;
+    skip += t;
+    t = tm2_read_stream(l, buf + skip, TM2_TYPE);
+    if(t == -1)
+        return -1;
+    p->key_frame = tm2_decode_blocks(l, p);
+    if(p->key_frame)
+        p->pict_type = FF_I_TYPE;
+    else
+        p->pict_type = FF_P_TYPE;
+
+    l->cur = !l->cur;
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = l->pic;
+
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    TM2Context * const l = avctx->priv_data;
+    int i;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return -1;
+    }
+    if((avctx->width & 3) || (avctx->height & 3)){
+        av_log(avctx, AV_LOG_ERROR, "Width and height must be multiple of 4\n");
+        return -1;
+    }
+
+    l->avctx = avctx;
+    l->pic.data[0]=NULL;
+    avctx->has_b_frames = 0;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    dsputil_init(&l->dsp, avctx);
+
+    l->last = av_malloc(4 * sizeof(int) * (avctx->width >> 2));
+    l->clast = av_malloc(4 * sizeof(int) * (avctx->width >> 2));
+
+    for(i = 0; i < TM2_NUM_STREAMS; i++) {
+        l->tokens[i] = NULL;
+        l->tok_lens[i] = 0;
+    }
+
+    l->Y1 = av_malloc(sizeof(int) * avctx->width * avctx->height);
+    l->U1 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
+    l->V1 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
+    l->Y2 = av_malloc(sizeof(int) * avctx->width * avctx->height);
+    l->U2 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
+    l->V2 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
+    l->cur = 0;
+
+    return 0;
+}
+
+static int decode_end(AVCodecContext *avctx){
+    TM2Context * const l = avctx->priv_data;
+    int i;
+
+    if(l->last)
+        av_free(l->last);
+    if(l->clast)
+        av_free(l->clast);
+    for(i = 0; i < TM2_NUM_STREAMS; i++)
+        if(l->tokens[i])
+            av_free(l->tokens[i]);
+    if(l->Y1){
+        av_free(l->Y1);
+        av_free(l->U1);
+        av_free(l->V1);
+        av_free(l->Y2);
+        av_free(l->U2);
+        av_free(l->V2);
+    }
+    return 0;
+}
+
+AVCodec truemotion2_decoder = {
+    "truemotion2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_TRUEMOTION2,
+    sizeof(TM2Context),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/truespeech.c b/contrib/ffmpeg/libavcodec/truespeech.c
new file mode 100644
index 000000000..077e9b037
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/truespeech.c
@@ -0,0 +1,381 @@
+/*
+ * DSP Group TrueSpeech compatible decoder
+ * Copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+#include "truespeech_data.h"
+/**
+ * @file truespeech.c
+ * TrueSpeech decoder.
+ */
+
+/**
+ * TrueSpeech decoder context
+ */
+typedef struct {
+    /* input data */
+    int16_t vector[8];  //< input vector: 5/5/4/4/4/3/3/3
+    int offset1[2];     //< 8-bit value, used in one copying offset
+    int offset2[4];     //< 7-bit value, encodes offsets for copying and for two-point filter
+    int pulseoff[4];    //< 4-bit offset of pulse values block
+    int pulsepos[4];    //< 27-bit variable, encodes 7 pulse positions
+    int pulseval[4];    //< 7x2-bit pulse values
+    int flag;           //< 1-bit flag, shows how to choose filters
+    /* temporary data */
+    int filtbuf[146];   // some big vector used for storing filters
+    int prevfilt[8];    // filter from previous frame
+    int16_t tmp1[8];    // coefficients for adding to out
+    int16_t tmp2[8];    // coefficients for adding to out
+    int16_t tmp3[8];    // coefficients for adding to out
+    int16_t cvector[8]; // correlated input vector
+    int filtval;        // gain value for one function
+    int16_t newvec[60]; // tmp vector
+    int16_t filters[32]; // filters for every subframe
+} TSContext;
+
+static int truespeech_decode_init(AVCodecContext * avctx)
+{
+//    TSContext *c = avctx->priv_data;
+
+    return 0;
+}
+
+static void truespeech_read_frame(TSContext *dec, uint8_t *input)
+{
+    uint32_t t;
+
+    /* first dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->flag = t & 1;
+
+    dec->vector[0] = ts_codebook[0][(t >>  1) & 0x1F];
+    dec->vector[1] = ts_codebook[1][(t >>  6) & 0x1F];
+    dec->vector[2] = ts_codebook[2][(t >> 11) &  0xF];
+    dec->vector[3] = ts_codebook[3][(t >> 15) &  0xF];
+    dec->vector[4] = ts_codebook[4][(t >> 19) &  0xF];
+    dec->vector[5] = ts_codebook[5][(t >> 23) &  0x7];
+    dec->vector[6] = ts_codebook[6][(t >> 26) &  0x7];
+    dec->vector[7] = ts_codebook[7][(t >> 29) &  0x7];
+
+    /* second dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->offset2[0] = (t >>  0) & 0x7F;
+    dec->offset2[1] = (t >>  7) & 0x7F;
+    dec->offset2[2] = (t >> 14) & 0x7F;
+    dec->offset2[3] = (t >> 21) & 0x7F;
+
+    dec->offset1[0] = ((t >> 28) & 0xF) << 4;
+
+    /* third dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->pulseval[0] = (t >>  0) & 0x3FFF;
+    dec->pulseval[1] = (t >> 14) & 0x3FFF;
+
+    dec->offset1[1] = (t >> 28) & 0x0F;
+
+    /* fourth dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->pulseval[2] = (t >>  0) & 0x3FFF;
+    dec->pulseval[3] = (t >> 14) & 0x3FFF;
+
+    dec->offset1[1] |= ((t >> 28) & 0x0F) << 4;
+
+    /* fifth dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->pulsepos[0] = (t >> 4) & 0x7FFFFFF;
+
+    dec->pulseoff[0] = (t >> 0) & 0xF;
+
+    dec->offset1[0] |= (t >> 31) & 1;
+
+    /* sixth dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->pulsepos[1] = (t >> 4) & 0x7FFFFFF;
+
+    dec->pulseoff[1] = (t >> 0) & 0xF;
+
+    dec->offset1[0] |= ((t >> 31) & 1) << 1;
+
+    /* seventh dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->pulsepos[2] = (t >> 4) & 0x7FFFFFF;
+
+    dec->pulseoff[2] = (t >> 0) & 0xF;
+
+    dec->offset1[0] |= ((t >> 31) & 1) << 2;
+
+    /* eighth dword */
+    t = LE_32(input);
+    input += 4;
+
+    dec->pulsepos[3] = (t >> 4) & 0x7FFFFFF;
+
+    dec->pulseoff[3] = (t >> 0) & 0xF;
+
+    dec->offset1[0] |= ((t >> 31) & 1) << 3;
+
+}
+
+static void truespeech_correlate_filter(TSContext *dec)
+{
+    int16_t tmp[8];
+    int i, j;
+
+    for(i = 0; i < 8; i++){
+        if(i > 0){
+            memcpy(tmp, dec->cvector, i * 2);
+            for(j = 0; j < i; j++)
+                dec->cvector[j] = ((tmp[i - j - 1] * dec->vector[i]) +
+                                   (dec->cvector[j] << 15) + 0x4000) >> 15;
+        }
+        dec->cvector[i] = (8 - dec->vector[i]) >> 3;
+    }
+    for(i = 0; i < 8; i++)
+        dec->cvector[i] = (dec->cvector[i] * ts_230[i]) >> 15;
+
+    dec->filtval = dec->vector[0];
+}
+
+static void truespeech_filters_merge(TSContext *dec)
+{
+    int i;
+
+    if(!dec->flag){
+        for(i = 0; i < 8; i++){
+            dec->filters[i + 0] = dec->prevfilt[i];
+            dec->filters[i + 8] = dec->prevfilt[i];
+        }
+    }else{
+        for(i = 0; i < 8; i++){
+            dec->filters[i + 0]=(dec->cvector[i] * 21846 + dec->prevfilt[i] * 10923 + 16384) >> 15;
+            dec->filters[i + 8]=(dec->cvector[i] * 10923 + dec->prevfilt[i] * 21846 + 16384) >> 15;
+        }
+    }
+    for(i = 0; i < 8; i++){
+        dec->filters[i + 16] = dec->cvector[i];
+        dec->filters[i + 24] = dec->cvector[i];
+    }
+}
+
+static void truespeech_apply_twopoint_filter(TSContext *dec, int quart)
+{
+    int16_t tmp[146 + 60], *ptr0, *ptr1;
+    const int16_t *filter;
+    int i, t, off;
+
+    t = dec->offset2[quart];
+    if(t == 127){
+        memset(dec->newvec, 0, 60 * 2);
+        return;
+    }
+    for(i = 0; i < 146; i++)
+        tmp[i] = dec->filtbuf[i];
+    off = (t / 25) + dec->offset1[quart >> 1] + 18;
+    ptr0 = tmp + 145 - off;
+    ptr1 = tmp + 146;
+    filter = (const int16_t*)ts_240 + (t % 25) * 2;
+    for(i = 0; i < 60; i++){
+        t = (ptr0[0] * filter[0] + ptr0[1] * filter[1] + 0x2000) >> 14;
+        ptr0++;
+        dec->newvec[i] = t;
+        ptr1[i] = t;
+    }
+}
+
+static void truespeech_place_pulses(TSContext *dec, int16_t *out, int quart)
+{
+    int16_t tmp[7];
+    int i, j, t;
+    const int16_t *ptr1;
+    int16_t *ptr2;
+    int coef;
+
+    memset(out, 0, 60 * 2);
+    for(i = 0; i < 7; i++) {
+        t = dec->pulseval[quart] & 3;
+        dec->pulseval[quart] >>= 2;
+        tmp[6 - i] = ts_562[dec->pulseoff[quart] * 4 + t];
+    }
+
+    coef = dec->pulsepos[quart] >> 15;
+    ptr1 = (const int16_t*)ts_140 + 30;
+    ptr2 = tmp;
+    for(i = 0, j = 3; (i < 30) && (j > 0); i++){
+        t = *ptr1++;
+        if(coef >= t)
+            coef -= t;
+        else{
+            out[i] = *ptr2++;
+            ptr1 += 30;
+            j--;
+        }
+    }
+    coef = dec->pulsepos[quart] & 0x7FFF;
+    ptr1 = (const int16_t*)ts_140;
+    for(i = 30, j = 4; (i < 60) && (j > 0); i++){
+        t = *ptr1++;
+        if(coef >= t)
+            coef -= t;
+        else{
+            out[i] = *ptr2++;
+            ptr1 += 30;
+            j--;
+        }
+    }
+
+}
+
+static void truespeech_update_filters(TSContext *dec, int16_t *out, int quart)
+{
+    int i;
+
+    for(i = 0; i < 86; i++)
+        dec->filtbuf[i] = dec->filtbuf[i + 60];
+    for(i = 0; i < 60; i++){
+        dec->filtbuf[i + 86] = out[i] + dec->newvec[i] - (dec->newvec[i] >> 3);
+        out[i] += dec->newvec[i];
+    }
+}
+
+static void truespeech_synth(TSContext *dec, int16_t *out, int quart)
+{
+    int i,k;
+    int t[8];
+    int16_t *ptr0, *ptr1;
+
+    ptr0 = dec->tmp1;
+    ptr1 = dec->filters + quart * 8;
+    for(i = 0; i < 60; i++){
+        int sum = 0;
+        for(k = 0; k < 8; k++)
+            sum += ptr0[k] * ptr1[k];
+        sum = (sum + (out[i] << 12) + 0x800) >> 12;
+        out[i] = clip(sum, -0x7FFE, 0x7FFE);
+        for(k = 7; k > 0; k--)
+            ptr0[k] = ptr0[k - 1];
+        ptr0[0] = out[i];
+    }
+
+    for(i = 0; i < 8; i++)
+        t[i] = (ts_5E2[i] * ptr1[i]) >> 15;
+
+    ptr0 = dec->tmp2;
+    for(i = 0; i < 60; i++){
+        int sum = 0;
+        for(k = 0; k < 8; k++)
+            sum += ptr0[k] * t[k];
+        for(k = 7; k > 0; k--)
+            ptr0[k] = ptr0[k - 1];
+        ptr0[0] = out[i];
+        out[i] = ((out[i] << 12) - sum) >> 12;
+    }
+
+    for(i = 0; i < 8; i++)
+        t[i] = (ts_5F2[i] * ptr1[i]) >> 15;
+
+    ptr0 = dec->tmp3;
+    for(i = 0; i < 60; i++){
+        int sum = out[i] << 12;
+        for(k = 0; k < 8; k++)
+            sum += ptr0[k] * t[k];
+        for(k = 7; k > 0; k--)
+            ptr0[k] = ptr0[k - 1];
+        ptr0[0] = clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE);
+
+        sum = ((ptr0[1] * (dec->filtval - (dec->filtval >> 2))) >> 4) + sum;
+        sum = sum - (sum >> 3);
+        out[i] = clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE);
+    }
+}
+
+static void truespeech_save_prevvec(TSContext *c)
+{
+    int i;
+
+    for(i = 0; i < 8; i++)
+        c->prevfilt[i] = c->cvector[i];
+}
+
+static int truespeech_decode_frame(AVCodecContext *avctx,
+                void *data, int *data_size,
+                uint8_t *buf, int buf_size)
+{
+    TSContext *c = avctx->priv_data;
+
+    int i;
+    short *samples = data;
+    int consumed = 0;
+    int16_t out_buf[240];
+
+    if (!buf_size)
+        return 0;
+
+    while (consumed < buf_size) {
+        truespeech_read_frame(c, buf + consumed);
+        consumed += 32;
+
+        truespeech_correlate_filter(c);
+        truespeech_filters_merge(c);
+
+        memset(out_buf, 0, 240 * 2);
+        for(i = 0; i < 4; i++) {
+            truespeech_apply_twopoint_filter(c, i);
+            truespeech_place_pulses(c, out_buf + i * 60, i);
+            truespeech_update_filters(c, out_buf + i * 60, i);
+            truespeech_synth(c, out_buf + i * 60, i);
+        }
+
+        truespeech_save_prevvec(c);
+
+        /* finally output decoded frame */
+        for(i = 0; i < 240; i++)
+            *samples++ = out_buf[i];
+
+    }
+
+    *data_size = consumed * 15;
+
+    return buf_size;
+}
+
+AVCodec truespeech_decoder = {
+    "truespeech",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_TRUESPEECH,
+    sizeof(TSContext),
+    truespeech_decode_init,
+    NULL,
+    NULL,
+    truespeech_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/truespeech_data.h b/contrib/ffmpeg/libavcodec/truespeech_data.h
new file mode 100644
index 000000000..cd8822fde
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/truespeech_data.h
@@ -0,0 +1,157 @@
+/*
+ * DSP Group TrueSpeech compatible decoder
+ * copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef __TRUESPEECH_DATA__
+#define __TRUESPEECH_DATA__
+
+/* codebooks fo expanding input filter */
+static const int16_t ts_cb_0[32] = {
+    0x8240, 0x8364, 0x84CE, 0x865D, 0x8805, 0x89DE, 0x8BD7, 0x8DF4,
+    0x9051, 0x92E2, 0x95DE, 0x990F, 0x9C81, 0xA079, 0xA54C, 0xAAD2,
+    0xB18A, 0xB90A, 0xC124, 0xC9CC, 0xD339, 0xDDD3, 0xE9D6, 0xF893,
+    0x096F, 0x1ACA, 0x29EC, 0x381F, 0x45F9, 0x546A, 0x63C3, 0x73B5,
+};
+
+static const int16_t ts_cb_1[32] = {
+    0x9F65, 0xB56B, 0xC583, 0xD371, 0xE018, 0xEBB4, 0xF61C, 0xFF59,
+    0x085B, 0x1106, 0x1952, 0x214A, 0x28C9, 0x2FF8, 0x36E6, 0x3D92,
+    0x43DF, 0x49BB, 0x4F46, 0x5467, 0x5930, 0x5DA3, 0x61EC, 0x65F9,
+    0x69D4, 0x6D5A, 0x709E, 0x73AD, 0x766B, 0x78F0, 0x7B5A, 0x7DA5,
+};
+
+static const int16_t ts_cb_2[16] = {
+    0x96F8, 0xA3B4, 0xAF45, 0xBA53, 0xC4B1, 0xCECC, 0xD86F, 0xE21E,
+    0xEBF3, 0xF640, 0x00F7, 0x0C20, 0x1881, 0x269A, 0x376B, 0x4D60,
+};
+
+static const int16_t ts_cb_3[16] = {
+    0xC654, 0xDEF2, 0xEFAA, 0xFD94, 0x096A, 0x143F, 0x1E7B, 0x282C,
+    0x3176, 0x3A89, 0x439F, 0x4CA2, 0x557F, 0x5E50, 0x6718, 0x6F8D,
+};
+
+static const int16_t ts_cb_4[16] = {
+    0xABE7, 0xBBA8, 0xC81C, 0xD326, 0xDD0E, 0xE5D4, 0xEE22, 0xF618,
+    0xFE28, 0x064F, 0x0EB7, 0x17B8, 0x21AA, 0x2D8B, 0x3BA2, 0x4DF9,
+};
+
+static const int16_t ts_cb_5[8] = {
+    0xD51B, 0xF12E, 0x042E, 0x13C7, 0x2260, 0x311B, 0x40DE, 0x5385,
+};
+
+static const int16_t ts_cb_6[8] = {
+    0xB550, 0xC825, 0xD980, 0xE997, 0xF883, 0x0752, 0x1811, 0x2E18,
+};
+
+static const int16_t ts_cb_7[8] = {
+    0xCEF0, 0xE4F9, 0xF6BB, 0x0646, 0x14F5, 0x23FF, 0x356F, 0x4A8D,
+};
+
+static const int16_t *ts_codebook[8] = {
+    ts_cb_0, ts_cb_1, ts_cb_2, ts_cb_3, ts_cb_4, ts_cb_5, ts_cb_6, ts_cb_7
+};
+
+/* table used for decoding pulse positions */
+static const int16_t ts_140[120] = {
+    0x0E46, 0x0CCC, 0x0B6D, 0x0A28, 0x08FC, 0x07E8, 0x06EB, 0x0604,
+    0x0532, 0x0474, 0x03C9, 0x0330, 0x02A8, 0x0230, 0x01C7, 0x016C,
+    0x011E, 0x00DC, 0x00A5, 0x0078, 0x0054, 0x0038, 0x0023, 0x0014,
+    0x000A, 0x0004, 0x0001, 0x0000, 0x0000, 0x0000,
+
+    0x0196, 0x017A, 0x015F, 0x0145, 0x012C, 0x0114, 0x00FD, 0x00E7,
+    0x00D2, 0x00BE, 0x00AB, 0x0099, 0x0088, 0x0078, 0x0069, 0x005B,
+    0x004E, 0x0042, 0x0037, 0x002D, 0x0024, 0x001C, 0x0015, 0x000F,
+    0x000A, 0x0006, 0x0003, 0x0001, 0x0000, 0x0000,
+
+    0x001D, 0x001C, 0x001B, 0x001A, 0x0019, 0x0018, 0x0017, 0x0016,
+    0x0015, 0x0014, 0x0013, 0x0012, 0x0011, 0x0010, 0x000F, 0x000E,
+    0x000D, 0x000C, 0x000B, 0x000A, 0x0009, 0x0008, 0x0007, 0x0006,
+    0x0005, 0x0004, 0x0003, 0x0002, 0x0001, 0x0000,
+
+    0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
+    0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
+    0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
+    0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001
+};
+
+/* filter for correlated input filter */
+static const int16_t ts_230[8] =
+    { 0x7F3B, 0x7E78, 0x7DB6, 0x7CF5, 0x7C35, 0x7B76, 0x7AB8, 0x79FC };
+
+/* two-point filters table */
+static const int16_t ts_240[25 * 2] = {
+    0xED2F, 0x5239,
+    0x54F1, 0xE4A9,
+    0x2620, 0xEE3E,
+    0x09D6, 0x2C40,
+    0xEFB5, 0x2BE0,
+
+    0x3FE1, 0x3339,
+    0x442F, 0xE6FE,
+    0x4458, 0xF9DF,
+    0xF231, 0x43DB,
+    0x3DB0, 0xF705,
+
+    0x4F7B, 0xFEFB,
+    0x26AD, 0x0CDC,
+    0x33C2, 0x0739,
+    0x12BE, 0x43A2,
+    0x1BDF, 0x1F3E,
+
+    0x0211, 0x0796,
+    0x2AEB, 0x163F,
+    0x050D, 0x3A38,
+    0x0D1E, 0x0D78,
+    0x150F, 0x3346,
+
+    0x38A4, 0x0B7D,
+    0x2D5D, 0x1FDF,
+    0x19B7, 0x2822,
+    0x0D99, 0x1F12,
+    0x194C, 0x0CE6
+};
+
+/* possible pulse values */
+static const int16_t ts_562[64] = {
+    0x0002, 0x0006, 0xFFFE, 0xFFFA,
+    0x0004, 0x000C, 0xFFFC, 0xFFF4,
+    0x0006, 0x0012, 0xFFFA, 0xFFEE,
+    0x000A, 0x001E, 0xFFF6, 0xFFE2,
+    0x0010, 0x0030, 0xFFF0, 0xFFD0,
+    0x0019, 0x004B, 0xFFE7, 0xFFB5,
+    0x0028, 0x0078, 0xFFD8, 0xFF88,
+    0x0040, 0x00C0, 0xFFC0, 0xFF40,
+    0x0065, 0x012F, 0xFF9B, 0xFED1,
+    0x00A1, 0x01E3, 0xFF5F, 0xFE1D,
+    0x0100, 0x0300, 0xFF00, 0xFD00,
+    0x0196, 0x04C2, 0xFE6A, 0xFB3E,
+    0x0285, 0x078F, 0xFD7B, 0xF871,
+    0x0400, 0x0C00, 0xFC00, 0xF400,
+    0x0659, 0x130B, 0xF9A7, 0xECF5,
+    0x0A14, 0x1E3C, 0xF5EC, 0xE1C4
+};
+
+/* filters used in final output calculations */
+static const int16_t ts_5E2[8] =
+    { 0x4666, 0x26B8, 0x154C, 0x0BB6, 0x0671, 0x038B, 0x01F3, 0x0112 };
+static const int16_t ts_5F2[8] =
+    { 0x6000, 0x4800, 0x3600, 0x2880, 0x1E60, 0x16C8, 0x1116, 0x0CD1 };
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/tscc.c b/contrib/ffmpeg/libavcodec/tscc.c
new file mode 100644
index 000000000..a24540f37
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/tscc.c
@@ -0,0 +1,348 @@
+/*
+ * TechSmith Camtasia decoder
+ * Copyright (c) 2004 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file tscc.c
+ * TechSmith Camtasia decoder
+ *
+ * Fourcc: TSCC
+ *
+ * Codec is very simple:
+ *  it codes picture (picture difference, really)
+ *  with algorithm almost identical to Windows RLE8,
+ *  only without padding and with greater pixel sizes,
+ *  then this coded picture is packed with ZLib
+ *
+ * Supports: BGR8,BGR555,BGR24 - only BGR8 and BGR555 tested
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+#endif
+
+
+/*
+ * Decoder context
+ */
+typedef struct TsccContext {
+
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    // Bits per pixel
+    int bpp;
+    // Decompressed data size
+    unsigned int decomp_size;
+    // Decompression buffer
+    unsigned char* decomp_buf;
+    int height;
+#ifdef CONFIG_ZLIB
+    z_stream zstream;
+#endif
+} CamtasiaContext;
+
+/*
+ *
+ * Decode RLE - almost identical to Windows BMP RLE8
+ *              and enhanced to bigger color depths
+ *
+ */
+
+static int decode_rle(CamtasiaContext *c, unsigned int srcsize)
+{
+    unsigned char *src = c->decomp_buf;
+    unsigned char *output, *output_end;
+    int p1, p2, line=c->height, pos=0, i;
+    uint16_t pix16;
+    uint32_t pix32;
+
+    output = c->pic.data[0] + (c->height - 1) * c->pic.linesize[0];
+    output_end = c->pic.data[0] + (c->height) * c->pic.linesize[0];
+    while(src < c->decomp_buf + srcsize) {
+        p1 = *src++;
+        if(p1 == 0) { //Escape code
+            p2 = *src++;
+            if(p2 == 0) { //End-of-line
+                output = c->pic.data[0] + (--line) * c->pic.linesize[0];
+                if (line < 0)
+                    return -1;
+                pos = 0;
+                continue;
+            } else if(p2 == 1) { //End-of-picture
+                return 0;
+            } else if(p2 == 2) { //Skip
+                p1 = *src++;
+                p2 = *src++;
+                line -= p2;
+                if (line < 0)
+                    return -1;
+                pos += p1;
+                output = c->pic.data[0] + line * c->pic.linesize[0] + pos * (c->bpp / 8);
+                continue;
+            }
+            // Copy data
+            if (output + p2 * (c->bpp / 8) > output_end) {
+                src += p2 * (c->bpp / 8);
+                continue;
+            }
+            if ((c->bpp == 8) || (c->bpp == 24)) {
+                for(i = 0; i < p2 * (c->bpp / 8); i++) {
+                    *output++ = *src++;
+                }
+                // RLE8 copy is actually padded - and runs are not!
+                if(c->bpp == 8 && (p2 & 1)) {
+                    src++;
+                }
+            } else if (c->bpp == 16) {
+                for(i = 0; i < p2; i++) {
+                    pix16 = LE_16(src);
+                    src += 2;
+                    *(uint16_t*)output = pix16;
+                    output += 2;
+                }
+            } else if (c->bpp == 32) {
+                for(i = 0; i < p2; i++) {
+                    pix32 = LE_32(src);
+                    src += 4;
+                    *(uint32_t*)output = pix32;
+                    output += 4;
+                }
+            }
+            pos += p2;
+        } else { //Run of pixels
+            int pix[4]; //original pixel
+            switch(c->bpp){
+            case  8: pix[0] = *src++;
+                     break;
+            case 16: pix16 = LE_16(src);
+                     src += 2;
+                     *(uint16_t*)pix = pix16;
+                     break;
+            case 24: pix[0] = *src++;
+                     pix[1] = *src++;
+                     pix[2] = *src++;
+                     break;
+            case 32: pix32 = LE_32(src);
+                     src += 4;
+                     *(uint32_t*)pix = pix32;
+                     break;
+            }
+            if (output + p1 * (c->bpp / 8) > output_end)
+                continue;
+            for(i = 0; i < p1; i++) {
+                switch(c->bpp){
+                case  8: *output++ = pix[0];
+                         break;
+                case 16: *(uint16_t*)output = pix16;
+                         output += 2;
+                         break;
+                case 24: *output++ = pix[0];
+                         *output++ = pix[1];
+                         *output++ = pix[2];
+                         break;
+                case 32: *(uint32_t*)output = pix32;
+                         output += 4;
+                         break;
+                }
+            }
+            pos += p1;
+        }
+    }
+
+    av_log(c->avctx, AV_LOG_ERROR, "Camtasia warning: no End-of-picture code\n");
+    return 1;
+}
+
+/*
+ *
+ * Decode a frame
+ *
+ */
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+    CamtasiaContext * const c = (CamtasiaContext *)avctx->priv_data;
+    unsigned char *encoded = (unsigned char *)buf;
+    unsigned char *outptr;
+#ifdef CONFIG_ZLIB
+    int zret; // Zlib return code
+#endif
+    int len = buf_size;
+
+    if(c->pic.data[0])
+            avctx->release_buffer(avctx, &c->pic);
+
+    c->pic.reference = 1;
+    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+    if(avctx->get_buffer(avctx, &c->pic) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    outptr = c->pic.data[0]; // Output image pointer
+
+#ifdef CONFIG_ZLIB
+    zret = inflateReset(&(c->zstream));
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
+        return -1;
+    }
+    c->zstream.next_in = encoded;
+    c->zstream.avail_in = len;
+    c->zstream.next_out = c->decomp_buf;
+    c->zstream.avail_out = c->decomp_size;
+    zret = inflate(&(c->zstream), Z_FINISH);
+    // Z_DATA_ERROR means empty picture
+    if ((zret != Z_OK) && (zret != Z_STREAM_END) && (zret != Z_DATA_ERROR)) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", zret);
+        return -1;
+    }
+
+
+    if(zret != Z_DATA_ERROR)
+        decode_rle(c, c->zstream.avail_out);
+
+    /* make the palette available on the way out */
+    if (c->avctx->pix_fmt == PIX_FMT_PAL8) {
+        memcpy(c->pic.data[1], c->avctx->palctrl->palette, AVPALETTE_SIZE);
+        if (c->avctx->palctrl->palette_changed) {
+            c->pic.palette_has_changed = 1;
+            c->avctx->palctrl->palette_changed = 0;
+        }
+    }
+
+#else
+    av_log(avctx, AV_LOG_ERROR, "BUG! Zlib support not compiled in frame decoder.\n");
+    return -1;
+#endif
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = c->pic;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+
+
+/*
+ *
+ * Init tscc decoder
+ *
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+    CamtasiaContext * const c = (CamtasiaContext *)avctx->priv_data;
+    int zret; // Zlib return code
+
+    c->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    c->pic.data[0] = NULL;
+    c->height = avctx->height;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+
+#ifdef CONFIG_ZLIB
+    // Needed if zlib unused or init aborted before inflateInit
+    memset(&(c->zstream), 0, sizeof(z_stream));
+#else
+    av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n");
+    return 1;
+#endif
+    switch(avctx->bits_per_sample){
+    case  8: avctx->pix_fmt = PIX_FMT_PAL8; break;
+    case 16: avctx->pix_fmt = PIX_FMT_RGB555; break;
+    case 24:
+             avctx->pix_fmt = PIX_FMT_BGR24;
+             break;
+    case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break;
+    default: av_log(avctx, AV_LOG_ERROR, "Camtasia error: unknown depth %i bpp\n", avctx->bits_per_sample);
+             return -1;
+    }
+    c->bpp = avctx->bits_per_sample;
+    c->decomp_size = (avctx->width * c->bpp + (avctx->width + 254) / 255 + 2) * avctx->height + 2;//RLE in the 'best' case
+
+    /* Allocate decompression buffer */
+    if (c->decomp_size) {
+        if ((c->decomp_buf = av_malloc(c->decomp_size)) == NULL) {
+            av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
+            return 1;
+        }
+    }
+
+#ifdef CONFIG_ZLIB
+    c->zstream.zalloc = Z_NULL;
+    c->zstream.zfree = Z_NULL;
+    c->zstream.opaque = Z_NULL;
+    zret = inflateInit(&(c->zstream));
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+        return 1;
+    }
+#endif
+
+    return 0;
+}
+
+
+
+/*
+ *
+ * Uninit tscc decoder
+ *
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+    CamtasiaContext * const c = (CamtasiaContext *)avctx->priv_data;
+
+    av_freep(&c->decomp_buf);
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+#ifdef CONFIG_ZLIB
+    inflateEnd(&(c->zstream));
+#endif
+
+    return 0;
+}
+
+AVCodec tscc_decoder = {
+        "camtasia",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_TSCC,
+        sizeof(CamtasiaContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/tta.c b/contrib/ffmpeg/libavcodec/tta.c
new file mode 100644
index 000000000..82713fb0f
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/tta.c
@@ -0,0 +1,446 @@
+/*
+ * TTA (The Lossless True Audio) decoder
+ * Copyright (c) 2006 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file tta.c
+ * TTA (The Lossless True Audio) decoder
+ * (www.true-audio.com or tta.corecodec.org)
+ * @author Alex Beregszaszi
+ *
+ */
+
+#define ALT_BITSTREAM_READER_LE
+//#define DEBUG
+#include <limits.h>
+#include "avcodec.h"
+#include "bitstream.h"
+
+#define FORMAT_INT 1
+#define FORMAT_FLOAT 3
+
+typedef struct TTAContext {
+    AVCodecContext *avctx;
+    GetBitContext gb;
+
+    int flags, channels, bps, is_float, data_length;
+    int frame_length, last_frame_length, total_frames;
+
+    int32_t *decode_buffer;
+} TTAContext;
+
+#if 0
+static inline int shift_1(int i)
+{
+    if (i < 32)
+        return 1 << i;
+    else
+        return 0x80000000; // 16 << 31
+}
+
+static inline int shift_16(int i)
+{
+    if (i < 28)
+        return 16 << i;
+    else
+        return 0x80000000; // 16 << 27
+}
+#else
+static const uint32_t shift_1[] = {
+    0x00000001, 0x00000002, 0x00000004, 0x00000008,
+    0x00000010, 0x00000020, 0x00000040, 0x00000080,
+    0x00000100, 0x00000200, 0x00000400, 0x00000800,
+    0x00001000, 0x00002000, 0x00004000, 0x00008000,
+    0x00010000, 0x00020000, 0x00040000, 0x00080000,
+    0x00100000, 0x00200000, 0x00400000, 0x00800000,
+    0x01000000, 0x02000000, 0x04000000, 0x08000000,
+    0x10000000, 0x20000000, 0x40000000, 0x80000000,
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
+    0x80000000, 0x80000000, 0x80000000, 0x80000000
+};
+
+static const uint32_t *shift_16 = shift_1 + 4;
+#endif
+
+#define MAX_ORDER 16
+typedef struct TTAFilter {
+    int32_t shift, round, error, mode;
+    int32_t qm[MAX_ORDER];
+    int32_t dx[MAX_ORDER];
+    int32_t dl[MAX_ORDER];
+} TTAFilter;
+
+static int32_t ttafilter_configs[4][2] = {
+    {10, 1},
+    {9, 1},
+    {10, 1},
+    {12, 0}
+};
+
+static void ttafilter_init(TTAFilter *c, int32_t shift, int32_t mode) {
+    memset(c, 0, sizeof(TTAFilter));
+    c->shift = shift;
+   c->round = shift_1[shift-1];
+//    c->round = 1 << (shift - 1);
+    c->mode = mode;
+}
+
+// FIXME: copy paste from original
+static inline void memshl(register int32_t *a, register int32_t *b) {
+    *a++ = *b++;
+    *a++ = *b++;
+    *a++ = *b++;
+    *a++ = *b++;
+    *a++ = *b++;
+    *a++ = *b++;
+    *a++ = *b++;
+    *a = *b;
+}
+
+// FIXME: copy paste from original
+// mode=1 encoder, mode=0 decoder
+static inline void ttafilter_process(TTAFilter *c, int32_t *in, int32_t mode) {
+    register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round;
+
+    if (!c->error) {
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        sum += *dl++ * *qm, qm++;
+        dx += 8;
+    } else if(c->error < 0) {
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+        sum += *dl++ * (*qm -= *dx++), qm++;
+    } else {
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+        sum += *dl++ * (*qm += *dx++), qm++;
+    }
+
+    *(dx-0) = ((*(dl-1) >> 30) | 1) << 2;
+    *(dx-1) = ((*(dl-2) >> 30) | 1) << 1;
+    *(dx-2) = ((*(dl-3) >> 30) | 1) << 1;
+    *(dx-3) = ((*(dl-4) >> 30) | 1);
+
+    // compress
+    if (mode) {
+        *dl = *in;
+        *in -= (sum >> c->shift);
+        c->error = *in;
+    } else {
+        c->error = *in;
+        *in += (sum >> c->shift);
+        *dl = *in;
+    }
+
+    if (c->mode) {
+        *(dl-1) = *dl - *(dl-1);
+        *(dl-2) = *(dl-1) - *(dl-2);
+        *(dl-3) = *(dl-2) - *(dl-3);
+    }
+
+    memshl(c->dl, c->dl + 1);
+    memshl(c->dx, c->dx + 1);
+}
+
+typedef struct TTARice {
+    uint32_t k0, k1, sum0, sum1;
+} TTARice;
+
+static void rice_init(TTARice *c, uint32_t k0, uint32_t k1)
+{
+    c->k0 = k0;
+    c->k1 = k1;
+    c->sum0 = shift_16[k0];
+    c->sum1 = shift_16[k1];
+}
+
+static int tta_get_unary(GetBitContext *gb)
+{
+    int ret = 0;
+
+    // count ones
+    while(get_bits1(gb))
+        ret++;
+    return ret;
+}
+
+static int tta_decode_init(AVCodecContext * avctx)
+{
+    TTAContext *s = avctx->priv_data;
+    int i;
+
+    s->avctx = avctx;
+
+    // 30bytes includes a seektable with one frame
+    if (avctx->extradata_size < 30)
+        return -1;
+
+    init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size);
+    if (show_bits_long(&s->gb, 32) == ff_get_fourcc("TTA1"))
+    {
+        /* signature */
+        skip_bits(&s->gb, 32);
+//        if (get_bits_long(&s->gb, 32) != bswap_32(ff_get_fourcc("TTA1"))) {
+//            av_log(s->avctx, AV_LOG_ERROR, "Missing magic\n");
+//            return -1;
+//        }
+
+        s->flags = get_bits(&s->gb, 16);
+        if (s->flags != 1 && s->flags != 3)
+        {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid flags\n");
+            return -1;
+        }
+        s->is_float = (s->flags == FORMAT_FLOAT);
+        avctx->channels = s->channels = get_bits(&s->gb, 16);
+        avctx->bits_per_sample = get_bits(&s->gb, 16);
+        s->bps = (avctx->bits_per_sample + 7) / 8;
+        avctx->sample_rate = get_bits_long(&s->gb, 32);
+        if(avctx->sample_rate > 1000000){ //prevent FRAME_TIME * avctx->sample_rate from overflowing and sanity check
+            av_log(avctx, AV_LOG_ERROR, "sample_rate too large\n");
+            return -1;
+        }
+        s->data_length = get_bits_long(&s->gb, 32);
+        skip_bits(&s->gb, 32); // CRC32 of header
+
+        if (s->is_float)
+        {
+            avctx->sample_fmt = SAMPLE_FMT_FLT;
+            av_log(s->avctx, AV_LOG_ERROR, "Unsupported sample format. Please contact the developers.\n");
+            return -1;
+        }
+        else switch(s->bps) {
+//            case 1: avctx->sample_fmt = SAMPLE_FMT_U8; break;
+            case 2: avctx->sample_fmt = SAMPLE_FMT_S16; break;
+//            case 3: avctx->sample_fmt = SAMPLE_FMT_S24; break;
+            case 4: avctx->sample_fmt = SAMPLE_FMT_S32; break;
+            default:
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid/unsupported sample format. Please contact the developers.\n");
+                return -1;
+        }
+
+        // FIXME: horribly broken, but directly from reference source
+#define FRAME_TIME 1.04489795918367346939
+        s->frame_length = (int)(FRAME_TIME * avctx->sample_rate);
+
+        s->last_frame_length = s->data_length % s->frame_length;
+        s->total_frames = s->data_length / s->frame_length +
+                        (s->last_frame_length ? 1 : 0);
+
+        av_log(s->avctx, AV_LOG_DEBUG, "flags: %x chans: %d bps: %d rate: %d block: %d\n",
+            s->flags, avctx->channels, avctx->bits_per_sample, avctx->sample_rate,
+            avctx->block_align);
+        av_log(s->avctx, AV_LOG_DEBUG, "data_length: %d frame_length: %d last: %d total: %d\n",
+            s->data_length, s->frame_length, s->last_frame_length, s->total_frames);
+
+        // FIXME: seek table
+        for (i = 0; i < s->total_frames; i++)
+            skip_bits(&s->gb, 32);
+        skip_bits(&s->gb, 32); // CRC32 of seektable
+
+        if(s->frame_length >= UINT_MAX / (s->channels * sizeof(int32_t))){
+            av_log(avctx, AV_LOG_ERROR, "frame_length too large\n");
+            return -1;
+        }
+
+        s->decode_buffer = av_mallocz(sizeof(int32_t)*s->frame_length*s->channels);
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Wrong extradata present\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int tta_decode_frame(AVCodecContext *avctx,
+        void *data, int *data_size,
+        uint8_t *buf, int buf_size)
+{
+    TTAContext *s = avctx->priv_data;
+    int i;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+    {
+        int32_t predictors[s->channels];
+        TTAFilter filters[s->channels];
+        TTARice rices[s->channels];
+        int cur_chan = 0, framelen = s->frame_length;
+        int32_t *p;
+
+        // FIXME: seeking
+        s->total_frames--;
+        if (!s->total_frames && s->last_frame_length)
+            framelen = s->last_frame_length;
+
+        // init per channel states
+        for (i = 0; i < s->channels; i++) {
+            predictors[i] = 0;
+            ttafilter_init(&(filters[i]), ttafilter_configs[s->bps-1][0], ttafilter_configs[s->bps-1][1]);
+            rice_init(&(rices[i]), 10, 10);
+        }
+
+        for (p = s->decode_buffer; p < s->decode_buffer + (framelen * s->channels); p++) {
+            int32_t *predictor = &(predictors[cur_chan]);
+            TTAFilter *filter = &(filters[cur_chan]);
+            TTARice *rice = &(rices[cur_chan]);
+            uint32_t unary, depth, k;
+            int32_t value;
+
+            unary = tta_get_unary(&s->gb);
+
+            if (unary == 0) {
+                depth = 0;
+                k = rice->k0;
+            } else {
+                depth = 1;
+                k = rice->k1;
+                unary--;
+            }
+
+            if (k)
+                value = (unary << k) + get_bits(&s->gb, k);
+            else
+                value = unary;
+
+            // FIXME: copy paste from original
+            switch (depth) {
+            case 1:
+                rice->sum1 += value - (rice->sum1 >> 4);
+                if (rice->k1 > 0 && rice->sum1 < shift_16[rice->k1])
+                    rice->k1--;
+                else if(rice->sum1 > shift_16[rice->k1 + 1])
+                    rice->k1++;
+                value += shift_1[rice->k0];
+            default:
+                rice->sum0 += value - (rice->sum0 >> 4);
+                if (rice->k0 > 0 && rice->sum0 < shift_16[rice->k0])
+                    rice->k0--;
+                else if(rice->sum0 > shift_16[rice->k0 + 1])
+                    rice->k0++;
+            }
+
+            // extract coded value
+#define UNFOLD(x) (((x)&1) ? (++(x)>>1) : (-(x)>>1))
+            *p = UNFOLD(value);
+
+            // run hybrid filter
+            ttafilter_process(filter, p, 0);
+
+            // fixed order prediction
+#define PRED(x, k) (int32_t)((((uint64_t)x << k) - x) >> k)
+            switch (s->bps) {
+                case 1: *p += PRED(*predictor, 4); break;
+                case 2:
+                case 3: *p += PRED(*predictor, 5); break;
+                case 4: *p += *predictor; break;
+            }
+            *predictor = *p;
+
+#if 0
+            // extract 32bit float from last two int samples
+            if (s->is_float && ((p - data) & 1)) {
+                uint32_t neg = *p & 0x80000000;
+                uint32_t hi = *(p - 1);
+                uint32_t lo = abs(*p) - 1;
+
+                hi += (hi || lo) ? 0x3f80 : 0;
+                // SWAP16: swap all the 16 bits
+                *(p - 1) = (hi << 16) | SWAP16(lo) | neg;
+            }
+#endif
+
+            /*if ((get_bits_count(&s->gb)+7)/8 > buf_size)
+            {
+                av_log(NULL, AV_LOG_INFO, "overread!!\n");
+                break;
+            }*/
+
+            // flip channels
+            if (cur_chan < (s->channels-1))
+                cur_chan++;
+            else {
+                // decorrelate in case of stereo integer
+                if (!s->is_float && (s->channels > 1)) {
+                    int32_t *r = p - 1;
+                    for (*p += *r / 2; r > p - s->channels; r--)
+                        *r = *(r + 1) - *r;
+                }
+                cur_chan = 0;
+            }
+        }
+
+        skip_bits(&s->gb, 32); // frame crc
+
+        // convert to output buffer
+        switch(s->bps) {
+            case 2: {
+                uint16_t *samples = data;
+                for (p = s->decode_buffer; p < s->decode_buffer + (framelen * s->channels); p++) {
+//                    *samples++ = (unsigned char)*p;
+//                    *samples++ = (unsigned char)(*p >> 8);
+                    *samples++ = *p;
+                }
+                *data_size = (uint8_t *)samples - (uint8_t *)data;
+                break;
+            }
+            default:
+                av_log(s->avctx, AV_LOG_ERROR, "Error, only 16bit samples supported!\n");
+        }
+    }
+
+//    return get_bits_count(&s->gb)+7)/8;
+    return buf_size;
+}
+
+static int tta_decode_close(AVCodecContext *avctx) {
+    TTAContext *s = avctx->priv_data;
+
+    if (s->decode_buffer)
+        av_free(s->decode_buffer);
+
+    return 0;
+}
+
+AVCodec tta_decoder = {
+    "tta",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_TTA,
+    sizeof(TTAContext),
+    tta_decode_init,
+    NULL,
+    tta_decode_close,
+    tta_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/ulti.c b/contrib/ffmpeg/libavcodec/ulti.c
new file mode 100644
index 000000000..b4028f439
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ulti.c
@@ -0,0 +1,428 @@
+/*
+ * IBM Ultimotion Video Decoder
+ * Copyright (C) 2004 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file ulti.c
+ * IBM Ultimotion Video Decoder.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#include "ulti_cb.h"
+
+typedef struct UltimotionDecodeContext {
+    AVCodecContext *avctx;
+    int width, height, blocks;
+    AVFrame frame;
+    const uint8_t *ulti_codebook;
+} UltimotionDecodeContext;
+
+static int ulti_decode_init(AVCodecContext *avctx)
+{
+    UltimotionDecodeContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->width = avctx->width;
+    s->height = avctx->height;
+    s->blocks = (s->width / 8) * (s->height / 8);
+    avctx->pix_fmt = PIX_FMT_YUV410P;
+    avctx->has_b_frames = 0;
+    avctx->coded_frame = (AVFrame*) &s->frame;
+    s->ulti_codebook = ulti_codebook;
+
+    return 0;
+}
+
+static int block_coords[8] = // 4x4 block coords in 8x8 superblock
+    { 0, 0, 0, 4, 4, 4, 4, 0};
+
+static int angle_by_index[4] = { 0, 2, 6, 12};
+
+/* Lookup tables for luma and chroma - used by ulti_convert_yuv() */
+static uint8_t ulti_lumas[64] =
+    { 0x10, 0x13, 0x17, 0x1A, 0x1E, 0x21, 0x25, 0x28,
+      0x2C, 0x2F, 0x33, 0x36, 0x3A, 0x3D, 0x41, 0x44,
+      0x48, 0x4B, 0x4F, 0x52, 0x56, 0x59, 0x5C, 0x60,
+      0x63, 0x67, 0x6A, 0x6E, 0x71, 0x75, 0x78, 0x7C,
+      0x7F, 0x83, 0x86, 0x8A, 0x8D, 0x91, 0x94, 0x98,
+      0x9B, 0x9F, 0xA2, 0xA5, 0xA9, 0xAC, 0xB0, 0xB3,
+      0xB7, 0xBA, 0xBE, 0xC1, 0xC5, 0xC8, 0xCC, 0xCF,
+      0xD3, 0xD6, 0xDA, 0xDD, 0xE1, 0xE4, 0xE8, 0xEB};
+
+static uint8_t ulti_chromas[16] =
+    { 0x60, 0x67, 0x6D, 0x73, 0x7A, 0x80, 0x86, 0x8D,
+      0x93, 0x99, 0xA0, 0xA6, 0xAC, 0xB3, 0xB9, 0xC0};
+
+/* convert Ultimotion YUV block (sixteen 6-bit Y samples and
+ two 4-bit chroma samples) into standard YUV and put it into frame */
+static void ulti_convert_yuv(AVFrame *frame, int x, int y,
+                             uint8_t *luma,int chroma)
+{
+    uint8_t *y_plane, *cr_plane, *cb_plane;
+    int i;
+
+    y_plane = frame->data[0] + x + y * frame->linesize[0];
+    cr_plane = frame->data[1] + (x / 4) + (y / 4) * frame->linesize[1];
+    cb_plane = frame->data[2] + (x / 4) + (y / 4) * frame->linesize[2];
+
+    cr_plane[0] = ulti_chromas[chroma >> 4];
+
+    cb_plane[0] = ulti_chromas[chroma & 0xF];
+
+
+    for(i = 0; i < 16; i++){
+        y_plane[i & 3] = ulti_lumas[luma[i]];
+        if((i & 3) == 3) { //next row
+            y_plane += frame->linesize[0];
+        }
+    }
+}
+
+/* generate block like in MS Video1 */
+static void ulti_pattern(AVFrame *frame, int x, int y,
+                         int f0, int f1, int Y0, int Y1, int chroma)
+{
+    uint8_t Luma[16];
+    int mask, i;
+    for(mask = 0x80, i = 0; mask; mask >>= 1, i++) {
+        if(f0 & mask)
+            Luma[i] = Y1;
+        else
+            Luma[i] = Y0;
+    }
+
+    for(mask = 0x80, i = 8; mask; mask >>= 1, i++) {
+        if(f1 & mask)
+            Luma[i] = Y1;
+        else
+            Luma[i] = Y0;
+    }
+
+    ulti_convert_yuv(frame, x, y, Luma, chroma);
+}
+
+/* fill block with some gradient */
+static void ulti_grad(AVFrame *frame, int x, int y, uint8_t *Y, int chroma, int angle)
+{
+    uint8_t Luma[16];
+    if(angle & 8) { //reverse order
+        int t;
+        angle &= 0x7;
+        t = Y[0];
+        Y[0] = Y[3];
+        Y[3] = t;
+        t = Y[1];
+        Y[1] = Y[2];
+        Y[2] = t;
+    }
+    switch(angle){
+    case 0:
+        Luma[0]  = Y[0]; Luma[1]  = Y[1]; Luma[2]  = Y[2]; Luma[3]  = Y[3];
+        Luma[4]  = Y[0]; Luma[5]  = Y[1]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3];
+        Luma[12] = Y[0]; Luma[13] = Y[1]; Luma[14] = Y[2]; Luma[15] = Y[3];
+        break;
+    case 1:
+        Luma[0]  = Y[1]; Luma[1]  = Y[2]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[0]; Luma[5]  = Y[1]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2];
+        break;
+    case 2:
+        Luma[0]  = Y[1]; Luma[1]  = Y[2]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[1]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2];
+        break;
+    case 3:
+        Luma[0]  = Y[2]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[1]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[1];
+        break;
+    case 4:
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[2]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[2];
+        Luma[8]  = Y[1]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[1];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
+    case 5:
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[2];
+        Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[1];
+        Luma[8]  = Y[2]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[0];
+        Luma[12] = Y[1]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
+    case 6:
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[2]; Luma[3]  = Y[2];
+        Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[1]; Luma[7]  = Y[1];
+        Luma[8]  = Y[2]; Luma[9]  = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0];
+        Luma[12] = Y[1]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
+    case 7:
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[2]; Luma[3]  = Y[1];
+        Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[1]; Luma[7]  = Y[0];
+        Luma[8]  = Y[3]; Luma[9]  = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0];
+        Luma[12] = Y[2]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
+    default:
+        Luma[0]  = Y[0]; Luma[1]  = Y[0]; Luma[2]  = Y[1]; Luma[3]  = Y[1];
+        Luma[4]  = Y[0]; Luma[5]  = Y[0]; Luma[6]  = Y[1]; Luma[7]  = Y[1];
+        Luma[8]  = Y[2]; Luma[9]  = Y[2]; Luma[10] = Y[3]; Luma[11] = Y[3];
+        Luma[12] = Y[2]; Luma[13] = Y[2]; Luma[14] = Y[3]; Luma[15] = Y[3];
+        break;
+    }
+
+    ulti_convert_yuv(frame, x, y, Luma, chroma);
+}
+
+static int ulti_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             uint8_t *buf, int buf_size)
+{
+    UltimotionDecodeContext *s=avctx->priv_data;
+    int modifier = 0;
+    int uniq = 0;
+    int mode = 0;
+    int blocks = 0;
+    int done = 0;
+    int x = 0, y = 0;
+    int i;
+    int skip;
+    int tmp;
+
+    if(s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    s->frame.reference = 1;
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if(avctx->get_buffer(avctx, &s->frame) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    while(!done) {
+        int idx;
+        if(blocks >= s->blocks || y >= s->height)
+            break;//all blocks decoded
+
+        idx = *buf++;
+        if((idx & 0xF8) == 0x70) {
+            switch(idx) {
+            case 0x70: //change modifier
+                modifier = *buf++;
+                if(modifier>1)
+                    av_log(avctx, AV_LOG_INFO, "warning: modifier must be 0 or 1, got %i\n", modifier);
+                break;
+            case 0x71: // set uniq flag
+                uniq = 1;
+                break;
+            case 0x72: //toggle mode
+                mode = !mode;
+                break;
+            case 0x73: //end-of-frame
+                done = 1;
+                break;
+            case 0x74: //skip some blocks
+                skip = *buf++;
+                if ((blocks + skip) >= s->blocks)
+                    break;
+                blocks += skip;
+                x += skip * 8;
+                while(x >= s->width) {
+                    x -= s->width;
+                    y += 8;
+                }
+                break;
+            default:
+                av_log(avctx, AV_LOG_INFO, "warning: unknown escape 0x%02X\n", idx);
+            }
+        } else { //handle one block
+            int code;
+            int cf;
+            int angle = 0;
+            uint8_t Y[4]; // luma samples of block
+            int tx = 0, ty = 0; //coords of subblock
+            int chroma = 0;
+            if (mode || uniq) {
+                uniq = 0;
+                cf = 1;
+                chroma = 0;
+            } else {
+                cf = 0;
+                if (idx)
+                    chroma = *buf++;
+            }
+            for (i = 0; i < 4; i++) { // for every subblock
+                code = (idx >> (6 - i*2)) & 3; //extract 2 bits
+                if(!code) //skip subblock
+                    continue;
+                if(cf)
+                    chroma = *buf++;
+                tx = x + block_coords[i * 2];
+                ty = y + block_coords[(i * 2) + 1];
+                switch(code) {
+                case 1:
+                    tmp = *buf++;
+
+                    angle = angle_by_index[(tmp >> 6) & 0x3];
+
+                    Y[0] = tmp & 0x3F;
+                    Y[1] = Y[0];
+
+                    if (angle) {
+                        Y[2] = Y[0]+1;
+                        if (Y[2] > 0x3F)
+                            Y[2] = 0x3F;
+                        Y[3] = Y[2];
+                    } else {
+                        Y[2] = Y[0];
+                        Y[3] = Y[0];
+                    }
+                    break;
+
+                case 2:
+                    if (modifier) { // unpack four luma samples
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+
+                        Y[0] = (tmp >> 18) & 0x3F;
+                        Y[1] = (tmp >> 12) & 0x3F;
+                        Y[2] = (tmp >> 6) & 0x3F;
+                        Y[3] = tmp & 0x3F;
+                        angle = 16;
+                    } else { // retrieve luma samples from codebook
+                        tmp = (*buf++) << 8;
+                        tmp += (*buf++);
+
+                        angle = (tmp >> 12) & 0xF;
+                        tmp &= 0xFFF;
+                        tmp <<= 2;
+                        Y[0] = s->ulti_codebook[tmp];
+                        Y[1] = s->ulti_codebook[tmp + 1];
+                        Y[2] = s->ulti_codebook[tmp + 2];
+                        Y[3] = s->ulti_codebook[tmp + 3];
+                    }
+                    break;
+
+                case 3:
+                    if (modifier) { // all 16 luma samples
+                        uint8_t Luma[16];
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[0] = (tmp >> 18) & 0x3F;
+                        Luma[1] = (tmp >> 12) & 0x3F;
+                        Luma[2] = (tmp >> 6) & 0x3F;
+                        Luma[3] = tmp & 0x3F;
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[4] = (tmp >> 18) & 0x3F;
+                        Luma[5] = (tmp >> 12) & 0x3F;
+                        Luma[6] = (tmp >> 6) & 0x3F;
+                        Luma[7] = tmp & 0x3F;
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[8] = (tmp >> 18) & 0x3F;
+                        Luma[9] = (tmp >> 12) & 0x3F;
+                        Luma[10] = (tmp >> 6) & 0x3F;
+                        Luma[11] = tmp & 0x3F;
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[12] = (tmp >> 18) & 0x3F;
+                        Luma[13] = (tmp >> 12) & 0x3F;
+                        Luma[14] = (tmp >> 6) & 0x3F;
+                        Luma[15] = tmp & 0x3F;
+
+                        ulti_convert_yuv(&s->frame, tx, ty, Luma, chroma);
+                    } else {
+                        tmp = *buf++;
+                        if(tmp & 0x80) {
+                            angle = (tmp >> 4) & 0x7;
+                            tmp = (tmp << 8) + *buf++;
+                            Y[0] = (tmp >> 6) & 0x3F;
+                            Y[1] = tmp & 0x3F;
+                            Y[2] = (*buf++) & 0x3F;
+                            Y[3] = (*buf++) & 0x3F;
+                            ulti_grad(&s->frame, tx, ty, Y, chroma, angle); //draw block
+                        } else { // some patterns
+                            int f0, f1;
+                            f0 = *buf++;
+                            f1 = tmp;
+                            Y[0] = (*buf++) & 0x3F;
+                            Y[1] = (*buf++) & 0x3F;
+                            ulti_pattern(&s->frame, tx, ty, f1, f0, Y[0], Y[1], chroma);
+                        }
+                    }
+                    break;
+                }
+                if(code != 3)
+                    ulti_grad(&s->frame, tx, ty, Y, chroma, angle); // draw block
+            }
+            blocks++;
+                x += 8;
+            if(x >= s->width) {
+                x = 0;
+                y += 8;
+            }
+        }
+    }
+
+    *data_size=sizeof(AVFrame);
+    *(AVFrame*)data= s->frame;
+
+    return buf_size;
+}
+
+static int ulti_decode_end(AVCodecContext *avctx)
+{
+/*    UltimotionDecodeContext *s = avctx->priv_data;*/
+
+    return 0;
+}
+
+AVCodec ulti_decoder = {
+    "ultimotion",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ULTI,
+    sizeof(UltimotionDecodeContext),
+    ulti_decode_init,
+    NULL,
+    ulti_decode_end,
+    ulti_decode_frame,
+    CODEC_CAP_DR1,
+    NULL
+};
+
diff --git a/contrib/ffmpeg/libavcodec/ulti_cb.h b/contrib/ffmpeg/libavcodec/ulti_cb.h
new file mode 100644
index 000000000..2d8c9082c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ulti_cb.h
@@ -0,0 +1,4119 @@
+/*
+ * IBM Ultimotion Video Decoder
+ * copyright (C) 2004 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static const unsigned char ulti_codebook[16384]={
+    0x00, 0x01, 0x01, 0x02,
+    0x00, 0x01, 0x02, 0x03,
+    0x00, 0x02, 0x03, 0x04,
+    0x00, 0x01, 0x03, 0x04,
+    0x00, 0x01, 0x02, 0x04,
+    0x00, 0x02, 0x03, 0x05,
+    0x00, 0x02, 0x04, 0x05,
+    0x00, 0x01, 0x04, 0x05,
+    0x00, 0x01, 0x03, 0x05,
+    0x00, 0x02, 0x04, 0x06,
+    0x00, 0x03, 0x05, 0x06,
+    0x00, 0x01, 0x05, 0x06,
+    0x00, 0x01, 0x03, 0x06,
+    0x00, 0x06, 0x06, 0x06,
+    0x00, 0x00, 0x06, 0x06,
+    0x00, 0x00, 0x00, 0x06,
+    0x00, 0x03, 0x04, 0x07,
+    0x00, 0x03, 0x06, 0x07,
+    0x00, 0x01, 0x06, 0x07,
+    0x00, 0x01, 0x04, 0x07,
+    0x00, 0x03, 0x05, 0x08,
+    0x00, 0x04, 0x06, 0x08,
+    0x00, 0x02, 0x06, 0x08,
+    0x00, 0x02, 0x04, 0x08,
+    0x00, 0x08, 0x08, 0x08,
+    0x00, 0x00, 0x08, 0x08,
+    0x00, 0x00, 0x00, 0x08,
+    0x00, 0x04, 0x07, 0x0B,
+    0x00, 0x05, 0x09, 0x0B,
+    0x00, 0x02, 0x09, 0x0B,
+    0x00, 0x02, 0x06, 0x0B,
+    0x00, 0x0B, 0x0B, 0x0B,
+    0x00, 0x00, 0x0B, 0x0B,
+    0x00, 0x00, 0x00, 0x0B,
+    0x00, 0x05, 0x09, 0x0E,
+    0x00, 0x07, 0x0B, 0x0E,
+    0x00, 0x03, 0x0B, 0x0E,
+    0x00, 0x03, 0x07, 0x0E,
+    0x00, 0x0E, 0x0E, 0x0E,
+    0x00, 0x00, 0x0E, 0x0E,
+    0x00, 0x00, 0x00, 0x0E,
+    0x00, 0x06, 0x0B, 0x11,
+    0x00, 0x08, 0x0D, 0x11,
+    0x00, 0x04, 0x0D, 0x11,
+    0x00, 0x04, 0x09, 0x11,
+    0x00, 0x11, 0x11, 0x11,
+    0x00, 0x00, 0x11, 0x11,
+    0x00, 0x00, 0x00, 0x11,
+    0x00, 0x07, 0x0D, 0x14,
+    0x00, 0x0A, 0x0F, 0x14,
+    0x00, 0x05, 0x0F, 0x14,
+    0x00, 0x05, 0x0A, 0x14,
+    0x00, 0x14, 0x14, 0x14,
+    0x00, 0x00, 0x14, 0x14,
+    0x00, 0x00, 0x00, 0x14,
+    0x00, 0x0B, 0x12, 0x17,
+    0x00, 0x05, 0x12, 0x17,
+    0x00, 0x05, 0x0C, 0x17,
+    0x00, 0x17, 0x17, 0x17,
+    0x00, 0x00, 0x17, 0x17,
+    0x00, 0x00, 0x00, 0x17,
+    0x00, 0x0D, 0x14, 0x1A,
+    0x00, 0x06, 0x14, 0x1A,
+    0x00, 0x06, 0x0D, 0x1A,
+    0x00, 0x1A, 0x1A, 0x1A,
+    0x00, 0x00, 0x1A, 0x1A,
+    0x00, 0x00, 0x00, 0x1A,
+    0x00, 0x0E, 0x16, 0x1D,
+    0x00, 0x07, 0x16, 0x1D,
+    0x00, 0x07, 0x0F, 0x1D,
+    0x00, 0x1D, 0x1D, 0x1D,
+    0x00, 0x00, 0x1D, 0x1D,
+    0x00, 0x00, 0x00, 0x1D,
+    0x00, 0x10, 0x18, 0x20,
+    0x00, 0x08, 0x18, 0x20,
+    0x00, 0x08, 0x10, 0x20,
+    0x00, 0x20, 0x20, 0x20,
+    0x00, 0x00, 0x20, 0x20,
+    0x00, 0x00, 0x00, 0x20,
+    0x00, 0x23, 0x23, 0x23,
+    0x00, 0x00, 0x23, 0x23,
+    0x00, 0x00, 0x00, 0x23,
+    0x00, 0x12, 0x1B, 0x24,
+    0x00, 0x09, 0x1B, 0x24,
+    0x00, 0x09, 0x12, 0x24,
+    0x00, 0x28, 0x28, 0x28,
+    0x00, 0x00, 0x28, 0x28,
+    0x00, 0x00, 0x00, 0x28,
+    0x00, 0x2E, 0x2E, 0x2E,
+    0x00, 0x00, 0x2E, 0x2E,
+    0x00, 0x00, 0x00, 0x2E,
+    0x01, 0x02, 0x02, 0x03,
+    0x01, 0x02, 0x03, 0x04,
+    0x01, 0x03, 0x04, 0x05,
+    0x01, 0x02, 0x04, 0x05,
+    0x01, 0x02, 0x03, 0x05,
+    0x01, 0x03, 0x04, 0x06,
+    0x01, 0x03, 0x05, 0x06,
+    0x01, 0x02, 0x05, 0x06,
+    0x01, 0x02, 0x04, 0x06,
+    0x01, 0x03, 0x05, 0x07,
+    0x01, 0x04, 0x06, 0x07,
+    0x01, 0x02, 0x06, 0x07,
+    0x01, 0x02, 0x04, 0x07,
+    0x01, 0x07, 0x07, 0x07,
+    0x01, 0x01, 0x07, 0x07,
+    0x01, 0x01, 0x01, 0x07,
+    0x01, 0x04, 0x05, 0x08,
+    0x01, 0x04, 0x07, 0x08,
+    0x01, 0x02, 0x07, 0x08,
+    0x01, 0x02, 0x05, 0x08,
+    0x01, 0x04, 0x06, 0x09,
+    0x01, 0x05, 0x07, 0x09,
+    0x01, 0x03, 0x07, 0x09,
+    0x01, 0x03, 0x05, 0x09,
+    0x01, 0x09, 0x09, 0x09,
+    0x01, 0x01, 0x09, 0x09,
+    0x01, 0x01, 0x01, 0x09,
+    0x01, 0x05, 0x08, 0x0C,
+    0x01, 0x06, 0x0A, 0x0C,
+    0x01, 0x03, 0x0A, 0x0C,
+    0x01, 0x03, 0x07, 0x0C,
+    0x01, 0x0C, 0x0C, 0x0C,
+    0x01, 0x01, 0x0C, 0x0C,
+    0x01, 0x01, 0x01, 0x0C,
+    0x01, 0x06, 0x0A, 0x0F,
+    0x01, 0x08, 0x0C, 0x0F,
+    0x01, 0x04, 0x0C, 0x0F,
+    0x01, 0x04, 0x08, 0x0F,
+    0x01, 0x0F, 0x0F, 0x0F,
+    0x01, 0x01, 0x0F, 0x0F,
+    0x01, 0x01, 0x01, 0x0F,
+    0x01, 0x07, 0x0C, 0x12,
+    0x01, 0x09, 0x0E, 0x12,
+    0x01, 0x05, 0x0E, 0x12,
+    0x01, 0x05, 0x0A, 0x12,
+    0x01, 0x12, 0x12, 0x12,
+    0x01, 0x01, 0x12, 0x12,
+    0x01, 0x01, 0x01, 0x12,
+    0x01, 0x08, 0x0E, 0x15,
+    0x01, 0x0B, 0x10, 0x15,
+    0x01, 0x06, 0x10, 0x15,
+    0x01, 0x06, 0x0B, 0x15,
+    0x01, 0x15, 0x15, 0x15,
+    0x01, 0x01, 0x15, 0x15,
+    0x01, 0x01, 0x01, 0x15,
+    0x01, 0x0C, 0x13, 0x18,
+    0x01, 0x06, 0x13, 0x18,
+    0x01, 0x06, 0x0D, 0x18,
+    0x01, 0x18, 0x18, 0x18,
+    0x01, 0x01, 0x18, 0x18,
+    0x01, 0x01, 0x01, 0x18,
+    0x01, 0x0E, 0x15, 0x1B,
+    0x01, 0x07, 0x15, 0x1B,
+    0x01, 0x07, 0x0E, 0x1B,
+    0x01, 0x1B, 0x1B, 0x1B,
+    0x01, 0x01, 0x1B, 0x1B,
+    0x01, 0x01, 0x01, 0x1B,
+    0x01, 0x0F, 0x17, 0x1E,
+    0x01, 0x08, 0x17, 0x1E,
+    0x01, 0x08, 0x10, 0x1E,
+    0x01, 0x1E, 0x1E, 0x1E,
+    0x01, 0x01, 0x1E, 0x1E,
+    0x01, 0x01, 0x01, 0x1E,
+    0x01, 0x11, 0x19, 0x21,
+    0x01, 0x09, 0x19, 0x21,
+    0x01, 0x09, 0x11, 0x21,
+    0x01, 0x21, 0x21, 0x21,
+    0x01, 0x01, 0x21, 0x21,
+    0x01, 0x01, 0x01, 0x21,
+    0x01, 0x24, 0x24, 0x24,
+    0x01, 0x01, 0x24, 0x24,
+    0x01, 0x01, 0x01, 0x24,
+    0x01, 0x13, 0x1C, 0x25,
+    0x01, 0x0A, 0x1C, 0x25,
+    0x01, 0x0A, 0x13, 0x25,
+    0x01, 0x29, 0x29, 0x29,
+    0x01, 0x01, 0x29, 0x29,
+    0x01, 0x01, 0x01, 0x29,
+    0x01, 0x2F, 0x2F, 0x2F,
+    0x01, 0x01, 0x2F, 0x2F,
+    0x01, 0x01, 0x01, 0x2F,
+    0x02, 0x03, 0x03, 0x04,
+    0x02, 0x03, 0x04, 0x05,
+    0x02, 0x04, 0x05, 0x06,
+    0x02, 0x03, 0x05, 0x06,
+    0x02, 0x03, 0x04, 0x06,
+    0x02, 0x04, 0x05, 0x07,
+    0x02, 0x04, 0x06, 0x07,
+    0x02, 0x03, 0x06, 0x07,
+    0x02, 0x03, 0x05, 0x07,
+    0x02, 0x04, 0x06, 0x08,
+    0x02, 0x05, 0x07, 0x08,
+    0x02, 0x03, 0x07, 0x08,
+    0x02, 0x03, 0x05, 0x08,
+    0x02, 0x08, 0x08, 0x08,
+    0x02, 0x02, 0x08, 0x08,
+    0x02, 0x02, 0x02, 0x08,
+    0x02, 0x05, 0x06, 0x09,
+    0x02, 0x05, 0x08, 0x09,
+    0x02, 0x03, 0x08, 0x09,
+    0x02, 0x03, 0x06, 0x09,
+    0x02, 0x05, 0x07, 0x0A,
+    0x02, 0x06, 0x08, 0x0A,
+    0x02, 0x04, 0x08, 0x0A,
+    0x02, 0x04, 0x06, 0x0A,
+    0x02, 0x0A, 0x0A, 0x0A,
+    0x02, 0x02, 0x0A, 0x0A,
+    0x02, 0x02, 0x02, 0x0A,
+    0x02, 0x06, 0x09, 0x0D,
+    0x02, 0x07, 0x0B, 0x0D,
+    0x02, 0x04, 0x0B, 0x0D,
+    0x02, 0x04, 0x08, 0x0D,
+    0x02, 0x0D, 0x0D, 0x0D,
+    0x02, 0x02, 0x0D, 0x0D,
+    0x02, 0x02, 0x02, 0x0D,
+    0x02, 0x07, 0x0B, 0x10,
+    0x02, 0x09, 0x0D, 0x10,
+    0x02, 0x05, 0x0D, 0x10,
+    0x02, 0x05, 0x09, 0x10,
+    0x02, 0x10, 0x10, 0x10,
+    0x02, 0x02, 0x10, 0x10,
+    0x02, 0x02, 0x02, 0x10,
+    0x02, 0x08, 0x0D, 0x13,
+    0x02, 0x0A, 0x0F, 0x13,
+    0x02, 0x06, 0x0F, 0x13,
+    0x02, 0x06, 0x0B, 0x13,
+    0x02, 0x13, 0x13, 0x13,
+    0x02, 0x02, 0x13, 0x13,
+    0x02, 0x02, 0x02, 0x13,
+    0x02, 0x09, 0x0F, 0x16,
+    0x02, 0x0C, 0x11, 0x16,
+    0x02, 0x07, 0x11, 0x16,
+    0x02, 0x07, 0x0C, 0x16,
+    0x02, 0x16, 0x16, 0x16,
+    0x02, 0x02, 0x16, 0x16,
+    0x02, 0x02, 0x02, 0x16,
+    0x02, 0x0D, 0x14, 0x19,
+    0x02, 0x07, 0x14, 0x19,
+    0x02, 0x07, 0x0E, 0x19,
+    0x02, 0x19, 0x19, 0x19,
+    0x02, 0x02, 0x19, 0x19,
+    0x02, 0x02, 0x02, 0x19,
+    0x02, 0x0F, 0x16, 0x1C,
+    0x02, 0x08, 0x16, 0x1C,
+    0x02, 0x08, 0x0F, 0x1C,
+    0x02, 0x1C, 0x1C, 0x1C,
+    0x02, 0x02, 0x1C, 0x1C,
+    0x02, 0x02, 0x02, 0x1C,
+    0x02, 0x10, 0x18, 0x1F,
+    0x02, 0x09, 0x18, 0x1F,
+    0x02, 0x09, 0x11, 0x1F,
+    0x02, 0x1F, 0x1F, 0x1F,
+    0x02, 0x02, 0x1F, 0x1F,
+    0x02, 0x02, 0x02, 0x1F,
+    0x02, 0x12, 0x1A, 0x22,
+    0x02, 0x0A, 0x1A, 0x22,
+    0x02, 0x0A, 0x12, 0x22,
+    0x02, 0x22, 0x22, 0x22,
+    0x02, 0x02, 0x22, 0x22,
+    0x02, 0x02, 0x02, 0x22,
+    0x02, 0x25, 0x25, 0x25,
+    0x02, 0x02, 0x25, 0x25,
+    0x02, 0x02, 0x02, 0x25,
+    0x02, 0x14, 0x1D, 0x26,
+    0x02, 0x0B, 0x1D, 0x26,
+    0x02, 0x0B, 0x14, 0x26,
+    0x02, 0x2A, 0x2A, 0x2A,
+    0x02, 0x02, 0x2A, 0x2A,
+    0x02, 0x02, 0x02, 0x2A,
+    0x02, 0x30, 0x30, 0x30,
+    0x02, 0x02, 0x30, 0x30,
+    0x02, 0x02, 0x02, 0x30,
+    0x03, 0x04, 0x04, 0x05,
+    0x03, 0x04, 0x05, 0x06,
+    0x03, 0x05, 0x06, 0x07,
+    0x03, 0x04, 0x06, 0x07,
+    0x03, 0x04, 0x05, 0x07,
+    0x03, 0x05, 0x06, 0x08,
+    0x03, 0x05, 0x07, 0x08,
+    0x03, 0x04, 0x07, 0x08,
+    0x03, 0x04, 0x06, 0x08,
+    0x03, 0x05, 0x07, 0x09,
+    0x03, 0x06, 0x08, 0x09,
+    0x03, 0x04, 0x08, 0x09,
+    0x03, 0x04, 0x06, 0x09,
+    0x03, 0x09, 0x09, 0x09,
+    0x03, 0x03, 0x09, 0x09,
+    0x03, 0x03, 0x03, 0x09,
+    0x03, 0x06, 0x07, 0x0A,
+    0x03, 0x06, 0x09, 0x0A,
+    0x03, 0x04, 0x09, 0x0A,
+    0x03, 0x04, 0x07, 0x0A,
+    0x03, 0x06, 0x08, 0x0B,
+    0x03, 0x07, 0x09, 0x0B,
+    0x03, 0x05, 0x09, 0x0B,
+    0x03, 0x05, 0x07, 0x0B,
+    0x03, 0x0B, 0x0B, 0x0B,
+    0x03, 0x03, 0x0B, 0x0B,
+    0x03, 0x03, 0x03, 0x0B,
+    0x03, 0x07, 0x0A, 0x0E,
+    0x03, 0x08, 0x0C, 0x0E,
+    0x03, 0x05, 0x0C, 0x0E,
+    0x03, 0x05, 0x09, 0x0E,
+    0x03, 0x0E, 0x0E, 0x0E,
+    0x03, 0x03, 0x0E, 0x0E,
+    0x03, 0x03, 0x03, 0x0E,
+    0x03, 0x08, 0x0C, 0x11,
+    0x03, 0x0A, 0x0E, 0x11,
+    0x03, 0x06, 0x0E, 0x11,
+    0x03, 0x06, 0x0A, 0x11,
+    0x03, 0x11, 0x11, 0x11,
+    0x03, 0x03, 0x11, 0x11,
+    0x03, 0x03, 0x03, 0x11,
+    0x03, 0x09, 0x0E, 0x14,
+    0x03, 0x0B, 0x10, 0x14,
+    0x03, 0x07, 0x10, 0x14,
+    0x03, 0x07, 0x0C, 0x14,
+    0x03, 0x14, 0x14, 0x14,
+    0x03, 0x03, 0x14, 0x14,
+    0x03, 0x03, 0x03, 0x14,
+    0x03, 0x0A, 0x10, 0x17,
+    0x03, 0x0D, 0x12, 0x17,
+    0x03, 0x08, 0x12, 0x17,
+    0x03, 0x08, 0x0D, 0x17,
+    0x03, 0x17, 0x17, 0x17,
+    0x03, 0x03, 0x17, 0x17,
+    0x03, 0x03, 0x03, 0x17,
+    0x03, 0x0E, 0x15, 0x1A,
+    0x03, 0x08, 0x15, 0x1A,
+    0x03, 0x08, 0x0F, 0x1A,
+    0x03, 0x1A, 0x1A, 0x1A,
+    0x03, 0x03, 0x1A, 0x1A,
+    0x03, 0x03, 0x03, 0x1A,
+    0x03, 0x10, 0x17, 0x1D,
+    0x03, 0x09, 0x17, 0x1D,
+    0x03, 0x09, 0x10, 0x1D,
+    0x03, 0x1D, 0x1D, 0x1D,
+    0x03, 0x03, 0x1D, 0x1D,
+    0x03, 0x03, 0x03, 0x1D,
+    0x03, 0x11, 0x19, 0x20,
+    0x03, 0x0A, 0x19, 0x20,
+    0x03, 0x0A, 0x12, 0x20,
+    0x03, 0x20, 0x20, 0x20,
+    0x03, 0x03, 0x20, 0x20,
+    0x03, 0x03, 0x03, 0x20,
+    0x03, 0x13, 0x1B, 0x23,
+    0x03, 0x0B, 0x1B, 0x23,
+    0x03, 0x0B, 0x13, 0x23,
+    0x03, 0x23, 0x23, 0x23,
+    0x03, 0x03, 0x23, 0x23,
+    0x03, 0x03, 0x03, 0x23,
+    0x03, 0x26, 0x26, 0x26,
+    0x03, 0x03, 0x26, 0x26,
+    0x03, 0x03, 0x03, 0x26,
+    0x03, 0x15, 0x1E, 0x27,
+    0x03, 0x0C, 0x1E, 0x27,
+    0x03, 0x0C, 0x15, 0x27,
+    0x03, 0x2B, 0x2B, 0x2B,
+    0x03, 0x03, 0x2B, 0x2B,
+    0x03, 0x03, 0x03, 0x2B,
+    0x03, 0x31, 0x31, 0x31,
+    0x03, 0x03, 0x31, 0x31,
+    0x03, 0x03, 0x03, 0x31,
+    0x04, 0x05, 0x05, 0x06,
+    0x04, 0x05, 0x06, 0x07,
+    0x04, 0x06, 0x07, 0x08,
+    0x04, 0x05, 0x07, 0x08,
+    0x04, 0x05, 0x06, 0x08,
+    0x04, 0x06, 0x07, 0x09,
+    0x04, 0x06, 0x08, 0x09,
+    0x04, 0x05, 0x08, 0x09,
+    0x04, 0x05, 0x07, 0x09,
+    0x04, 0x06, 0x08, 0x0A,
+    0x04, 0x07, 0x09, 0x0A,
+    0x04, 0x05, 0x09, 0x0A,
+    0x04, 0x05, 0x07, 0x0A,
+    0x04, 0x0A, 0x0A, 0x0A,
+    0x04, 0x04, 0x0A, 0x0A,
+    0x04, 0x04, 0x04, 0x0A,
+    0x04, 0x07, 0x08, 0x0B,
+    0x04, 0x07, 0x0A, 0x0B,
+    0x04, 0x05, 0x0A, 0x0B,
+    0x04, 0x05, 0x08, 0x0B,
+    0x04, 0x07, 0x09, 0x0C,
+    0x04, 0x08, 0x0A, 0x0C,
+    0x04, 0x06, 0x0A, 0x0C,
+    0x04, 0x06, 0x08, 0x0C,
+    0x04, 0x0C, 0x0C, 0x0C,
+    0x04, 0x04, 0x0C, 0x0C,
+    0x04, 0x04, 0x04, 0x0C,
+    0x04, 0x08, 0x0B, 0x0F,
+    0x04, 0x09, 0x0D, 0x0F,
+    0x04, 0x06, 0x0D, 0x0F,
+    0x04, 0x06, 0x0A, 0x0F,
+    0x04, 0x0F, 0x0F, 0x0F,
+    0x04, 0x04, 0x0F, 0x0F,
+    0x04, 0x04, 0x04, 0x0F,
+    0x04, 0x09, 0x0D, 0x12,
+    0x04, 0x0B, 0x0F, 0x12,
+    0x04, 0x07, 0x0F, 0x12,
+    0x04, 0x07, 0x0B, 0x12,
+    0x04, 0x12, 0x12, 0x12,
+    0x04, 0x04, 0x12, 0x12,
+    0x04, 0x04, 0x04, 0x12,
+    0x04, 0x0A, 0x0F, 0x15,
+    0x04, 0x0C, 0x11, 0x15,
+    0x04, 0x08, 0x11, 0x15,
+    0x04, 0x08, 0x0D, 0x15,
+    0x04, 0x15, 0x15, 0x15,
+    0x04, 0x04, 0x15, 0x15,
+    0x04, 0x04, 0x04, 0x15,
+    0x04, 0x0B, 0x11, 0x18,
+    0x04, 0x0E, 0x13, 0x18,
+    0x04, 0x09, 0x13, 0x18,
+    0x04, 0x09, 0x0E, 0x18,
+    0x04, 0x18, 0x18, 0x18,
+    0x04, 0x04, 0x18, 0x18,
+    0x04, 0x04, 0x04, 0x18,
+    0x04, 0x0F, 0x16, 0x1B,
+    0x04, 0x09, 0x16, 0x1B,
+    0x04, 0x09, 0x10, 0x1B,
+    0x04, 0x1B, 0x1B, 0x1B,
+    0x04, 0x04, 0x1B, 0x1B,
+    0x04, 0x04, 0x04, 0x1B,
+    0x04, 0x11, 0x18, 0x1E,
+    0x04, 0x0A, 0x18, 0x1E,
+    0x04, 0x0A, 0x11, 0x1E,
+    0x04, 0x1E, 0x1E, 0x1E,
+    0x04, 0x04, 0x1E, 0x1E,
+    0x04, 0x04, 0x04, 0x1E,
+    0x04, 0x12, 0x1A, 0x21,
+    0x04, 0x0B, 0x1A, 0x21,
+    0x04, 0x0B, 0x13, 0x21,
+    0x04, 0x21, 0x21, 0x21,
+    0x04, 0x04, 0x21, 0x21,
+    0x04, 0x04, 0x04, 0x21,
+    0x04, 0x14, 0x1C, 0x24,
+    0x04, 0x0C, 0x1C, 0x24,
+    0x04, 0x0C, 0x14, 0x24,
+    0x04, 0x24, 0x24, 0x24,
+    0x04, 0x04, 0x24, 0x24,
+    0x04, 0x04, 0x04, 0x24,
+    0x04, 0x27, 0x27, 0x27,
+    0x04, 0x04, 0x27, 0x27,
+    0x04, 0x04, 0x04, 0x27,
+    0x04, 0x16, 0x1F, 0x28,
+    0x04, 0x0D, 0x1F, 0x28,
+    0x04, 0x0D, 0x16, 0x28,
+    0x04, 0x2C, 0x2C, 0x2C,
+    0x04, 0x04, 0x2C, 0x2C,
+    0x04, 0x04, 0x04, 0x2C,
+    0x04, 0x32, 0x32, 0x32,
+    0x04, 0x04, 0x32, 0x32,
+    0x04, 0x04, 0x04, 0x32,
+    0x05, 0x06, 0x06, 0x07,
+    0x05, 0x06, 0x07, 0x08,
+    0x05, 0x07, 0x08, 0x09,
+    0x05, 0x06, 0x08, 0x09,
+    0x05, 0x06, 0x07, 0x09,
+    0x05, 0x07, 0x08, 0x0A,
+    0x05, 0x07, 0x09, 0x0A,
+    0x05, 0x06, 0x09, 0x0A,
+    0x05, 0x06, 0x08, 0x0A,
+    0x05, 0x07, 0x09, 0x0B,
+    0x05, 0x08, 0x0A, 0x0B,
+    0x05, 0x06, 0x0A, 0x0B,
+    0x05, 0x06, 0x08, 0x0B,
+    0x05, 0x0B, 0x0B, 0x0B,
+    0x05, 0x05, 0x0B, 0x0B,
+    0x05, 0x05, 0x05, 0x0B,
+    0x05, 0x08, 0x09, 0x0C,
+    0x05, 0x08, 0x0B, 0x0C,
+    0x05, 0x06, 0x0B, 0x0C,
+    0x05, 0x06, 0x09, 0x0C,
+    0x05, 0x08, 0x0A, 0x0D,
+    0x05, 0x09, 0x0B, 0x0D,
+    0x05, 0x07, 0x0B, 0x0D,
+    0x05, 0x07, 0x09, 0x0D,
+    0x05, 0x0D, 0x0D, 0x0D,
+    0x05, 0x05, 0x0D, 0x0D,
+    0x05, 0x05, 0x05, 0x0D,
+    0x05, 0x09, 0x0C, 0x10,
+    0x05, 0x0A, 0x0E, 0x10,
+    0x05, 0x07, 0x0E, 0x10,
+    0x05, 0x07, 0x0B, 0x10,
+    0x05, 0x10, 0x10, 0x10,
+    0x05, 0x05, 0x10, 0x10,
+    0x05, 0x05, 0x05, 0x10,
+    0x05, 0x0A, 0x0E, 0x13,
+    0x05, 0x0C, 0x10, 0x13,
+    0x05, 0x08, 0x10, 0x13,
+    0x05, 0x08, 0x0C, 0x13,
+    0x05, 0x13, 0x13, 0x13,
+    0x05, 0x05, 0x13, 0x13,
+    0x05, 0x05, 0x05, 0x13,
+    0x05, 0x0B, 0x10, 0x16,
+    0x05, 0x0D, 0x12, 0x16,
+    0x05, 0x09, 0x12, 0x16,
+    0x05, 0x09, 0x0E, 0x16,
+    0x05, 0x16, 0x16, 0x16,
+    0x05, 0x05, 0x16, 0x16,
+    0x05, 0x05, 0x05, 0x16,
+    0x05, 0x0C, 0x12, 0x19,
+    0x05, 0x0F, 0x14, 0x19,
+    0x05, 0x0A, 0x14, 0x19,
+    0x05, 0x0A, 0x0F, 0x19,
+    0x05, 0x19, 0x19, 0x19,
+    0x05, 0x05, 0x19, 0x19,
+    0x05, 0x05, 0x05, 0x19,
+    0x05, 0x10, 0x17, 0x1C,
+    0x05, 0x0A, 0x17, 0x1C,
+    0x05, 0x0A, 0x11, 0x1C,
+    0x05, 0x1C, 0x1C, 0x1C,
+    0x05, 0x05, 0x1C, 0x1C,
+    0x05, 0x05, 0x05, 0x1C,
+    0x05, 0x12, 0x19, 0x1F,
+    0x05, 0x0B, 0x19, 0x1F,
+    0x05, 0x0B, 0x12, 0x1F,
+    0x05, 0x1F, 0x1F, 0x1F,
+    0x05, 0x05, 0x1F, 0x1F,
+    0x05, 0x05, 0x05, 0x1F,
+    0x05, 0x13, 0x1B, 0x22,
+    0x05, 0x0C, 0x1B, 0x22,
+    0x05, 0x0C, 0x14, 0x22,
+    0x05, 0x22, 0x22, 0x22,
+    0x05, 0x05, 0x22, 0x22,
+    0x05, 0x05, 0x05, 0x22,
+    0x05, 0x15, 0x1D, 0x25,
+    0x05, 0x0D, 0x1D, 0x25,
+    0x05, 0x0D, 0x15, 0x25,
+    0x05, 0x25, 0x25, 0x25,
+    0x05, 0x05, 0x25, 0x25,
+    0x05, 0x05, 0x05, 0x25,
+    0x05, 0x28, 0x28, 0x28,
+    0x05, 0x05, 0x28, 0x28,
+    0x05, 0x05, 0x05, 0x28,
+    0x05, 0x17, 0x20, 0x29,
+    0x05, 0x0E, 0x20, 0x29,
+    0x05, 0x0E, 0x17, 0x29,
+    0x05, 0x2D, 0x2D, 0x2D,
+    0x05, 0x05, 0x2D, 0x2D,
+    0x05, 0x05, 0x05, 0x2D,
+    0x05, 0x33, 0x33, 0x33,
+    0x05, 0x05, 0x33, 0x33,
+    0x05, 0x05, 0x05, 0x33,
+    0x06, 0x07, 0x07, 0x08,
+    0x06, 0x07, 0x08, 0x09,
+    0x06, 0x08, 0x09, 0x0A,
+    0x06, 0x07, 0x09, 0x0A,
+    0x06, 0x07, 0x08, 0x0A,
+    0x06, 0x08, 0x09, 0x0B,
+    0x06, 0x08, 0x0A, 0x0B,
+    0x06, 0x07, 0x0A, 0x0B,
+    0x06, 0x07, 0x09, 0x0B,
+    0x06, 0x08, 0x0A, 0x0C,
+    0x06, 0x09, 0x0B, 0x0C,
+    0x06, 0x07, 0x0B, 0x0C,
+    0x06, 0x07, 0x09, 0x0C,
+    0x06, 0x0C, 0x0C, 0x0C,
+    0x06, 0x06, 0x0C, 0x0C,
+    0x06, 0x06, 0x06, 0x0C,
+    0x06, 0x09, 0x0A, 0x0D,
+    0x06, 0x09, 0x0C, 0x0D,
+    0x06, 0x07, 0x0C, 0x0D,
+    0x06, 0x07, 0x0A, 0x0D,
+    0x06, 0x09, 0x0B, 0x0E,
+    0x06, 0x0A, 0x0C, 0x0E,
+    0x06, 0x08, 0x0C, 0x0E,
+    0x06, 0x08, 0x0A, 0x0E,
+    0x06, 0x0E, 0x0E, 0x0E,
+    0x06, 0x06, 0x0E, 0x0E,
+    0x06, 0x06, 0x06, 0x0E,
+    0x06, 0x0A, 0x0D, 0x11,
+    0x06, 0x0B, 0x0F, 0x11,
+    0x06, 0x08, 0x0F, 0x11,
+    0x06, 0x08, 0x0C, 0x11,
+    0x06, 0x11, 0x11, 0x11,
+    0x06, 0x06, 0x11, 0x11,
+    0x06, 0x06, 0x06, 0x11,
+    0x06, 0x0B, 0x0F, 0x14,
+    0x06, 0x0D, 0x11, 0x14,
+    0x06, 0x09, 0x11, 0x14,
+    0x06, 0x09, 0x0D, 0x14,
+    0x06, 0x14, 0x14, 0x14,
+    0x06, 0x06, 0x14, 0x14,
+    0x06, 0x06, 0x06, 0x14,
+    0x06, 0x0C, 0x11, 0x17,
+    0x06, 0x0E, 0x13, 0x17,
+    0x06, 0x0A, 0x13, 0x17,
+    0x06, 0x0A, 0x0F, 0x17,
+    0x06, 0x17, 0x17, 0x17,
+    0x06, 0x06, 0x17, 0x17,
+    0x06, 0x06, 0x06, 0x17,
+    0x06, 0x0D, 0x13, 0x1A,
+    0x06, 0x10, 0x15, 0x1A,
+    0x06, 0x0B, 0x15, 0x1A,
+    0x06, 0x0B, 0x10, 0x1A,
+    0x06, 0x1A, 0x1A, 0x1A,
+    0x06, 0x06, 0x1A, 0x1A,
+    0x06, 0x06, 0x06, 0x1A,
+    0x06, 0x11, 0x18, 0x1D,
+    0x06, 0x0B, 0x18, 0x1D,
+    0x06, 0x0B, 0x12, 0x1D,
+    0x06, 0x1D, 0x1D, 0x1D,
+    0x06, 0x06, 0x1D, 0x1D,
+    0x06, 0x06, 0x06, 0x1D,
+    0x06, 0x13, 0x1A, 0x20,
+    0x06, 0x0C, 0x1A, 0x20,
+    0x06, 0x0C, 0x13, 0x20,
+    0x06, 0x20, 0x20, 0x20,
+    0x06, 0x06, 0x20, 0x20,
+    0x06, 0x06, 0x06, 0x20,
+    0x06, 0x14, 0x1C, 0x23,
+    0x06, 0x0D, 0x1C, 0x23,
+    0x06, 0x0D, 0x15, 0x23,
+    0x06, 0x23, 0x23, 0x23,
+    0x06, 0x06, 0x23, 0x23,
+    0x06, 0x06, 0x06, 0x23,
+    0x06, 0x16, 0x1E, 0x26,
+    0x06, 0x0E, 0x1E, 0x26,
+    0x06, 0x0E, 0x16, 0x26,
+    0x06, 0x26, 0x26, 0x26,
+    0x06, 0x06, 0x26, 0x26,
+    0x06, 0x06, 0x06, 0x26,
+    0x06, 0x29, 0x29, 0x29,
+    0x06, 0x06, 0x29, 0x29,
+    0x06, 0x06, 0x06, 0x29,
+    0x06, 0x18, 0x21, 0x2A,
+    0x06, 0x0F, 0x21, 0x2A,
+    0x06, 0x0F, 0x18, 0x2A,
+    0x06, 0x2E, 0x2E, 0x2E,
+    0x06, 0x06, 0x2E, 0x2E,
+    0x06, 0x06, 0x06, 0x2E,
+    0x06, 0x34, 0x34, 0x34,
+    0x06, 0x06, 0x34, 0x34,
+    0x06, 0x06, 0x06, 0x34,
+    0x07, 0x08, 0x08, 0x09,
+    0x07, 0x08, 0x09, 0x0A,
+    0x07, 0x09, 0x0A, 0x0B,
+    0x07, 0x08, 0x0A, 0x0B,
+    0x07, 0x08, 0x09, 0x0B,
+    0x07, 0x09, 0x0A, 0x0C,
+    0x07, 0x09, 0x0B, 0x0C,
+    0x07, 0x08, 0x0B, 0x0C,
+    0x07, 0x08, 0x0A, 0x0C,
+    0x07, 0x09, 0x0B, 0x0D,
+    0x07, 0x0A, 0x0C, 0x0D,
+    0x07, 0x08, 0x0C, 0x0D,
+    0x07, 0x08, 0x0A, 0x0D,
+    0x07, 0x0D, 0x0D, 0x0D,
+    0x07, 0x07, 0x0D, 0x0D,
+    0x07, 0x07, 0x07, 0x0D,
+    0x07, 0x0A, 0x0B, 0x0E,
+    0x07, 0x0A, 0x0D, 0x0E,
+    0x07, 0x08, 0x0D, 0x0E,
+    0x07, 0x08, 0x0B, 0x0E,
+    0x07, 0x0A, 0x0C, 0x0F,
+    0x07, 0x0B, 0x0D, 0x0F,
+    0x07, 0x09, 0x0D, 0x0F,
+    0x07, 0x09, 0x0B, 0x0F,
+    0x07, 0x0F, 0x0F, 0x0F,
+    0x07, 0x07, 0x0F, 0x0F,
+    0x07, 0x07, 0x07, 0x0F,
+    0x07, 0x0B, 0x0E, 0x12,
+    0x07, 0x0C, 0x10, 0x12,
+    0x07, 0x09, 0x10, 0x12,
+    0x07, 0x09, 0x0D, 0x12,
+    0x07, 0x12, 0x12, 0x12,
+    0x07, 0x07, 0x12, 0x12,
+    0x07, 0x07, 0x07, 0x12,
+    0x07, 0x0C, 0x10, 0x15,
+    0x07, 0x0E, 0x12, 0x15,
+    0x07, 0x0A, 0x12, 0x15,
+    0x07, 0x0A, 0x0E, 0x15,
+    0x07, 0x15, 0x15, 0x15,
+    0x07, 0x07, 0x15, 0x15,
+    0x07, 0x07, 0x07, 0x15,
+    0x07, 0x0D, 0x12, 0x18,
+    0x07, 0x0F, 0x14, 0x18,
+    0x07, 0x0B, 0x14, 0x18,
+    0x07, 0x0B, 0x10, 0x18,
+    0x07, 0x18, 0x18, 0x18,
+    0x07, 0x07, 0x18, 0x18,
+    0x07, 0x07, 0x07, 0x18,
+    0x07, 0x0E, 0x14, 0x1B,
+    0x07, 0x11, 0x16, 0x1B,
+    0x07, 0x0C, 0x16, 0x1B,
+    0x07, 0x0C, 0x11, 0x1B,
+    0x07, 0x1B, 0x1B, 0x1B,
+    0x07, 0x07, 0x1B, 0x1B,
+    0x07, 0x07, 0x07, 0x1B,
+    0x07, 0x12, 0x19, 0x1E,
+    0x07, 0x0C, 0x19, 0x1E,
+    0x07, 0x0C, 0x13, 0x1E,
+    0x07, 0x1E, 0x1E, 0x1E,
+    0x07, 0x07, 0x1E, 0x1E,
+    0x07, 0x07, 0x07, 0x1E,
+    0x07, 0x14, 0x1B, 0x21,
+    0x07, 0x0D, 0x1B, 0x21,
+    0x07, 0x0D, 0x14, 0x21,
+    0x07, 0x21, 0x21, 0x21,
+    0x07, 0x07, 0x21, 0x21,
+    0x07, 0x07, 0x07, 0x21,
+    0x07, 0x15, 0x1D, 0x24,
+    0x07, 0x0E, 0x1D, 0x24,
+    0x07, 0x0E, 0x16, 0x24,
+    0x07, 0x24, 0x24, 0x24,
+    0x07, 0x07, 0x24, 0x24,
+    0x07, 0x07, 0x07, 0x24,
+    0x07, 0x17, 0x1F, 0x27,
+    0x07, 0x0F, 0x1F, 0x27,
+    0x07, 0x0F, 0x17, 0x27,
+    0x07, 0x27, 0x27, 0x27,
+    0x07, 0x07, 0x27, 0x27,
+    0x07, 0x07, 0x07, 0x27,
+    0x07, 0x2A, 0x2A, 0x2A,
+    0x07, 0x07, 0x2A, 0x2A,
+    0x07, 0x07, 0x07, 0x2A,
+    0x07, 0x19, 0x22, 0x2B,
+    0x07, 0x10, 0x22, 0x2B,
+    0x07, 0x10, 0x19, 0x2B,
+    0x07, 0x2F, 0x2F, 0x2F,
+    0x07, 0x07, 0x2F, 0x2F,
+    0x07, 0x07, 0x07, 0x2F,
+    0x07, 0x35, 0x35, 0x35,
+    0x07, 0x07, 0x35, 0x35,
+    0x07, 0x07, 0x07, 0x35,
+    0x08, 0x09, 0x09, 0x0A,
+    0x08, 0x09, 0x0A, 0x0B,
+    0x08, 0x0A, 0x0B, 0x0C,
+    0x08, 0x09, 0x0B, 0x0C,
+    0x08, 0x09, 0x0A, 0x0C,
+    0x08, 0x0A, 0x0B, 0x0D,
+    0x08, 0x0A, 0x0C, 0x0D,
+    0x08, 0x09, 0x0C, 0x0D,
+    0x08, 0x09, 0x0B, 0x0D,
+    0x08, 0x0A, 0x0C, 0x0E,
+    0x08, 0x0B, 0x0D, 0x0E,
+    0x08, 0x09, 0x0D, 0x0E,
+    0x08, 0x09, 0x0B, 0x0E,
+    0x08, 0x0E, 0x0E, 0x0E,
+    0x08, 0x08, 0x0E, 0x0E,
+    0x08, 0x08, 0x08, 0x0E,
+    0x08, 0x0B, 0x0C, 0x0F,
+    0x08, 0x0B, 0x0E, 0x0F,
+    0x08, 0x09, 0x0E, 0x0F,
+    0x08, 0x09, 0x0C, 0x0F,
+    0x08, 0x0B, 0x0D, 0x10,
+    0x08, 0x0C, 0x0E, 0x10,
+    0x08, 0x0A, 0x0E, 0x10,
+    0x08, 0x0A, 0x0C, 0x10,
+    0x08, 0x10, 0x10, 0x10,
+    0x08, 0x08, 0x10, 0x10,
+    0x08, 0x08, 0x08, 0x10,
+    0x08, 0x0C, 0x0F, 0x13,
+    0x08, 0x0D, 0x11, 0x13,
+    0x08, 0x0A, 0x11, 0x13,
+    0x08, 0x0A, 0x0E, 0x13,
+    0x08, 0x13, 0x13, 0x13,
+    0x08, 0x08, 0x13, 0x13,
+    0x08, 0x08, 0x08, 0x13,
+    0x08, 0x0D, 0x11, 0x16,
+    0x08, 0x0F, 0x13, 0x16,
+    0x08, 0x0B, 0x13, 0x16,
+    0x08, 0x0B, 0x0F, 0x16,
+    0x08, 0x16, 0x16, 0x16,
+    0x08, 0x08, 0x16, 0x16,
+    0x08, 0x08, 0x08, 0x16,
+    0x08, 0x0E, 0x13, 0x19,
+    0x08, 0x10, 0x15, 0x19,
+    0x08, 0x0C, 0x15, 0x19,
+    0x08, 0x0C, 0x11, 0x19,
+    0x08, 0x19, 0x19, 0x19,
+    0x08, 0x08, 0x19, 0x19,
+    0x08, 0x08, 0x08, 0x19,
+    0x08, 0x0F, 0x15, 0x1C,
+    0x08, 0x12, 0x17, 0x1C,
+    0x08, 0x0D, 0x17, 0x1C,
+    0x08, 0x0D, 0x12, 0x1C,
+    0x08, 0x1C, 0x1C, 0x1C,
+    0x08, 0x08, 0x1C, 0x1C,
+    0x08, 0x08, 0x08, 0x1C,
+    0x08, 0x13, 0x1A, 0x1F,
+    0x08, 0x0D, 0x1A, 0x1F,
+    0x08, 0x0D, 0x14, 0x1F,
+    0x08, 0x1F, 0x1F, 0x1F,
+    0x08, 0x08, 0x1F, 0x1F,
+    0x08, 0x08, 0x08, 0x1F,
+    0x08, 0x15, 0x1C, 0x22,
+    0x08, 0x0E, 0x1C, 0x22,
+    0x08, 0x0E, 0x15, 0x22,
+    0x08, 0x22, 0x22, 0x22,
+    0x08, 0x08, 0x22, 0x22,
+    0x08, 0x08, 0x08, 0x22,
+    0x08, 0x16, 0x1E, 0x25,
+    0x08, 0x0F, 0x1E, 0x25,
+    0x08, 0x0F, 0x17, 0x25,
+    0x08, 0x25, 0x25, 0x25,
+    0x08, 0x08, 0x25, 0x25,
+    0x08, 0x08, 0x08, 0x25,
+    0x08, 0x18, 0x20, 0x28,
+    0x08, 0x10, 0x20, 0x28,
+    0x08, 0x10, 0x18, 0x28,
+    0x08, 0x28, 0x28, 0x28,
+    0x08, 0x08, 0x28, 0x28,
+    0x08, 0x08, 0x08, 0x28,
+    0x08, 0x2B, 0x2B, 0x2B,
+    0x08, 0x08, 0x2B, 0x2B,
+    0x08, 0x08, 0x08, 0x2B,
+    0x08, 0x1A, 0x23, 0x2C,
+    0x08, 0x11, 0x23, 0x2C,
+    0x08, 0x11, 0x1A, 0x2C,
+    0x08, 0x30, 0x30, 0x30,
+    0x08, 0x08, 0x30, 0x30,
+    0x08, 0x08, 0x08, 0x30,
+    0x08, 0x36, 0x36, 0x36,
+    0x08, 0x08, 0x36, 0x36,
+    0x08, 0x08, 0x08, 0x36,
+    0x09, 0x0A, 0x0A, 0x0B,
+    0x09, 0x0A, 0x0B, 0x0C,
+    0x09, 0x0B, 0x0C, 0x0D,
+    0x09, 0x0A, 0x0C, 0x0D,
+    0x09, 0x0A, 0x0B, 0x0D,
+    0x09, 0x0B, 0x0C, 0x0E,
+    0x09, 0x0B, 0x0D, 0x0E,
+    0x09, 0x0A, 0x0D, 0x0E,
+    0x09, 0x0A, 0x0C, 0x0E,
+    0x09, 0x0B, 0x0D, 0x0F,
+    0x09, 0x0C, 0x0E, 0x0F,
+    0x09, 0x0A, 0x0E, 0x0F,
+    0x09, 0x0A, 0x0C, 0x0F,
+    0x09, 0x0F, 0x0F, 0x0F,
+    0x09, 0x09, 0x0F, 0x0F,
+    0x09, 0x09, 0x09, 0x0F,
+    0x09, 0x0C, 0x0D, 0x10,
+    0x09, 0x0C, 0x0F, 0x10,
+    0x09, 0x0A, 0x0F, 0x10,
+    0x09, 0x0A, 0x0D, 0x10,
+    0x09, 0x0C, 0x0E, 0x11,
+    0x09, 0x0D, 0x0F, 0x11,
+    0x09, 0x0B, 0x0F, 0x11,
+    0x09, 0x0B, 0x0D, 0x11,
+    0x09, 0x11, 0x11, 0x11,
+    0x09, 0x09, 0x11, 0x11,
+    0x09, 0x09, 0x09, 0x11,
+    0x09, 0x0D, 0x10, 0x14,
+    0x09, 0x0E, 0x12, 0x14,
+    0x09, 0x0B, 0x12, 0x14,
+    0x09, 0x0B, 0x0F, 0x14,
+    0x09, 0x14, 0x14, 0x14,
+    0x09, 0x09, 0x14, 0x14,
+    0x09, 0x09, 0x09, 0x14,
+    0x09, 0x0E, 0x12, 0x17,
+    0x09, 0x10, 0x14, 0x17,
+    0x09, 0x0C, 0x14, 0x17,
+    0x09, 0x0C, 0x10, 0x17,
+    0x09, 0x17, 0x17, 0x17,
+    0x09, 0x09, 0x17, 0x17,
+    0x09, 0x09, 0x09, 0x17,
+    0x09, 0x0F, 0x14, 0x1A,
+    0x09, 0x11, 0x16, 0x1A,
+    0x09, 0x0D, 0x16, 0x1A,
+    0x09, 0x0D, 0x12, 0x1A,
+    0x09, 0x1A, 0x1A, 0x1A,
+    0x09, 0x09, 0x1A, 0x1A,
+    0x09, 0x09, 0x09, 0x1A,
+    0x09, 0x10, 0x16, 0x1D,
+    0x09, 0x13, 0x18, 0x1D,
+    0x09, 0x0E, 0x18, 0x1D,
+    0x09, 0x0E, 0x13, 0x1D,
+    0x09, 0x1D, 0x1D, 0x1D,
+    0x09, 0x09, 0x1D, 0x1D,
+    0x09, 0x09, 0x09, 0x1D,
+    0x09, 0x14, 0x1B, 0x20,
+    0x09, 0x0E, 0x1B, 0x20,
+    0x09, 0x0E, 0x15, 0x20,
+    0x09, 0x20, 0x20, 0x20,
+    0x09, 0x09, 0x20, 0x20,
+    0x09, 0x09, 0x09, 0x20,
+    0x09, 0x16, 0x1D, 0x23,
+    0x09, 0x0F, 0x1D, 0x23,
+    0x09, 0x0F, 0x16, 0x23,
+    0x09, 0x23, 0x23, 0x23,
+    0x09, 0x09, 0x23, 0x23,
+    0x09, 0x09, 0x09, 0x23,
+    0x09, 0x17, 0x1F, 0x26,
+    0x09, 0x10, 0x1F, 0x26,
+    0x09, 0x10, 0x18, 0x26,
+    0x09, 0x26, 0x26, 0x26,
+    0x09, 0x09, 0x26, 0x26,
+    0x09, 0x09, 0x09, 0x26,
+    0x09, 0x19, 0x21, 0x29,
+    0x09, 0x11, 0x21, 0x29,
+    0x09, 0x11, 0x19, 0x29,
+    0x09, 0x29, 0x29, 0x29,
+    0x09, 0x09, 0x29, 0x29,
+    0x09, 0x09, 0x09, 0x29,
+    0x09, 0x2C, 0x2C, 0x2C,
+    0x09, 0x09, 0x2C, 0x2C,
+    0x09, 0x09, 0x09, 0x2C,
+    0x09, 0x1B, 0x24, 0x2D,
+    0x09, 0x12, 0x24, 0x2D,
+    0x09, 0x12, 0x1B, 0x2D,
+    0x09, 0x31, 0x31, 0x31,
+    0x09, 0x09, 0x31, 0x31,
+    0x09, 0x09, 0x09, 0x31,
+    0x09, 0x37, 0x37, 0x37,
+    0x09, 0x09, 0x37, 0x37,
+    0x09, 0x09, 0x09, 0x37,
+    0x0A, 0x0B, 0x0B, 0x0C,
+    0x0A, 0x0B, 0x0C, 0x0D,
+    0x0A, 0x0C, 0x0D, 0x0E,
+    0x0A, 0x0B, 0x0D, 0x0E,
+    0x0A, 0x0B, 0x0C, 0x0E,
+    0x0A, 0x0C, 0x0D, 0x0F,
+    0x0A, 0x0C, 0x0E, 0x0F,
+    0x0A, 0x0B, 0x0E, 0x0F,
+    0x0A, 0x0B, 0x0D, 0x0F,
+    0x0A, 0x0C, 0x0E, 0x10,
+    0x0A, 0x0D, 0x0F, 0x10,
+    0x0A, 0x0B, 0x0F, 0x10,
+    0x0A, 0x0B, 0x0D, 0x10,
+    0x0A, 0x10, 0x10, 0x10,
+    0x0A, 0x0A, 0x10, 0x10,
+    0x0A, 0x0A, 0x0A, 0x10,
+    0x0A, 0x0D, 0x0E, 0x11,
+    0x0A, 0x0D, 0x10, 0x11,
+    0x0A, 0x0B, 0x10, 0x11,
+    0x0A, 0x0B, 0x0E, 0x11,
+    0x0A, 0x0D, 0x0F, 0x12,
+    0x0A, 0x0E, 0x10, 0x12,
+    0x0A, 0x0C, 0x10, 0x12,
+    0x0A, 0x0C, 0x0E, 0x12,
+    0x0A, 0x12, 0x12, 0x12,
+    0x0A, 0x0A, 0x12, 0x12,
+    0x0A, 0x0A, 0x0A, 0x12,
+    0x0A, 0x0E, 0x11, 0x15,
+    0x0A, 0x0F, 0x13, 0x15,
+    0x0A, 0x0C, 0x13, 0x15,
+    0x0A, 0x0C, 0x10, 0x15,
+    0x0A, 0x15, 0x15, 0x15,
+    0x0A, 0x0A, 0x15, 0x15,
+    0x0A, 0x0A, 0x0A, 0x15,
+    0x0A, 0x0F, 0x13, 0x18,
+    0x0A, 0x11, 0x15, 0x18,
+    0x0A, 0x0D, 0x15, 0x18,
+    0x0A, 0x0D, 0x11, 0x18,
+    0x0A, 0x18, 0x18, 0x18,
+    0x0A, 0x0A, 0x18, 0x18,
+    0x0A, 0x0A, 0x0A, 0x18,
+    0x0A, 0x10, 0x15, 0x1B,
+    0x0A, 0x12, 0x17, 0x1B,
+    0x0A, 0x0E, 0x17, 0x1B,
+    0x0A, 0x0E, 0x13, 0x1B,
+    0x0A, 0x1B, 0x1B, 0x1B,
+    0x0A, 0x0A, 0x1B, 0x1B,
+    0x0A, 0x0A, 0x0A, 0x1B,
+    0x0A, 0x11, 0x17, 0x1E,
+    0x0A, 0x14, 0x19, 0x1E,
+    0x0A, 0x0F, 0x19, 0x1E,
+    0x0A, 0x0F, 0x14, 0x1E,
+    0x0A, 0x1E, 0x1E, 0x1E,
+    0x0A, 0x0A, 0x1E, 0x1E,
+    0x0A, 0x0A, 0x0A, 0x1E,
+    0x0A, 0x15, 0x1C, 0x21,
+    0x0A, 0x0F, 0x1C, 0x21,
+    0x0A, 0x0F, 0x16, 0x21,
+    0x0A, 0x21, 0x21, 0x21,
+    0x0A, 0x0A, 0x21, 0x21,
+    0x0A, 0x0A, 0x0A, 0x21,
+    0x0A, 0x17, 0x1E, 0x24,
+    0x0A, 0x10, 0x1E, 0x24,
+    0x0A, 0x10, 0x17, 0x24,
+    0x0A, 0x24, 0x24, 0x24,
+    0x0A, 0x0A, 0x24, 0x24,
+    0x0A, 0x0A, 0x0A, 0x24,
+    0x0A, 0x18, 0x20, 0x27,
+    0x0A, 0x11, 0x20, 0x27,
+    0x0A, 0x11, 0x19, 0x27,
+    0x0A, 0x27, 0x27, 0x27,
+    0x0A, 0x0A, 0x27, 0x27,
+    0x0A, 0x0A, 0x0A, 0x27,
+    0x0A, 0x1A, 0x22, 0x2A,
+    0x0A, 0x12, 0x22, 0x2A,
+    0x0A, 0x12, 0x1A, 0x2A,
+    0x0A, 0x2A, 0x2A, 0x2A,
+    0x0A, 0x0A, 0x2A, 0x2A,
+    0x0A, 0x0A, 0x0A, 0x2A,
+    0x0A, 0x2D, 0x2D, 0x2D,
+    0x0A, 0x0A, 0x2D, 0x2D,
+    0x0A, 0x0A, 0x0A, 0x2D,
+    0x0A, 0x1C, 0x25, 0x2E,
+    0x0A, 0x13, 0x25, 0x2E,
+    0x0A, 0x13, 0x1C, 0x2E,
+    0x0A, 0x32, 0x32, 0x32,
+    0x0A, 0x0A, 0x32, 0x32,
+    0x0A, 0x0A, 0x0A, 0x32,
+    0x0A, 0x38, 0x38, 0x38,
+    0x0A, 0x0A, 0x38, 0x38,
+    0x0A, 0x0A, 0x0A, 0x38,
+    0x0B, 0x0C, 0x0C, 0x0D,
+    0x0B, 0x0C, 0x0D, 0x0E,
+    0x0B, 0x0D, 0x0E, 0x0F,
+    0x0B, 0x0C, 0x0E, 0x0F,
+    0x0B, 0x0C, 0x0D, 0x0F,
+    0x0B, 0x0D, 0x0E, 0x10,
+    0x0B, 0x0D, 0x0F, 0x10,
+    0x0B, 0x0C, 0x0F, 0x10,
+    0x0B, 0x0C, 0x0E, 0x10,
+    0x0B, 0x0D, 0x0F, 0x11,
+    0x0B, 0x0E, 0x10, 0x11,
+    0x0B, 0x0C, 0x10, 0x11,
+    0x0B, 0x0C, 0x0E, 0x11,
+    0x0B, 0x11, 0x11, 0x11,
+    0x0B, 0x0B, 0x11, 0x11,
+    0x0B, 0x0B, 0x0B, 0x11,
+    0x0B, 0x0E, 0x0F, 0x12,
+    0x0B, 0x0E, 0x11, 0x12,
+    0x0B, 0x0C, 0x11, 0x12,
+    0x0B, 0x0C, 0x0F, 0x12,
+    0x0B, 0x0E, 0x10, 0x13,
+    0x0B, 0x0F, 0x11, 0x13,
+    0x0B, 0x0D, 0x11, 0x13,
+    0x0B, 0x0D, 0x0F, 0x13,
+    0x0B, 0x13, 0x13, 0x13,
+    0x0B, 0x0B, 0x13, 0x13,
+    0x0B, 0x0B, 0x0B, 0x13,
+    0x0B, 0x0F, 0x12, 0x16,
+    0x0B, 0x10, 0x14, 0x16,
+    0x0B, 0x0D, 0x14, 0x16,
+    0x0B, 0x0D, 0x11, 0x16,
+    0x0B, 0x16, 0x16, 0x16,
+    0x0B, 0x0B, 0x16, 0x16,
+    0x0B, 0x0B, 0x0B, 0x16,
+    0x0B, 0x10, 0x14, 0x19,
+    0x0B, 0x12, 0x16, 0x19,
+    0x0B, 0x0E, 0x16, 0x19,
+    0x0B, 0x0E, 0x12, 0x19,
+    0x0B, 0x19, 0x19, 0x19,
+    0x0B, 0x0B, 0x19, 0x19,
+    0x0B, 0x0B, 0x0B, 0x19,
+    0x0B, 0x11, 0x16, 0x1C,
+    0x0B, 0x13, 0x18, 0x1C,
+    0x0B, 0x0F, 0x18, 0x1C,
+    0x0B, 0x0F, 0x14, 0x1C,
+    0x0B, 0x1C, 0x1C, 0x1C,
+    0x0B, 0x0B, 0x1C, 0x1C,
+    0x0B, 0x0B, 0x0B, 0x1C,
+    0x0B, 0x12, 0x18, 0x1F,
+    0x0B, 0x15, 0x1A, 0x1F,
+    0x0B, 0x10, 0x1A, 0x1F,
+    0x0B, 0x10, 0x15, 0x1F,
+    0x0B, 0x1F, 0x1F, 0x1F,
+    0x0B, 0x0B, 0x1F, 0x1F,
+    0x0B, 0x0B, 0x0B, 0x1F,
+    0x0B, 0x16, 0x1D, 0x22,
+    0x0B, 0x10, 0x1D, 0x22,
+    0x0B, 0x10, 0x17, 0x22,
+    0x0B, 0x22, 0x22, 0x22,
+    0x0B, 0x0B, 0x22, 0x22,
+    0x0B, 0x0B, 0x0B, 0x22,
+    0x0B, 0x18, 0x1F, 0x25,
+    0x0B, 0x11, 0x1F, 0x25,
+    0x0B, 0x11, 0x18, 0x25,
+    0x0B, 0x25, 0x25, 0x25,
+    0x0B, 0x0B, 0x25, 0x25,
+    0x0B, 0x0B, 0x0B, 0x25,
+    0x0B, 0x19, 0x21, 0x28,
+    0x0B, 0x12, 0x21, 0x28,
+    0x0B, 0x12, 0x1A, 0x28,
+    0x0B, 0x28, 0x28, 0x28,
+    0x0B, 0x0B, 0x28, 0x28,
+    0x0B, 0x0B, 0x0B, 0x28,
+    0x0B, 0x1B, 0x23, 0x2B,
+    0x0B, 0x13, 0x23, 0x2B,
+    0x0B, 0x13, 0x1B, 0x2B,
+    0x0B, 0x2B, 0x2B, 0x2B,
+    0x0B, 0x0B, 0x2B, 0x2B,
+    0x0B, 0x0B, 0x0B, 0x2B,
+    0x0B, 0x2E, 0x2E, 0x2E,
+    0x0B, 0x0B, 0x2E, 0x2E,
+    0x0B, 0x0B, 0x0B, 0x2E,
+    0x0B, 0x1D, 0x26, 0x2F,
+    0x0B, 0x14, 0x26, 0x2F,
+    0x0B, 0x14, 0x1D, 0x2F,
+    0x0B, 0x33, 0x33, 0x33,
+    0x0B, 0x0B, 0x33, 0x33,
+    0x0B, 0x0B, 0x0B, 0x33,
+    0x0B, 0x39, 0x39, 0x39,
+    0x0B, 0x0B, 0x39, 0x39,
+    0x0B, 0x0B, 0x0B, 0x39,
+    0x0C, 0x0D, 0x0D, 0x0E,
+    0x0C, 0x0D, 0x0E, 0x0F,
+    0x0C, 0x0E, 0x0F, 0x10,
+    0x0C, 0x0D, 0x0F, 0x10,
+    0x0C, 0x0D, 0x0E, 0x10,
+    0x0C, 0x0E, 0x0F, 0x11,
+    0x0C, 0x0E, 0x10, 0x11,
+    0x0C, 0x0D, 0x10, 0x11,
+    0x0C, 0x0D, 0x0F, 0x11,
+    0x0C, 0x0E, 0x10, 0x12,
+    0x0C, 0x0F, 0x11, 0x12,
+    0x0C, 0x0D, 0x11, 0x12,
+    0x0C, 0x0D, 0x0F, 0x12,
+    0x0C, 0x12, 0x12, 0x12,
+    0x0C, 0x0C, 0x12, 0x12,
+    0x0C, 0x0C, 0x0C, 0x12,
+    0x0C, 0x0F, 0x10, 0x13,
+    0x0C, 0x0F, 0x12, 0x13,
+    0x0C, 0x0D, 0x12, 0x13,
+    0x0C, 0x0D, 0x10, 0x13,
+    0x0C, 0x0F, 0x11, 0x14,
+    0x0C, 0x10, 0x12, 0x14,
+    0x0C, 0x0E, 0x12, 0x14,
+    0x0C, 0x0E, 0x10, 0x14,
+    0x0C, 0x14, 0x14, 0x14,
+    0x0C, 0x0C, 0x14, 0x14,
+    0x0C, 0x0C, 0x0C, 0x14,
+    0x0C, 0x10, 0x13, 0x17,
+    0x0C, 0x11, 0x15, 0x17,
+    0x0C, 0x0E, 0x15, 0x17,
+    0x0C, 0x0E, 0x12, 0x17,
+    0x0C, 0x17, 0x17, 0x17,
+    0x0C, 0x0C, 0x17, 0x17,
+    0x0C, 0x0C, 0x0C, 0x17,
+    0x0C, 0x11, 0x15, 0x1A,
+    0x0C, 0x13, 0x17, 0x1A,
+    0x0C, 0x0F, 0x17, 0x1A,
+    0x0C, 0x0F, 0x13, 0x1A,
+    0x0C, 0x1A, 0x1A, 0x1A,
+    0x0C, 0x0C, 0x1A, 0x1A,
+    0x0C, 0x0C, 0x0C, 0x1A,
+    0x0C, 0x12, 0x17, 0x1D,
+    0x0C, 0x14, 0x19, 0x1D,
+    0x0C, 0x10, 0x19, 0x1D,
+    0x0C, 0x10, 0x15, 0x1D,
+    0x0C, 0x1D, 0x1D, 0x1D,
+    0x0C, 0x0C, 0x1D, 0x1D,
+    0x0C, 0x0C, 0x0C, 0x1D,
+    0x0C, 0x13, 0x19, 0x20,
+    0x0C, 0x16, 0x1B, 0x20,
+    0x0C, 0x11, 0x1B, 0x20,
+    0x0C, 0x11, 0x16, 0x20,
+    0x0C, 0x20, 0x20, 0x20,
+    0x0C, 0x0C, 0x20, 0x20,
+    0x0C, 0x0C, 0x0C, 0x20,
+    0x0C, 0x17, 0x1E, 0x23,
+    0x0C, 0x11, 0x1E, 0x23,
+    0x0C, 0x11, 0x18, 0x23,
+    0x0C, 0x23, 0x23, 0x23,
+    0x0C, 0x0C, 0x23, 0x23,
+    0x0C, 0x0C, 0x0C, 0x23,
+    0x0C, 0x19, 0x20, 0x26,
+    0x0C, 0x12, 0x20, 0x26,
+    0x0C, 0x12, 0x19, 0x26,
+    0x0C, 0x26, 0x26, 0x26,
+    0x0C, 0x0C, 0x26, 0x26,
+    0x0C, 0x0C, 0x0C, 0x26,
+    0x0C, 0x1A, 0x22, 0x29,
+    0x0C, 0x13, 0x22, 0x29,
+    0x0C, 0x13, 0x1B, 0x29,
+    0x0C, 0x29, 0x29, 0x29,
+    0x0C, 0x0C, 0x29, 0x29,
+    0x0C, 0x0C, 0x0C, 0x29,
+    0x0C, 0x1C, 0x24, 0x2C,
+    0x0C, 0x14, 0x24, 0x2C,
+    0x0C, 0x14, 0x1C, 0x2C,
+    0x0C, 0x2C, 0x2C, 0x2C,
+    0x0C, 0x0C, 0x2C, 0x2C,
+    0x0C, 0x0C, 0x0C, 0x2C,
+    0x0C, 0x2F, 0x2F, 0x2F,
+    0x0C, 0x0C, 0x2F, 0x2F,
+    0x0C, 0x0C, 0x0C, 0x2F,
+    0x0C, 0x1E, 0x27, 0x30,
+    0x0C, 0x15, 0x27, 0x30,
+    0x0C, 0x15, 0x1E, 0x30,
+    0x0C, 0x34, 0x34, 0x34,
+    0x0C, 0x0C, 0x34, 0x34,
+    0x0C, 0x0C, 0x0C, 0x34,
+    0x0C, 0x3A, 0x3A, 0x3A,
+    0x0C, 0x0C, 0x3A, 0x3A,
+    0x0C, 0x0C, 0x0C, 0x3A,
+    0x0D, 0x0E, 0x0E, 0x0F,
+    0x0D, 0x0E, 0x0F, 0x10,
+    0x0D, 0x0F, 0x10, 0x11,
+    0x0D, 0x0E, 0x10, 0x11,
+    0x0D, 0x0E, 0x0F, 0x11,
+    0x0D, 0x0F, 0x10, 0x12,
+    0x0D, 0x0F, 0x11, 0x12,
+    0x0D, 0x0E, 0x11, 0x12,
+    0x0D, 0x0E, 0x10, 0x12,
+    0x0D, 0x0F, 0x11, 0x13,
+    0x0D, 0x10, 0x12, 0x13,
+    0x0D, 0x0E, 0x12, 0x13,
+    0x0D, 0x0E, 0x10, 0x13,
+    0x0D, 0x13, 0x13, 0x13,
+    0x0D, 0x0D, 0x13, 0x13,
+    0x0D, 0x0D, 0x0D, 0x13,
+    0x0D, 0x10, 0x11, 0x14,
+    0x0D, 0x10, 0x13, 0x14,
+    0x0D, 0x0E, 0x13, 0x14,
+    0x0D, 0x0E, 0x11, 0x14,
+    0x0D, 0x10, 0x12, 0x15,
+    0x0D, 0x11, 0x13, 0x15,
+    0x0D, 0x0F, 0x13, 0x15,
+    0x0D, 0x0F, 0x11, 0x15,
+    0x0D, 0x15, 0x15, 0x15,
+    0x0D, 0x0D, 0x15, 0x15,
+    0x0D, 0x0D, 0x0D, 0x15,
+    0x0D, 0x11, 0x14, 0x18,
+    0x0D, 0x12, 0x16, 0x18,
+    0x0D, 0x0F, 0x16, 0x18,
+    0x0D, 0x0F, 0x13, 0x18,
+    0x0D, 0x18, 0x18, 0x18,
+    0x0D, 0x0D, 0x18, 0x18,
+    0x0D, 0x0D, 0x0D, 0x18,
+    0x0D, 0x12, 0x16, 0x1B,
+    0x0D, 0x14, 0x18, 0x1B,
+    0x0D, 0x10, 0x18, 0x1B,
+    0x0D, 0x10, 0x14, 0x1B,
+    0x0D, 0x1B, 0x1B, 0x1B,
+    0x0D, 0x0D, 0x1B, 0x1B,
+    0x0D, 0x0D, 0x0D, 0x1B,
+    0x0D, 0x13, 0x18, 0x1E,
+    0x0D, 0x15, 0x1A, 0x1E,
+    0x0D, 0x11, 0x1A, 0x1E,
+    0x0D, 0x11, 0x16, 0x1E,
+    0x0D, 0x1E, 0x1E, 0x1E,
+    0x0D, 0x0D, 0x1E, 0x1E,
+    0x0D, 0x0D, 0x0D, 0x1E,
+    0x0D, 0x14, 0x1A, 0x21,
+    0x0D, 0x17, 0x1C, 0x21,
+    0x0D, 0x12, 0x1C, 0x21,
+    0x0D, 0x12, 0x17, 0x21,
+    0x0D, 0x21, 0x21, 0x21,
+    0x0D, 0x0D, 0x21, 0x21,
+    0x0D, 0x0D, 0x0D, 0x21,
+    0x0D, 0x18, 0x1F, 0x24,
+    0x0D, 0x12, 0x1F, 0x24,
+    0x0D, 0x12, 0x19, 0x24,
+    0x0D, 0x24, 0x24, 0x24,
+    0x0D, 0x0D, 0x24, 0x24,
+    0x0D, 0x0D, 0x0D, 0x24,
+    0x0D, 0x1A, 0x21, 0x27,
+    0x0D, 0x13, 0x21, 0x27,
+    0x0D, 0x13, 0x1A, 0x27,
+    0x0D, 0x27, 0x27, 0x27,
+    0x0D, 0x0D, 0x27, 0x27,
+    0x0D, 0x0D, 0x0D, 0x27,
+    0x0D, 0x1B, 0x23, 0x2A,
+    0x0D, 0x14, 0x23, 0x2A,
+    0x0D, 0x14, 0x1C, 0x2A,
+    0x0D, 0x2A, 0x2A, 0x2A,
+    0x0D, 0x0D, 0x2A, 0x2A,
+    0x0D, 0x0D, 0x0D, 0x2A,
+    0x0D, 0x1D, 0x25, 0x2D,
+    0x0D, 0x15, 0x25, 0x2D,
+    0x0D, 0x15, 0x1D, 0x2D,
+    0x0D, 0x2D, 0x2D, 0x2D,
+    0x0D, 0x0D, 0x2D, 0x2D,
+    0x0D, 0x0D, 0x0D, 0x2D,
+    0x0D, 0x30, 0x30, 0x30,
+    0x0D, 0x0D, 0x30, 0x30,
+    0x0D, 0x0D, 0x0D, 0x30,
+    0x0D, 0x1F, 0x28, 0x31,
+    0x0D, 0x16, 0x28, 0x31,
+    0x0D, 0x16, 0x1F, 0x31,
+    0x0D, 0x35, 0x35, 0x35,
+    0x0D, 0x0D, 0x35, 0x35,
+    0x0D, 0x0D, 0x0D, 0x35,
+    0x0D, 0x3B, 0x3B, 0x3B,
+    0x0D, 0x0D, 0x3B, 0x3B,
+    0x0D, 0x0D, 0x0D, 0x3B,
+    0x0E, 0x0F, 0x0F, 0x10,
+    0x0E, 0x0F, 0x10, 0x11,
+    0x0E, 0x10, 0x11, 0x12,
+    0x0E, 0x0F, 0x11, 0x12,
+    0x0E, 0x0F, 0x10, 0x12,
+    0x0E, 0x10, 0x11, 0x13,
+    0x0E, 0x10, 0x12, 0x13,
+    0x0E, 0x0F, 0x12, 0x13,
+    0x0E, 0x0F, 0x11, 0x13,
+    0x0E, 0x10, 0x12, 0x14,
+    0x0E, 0x11, 0x13, 0x14,
+    0x0E, 0x0F, 0x13, 0x14,
+    0x0E, 0x0F, 0x11, 0x14,
+    0x0E, 0x14, 0x14, 0x14,
+    0x0E, 0x0E, 0x14, 0x14,
+    0x0E, 0x0E, 0x0E, 0x14,
+    0x0E, 0x11, 0x12, 0x15,
+    0x0E, 0x11, 0x14, 0x15,
+    0x0E, 0x0F, 0x14, 0x15,
+    0x0E, 0x0F, 0x12, 0x15,
+    0x0E, 0x11, 0x13, 0x16,
+    0x0E, 0x12, 0x14, 0x16,
+    0x0E, 0x10, 0x14, 0x16,
+    0x0E, 0x10, 0x12, 0x16,
+    0x0E, 0x16, 0x16, 0x16,
+    0x0E, 0x0E, 0x16, 0x16,
+    0x0E, 0x0E, 0x0E, 0x16,
+    0x0E, 0x12, 0x15, 0x19,
+    0x0E, 0x13, 0x17, 0x19,
+    0x0E, 0x10, 0x17, 0x19,
+    0x0E, 0x10, 0x14, 0x19,
+    0x0E, 0x19, 0x19, 0x19,
+    0x0E, 0x0E, 0x19, 0x19,
+    0x0E, 0x0E, 0x0E, 0x19,
+    0x0E, 0x13, 0x17, 0x1C,
+    0x0E, 0x15, 0x19, 0x1C,
+    0x0E, 0x11, 0x19, 0x1C,
+    0x0E, 0x11, 0x15, 0x1C,
+    0x0E, 0x1C, 0x1C, 0x1C,
+    0x0E, 0x0E, 0x1C, 0x1C,
+    0x0E, 0x0E, 0x0E, 0x1C,
+    0x0E, 0x14, 0x19, 0x1F,
+    0x0E, 0x16, 0x1B, 0x1F,
+    0x0E, 0x12, 0x1B, 0x1F,
+    0x0E, 0x12, 0x17, 0x1F,
+    0x0E, 0x1F, 0x1F, 0x1F,
+    0x0E, 0x0E, 0x1F, 0x1F,
+    0x0E, 0x0E, 0x0E, 0x1F,
+    0x0E, 0x15, 0x1B, 0x22,
+    0x0E, 0x18, 0x1D, 0x22,
+    0x0E, 0x13, 0x1D, 0x22,
+    0x0E, 0x13, 0x18, 0x22,
+    0x0E, 0x22, 0x22, 0x22,
+    0x0E, 0x0E, 0x22, 0x22,
+    0x0E, 0x0E, 0x0E, 0x22,
+    0x0E, 0x19, 0x20, 0x25,
+    0x0E, 0x13, 0x20, 0x25,
+    0x0E, 0x13, 0x1A, 0x25,
+    0x0E, 0x25, 0x25, 0x25,
+    0x0E, 0x0E, 0x25, 0x25,
+    0x0E, 0x0E, 0x0E, 0x25,
+    0x0E, 0x1B, 0x22, 0x28,
+    0x0E, 0x14, 0x22, 0x28,
+    0x0E, 0x14, 0x1B, 0x28,
+    0x0E, 0x28, 0x28, 0x28,
+    0x0E, 0x0E, 0x28, 0x28,
+    0x0E, 0x0E, 0x0E, 0x28,
+    0x0E, 0x1C, 0x24, 0x2B,
+    0x0E, 0x15, 0x24, 0x2B,
+    0x0E, 0x15, 0x1D, 0x2B,
+    0x0E, 0x2B, 0x2B, 0x2B,
+    0x0E, 0x0E, 0x2B, 0x2B,
+    0x0E, 0x0E, 0x0E, 0x2B,
+    0x0E, 0x1E, 0x26, 0x2E,
+    0x0E, 0x16, 0x26, 0x2E,
+    0x0E, 0x16, 0x1E, 0x2E,
+    0x0E, 0x2E, 0x2E, 0x2E,
+    0x0E, 0x0E, 0x2E, 0x2E,
+    0x0E, 0x0E, 0x0E, 0x2E,
+    0x0E, 0x31, 0x31, 0x31,
+    0x0E, 0x0E, 0x31, 0x31,
+    0x0E, 0x0E, 0x0E, 0x31,
+    0x0E, 0x20, 0x29, 0x32,
+    0x0E, 0x17, 0x29, 0x32,
+    0x0E, 0x17, 0x20, 0x32,
+    0x0E, 0x36, 0x36, 0x36,
+    0x0E, 0x0E, 0x36, 0x36,
+    0x0E, 0x0E, 0x0E, 0x36,
+    0x0E, 0x3C, 0x3C, 0x3C,
+    0x0E, 0x0E, 0x3C, 0x3C,
+    0x0E, 0x0E, 0x0E, 0x3C,
+    0x0F, 0x10, 0x10, 0x11,
+    0x0F, 0x10, 0x11, 0x12,
+    0x0F, 0x11, 0x12, 0x13,
+    0x0F, 0x10, 0x12, 0x13,
+    0x0F, 0x10, 0x11, 0x13,
+    0x0F, 0x11, 0x12, 0x14,
+    0x0F, 0x11, 0x13, 0x14,
+    0x0F, 0x10, 0x13, 0x14,
+    0x0F, 0x10, 0x12, 0x14,
+    0x0F, 0x11, 0x13, 0x15,
+    0x0F, 0x12, 0x14, 0x15,
+    0x0F, 0x10, 0x14, 0x15,
+    0x0F, 0x10, 0x12, 0x15,
+    0x0F, 0x15, 0x15, 0x15,
+    0x0F, 0x0F, 0x15, 0x15,
+    0x0F, 0x0F, 0x0F, 0x15,
+    0x0F, 0x12, 0x13, 0x16,
+    0x0F, 0x12, 0x15, 0x16,
+    0x0F, 0x10, 0x15, 0x16,
+    0x0F, 0x10, 0x13, 0x16,
+    0x0F, 0x12, 0x14, 0x17,
+    0x0F, 0x13, 0x15, 0x17,
+    0x0F, 0x11, 0x15, 0x17,
+    0x0F, 0x11, 0x13, 0x17,
+    0x0F, 0x17, 0x17, 0x17,
+    0x0F, 0x0F, 0x17, 0x17,
+    0x0F, 0x0F, 0x0F, 0x17,
+    0x0F, 0x13, 0x16, 0x1A,
+    0x0F, 0x14, 0x18, 0x1A,
+    0x0F, 0x11, 0x18, 0x1A,
+    0x0F, 0x11, 0x15, 0x1A,
+    0x0F, 0x1A, 0x1A, 0x1A,
+    0x0F, 0x0F, 0x1A, 0x1A,
+    0x0F, 0x0F, 0x0F, 0x1A,
+    0x0F, 0x14, 0x18, 0x1D,
+    0x0F, 0x16, 0x1A, 0x1D,
+    0x0F, 0x12, 0x1A, 0x1D,
+    0x0F, 0x12, 0x16, 0x1D,
+    0x0F, 0x1D, 0x1D, 0x1D,
+    0x0F, 0x0F, 0x1D, 0x1D,
+    0x0F, 0x0F, 0x0F, 0x1D,
+    0x0F, 0x15, 0x1A, 0x20,
+    0x0F, 0x17, 0x1C, 0x20,
+    0x0F, 0x13, 0x1C, 0x20,
+    0x0F, 0x13, 0x18, 0x20,
+    0x0F, 0x20, 0x20, 0x20,
+    0x0F, 0x0F, 0x20, 0x20,
+    0x0F, 0x0F, 0x0F, 0x20,
+    0x0F, 0x16, 0x1C, 0x23,
+    0x0F, 0x19, 0x1E, 0x23,
+    0x0F, 0x14, 0x1E, 0x23,
+    0x0F, 0x14, 0x19, 0x23,
+    0x0F, 0x23, 0x23, 0x23,
+    0x0F, 0x0F, 0x23, 0x23,
+    0x0F, 0x0F, 0x0F, 0x23,
+    0x0F, 0x1A, 0x21, 0x26,
+    0x0F, 0x14, 0x21, 0x26,
+    0x0F, 0x14, 0x1B, 0x26,
+    0x0F, 0x26, 0x26, 0x26,
+    0x0F, 0x0F, 0x26, 0x26,
+    0x0F, 0x0F, 0x0F, 0x26,
+    0x0F, 0x1C, 0x23, 0x29,
+    0x0F, 0x15, 0x23, 0x29,
+    0x0F, 0x15, 0x1C, 0x29,
+    0x0F, 0x29, 0x29, 0x29,
+    0x0F, 0x0F, 0x29, 0x29,
+    0x0F, 0x0F, 0x0F, 0x29,
+    0x0F, 0x1D, 0x25, 0x2C,
+    0x0F, 0x16, 0x25, 0x2C,
+    0x0F, 0x16, 0x1E, 0x2C,
+    0x0F, 0x2C, 0x2C, 0x2C,
+    0x0F, 0x0F, 0x2C, 0x2C,
+    0x0F, 0x0F, 0x0F, 0x2C,
+    0x0F, 0x1F, 0x27, 0x2F,
+    0x0F, 0x17, 0x27, 0x2F,
+    0x0F, 0x17, 0x1F, 0x2F,
+    0x0F, 0x2F, 0x2F, 0x2F,
+    0x0F, 0x0F, 0x2F, 0x2F,
+    0x0F, 0x0F, 0x0F, 0x2F,
+    0x0F, 0x32, 0x32, 0x32,
+    0x0F, 0x0F, 0x32, 0x32,
+    0x0F, 0x0F, 0x0F, 0x32,
+    0x0F, 0x21, 0x2A, 0x33,
+    0x0F, 0x18, 0x2A, 0x33,
+    0x0F, 0x18, 0x21, 0x33,
+    0x0F, 0x37, 0x37, 0x37,
+    0x0F, 0x0F, 0x37, 0x37,
+    0x0F, 0x0F, 0x0F, 0x37,
+    0x0F, 0x3D, 0x3D, 0x3D,
+    0x0F, 0x0F, 0x3D, 0x3D,
+    0x0F, 0x0F, 0x0F, 0x3D,
+    0x10, 0x11, 0x11, 0x12,
+    0x10, 0x11, 0x12, 0x13,
+    0x10, 0x12, 0x13, 0x14,
+    0x10, 0x11, 0x13, 0x14,
+    0x10, 0x11, 0x12, 0x14,
+    0x10, 0x12, 0x13, 0x15,
+    0x10, 0x12, 0x14, 0x15,
+    0x10, 0x11, 0x14, 0x15,
+    0x10, 0x11, 0x13, 0x15,
+    0x10, 0x12, 0x14, 0x16,
+    0x10, 0x13, 0x15, 0x16,
+    0x10, 0x11, 0x15, 0x16,
+    0x10, 0x11, 0x13, 0x16,
+    0x10, 0x16, 0x16, 0x16,
+    0x10, 0x10, 0x16, 0x16,
+    0x10, 0x10, 0x10, 0x16,
+    0x10, 0x13, 0x14, 0x17,
+    0x10, 0x13, 0x16, 0x17,
+    0x10, 0x11, 0x16, 0x17,
+    0x10, 0x11, 0x14, 0x17,
+    0x10, 0x13, 0x15, 0x18,
+    0x10, 0x14, 0x16, 0x18,
+    0x10, 0x12, 0x16, 0x18,
+    0x10, 0x12, 0x14, 0x18,
+    0x10, 0x18, 0x18, 0x18,
+    0x10, 0x10, 0x18, 0x18,
+    0x10, 0x10, 0x10, 0x18,
+    0x10, 0x14, 0x17, 0x1B,
+    0x10, 0x15, 0x19, 0x1B,
+    0x10, 0x12, 0x19, 0x1B,
+    0x10, 0x12, 0x16, 0x1B,
+    0x10, 0x1B, 0x1B, 0x1B,
+    0x10, 0x10, 0x1B, 0x1B,
+    0x10, 0x10, 0x10, 0x1B,
+    0x10, 0x15, 0x19, 0x1E,
+    0x10, 0x17, 0x1B, 0x1E,
+    0x10, 0x13, 0x1B, 0x1E,
+    0x10, 0x13, 0x17, 0x1E,
+    0x10, 0x1E, 0x1E, 0x1E,
+    0x10, 0x10, 0x1E, 0x1E,
+    0x10, 0x10, 0x10, 0x1E,
+    0x10, 0x16, 0x1B, 0x21,
+    0x10, 0x18, 0x1D, 0x21,
+    0x10, 0x14, 0x1D, 0x21,
+    0x10, 0x14, 0x19, 0x21,
+    0x10, 0x21, 0x21, 0x21,
+    0x10, 0x10, 0x21, 0x21,
+    0x10, 0x10, 0x10, 0x21,
+    0x10, 0x17, 0x1D, 0x24,
+    0x10, 0x1A, 0x1F, 0x24,
+    0x10, 0x15, 0x1F, 0x24,
+    0x10, 0x15, 0x1A, 0x24,
+    0x10, 0x24, 0x24, 0x24,
+    0x10, 0x10, 0x24, 0x24,
+    0x10, 0x10, 0x10, 0x24,
+    0x10, 0x1B, 0x22, 0x27,
+    0x10, 0x15, 0x22, 0x27,
+    0x10, 0x15, 0x1C, 0x27,
+    0x10, 0x27, 0x27, 0x27,
+    0x10, 0x10, 0x27, 0x27,
+    0x10, 0x10, 0x10, 0x27,
+    0x10, 0x1D, 0x24, 0x2A,
+    0x10, 0x16, 0x24, 0x2A,
+    0x10, 0x16, 0x1D, 0x2A,
+    0x10, 0x2A, 0x2A, 0x2A,
+    0x10, 0x10, 0x2A, 0x2A,
+    0x10, 0x10, 0x10, 0x2A,
+    0x10, 0x1E, 0x26, 0x2D,
+    0x10, 0x17, 0x26, 0x2D,
+    0x10, 0x17, 0x1F, 0x2D,
+    0x10, 0x2D, 0x2D, 0x2D,
+    0x10, 0x10, 0x2D, 0x2D,
+    0x10, 0x10, 0x10, 0x2D,
+    0x10, 0x20, 0x28, 0x30,
+    0x10, 0x18, 0x28, 0x30,
+    0x10, 0x18, 0x20, 0x30,
+    0x10, 0x30, 0x30, 0x30,
+    0x10, 0x10, 0x30, 0x30,
+    0x10, 0x10, 0x10, 0x30,
+    0x10, 0x33, 0x33, 0x33,
+    0x10, 0x10, 0x33, 0x33,
+    0x10, 0x10, 0x10, 0x33,
+    0x10, 0x22, 0x2B, 0x34,
+    0x10, 0x19, 0x2B, 0x34,
+    0x10, 0x19, 0x22, 0x34,
+    0x10, 0x38, 0x38, 0x38,
+    0x10, 0x10, 0x38, 0x38,
+    0x10, 0x10, 0x10, 0x38,
+    0x10, 0x3E, 0x3E, 0x3E,
+    0x10, 0x10, 0x3E, 0x3E,
+    0x10, 0x10, 0x10, 0x3E,
+    0x11, 0x12, 0x12, 0x13,
+    0x11, 0x12, 0x13, 0x14,
+    0x11, 0x13, 0x14, 0x15,
+    0x11, 0x12, 0x14, 0x15,
+    0x11, 0x12, 0x13, 0x15,
+    0x11, 0x13, 0x14, 0x16,
+    0x11, 0x13, 0x15, 0x16,
+    0x11, 0x12, 0x15, 0x16,
+    0x11, 0x12, 0x14, 0x16,
+    0x11, 0x13, 0x15, 0x17,
+    0x11, 0x14, 0x16, 0x17,
+    0x11, 0x12, 0x16, 0x17,
+    0x11, 0x12, 0x14, 0x17,
+    0x11, 0x17, 0x17, 0x17,
+    0x11, 0x11, 0x17, 0x17,
+    0x11, 0x11, 0x11, 0x17,
+    0x11, 0x14, 0x15, 0x18,
+    0x11, 0x14, 0x17, 0x18,
+    0x11, 0x12, 0x17, 0x18,
+    0x11, 0x12, 0x15, 0x18,
+    0x11, 0x14, 0x16, 0x19,
+    0x11, 0x15, 0x17, 0x19,
+    0x11, 0x13, 0x17, 0x19,
+    0x11, 0x13, 0x15, 0x19,
+    0x11, 0x19, 0x19, 0x19,
+    0x11, 0x11, 0x19, 0x19,
+    0x11, 0x11, 0x11, 0x19,
+    0x11, 0x15, 0x18, 0x1C,
+    0x11, 0x16, 0x1A, 0x1C,
+    0x11, 0x13, 0x1A, 0x1C,
+    0x11, 0x13, 0x17, 0x1C,
+    0x11, 0x1C, 0x1C, 0x1C,
+    0x11, 0x11, 0x1C, 0x1C,
+    0x11, 0x11, 0x11, 0x1C,
+    0x11, 0x16, 0x1A, 0x1F,
+    0x11, 0x18, 0x1C, 0x1F,
+    0x11, 0x14, 0x1C, 0x1F,
+    0x11, 0x14, 0x18, 0x1F,
+    0x11, 0x1F, 0x1F, 0x1F,
+    0x11, 0x11, 0x1F, 0x1F,
+    0x11, 0x11, 0x11, 0x1F,
+    0x11, 0x17, 0x1C, 0x22,
+    0x11, 0x19, 0x1E, 0x22,
+    0x11, 0x15, 0x1E, 0x22,
+    0x11, 0x15, 0x1A, 0x22,
+    0x11, 0x22, 0x22, 0x22,
+    0x11, 0x11, 0x22, 0x22,
+    0x11, 0x11, 0x11, 0x22,
+    0x11, 0x18, 0x1E, 0x25,
+    0x11, 0x1B, 0x20, 0x25,
+    0x11, 0x16, 0x20, 0x25,
+    0x11, 0x16, 0x1B, 0x25,
+    0x11, 0x25, 0x25, 0x25,
+    0x11, 0x11, 0x25, 0x25,
+    0x11, 0x11, 0x11, 0x25,
+    0x11, 0x1C, 0x23, 0x28,
+    0x11, 0x16, 0x23, 0x28,
+    0x11, 0x16, 0x1D, 0x28,
+    0x11, 0x28, 0x28, 0x28,
+    0x11, 0x11, 0x28, 0x28,
+    0x11, 0x11, 0x11, 0x28,
+    0x11, 0x1E, 0x25, 0x2B,
+    0x11, 0x17, 0x25, 0x2B,
+    0x11, 0x17, 0x1E, 0x2B,
+    0x11, 0x2B, 0x2B, 0x2B,
+    0x11, 0x11, 0x2B, 0x2B,
+    0x11, 0x11, 0x11, 0x2B,
+    0x11, 0x1F, 0x27, 0x2E,
+    0x11, 0x18, 0x27, 0x2E,
+    0x11, 0x18, 0x20, 0x2E,
+    0x11, 0x2E, 0x2E, 0x2E,
+    0x11, 0x11, 0x2E, 0x2E,
+    0x11, 0x11, 0x11, 0x2E,
+    0x11, 0x21, 0x29, 0x31,
+    0x11, 0x19, 0x29, 0x31,
+    0x11, 0x19, 0x21, 0x31,
+    0x11, 0x31, 0x31, 0x31,
+    0x11, 0x11, 0x31, 0x31,
+    0x11, 0x11, 0x11, 0x31,
+    0x11, 0x34, 0x34, 0x34,
+    0x11, 0x11, 0x34, 0x34,
+    0x11, 0x11, 0x11, 0x34,
+    0x11, 0x23, 0x2C, 0x35,
+    0x11, 0x1A, 0x2C, 0x35,
+    0x11, 0x1A, 0x23, 0x35,
+    0x11, 0x39, 0x39, 0x39,
+    0x11, 0x11, 0x39, 0x39,
+    0x11, 0x11, 0x11, 0x39,
+    0x11, 0x3F, 0x3F, 0x3F,
+    0x11, 0x11, 0x3F, 0x3F,
+    0x11, 0x11, 0x11, 0x3F,
+    0x12, 0x13, 0x13, 0x14,
+    0x12, 0x13, 0x14, 0x15,
+    0x12, 0x14, 0x15, 0x16,
+    0x12, 0x13, 0x15, 0x16,
+    0x12, 0x13, 0x14, 0x16,
+    0x12, 0x14, 0x15, 0x17,
+    0x12, 0x14, 0x16, 0x17,
+    0x12, 0x13, 0x16, 0x17,
+    0x12, 0x13, 0x15, 0x17,
+    0x12, 0x14, 0x16, 0x18,
+    0x12, 0x15, 0x17, 0x18,
+    0x12, 0x13, 0x17, 0x18,
+    0x12, 0x13, 0x15, 0x18,
+    0x12, 0x18, 0x18, 0x18,
+    0x12, 0x12, 0x18, 0x18,
+    0x12, 0x12, 0x12, 0x18,
+    0x12, 0x15, 0x16, 0x19,
+    0x12, 0x15, 0x18, 0x19,
+    0x12, 0x13, 0x18, 0x19,
+    0x12, 0x13, 0x16, 0x19,
+    0x12, 0x15, 0x17, 0x1A,
+    0x12, 0x16, 0x18, 0x1A,
+    0x12, 0x14, 0x18, 0x1A,
+    0x12, 0x14, 0x16, 0x1A,
+    0x12, 0x1A, 0x1A, 0x1A,
+    0x12, 0x12, 0x1A, 0x1A,
+    0x12, 0x12, 0x12, 0x1A,
+    0x12, 0x16, 0x19, 0x1D,
+    0x12, 0x17, 0x1B, 0x1D,
+    0x12, 0x14, 0x1B, 0x1D,
+    0x12, 0x14, 0x18, 0x1D,
+    0x12, 0x1D, 0x1D, 0x1D,
+    0x12, 0x12, 0x1D, 0x1D,
+    0x12, 0x12, 0x12, 0x1D,
+    0x12, 0x17, 0x1B, 0x20,
+    0x12, 0x19, 0x1D, 0x20,
+    0x12, 0x15, 0x1D, 0x20,
+    0x12, 0x15, 0x19, 0x20,
+    0x12, 0x20, 0x20, 0x20,
+    0x12, 0x12, 0x20, 0x20,
+    0x12, 0x12, 0x12, 0x20,
+    0x12, 0x18, 0x1D, 0x23,
+    0x12, 0x1A, 0x1F, 0x23,
+    0x12, 0x16, 0x1F, 0x23,
+    0x12, 0x16, 0x1B, 0x23,
+    0x12, 0x23, 0x23, 0x23,
+    0x12, 0x12, 0x23, 0x23,
+    0x12, 0x12, 0x12, 0x23,
+    0x12, 0x19, 0x1F, 0x26,
+    0x12, 0x1C, 0x21, 0x26,
+    0x12, 0x17, 0x21, 0x26,
+    0x12, 0x17, 0x1C, 0x26,
+    0x12, 0x26, 0x26, 0x26,
+    0x12, 0x12, 0x26, 0x26,
+    0x12, 0x12, 0x12, 0x26,
+    0x12, 0x1D, 0x24, 0x29,
+    0x12, 0x17, 0x24, 0x29,
+    0x12, 0x17, 0x1E, 0x29,
+    0x12, 0x29, 0x29, 0x29,
+    0x12, 0x12, 0x29, 0x29,
+    0x12, 0x12, 0x12, 0x29,
+    0x12, 0x1F, 0x26, 0x2C,
+    0x12, 0x18, 0x26, 0x2C,
+    0x12, 0x18, 0x1F, 0x2C,
+    0x12, 0x2C, 0x2C, 0x2C,
+    0x12, 0x12, 0x2C, 0x2C,
+    0x12, 0x12, 0x12, 0x2C,
+    0x12, 0x20, 0x28, 0x2F,
+    0x12, 0x19, 0x28, 0x2F,
+    0x12, 0x19, 0x21, 0x2F,
+    0x12, 0x2F, 0x2F, 0x2F,
+    0x12, 0x12, 0x2F, 0x2F,
+    0x12, 0x12, 0x12, 0x2F,
+    0x12, 0x22, 0x2A, 0x32,
+    0x12, 0x1A, 0x2A, 0x32,
+    0x12, 0x1A, 0x22, 0x32,
+    0x12, 0x32, 0x32, 0x32,
+    0x12, 0x12, 0x32, 0x32,
+    0x12, 0x12, 0x12, 0x32,
+    0x12, 0x35, 0x35, 0x35,
+    0x12, 0x12, 0x35, 0x35,
+    0x12, 0x12, 0x12, 0x35,
+    0x12, 0x24, 0x2D, 0x36,
+    0x12, 0x1B, 0x2D, 0x36,
+    0x12, 0x1B, 0x24, 0x36,
+    0x12, 0x3A, 0x3A, 0x3A,
+    0x12, 0x12, 0x3A, 0x3A,
+    0x12, 0x12, 0x12, 0x3A,
+    0x13, 0x14, 0x14, 0x15,
+    0x13, 0x14, 0x15, 0x16,
+    0x13, 0x15, 0x16, 0x17,
+    0x13, 0x14, 0x16, 0x17,
+    0x13, 0x14, 0x15, 0x17,
+    0x13, 0x15, 0x16, 0x18,
+    0x13, 0x15, 0x17, 0x18,
+    0x13, 0x14, 0x17, 0x18,
+    0x13, 0x14, 0x16, 0x18,
+    0x13, 0x15, 0x17, 0x19,
+    0x13, 0x16, 0x18, 0x19,
+    0x13, 0x14, 0x18, 0x19,
+    0x13, 0x14, 0x16, 0x19,
+    0x13, 0x19, 0x19, 0x19,
+    0x13, 0x13, 0x19, 0x19,
+    0x13, 0x13, 0x13, 0x19,
+    0x13, 0x16, 0x17, 0x1A,
+    0x13, 0x16, 0x19, 0x1A,
+    0x13, 0x14, 0x19, 0x1A,
+    0x13, 0x14, 0x17, 0x1A,
+    0x13, 0x16, 0x18, 0x1B,
+    0x13, 0x17, 0x19, 0x1B,
+    0x13, 0x15, 0x19, 0x1B,
+    0x13, 0x15, 0x17, 0x1B,
+    0x13, 0x1B, 0x1B, 0x1B,
+    0x13, 0x13, 0x1B, 0x1B,
+    0x13, 0x13, 0x13, 0x1B,
+    0x13, 0x17, 0x1A, 0x1E,
+    0x13, 0x18, 0x1C, 0x1E,
+    0x13, 0x15, 0x1C, 0x1E,
+    0x13, 0x15, 0x19, 0x1E,
+    0x13, 0x1E, 0x1E, 0x1E,
+    0x13, 0x13, 0x1E, 0x1E,
+    0x13, 0x13, 0x13, 0x1E,
+    0x13, 0x18, 0x1C, 0x21,
+    0x13, 0x1A, 0x1E, 0x21,
+    0x13, 0x16, 0x1E, 0x21,
+    0x13, 0x16, 0x1A, 0x21,
+    0x13, 0x21, 0x21, 0x21,
+    0x13, 0x13, 0x21, 0x21,
+    0x13, 0x13, 0x13, 0x21,
+    0x13, 0x19, 0x1E, 0x24,
+    0x13, 0x1B, 0x20, 0x24,
+    0x13, 0x17, 0x20, 0x24,
+    0x13, 0x17, 0x1C, 0x24,
+    0x13, 0x24, 0x24, 0x24,
+    0x13, 0x13, 0x24, 0x24,
+    0x13, 0x13, 0x13, 0x24,
+    0x13, 0x1A, 0x20, 0x27,
+    0x13, 0x1D, 0x22, 0x27,
+    0x13, 0x18, 0x22, 0x27,
+    0x13, 0x18, 0x1D, 0x27,
+    0x13, 0x27, 0x27, 0x27,
+    0x13, 0x13, 0x27, 0x27,
+    0x13, 0x13, 0x13, 0x27,
+    0x13, 0x1E, 0x25, 0x2A,
+    0x13, 0x18, 0x25, 0x2A,
+    0x13, 0x18, 0x1F, 0x2A,
+    0x13, 0x2A, 0x2A, 0x2A,
+    0x13, 0x13, 0x2A, 0x2A,
+    0x13, 0x13, 0x13, 0x2A,
+    0x13, 0x20, 0x27, 0x2D,
+    0x13, 0x19, 0x27, 0x2D,
+    0x13, 0x19, 0x20, 0x2D,
+    0x13, 0x2D, 0x2D, 0x2D,
+    0x13, 0x13, 0x2D, 0x2D,
+    0x13, 0x13, 0x13, 0x2D,
+    0x13, 0x21, 0x29, 0x30,
+    0x13, 0x1A, 0x29, 0x30,
+    0x13, 0x1A, 0x22, 0x30,
+    0x13, 0x30, 0x30, 0x30,
+    0x13, 0x13, 0x30, 0x30,
+    0x13, 0x13, 0x13, 0x30,
+    0x13, 0x23, 0x2B, 0x33,
+    0x13, 0x1B, 0x2B, 0x33,
+    0x13, 0x1B, 0x23, 0x33,
+    0x13, 0x33, 0x33, 0x33,
+    0x13, 0x13, 0x33, 0x33,
+    0x13, 0x13, 0x13, 0x33,
+    0x13, 0x36, 0x36, 0x36,
+    0x13, 0x13, 0x36, 0x36,
+    0x13, 0x13, 0x13, 0x36,
+    0x13, 0x25, 0x2E, 0x37,
+    0x13, 0x1C, 0x2E, 0x37,
+    0x13, 0x1C, 0x25, 0x37,
+    0x13, 0x3B, 0x3B, 0x3B,
+    0x13, 0x13, 0x3B, 0x3B,
+    0x13, 0x13, 0x13, 0x3B,
+    0x14, 0x15, 0x15, 0x16,
+    0x14, 0x15, 0x16, 0x17,
+    0x14, 0x16, 0x17, 0x18,
+    0x14, 0x15, 0x17, 0x18,
+    0x14, 0x15, 0x16, 0x18,
+    0x14, 0x16, 0x17, 0x19,
+    0x14, 0x16, 0x18, 0x19,
+    0x14, 0x15, 0x18, 0x19,
+    0x14, 0x15, 0x17, 0x19,
+    0x14, 0x16, 0x18, 0x1A,
+    0x14, 0x17, 0x19, 0x1A,
+    0x14, 0x15, 0x19, 0x1A,
+    0x14, 0x15, 0x17, 0x1A,
+    0x14, 0x1A, 0x1A, 0x1A,
+    0x14, 0x14, 0x1A, 0x1A,
+    0x14, 0x14, 0x14, 0x1A,
+    0x14, 0x17, 0x18, 0x1B,
+    0x14, 0x17, 0x1A, 0x1B,
+    0x14, 0x15, 0x1A, 0x1B,
+    0x14, 0x15, 0x18, 0x1B,
+    0x14, 0x17, 0x19, 0x1C,
+    0x14, 0x18, 0x1A, 0x1C,
+    0x14, 0x16, 0x1A, 0x1C,
+    0x14, 0x16, 0x18, 0x1C,
+    0x14, 0x1C, 0x1C, 0x1C,
+    0x14, 0x14, 0x1C, 0x1C,
+    0x14, 0x14, 0x14, 0x1C,
+    0x14, 0x18, 0x1B, 0x1F,
+    0x14, 0x19, 0x1D, 0x1F,
+    0x14, 0x16, 0x1D, 0x1F,
+    0x14, 0x16, 0x1A, 0x1F,
+    0x14, 0x1F, 0x1F, 0x1F,
+    0x14, 0x14, 0x1F, 0x1F,
+    0x14, 0x14, 0x14, 0x1F,
+    0x14, 0x19, 0x1D, 0x22,
+    0x14, 0x1B, 0x1F, 0x22,
+    0x14, 0x17, 0x1F, 0x22,
+    0x14, 0x17, 0x1B, 0x22,
+    0x14, 0x22, 0x22, 0x22,
+    0x14, 0x14, 0x22, 0x22,
+    0x14, 0x14, 0x14, 0x22,
+    0x14, 0x1A, 0x1F, 0x25,
+    0x14, 0x1C, 0x21, 0x25,
+    0x14, 0x18, 0x21, 0x25,
+    0x14, 0x18, 0x1D, 0x25,
+    0x14, 0x25, 0x25, 0x25,
+    0x14, 0x14, 0x25, 0x25,
+    0x14, 0x14, 0x14, 0x25,
+    0x14, 0x1B, 0x21, 0x28,
+    0x14, 0x1E, 0x23, 0x28,
+    0x14, 0x19, 0x23, 0x28,
+    0x14, 0x19, 0x1E, 0x28,
+    0x14, 0x28, 0x28, 0x28,
+    0x14, 0x14, 0x28, 0x28,
+    0x14, 0x14, 0x14, 0x28,
+    0x14, 0x1F, 0x26, 0x2B,
+    0x14, 0x19, 0x26, 0x2B,
+    0x14, 0x19, 0x20, 0x2B,
+    0x14, 0x2B, 0x2B, 0x2B,
+    0x14, 0x14, 0x2B, 0x2B,
+    0x14, 0x14, 0x14, 0x2B,
+    0x14, 0x21, 0x28, 0x2E,
+    0x14, 0x1A, 0x28, 0x2E,
+    0x14, 0x1A, 0x21, 0x2E,
+    0x14, 0x2E, 0x2E, 0x2E,
+    0x14, 0x14, 0x2E, 0x2E,
+    0x14, 0x14, 0x14, 0x2E,
+    0x14, 0x22, 0x2A, 0x31,
+    0x14, 0x1B, 0x2A, 0x31,
+    0x14, 0x1B, 0x23, 0x31,
+    0x14, 0x31, 0x31, 0x31,
+    0x14, 0x14, 0x31, 0x31,
+    0x14, 0x14, 0x14, 0x31,
+    0x14, 0x24, 0x2C, 0x34,
+    0x14, 0x1C, 0x2C, 0x34,
+    0x14, 0x1C, 0x24, 0x34,
+    0x14, 0x34, 0x34, 0x34,
+    0x14, 0x14, 0x34, 0x34,
+    0x14, 0x14, 0x14, 0x34,
+    0x14, 0x37, 0x37, 0x37,
+    0x14, 0x14, 0x37, 0x37,
+    0x14, 0x14, 0x14, 0x37,
+    0x14, 0x26, 0x2F, 0x38,
+    0x14, 0x1D, 0x2F, 0x38,
+    0x14, 0x1D, 0x26, 0x38,
+    0x14, 0x3C, 0x3C, 0x3C,
+    0x14, 0x14, 0x3C, 0x3C,
+    0x14, 0x14, 0x14, 0x3C,
+    0x15, 0x16, 0x16, 0x17,
+    0x15, 0x16, 0x17, 0x18,
+    0x15, 0x17, 0x18, 0x19,
+    0x15, 0x16, 0x18, 0x19,
+    0x15, 0x16, 0x17, 0x19,
+    0x15, 0x17, 0x18, 0x1A,
+    0x15, 0x17, 0x19, 0x1A,
+    0x15, 0x16, 0x19, 0x1A,
+    0x15, 0x16, 0x18, 0x1A,
+    0x15, 0x17, 0x19, 0x1B,
+    0x15, 0x18, 0x1A, 0x1B,
+    0x15, 0x16, 0x1A, 0x1B,
+    0x15, 0x16, 0x18, 0x1B,
+    0x15, 0x1B, 0x1B, 0x1B,
+    0x15, 0x15, 0x1B, 0x1B,
+    0x15, 0x15, 0x15, 0x1B,
+    0x15, 0x18, 0x19, 0x1C,
+    0x15, 0x18, 0x1B, 0x1C,
+    0x15, 0x16, 0x1B, 0x1C,
+    0x15, 0x16, 0x19, 0x1C,
+    0x15, 0x18, 0x1A, 0x1D,
+    0x15, 0x19, 0x1B, 0x1D,
+    0x15, 0x17, 0x1B, 0x1D,
+    0x15, 0x17, 0x19, 0x1D,
+    0x15, 0x1D, 0x1D, 0x1D,
+    0x15, 0x15, 0x1D, 0x1D,
+    0x15, 0x15, 0x15, 0x1D,
+    0x15, 0x19, 0x1C, 0x20,
+    0x15, 0x1A, 0x1E, 0x20,
+    0x15, 0x17, 0x1E, 0x20,
+    0x15, 0x17, 0x1B, 0x20,
+    0x15, 0x20, 0x20, 0x20,
+    0x15, 0x15, 0x20, 0x20,
+    0x15, 0x15, 0x15, 0x20,
+    0x15, 0x1A, 0x1E, 0x23,
+    0x15, 0x1C, 0x20, 0x23,
+    0x15, 0x18, 0x20, 0x23,
+    0x15, 0x18, 0x1C, 0x23,
+    0x15, 0x23, 0x23, 0x23,
+    0x15, 0x15, 0x23, 0x23,
+    0x15, 0x15, 0x15, 0x23,
+    0x15, 0x1B, 0x20, 0x26,
+    0x15, 0x1D, 0x22, 0x26,
+    0x15, 0x19, 0x22, 0x26,
+    0x15, 0x19, 0x1E, 0x26,
+    0x15, 0x26, 0x26, 0x26,
+    0x15, 0x15, 0x26, 0x26,
+    0x15, 0x15, 0x15, 0x26,
+    0x15, 0x1C, 0x22, 0x29,
+    0x15, 0x1F, 0x24, 0x29,
+    0x15, 0x1A, 0x24, 0x29,
+    0x15, 0x1A, 0x1F, 0x29,
+    0x15, 0x29, 0x29, 0x29,
+    0x15, 0x15, 0x29, 0x29,
+    0x15, 0x15, 0x15, 0x29,
+    0x15, 0x20, 0x27, 0x2C,
+    0x15, 0x1A, 0x27, 0x2C,
+    0x15, 0x1A, 0x21, 0x2C,
+    0x15, 0x2C, 0x2C, 0x2C,
+    0x15, 0x15, 0x2C, 0x2C,
+    0x15, 0x15, 0x15, 0x2C,
+    0x15, 0x22, 0x29, 0x2F,
+    0x15, 0x1B, 0x29, 0x2F,
+    0x15, 0x1B, 0x22, 0x2F,
+    0x15, 0x2F, 0x2F, 0x2F,
+    0x15, 0x15, 0x2F, 0x2F,
+    0x15, 0x15, 0x15, 0x2F,
+    0x15, 0x23, 0x2B, 0x32,
+    0x15, 0x1C, 0x2B, 0x32,
+    0x15, 0x1C, 0x24, 0x32,
+    0x15, 0x32, 0x32, 0x32,
+    0x15, 0x15, 0x32, 0x32,
+    0x15, 0x15, 0x15, 0x32,
+    0x15, 0x25, 0x2D, 0x35,
+    0x15, 0x1D, 0x2D, 0x35,
+    0x15, 0x1D, 0x25, 0x35,
+    0x15, 0x35, 0x35, 0x35,
+    0x15, 0x15, 0x35, 0x35,
+    0x15, 0x15, 0x15, 0x35,
+    0x15, 0x38, 0x38, 0x38,
+    0x15, 0x15, 0x38, 0x38,
+    0x15, 0x15, 0x15, 0x38,
+    0x15, 0x27, 0x30, 0x39,
+    0x15, 0x1E, 0x30, 0x39,
+    0x15, 0x1E, 0x27, 0x39,
+    0x15, 0x3D, 0x3D, 0x3D,
+    0x15, 0x15, 0x3D, 0x3D,
+    0x15, 0x15, 0x15, 0x3D,
+    0x16, 0x17, 0x17, 0x18,
+    0x16, 0x17, 0x18, 0x19,
+    0x16, 0x18, 0x19, 0x1A,
+    0x16, 0x17, 0x19, 0x1A,
+    0x16, 0x17, 0x18, 0x1A,
+    0x16, 0x18, 0x19, 0x1B,
+    0x16, 0x18, 0x1A, 0x1B,
+    0x16, 0x17, 0x1A, 0x1B,
+    0x16, 0x17, 0x19, 0x1B,
+    0x16, 0x18, 0x1A, 0x1C,
+    0x16, 0x19, 0x1B, 0x1C,
+    0x16, 0x17, 0x1B, 0x1C,
+    0x16, 0x17, 0x19, 0x1C,
+    0x16, 0x1C, 0x1C, 0x1C,
+    0x16, 0x16, 0x1C, 0x1C,
+    0x16, 0x16, 0x16, 0x1C,
+    0x16, 0x19, 0x1A, 0x1D,
+    0x16, 0x19, 0x1C, 0x1D,
+    0x16, 0x17, 0x1C, 0x1D,
+    0x16, 0x17, 0x1A, 0x1D,
+    0x16, 0x19, 0x1B, 0x1E,
+    0x16, 0x1A, 0x1C, 0x1E,
+    0x16, 0x18, 0x1C, 0x1E,
+    0x16, 0x18, 0x1A, 0x1E,
+    0x16, 0x1E, 0x1E, 0x1E,
+    0x16, 0x16, 0x1E, 0x1E,
+    0x16, 0x16, 0x16, 0x1E,
+    0x16, 0x1A, 0x1D, 0x21,
+    0x16, 0x1B, 0x1F, 0x21,
+    0x16, 0x18, 0x1F, 0x21,
+    0x16, 0x18, 0x1C, 0x21,
+    0x16, 0x21, 0x21, 0x21,
+    0x16, 0x16, 0x21, 0x21,
+    0x16, 0x16, 0x16, 0x21,
+    0x16, 0x1B, 0x1F, 0x24,
+    0x16, 0x1D, 0x21, 0x24,
+    0x16, 0x19, 0x21, 0x24,
+    0x16, 0x19, 0x1D, 0x24,
+    0x16, 0x24, 0x24, 0x24,
+    0x16, 0x16, 0x24, 0x24,
+    0x16, 0x16, 0x16, 0x24,
+    0x16, 0x1C, 0x21, 0x27,
+    0x16, 0x1E, 0x23, 0x27,
+    0x16, 0x1A, 0x23, 0x27,
+    0x16, 0x1A, 0x1F, 0x27,
+    0x16, 0x27, 0x27, 0x27,
+    0x16, 0x16, 0x27, 0x27,
+    0x16, 0x16, 0x16, 0x27,
+    0x16, 0x1D, 0x23, 0x2A,
+    0x16, 0x20, 0x25, 0x2A,
+    0x16, 0x1B, 0x25, 0x2A,
+    0x16, 0x1B, 0x20, 0x2A,
+    0x16, 0x2A, 0x2A, 0x2A,
+    0x16, 0x16, 0x2A, 0x2A,
+    0x16, 0x16, 0x16, 0x2A,
+    0x16, 0x21, 0x28, 0x2D,
+    0x16, 0x1B, 0x28, 0x2D,
+    0x16, 0x1B, 0x22, 0x2D,
+    0x16, 0x2D, 0x2D, 0x2D,
+    0x16, 0x16, 0x2D, 0x2D,
+    0x16, 0x16, 0x16, 0x2D,
+    0x16, 0x23, 0x2A, 0x30,
+    0x16, 0x1C, 0x2A, 0x30,
+    0x16, 0x1C, 0x23, 0x30,
+    0x16, 0x30, 0x30, 0x30,
+    0x16, 0x16, 0x30, 0x30,
+    0x16, 0x16, 0x16, 0x30,
+    0x16, 0x24, 0x2C, 0x33,
+    0x16, 0x1D, 0x2C, 0x33,
+    0x16, 0x1D, 0x25, 0x33,
+    0x16, 0x33, 0x33, 0x33,
+    0x16, 0x16, 0x33, 0x33,
+    0x16, 0x16, 0x16, 0x33,
+    0x16, 0x26, 0x2E, 0x36,
+    0x16, 0x1E, 0x2E, 0x36,
+    0x16, 0x1E, 0x26, 0x36,
+    0x16, 0x36, 0x36, 0x36,
+    0x16, 0x16, 0x36, 0x36,
+    0x16, 0x16, 0x16, 0x36,
+    0x16, 0x39, 0x39, 0x39,
+    0x16, 0x16, 0x39, 0x39,
+    0x16, 0x16, 0x16, 0x39,
+    0x16, 0x28, 0x31, 0x3A,
+    0x16, 0x1F, 0x31, 0x3A,
+    0x16, 0x1F, 0x28, 0x3A,
+    0x16, 0x3E, 0x3E, 0x3E,
+    0x16, 0x16, 0x3E, 0x3E,
+    0x16, 0x16, 0x16, 0x3E,
+    0x17, 0x18, 0x18, 0x19,
+    0x17, 0x18, 0x19, 0x1A,
+    0x17, 0x19, 0x1A, 0x1B,
+    0x17, 0x18, 0x1A, 0x1B,
+    0x17, 0x18, 0x19, 0x1B,
+    0x17, 0x19, 0x1A, 0x1C,
+    0x17, 0x19, 0x1B, 0x1C,
+    0x17, 0x18, 0x1B, 0x1C,
+    0x17, 0x18, 0x1A, 0x1C,
+    0x17, 0x19, 0x1B, 0x1D,
+    0x17, 0x1A, 0x1C, 0x1D,
+    0x17, 0x18, 0x1C, 0x1D,
+    0x17, 0x18, 0x1A, 0x1D,
+    0x17, 0x1D, 0x1D, 0x1D,
+    0x17, 0x17, 0x1D, 0x1D,
+    0x17, 0x17, 0x17, 0x1D,
+    0x17, 0x1A, 0x1B, 0x1E,
+    0x17, 0x1A, 0x1D, 0x1E,
+    0x17, 0x18, 0x1D, 0x1E,
+    0x17, 0x18, 0x1B, 0x1E,
+    0x17, 0x1A, 0x1C, 0x1F,
+    0x17, 0x1B, 0x1D, 0x1F,
+    0x17, 0x19, 0x1D, 0x1F,
+    0x17, 0x19, 0x1B, 0x1F,
+    0x17, 0x1F, 0x1F, 0x1F,
+    0x17, 0x17, 0x1F, 0x1F,
+    0x17, 0x17, 0x17, 0x1F,
+    0x17, 0x1B, 0x1E, 0x22,
+    0x17, 0x1C, 0x20, 0x22,
+    0x17, 0x19, 0x20, 0x22,
+    0x17, 0x19, 0x1D, 0x22,
+    0x17, 0x22, 0x22, 0x22,
+    0x17, 0x17, 0x22, 0x22,
+    0x17, 0x17, 0x17, 0x22,
+    0x17, 0x1C, 0x20, 0x25,
+    0x17, 0x1E, 0x22, 0x25,
+    0x17, 0x1A, 0x22, 0x25,
+    0x17, 0x1A, 0x1E, 0x25,
+    0x17, 0x25, 0x25, 0x25,
+    0x17, 0x17, 0x25, 0x25,
+    0x17, 0x17, 0x17, 0x25,
+    0x17, 0x1D, 0x22, 0x28,
+    0x17, 0x1F, 0x24, 0x28,
+    0x17, 0x1B, 0x24, 0x28,
+    0x17, 0x1B, 0x20, 0x28,
+    0x17, 0x28, 0x28, 0x28,
+    0x17, 0x17, 0x28, 0x28,
+    0x17, 0x17, 0x17, 0x28,
+    0x17, 0x1E, 0x24, 0x2B,
+    0x17, 0x21, 0x26, 0x2B,
+    0x17, 0x1C, 0x26, 0x2B,
+    0x17, 0x1C, 0x21, 0x2B,
+    0x17, 0x2B, 0x2B, 0x2B,
+    0x17, 0x17, 0x2B, 0x2B,
+    0x17, 0x17, 0x17, 0x2B,
+    0x17, 0x22, 0x29, 0x2E,
+    0x17, 0x1C, 0x29, 0x2E,
+    0x17, 0x1C, 0x23, 0x2E,
+    0x17, 0x2E, 0x2E, 0x2E,
+    0x17, 0x17, 0x2E, 0x2E,
+    0x17, 0x17, 0x17, 0x2E,
+    0x17, 0x24, 0x2B, 0x31,
+    0x17, 0x1D, 0x2B, 0x31,
+    0x17, 0x1D, 0x24, 0x31,
+    0x17, 0x31, 0x31, 0x31,
+    0x17, 0x17, 0x31, 0x31,
+    0x17, 0x17, 0x17, 0x31,
+    0x17, 0x25, 0x2D, 0x34,
+    0x17, 0x1E, 0x2D, 0x34,
+    0x17, 0x1E, 0x26, 0x34,
+    0x17, 0x34, 0x34, 0x34,
+    0x17, 0x17, 0x34, 0x34,
+    0x17, 0x17, 0x17, 0x34,
+    0x17, 0x27, 0x2F, 0x37,
+    0x17, 0x1F, 0x2F, 0x37,
+    0x17, 0x1F, 0x27, 0x37,
+    0x17, 0x37, 0x37, 0x37,
+    0x17, 0x17, 0x37, 0x37,
+    0x17, 0x17, 0x17, 0x37,
+    0x17, 0x3A, 0x3A, 0x3A,
+    0x17, 0x17, 0x3A, 0x3A,
+    0x17, 0x17, 0x17, 0x3A,
+    0x17, 0x29, 0x32, 0x3B,
+    0x17, 0x20, 0x32, 0x3B,
+    0x17, 0x20, 0x29, 0x3B,
+    0x17, 0x3F, 0x3F, 0x3F,
+    0x17, 0x17, 0x3F, 0x3F,
+    0x17, 0x17, 0x17, 0x3F,
+    0x18, 0x19, 0x19, 0x1A,
+    0x18, 0x19, 0x1A, 0x1B,
+    0x18, 0x1A, 0x1B, 0x1C,
+    0x18, 0x19, 0x1B, 0x1C,
+    0x18, 0x19, 0x1A, 0x1C,
+    0x18, 0x1A, 0x1B, 0x1D,
+    0x18, 0x1A, 0x1C, 0x1D,
+    0x18, 0x19, 0x1C, 0x1D,
+    0x18, 0x19, 0x1B, 0x1D,
+    0x18, 0x1A, 0x1C, 0x1E,
+    0x18, 0x1B, 0x1D, 0x1E,
+    0x18, 0x19, 0x1D, 0x1E,
+    0x18, 0x19, 0x1B, 0x1E,
+    0x18, 0x1E, 0x1E, 0x1E,
+    0x18, 0x18, 0x1E, 0x1E,
+    0x18, 0x18, 0x18, 0x1E,
+    0x18, 0x1B, 0x1C, 0x1F,
+    0x18, 0x1B, 0x1E, 0x1F,
+    0x18, 0x19, 0x1E, 0x1F,
+    0x18, 0x19, 0x1C, 0x1F,
+    0x18, 0x1B, 0x1D, 0x20,
+    0x18, 0x1C, 0x1E, 0x20,
+    0x18, 0x1A, 0x1E, 0x20,
+    0x18, 0x1A, 0x1C, 0x20,
+    0x18, 0x20, 0x20, 0x20,
+    0x18, 0x18, 0x20, 0x20,
+    0x18, 0x18, 0x18, 0x20,
+    0x18, 0x1C, 0x1F, 0x23,
+    0x18, 0x1D, 0x21, 0x23,
+    0x18, 0x1A, 0x21, 0x23,
+    0x18, 0x1A, 0x1E, 0x23,
+    0x18, 0x23, 0x23, 0x23,
+    0x18, 0x18, 0x23, 0x23,
+    0x18, 0x18, 0x18, 0x23,
+    0x18, 0x1D, 0x21, 0x26,
+    0x18, 0x1F, 0x23, 0x26,
+    0x18, 0x1B, 0x23, 0x26,
+    0x18, 0x1B, 0x1F, 0x26,
+    0x18, 0x26, 0x26, 0x26,
+    0x18, 0x18, 0x26, 0x26,
+    0x18, 0x18, 0x18, 0x26,
+    0x18, 0x1E, 0x23, 0x29,
+    0x18, 0x20, 0x25, 0x29,
+    0x18, 0x1C, 0x25, 0x29,
+    0x18, 0x1C, 0x21, 0x29,
+    0x18, 0x29, 0x29, 0x29,
+    0x18, 0x18, 0x29, 0x29,
+    0x18, 0x18, 0x18, 0x29,
+    0x18, 0x1F, 0x25, 0x2C,
+    0x18, 0x22, 0x27, 0x2C,
+    0x18, 0x1D, 0x27, 0x2C,
+    0x18, 0x1D, 0x22, 0x2C,
+    0x18, 0x2C, 0x2C, 0x2C,
+    0x18, 0x18, 0x2C, 0x2C,
+    0x18, 0x18, 0x18, 0x2C,
+    0x18, 0x23, 0x2A, 0x2F,
+    0x18, 0x1D, 0x2A, 0x2F,
+    0x18, 0x1D, 0x24, 0x2F,
+    0x18, 0x2F, 0x2F, 0x2F,
+    0x18, 0x18, 0x2F, 0x2F,
+    0x18, 0x18, 0x18, 0x2F,
+    0x18, 0x25, 0x2C, 0x32,
+    0x18, 0x1E, 0x2C, 0x32,
+    0x18, 0x1E, 0x25, 0x32,
+    0x18, 0x32, 0x32, 0x32,
+    0x18, 0x18, 0x32, 0x32,
+    0x18, 0x18, 0x18, 0x32,
+    0x18, 0x26, 0x2E, 0x35,
+    0x18, 0x1F, 0x2E, 0x35,
+    0x18, 0x1F, 0x27, 0x35,
+    0x18, 0x35, 0x35, 0x35,
+    0x18, 0x18, 0x35, 0x35,
+    0x18, 0x18, 0x18, 0x35,
+    0x18, 0x28, 0x30, 0x38,
+    0x18, 0x20, 0x30, 0x38,
+    0x18, 0x20, 0x28, 0x38,
+    0x18, 0x38, 0x38, 0x38,
+    0x18, 0x18, 0x38, 0x38,
+    0x18, 0x18, 0x18, 0x38,
+    0x18, 0x3B, 0x3B, 0x3B,
+    0x18, 0x18, 0x3B, 0x3B,
+    0x18, 0x18, 0x18, 0x3B,
+    0x18, 0x2A, 0x33, 0x3C,
+    0x18, 0x21, 0x33, 0x3C,
+    0x18, 0x21, 0x2A, 0x3C,
+    0x19, 0x1A, 0x1A, 0x1B,
+    0x19, 0x1A, 0x1B, 0x1C,
+    0x19, 0x1B, 0x1C, 0x1D,
+    0x19, 0x1A, 0x1C, 0x1D,
+    0x19, 0x1A, 0x1B, 0x1D,
+    0x19, 0x1B, 0x1C, 0x1E,
+    0x19, 0x1B, 0x1D, 0x1E,
+    0x19, 0x1A, 0x1D, 0x1E,
+    0x19, 0x1A, 0x1C, 0x1E,
+    0x19, 0x1B, 0x1D, 0x1F,
+    0x19, 0x1C, 0x1E, 0x1F,
+    0x19, 0x1A, 0x1E, 0x1F,
+    0x19, 0x1A, 0x1C, 0x1F,
+    0x19, 0x1F, 0x1F, 0x1F,
+    0x19, 0x19, 0x1F, 0x1F,
+    0x19, 0x19, 0x19, 0x1F,
+    0x19, 0x1C, 0x1D, 0x20,
+    0x19, 0x1C, 0x1F, 0x20,
+    0x19, 0x1A, 0x1F, 0x20,
+    0x19, 0x1A, 0x1D, 0x20,
+    0x19, 0x1C, 0x1E, 0x21,
+    0x19, 0x1D, 0x1F, 0x21,
+    0x19, 0x1B, 0x1F, 0x21,
+    0x19, 0x1B, 0x1D, 0x21,
+    0x19, 0x21, 0x21, 0x21,
+    0x19, 0x19, 0x21, 0x21,
+    0x19, 0x19, 0x19, 0x21,
+    0x19, 0x1D, 0x20, 0x24,
+    0x19, 0x1E, 0x22, 0x24,
+    0x19, 0x1B, 0x22, 0x24,
+    0x19, 0x1B, 0x1F, 0x24,
+    0x19, 0x24, 0x24, 0x24,
+    0x19, 0x19, 0x24, 0x24,
+    0x19, 0x19, 0x19, 0x24,
+    0x19, 0x1E, 0x22, 0x27,
+    0x19, 0x20, 0x24, 0x27,
+    0x19, 0x1C, 0x24, 0x27,
+    0x19, 0x1C, 0x20, 0x27,
+    0x19, 0x27, 0x27, 0x27,
+    0x19, 0x19, 0x27, 0x27,
+    0x19, 0x19, 0x19, 0x27,
+    0x19, 0x1F, 0x24, 0x2A,
+    0x19, 0x21, 0x26, 0x2A,
+    0x19, 0x1D, 0x26, 0x2A,
+    0x19, 0x1D, 0x22, 0x2A,
+    0x19, 0x2A, 0x2A, 0x2A,
+    0x19, 0x19, 0x2A, 0x2A,
+    0x19, 0x19, 0x19, 0x2A,
+    0x19, 0x20, 0x26, 0x2D,
+    0x19, 0x23, 0x28, 0x2D,
+    0x19, 0x1E, 0x28, 0x2D,
+    0x19, 0x1E, 0x23, 0x2D,
+    0x19, 0x2D, 0x2D, 0x2D,
+    0x19, 0x19, 0x2D, 0x2D,
+    0x19, 0x19, 0x19, 0x2D,
+    0x19, 0x24, 0x2B, 0x30,
+    0x19, 0x1E, 0x2B, 0x30,
+    0x19, 0x1E, 0x25, 0x30,
+    0x19, 0x30, 0x30, 0x30,
+    0x19, 0x19, 0x30, 0x30,
+    0x19, 0x19, 0x19, 0x30,
+    0x19, 0x26, 0x2D, 0x33,
+    0x19, 0x1F, 0x2D, 0x33,
+    0x19, 0x1F, 0x26, 0x33,
+    0x19, 0x33, 0x33, 0x33,
+    0x19, 0x19, 0x33, 0x33,
+    0x19, 0x19, 0x19, 0x33,
+    0x19, 0x27, 0x2F, 0x36,
+    0x19, 0x20, 0x2F, 0x36,
+    0x19, 0x20, 0x28, 0x36,
+    0x19, 0x36, 0x36, 0x36,
+    0x19, 0x19, 0x36, 0x36,
+    0x19, 0x19, 0x19, 0x36,
+    0x19, 0x29, 0x31, 0x39,
+    0x19, 0x21, 0x31, 0x39,
+    0x19, 0x21, 0x29, 0x39,
+    0x19, 0x39, 0x39, 0x39,
+    0x19, 0x19, 0x39, 0x39,
+    0x19, 0x19, 0x19, 0x39,
+    0x19, 0x3C, 0x3C, 0x3C,
+    0x19, 0x19, 0x3C, 0x3C,
+    0x19, 0x19, 0x19, 0x3C,
+    0x19, 0x2B, 0x34, 0x3D,
+    0x19, 0x22, 0x34, 0x3D,
+    0x19, 0x22, 0x2B, 0x3D,
+    0x1A, 0x1B, 0x1B, 0x1C,
+    0x1A, 0x1B, 0x1C, 0x1D,
+    0x1A, 0x1C, 0x1D, 0x1E,
+    0x1A, 0x1B, 0x1D, 0x1E,
+    0x1A, 0x1B, 0x1C, 0x1E,
+    0x1A, 0x1C, 0x1D, 0x1F,
+    0x1A, 0x1C, 0x1E, 0x1F,
+    0x1A, 0x1B, 0x1E, 0x1F,
+    0x1A, 0x1B, 0x1D, 0x1F,
+    0x1A, 0x1C, 0x1E, 0x20,
+    0x1A, 0x1D, 0x1F, 0x20,
+    0x1A, 0x1B, 0x1F, 0x20,
+    0x1A, 0x1B, 0x1D, 0x20,
+    0x1A, 0x20, 0x20, 0x20,
+    0x1A, 0x1A, 0x20, 0x20,
+    0x1A, 0x1A, 0x1A, 0x20,
+    0x1A, 0x1D, 0x1E, 0x21,
+    0x1A, 0x1D, 0x20, 0x21,
+    0x1A, 0x1B, 0x20, 0x21,
+    0x1A, 0x1B, 0x1E, 0x21,
+    0x1A, 0x1D, 0x1F, 0x22,
+    0x1A, 0x1E, 0x20, 0x22,
+    0x1A, 0x1C, 0x20, 0x22,
+    0x1A, 0x1C, 0x1E, 0x22,
+    0x1A, 0x22, 0x22, 0x22,
+    0x1A, 0x1A, 0x22, 0x22,
+    0x1A, 0x1A, 0x1A, 0x22,
+    0x1A, 0x1E, 0x21, 0x25,
+    0x1A, 0x1F, 0x23, 0x25,
+    0x1A, 0x1C, 0x23, 0x25,
+    0x1A, 0x1C, 0x20, 0x25,
+    0x1A, 0x25, 0x25, 0x25,
+    0x1A, 0x1A, 0x25, 0x25,
+    0x1A, 0x1A, 0x1A, 0x25,
+    0x1A, 0x1F, 0x23, 0x28,
+    0x1A, 0x21, 0x25, 0x28,
+    0x1A, 0x1D, 0x25, 0x28,
+    0x1A, 0x1D, 0x21, 0x28,
+    0x1A, 0x28, 0x28, 0x28,
+    0x1A, 0x1A, 0x28, 0x28,
+    0x1A, 0x1A, 0x1A, 0x28,
+    0x1A, 0x20, 0x25, 0x2B,
+    0x1A, 0x22, 0x27, 0x2B,
+    0x1A, 0x1E, 0x27, 0x2B,
+    0x1A, 0x1E, 0x23, 0x2B,
+    0x1A, 0x2B, 0x2B, 0x2B,
+    0x1A, 0x1A, 0x2B, 0x2B,
+    0x1A, 0x1A, 0x1A, 0x2B,
+    0x1A, 0x21, 0x27, 0x2E,
+    0x1A, 0x24, 0x29, 0x2E,
+    0x1A, 0x1F, 0x29, 0x2E,
+    0x1A, 0x1F, 0x24, 0x2E,
+    0x1A, 0x2E, 0x2E, 0x2E,
+    0x1A, 0x1A, 0x2E, 0x2E,
+    0x1A, 0x1A, 0x1A, 0x2E,
+    0x1A, 0x25, 0x2C, 0x31,
+    0x1A, 0x1F, 0x2C, 0x31,
+    0x1A, 0x1F, 0x26, 0x31,
+    0x1A, 0x31, 0x31, 0x31,
+    0x1A, 0x1A, 0x31, 0x31,
+    0x1A, 0x1A, 0x1A, 0x31,
+    0x1A, 0x27, 0x2E, 0x34,
+    0x1A, 0x20, 0x2E, 0x34,
+    0x1A, 0x20, 0x27, 0x34,
+    0x1A, 0x34, 0x34, 0x34,
+    0x1A, 0x1A, 0x34, 0x34,
+    0x1A, 0x1A, 0x1A, 0x34,
+    0x1A, 0x28, 0x30, 0x37,
+    0x1A, 0x21, 0x30, 0x37,
+    0x1A, 0x21, 0x29, 0x37,
+    0x1A, 0x37, 0x37, 0x37,
+    0x1A, 0x1A, 0x37, 0x37,
+    0x1A, 0x1A, 0x1A, 0x37,
+    0x1A, 0x2A, 0x32, 0x3A,
+    0x1A, 0x22, 0x32, 0x3A,
+    0x1A, 0x22, 0x2A, 0x3A,
+    0x1A, 0x3A, 0x3A, 0x3A,
+    0x1A, 0x1A, 0x3A, 0x3A,
+    0x1A, 0x1A, 0x1A, 0x3A,
+    0x1A, 0x3D, 0x3D, 0x3D,
+    0x1A, 0x1A, 0x3D, 0x3D,
+    0x1A, 0x1A, 0x1A, 0x3D,
+    0x1A, 0x2C, 0x35, 0x3E,
+    0x1A, 0x23, 0x35, 0x3E,
+    0x1A, 0x23, 0x2C, 0x3E,
+    0x1B, 0x1C, 0x1C, 0x1D,
+    0x1B, 0x1C, 0x1D, 0x1E,
+    0x1B, 0x1D, 0x1E, 0x1F,
+    0x1B, 0x1C, 0x1E, 0x1F,
+    0x1B, 0x1C, 0x1D, 0x1F,
+    0x1B, 0x1D, 0x1E, 0x20,
+    0x1B, 0x1D, 0x1F, 0x20,
+    0x1B, 0x1C, 0x1F, 0x20,
+    0x1B, 0x1C, 0x1E, 0x20,
+    0x1B, 0x1D, 0x1F, 0x21,
+    0x1B, 0x1E, 0x20, 0x21,
+    0x1B, 0x1C, 0x20, 0x21,
+    0x1B, 0x1C, 0x1E, 0x21,
+    0x1B, 0x21, 0x21, 0x21,
+    0x1B, 0x1B, 0x21, 0x21,
+    0x1B, 0x1B, 0x1B, 0x21,
+    0x1B, 0x1E, 0x1F, 0x22,
+    0x1B, 0x1E, 0x21, 0x22,
+    0x1B, 0x1C, 0x21, 0x22,
+    0x1B, 0x1C, 0x1F, 0x22,
+    0x1B, 0x1E, 0x20, 0x23,
+    0x1B, 0x1F, 0x21, 0x23,
+    0x1B, 0x1D, 0x21, 0x23,
+    0x1B, 0x1D, 0x1F, 0x23,
+    0x1B, 0x23, 0x23, 0x23,
+    0x1B, 0x1B, 0x23, 0x23,
+    0x1B, 0x1B, 0x1B, 0x23,
+    0x1B, 0x1F, 0x22, 0x26,
+    0x1B, 0x20, 0x24, 0x26,
+    0x1B, 0x1D, 0x24, 0x26,
+    0x1B, 0x1D, 0x21, 0x26,
+    0x1B, 0x26, 0x26, 0x26,
+    0x1B, 0x1B, 0x26, 0x26,
+    0x1B, 0x1B, 0x1B, 0x26,
+    0x1B, 0x20, 0x24, 0x29,
+    0x1B, 0x22, 0x26, 0x29,
+    0x1B, 0x1E, 0x26, 0x29,
+    0x1B, 0x1E, 0x22, 0x29,
+    0x1B, 0x29, 0x29, 0x29,
+    0x1B, 0x1B, 0x29, 0x29,
+    0x1B, 0x1B, 0x1B, 0x29,
+    0x1B, 0x21, 0x26, 0x2C,
+    0x1B, 0x23, 0x28, 0x2C,
+    0x1B, 0x1F, 0x28, 0x2C,
+    0x1B, 0x1F, 0x24, 0x2C,
+    0x1B, 0x2C, 0x2C, 0x2C,
+    0x1B, 0x1B, 0x2C, 0x2C,
+    0x1B, 0x1B, 0x1B, 0x2C,
+    0x1B, 0x22, 0x28, 0x2F,
+    0x1B, 0x25, 0x2A, 0x2F,
+    0x1B, 0x20, 0x2A, 0x2F,
+    0x1B, 0x20, 0x25, 0x2F,
+    0x1B, 0x2F, 0x2F, 0x2F,
+    0x1B, 0x1B, 0x2F, 0x2F,
+    0x1B, 0x1B, 0x1B, 0x2F,
+    0x1B, 0x26, 0x2D, 0x32,
+    0x1B, 0x20, 0x2D, 0x32,
+    0x1B, 0x20, 0x27, 0x32,
+    0x1B, 0x32, 0x32, 0x32,
+    0x1B, 0x1B, 0x32, 0x32,
+    0x1B, 0x1B, 0x1B, 0x32,
+    0x1B, 0x28, 0x2F, 0x35,
+    0x1B, 0x21, 0x2F, 0x35,
+    0x1B, 0x21, 0x28, 0x35,
+    0x1B, 0x35, 0x35, 0x35,
+    0x1B, 0x1B, 0x35, 0x35,
+    0x1B, 0x1B, 0x1B, 0x35,
+    0x1B, 0x29, 0x31, 0x38,
+    0x1B, 0x22, 0x31, 0x38,
+    0x1B, 0x22, 0x2A, 0x38,
+    0x1B, 0x38, 0x38, 0x38,
+    0x1B, 0x1B, 0x38, 0x38,
+    0x1B, 0x1B, 0x1B, 0x38,
+    0x1B, 0x2B, 0x33, 0x3B,
+    0x1B, 0x23, 0x33, 0x3B,
+    0x1B, 0x23, 0x2B, 0x3B,
+    0x1B, 0x3B, 0x3B, 0x3B,
+    0x1B, 0x1B, 0x3B, 0x3B,
+    0x1B, 0x1B, 0x1B, 0x3B,
+    0x1B, 0x3E, 0x3E, 0x3E,
+    0x1B, 0x1B, 0x3E, 0x3E,
+    0x1B, 0x1B, 0x1B, 0x3E,
+    0x1B, 0x2D, 0x36, 0x3F,
+    0x1B, 0x24, 0x36, 0x3F,
+    0x1B, 0x24, 0x2D, 0x3F,
+    0x1C, 0x1D, 0x1D, 0x1E,
+    0x1C, 0x1D, 0x1E, 0x1F,
+    0x1C, 0x1E, 0x1F, 0x20,
+    0x1C, 0x1D, 0x1F, 0x20,
+    0x1C, 0x1D, 0x1E, 0x20,
+    0x1C, 0x1E, 0x1F, 0x21,
+    0x1C, 0x1E, 0x20, 0x21,
+    0x1C, 0x1D, 0x20, 0x21,
+    0x1C, 0x1D, 0x1F, 0x21,
+    0x1C, 0x1E, 0x20, 0x22,
+    0x1C, 0x1F, 0x21, 0x22,
+    0x1C, 0x1D, 0x21, 0x22,
+    0x1C, 0x1D, 0x1F, 0x22,
+    0x1C, 0x22, 0x22, 0x22,
+    0x1C, 0x1C, 0x22, 0x22,
+    0x1C, 0x1C, 0x1C, 0x22,
+    0x1C, 0x1F, 0x20, 0x23,
+    0x1C, 0x1F, 0x22, 0x23,
+    0x1C, 0x1D, 0x22, 0x23,
+    0x1C, 0x1D, 0x20, 0x23,
+    0x1C, 0x1F, 0x21, 0x24,
+    0x1C, 0x20, 0x22, 0x24,
+    0x1C, 0x1E, 0x22, 0x24,
+    0x1C, 0x1E, 0x20, 0x24,
+    0x1C, 0x24, 0x24, 0x24,
+    0x1C, 0x1C, 0x24, 0x24,
+    0x1C, 0x1C, 0x1C, 0x24,
+    0x1C, 0x20, 0x23, 0x27,
+    0x1C, 0x21, 0x25, 0x27,
+    0x1C, 0x1E, 0x25, 0x27,
+    0x1C, 0x1E, 0x22, 0x27,
+    0x1C, 0x27, 0x27, 0x27,
+    0x1C, 0x1C, 0x27, 0x27,
+    0x1C, 0x1C, 0x1C, 0x27,
+    0x1C, 0x21, 0x25, 0x2A,
+    0x1C, 0x23, 0x27, 0x2A,
+    0x1C, 0x1F, 0x27, 0x2A,
+    0x1C, 0x1F, 0x23, 0x2A,
+    0x1C, 0x2A, 0x2A, 0x2A,
+    0x1C, 0x1C, 0x2A, 0x2A,
+    0x1C, 0x1C, 0x1C, 0x2A,
+    0x1C, 0x22, 0x27, 0x2D,
+    0x1C, 0x24, 0x29, 0x2D,
+    0x1C, 0x20, 0x29, 0x2D,
+    0x1C, 0x20, 0x25, 0x2D,
+    0x1C, 0x2D, 0x2D, 0x2D,
+    0x1C, 0x1C, 0x2D, 0x2D,
+    0x1C, 0x1C, 0x1C, 0x2D,
+    0x1C, 0x23, 0x29, 0x30,
+    0x1C, 0x26, 0x2B, 0x30,
+    0x1C, 0x21, 0x2B, 0x30,
+    0x1C, 0x21, 0x26, 0x30,
+    0x1C, 0x30, 0x30, 0x30,
+    0x1C, 0x1C, 0x30, 0x30,
+    0x1C, 0x1C, 0x1C, 0x30,
+    0x1C, 0x27, 0x2E, 0x33,
+    0x1C, 0x21, 0x2E, 0x33,
+    0x1C, 0x21, 0x28, 0x33,
+    0x1C, 0x33, 0x33, 0x33,
+    0x1C, 0x1C, 0x33, 0x33,
+    0x1C, 0x1C, 0x1C, 0x33,
+    0x1C, 0x29, 0x30, 0x36,
+    0x1C, 0x22, 0x30, 0x36,
+    0x1C, 0x22, 0x29, 0x36,
+    0x1C, 0x36, 0x36, 0x36,
+    0x1C, 0x1C, 0x36, 0x36,
+    0x1C, 0x1C, 0x1C, 0x36,
+    0x1C, 0x2A, 0x32, 0x39,
+    0x1C, 0x23, 0x32, 0x39,
+    0x1C, 0x23, 0x2B, 0x39,
+    0x1C, 0x39, 0x39, 0x39,
+    0x1C, 0x1C, 0x39, 0x39,
+    0x1C, 0x1C, 0x1C, 0x39,
+    0x1C, 0x2C, 0x34, 0x3C,
+    0x1C, 0x24, 0x34, 0x3C,
+    0x1C, 0x24, 0x2C, 0x3C,
+    0x1C, 0x3C, 0x3C, 0x3C,
+    0x1C, 0x1C, 0x3C, 0x3C,
+    0x1C, 0x1C, 0x1C, 0x3C,
+    0x1C, 0x3F, 0x3F, 0x3F,
+    0x1C, 0x1C, 0x3F, 0x3F,
+    0x1C, 0x1C, 0x1C, 0x3F,
+    0x1D, 0x1E, 0x1E, 0x1F,
+    0x1D, 0x1E, 0x1F, 0x20,
+    0x1D, 0x1F, 0x20, 0x21,
+    0x1D, 0x1E, 0x20, 0x21,
+    0x1D, 0x1E, 0x1F, 0x21,
+    0x1D, 0x1F, 0x20, 0x22,
+    0x1D, 0x1F, 0x21, 0x22,
+    0x1D, 0x1E, 0x21, 0x22,
+    0x1D, 0x1E, 0x20, 0x22,
+    0x1D, 0x1F, 0x21, 0x23,
+    0x1D, 0x20, 0x22, 0x23,
+    0x1D, 0x1E, 0x22, 0x23,
+    0x1D, 0x1E, 0x20, 0x23,
+    0x1D, 0x23, 0x23, 0x23,
+    0x1D, 0x1D, 0x23, 0x23,
+    0x1D, 0x1D, 0x1D, 0x23,
+    0x1D, 0x20, 0x21, 0x24,
+    0x1D, 0x20, 0x23, 0x24,
+    0x1D, 0x1E, 0x23, 0x24,
+    0x1D, 0x1E, 0x21, 0x24,
+    0x1D, 0x20, 0x22, 0x25,
+    0x1D, 0x21, 0x23, 0x25,
+    0x1D, 0x1F, 0x23, 0x25,
+    0x1D, 0x1F, 0x21, 0x25,
+    0x1D, 0x25, 0x25, 0x25,
+    0x1D, 0x1D, 0x25, 0x25,
+    0x1D, 0x1D, 0x1D, 0x25,
+    0x1D, 0x21, 0x24, 0x28,
+    0x1D, 0x22, 0x26, 0x28,
+    0x1D, 0x1F, 0x26, 0x28,
+    0x1D, 0x1F, 0x23, 0x28,
+    0x1D, 0x28, 0x28, 0x28,
+    0x1D, 0x1D, 0x28, 0x28,
+    0x1D, 0x1D, 0x1D, 0x28,
+    0x1D, 0x22, 0x26, 0x2B,
+    0x1D, 0x24, 0x28, 0x2B,
+    0x1D, 0x20, 0x28, 0x2B,
+    0x1D, 0x20, 0x24, 0x2B,
+    0x1D, 0x2B, 0x2B, 0x2B,
+    0x1D, 0x1D, 0x2B, 0x2B,
+    0x1D, 0x1D, 0x1D, 0x2B,
+    0x1D, 0x23, 0x28, 0x2E,
+    0x1D, 0x25, 0x2A, 0x2E,
+    0x1D, 0x21, 0x2A, 0x2E,
+    0x1D, 0x21, 0x26, 0x2E,
+    0x1D, 0x2E, 0x2E, 0x2E,
+    0x1D, 0x1D, 0x2E, 0x2E,
+    0x1D, 0x1D, 0x1D, 0x2E,
+    0x1D, 0x24, 0x2A, 0x31,
+    0x1D, 0x27, 0x2C, 0x31,
+    0x1D, 0x22, 0x2C, 0x31,
+    0x1D, 0x22, 0x27, 0x31,
+    0x1D, 0x31, 0x31, 0x31,
+    0x1D, 0x1D, 0x31, 0x31,
+    0x1D, 0x1D, 0x1D, 0x31,
+    0x1D, 0x28, 0x2F, 0x34,
+    0x1D, 0x22, 0x2F, 0x34,
+    0x1D, 0x22, 0x29, 0x34,
+    0x1D, 0x34, 0x34, 0x34,
+    0x1D, 0x1D, 0x34, 0x34,
+    0x1D, 0x1D, 0x1D, 0x34,
+    0x1D, 0x2A, 0x31, 0x37,
+    0x1D, 0x23, 0x31, 0x37,
+    0x1D, 0x23, 0x2A, 0x37,
+    0x1D, 0x37, 0x37, 0x37,
+    0x1D, 0x1D, 0x37, 0x37,
+    0x1D, 0x1D, 0x1D, 0x37,
+    0x1D, 0x2B, 0x33, 0x3A,
+    0x1D, 0x24, 0x33, 0x3A,
+    0x1D, 0x24, 0x2C, 0x3A,
+    0x1D, 0x3A, 0x3A, 0x3A,
+    0x1D, 0x1D, 0x3A, 0x3A,
+    0x1D, 0x1D, 0x1D, 0x3A,
+    0x1D, 0x2D, 0x35, 0x3D,
+    0x1D, 0x25, 0x35, 0x3D,
+    0x1D, 0x25, 0x2D, 0x3D,
+    0x1D, 0x3D, 0x3D, 0x3D,
+    0x1D, 0x1D, 0x3D, 0x3D,
+    0x1D, 0x1D, 0x1D, 0x3D,
+    0x1E, 0x1F, 0x1F, 0x20,
+    0x1E, 0x1F, 0x20, 0x21,
+    0x1E, 0x20, 0x21, 0x22,
+    0x1E, 0x1F, 0x21, 0x22,
+    0x1E, 0x1F, 0x20, 0x22,
+    0x1E, 0x20, 0x21, 0x23,
+    0x1E, 0x20, 0x22, 0x23,
+    0x1E, 0x1F, 0x22, 0x23,
+    0x1E, 0x1F, 0x21, 0x23,
+    0x1E, 0x20, 0x22, 0x24,
+    0x1E, 0x21, 0x23, 0x24,
+    0x1E, 0x1F, 0x23, 0x24,
+    0x1E, 0x1F, 0x21, 0x24,
+    0x1E, 0x24, 0x24, 0x24,
+    0x1E, 0x1E, 0x24, 0x24,
+    0x1E, 0x1E, 0x1E, 0x24,
+    0x1E, 0x21, 0x22, 0x25,
+    0x1E, 0x21, 0x24, 0x25,
+    0x1E, 0x1F, 0x24, 0x25,
+    0x1E, 0x1F, 0x22, 0x25,
+    0x1E, 0x21, 0x23, 0x26,
+    0x1E, 0x22, 0x24, 0x26,
+    0x1E, 0x20, 0x24, 0x26,
+    0x1E, 0x20, 0x22, 0x26,
+    0x1E, 0x26, 0x26, 0x26,
+    0x1E, 0x1E, 0x26, 0x26,
+    0x1E, 0x1E, 0x1E, 0x26,
+    0x1E, 0x22, 0x25, 0x29,
+    0x1E, 0x23, 0x27, 0x29,
+    0x1E, 0x20, 0x27, 0x29,
+    0x1E, 0x20, 0x24, 0x29,
+    0x1E, 0x29, 0x29, 0x29,
+    0x1E, 0x1E, 0x29, 0x29,
+    0x1E, 0x1E, 0x1E, 0x29,
+    0x1E, 0x23, 0x27, 0x2C,
+    0x1E, 0x25, 0x29, 0x2C,
+    0x1E, 0x21, 0x29, 0x2C,
+    0x1E, 0x21, 0x25, 0x2C,
+    0x1E, 0x2C, 0x2C, 0x2C,
+    0x1E, 0x1E, 0x2C, 0x2C,
+    0x1E, 0x1E, 0x1E, 0x2C,
+    0x1E, 0x24, 0x29, 0x2F,
+    0x1E, 0x26, 0x2B, 0x2F,
+    0x1E, 0x22, 0x2B, 0x2F,
+    0x1E, 0x22, 0x27, 0x2F,
+    0x1E, 0x2F, 0x2F, 0x2F,
+    0x1E, 0x1E, 0x2F, 0x2F,
+    0x1E, 0x1E, 0x1E, 0x2F,
+    0x1E, 0x25, 0x2B, 0x32,
+    0x1E, 0x28, 0x2D, 0x32,
+    0x1E, 0x23, 0x2D, 0x32,
+    0x1E, 0x23, 0x28, 0x32,
+    0x1E, 0x32, 0x32, 0x32,
+    0x1E, 0x1E, 0x32, 0x32,
+    0x1E, 0x1E, 0x1E, 0x32,
+    0x1E, 0x29, 0x30, 0x35,
+    0x1E, 0x23, 0x30, 0x35,
+    0x1E, 0x23, 0x2A, 0x35,
+    0x1E, 0x35, 0x35, 0x35,
+    0x1E, 0x1E, 0x35, 0x35,
+    0x1E, 0x1E, 0x1E, 0x35,
+    0x1E, 0x2B, 0x32, 0x38,
+    0x1E, 0x24, 0x32, 0x38,
+    0x1E, 0x24, 0x2B, 0x38,
+    0x1E, 0x38, 0x38, 0x38,
+    0x1E, 0x1E, 0x38, 0x38,
+    0x1E, 0x1E, 0x1E, 0x38,
+    0x1E, 0x2C, 0x34, 0x3B,
+    0x1E, 0x25, 0x34, 0x3B,
+    0x1E, 0x25, 0x2D, 0x3B,
+    0x1E, 0x3B, 0x3B, 0x3B,
+    0x1E, 0x1E, 0x3B, 0x3B,
+    0x1E, 0x1E, 0x1E, 0x3B,
+    0x1E, 0x2E, 0x36, 0x3E,
+    0x1E, 0x26, 0x36, 0x3E,
+    0x1E, 0x26, 0x2E, 0x3E,
+    0x1E, 0x3E, 0x3E, 0x3E,
+    0x1E, 0x1E, 0x3E, 0x3E,
+    0x1E, 0x1E, 0x1E, 0x3E,
+    0x1F, 0x20, 0x20, 0x21,
+    0x1F, 0x20, 0x21, 0x22,
+    0x1F, 0x21, 0x22, 0x23,
+    0x1F, 0x20, 0x22, 0x23,
+    0x1F, 0x20, 0x21, 0x23,
+    0x1F, 0x21, 0x22, 0x24,
+    0x1F, 0x21, 0x23, 0x24,
+    0x1F, 0x20, 0x23, 0x24,
+    0x1F, 0x20, 0x22, 0x24,
+    0x1F, 0x21, 0x23, 0x25,
+    0x1F, 0x22, 0x24, 0x25,
+    0x1F, 0x20, 0x24, 0x25,
+    0x1F, 0x20, 0x22, 0x25,
+    0x1F, 0x25, 0x25, 0x25,
+    0x1F, 0x1F, 0x25, 0x25,
+    0x1F, 0x1F, 0x1F, 0x25,
+    0x1F, 0x22, 0x23, 0x26,
+    0x1F, 0x22, 0x25, 0x26,
+    0x1F, 0x20, 0x25, 0x26,
+    0x1F, 0x20, 0x23, 0x26,
+    0x1F, 0x22, 0x24, 0x27,
+    0x1F, 0x23, 0x25, 0x27,
+    0x1F, 0x21, 0x25, 0x27,
+    0x1F, 0x21, 0x23, 0x27,
+    0x1F, 0x27, 0x27, 0x27,
+    0x1F, 0x1F, 0x27, 0x27,
+    0x1F, 0x1F, 0x1F, 0x27,
+    0x1F, 0x23, 0x26, 0x2A,
+    0x1F, 0x24, 0x28, 0x2A,
+    0x1F, 0x21, 0x28, 0x2A,
+    0x1F, 0x21, 0x25, 0x2A,
+    0x1F, 0x2A, 0x2A, 0x2A,
+    0x1F, 0x1F, 0x2A, 0x2A,
+    0x1F, 0x1F, 0x1F, 0x2A,
+    0x1F, 0x24, 0x28, 0x2D,
+    0x1F, 0x26, 0x2A, 0x2D,
+    0x1F, 0x22, 0x2A, 0x2D,
+    0x1F, 0x22, 0x26, 0x2D,
+    0x1F, 0x2D, 0x2D, 0x2D,
+    0x1F, 0x1F, 0x2D, 0x2D,
+    0x1F, 0x1F, 0x1F, 0x2D,
+    0x1F, 0x25, 0x2A, 0x30,
+    0x1F, 0x27, 0x2C, 0x30,
+    0x1F, 0x23, 0x2C, 0x30,
+    0x1F, 0x23, 0x28, 0x30,
+    0x1F, 0x30, 0x30, 0x30,
+    0x1F, 0x1F, 0x30, 0x30,
+    0x1F, 0x1F, 0x1F, 0x30,
+    0x1F, 0x26, 0x2C, 0x33,
+    0x1F, 0x29, 0x2E, 0x33,
+    0x1F, 0x24, 0x2E, 0x33,
+    0x1F, 0x24, 0x29, 0x33,
+    0x1F, 0x33, 0x33, 0x33,
+    0x1F, 0x1F, 0x33, 0x33,
+    0x1F, 0x1F, 0x1F, 0x33,
+    0x1F, 0x2A, 0x31, 0x36,
+    0x1F, 0x24, 0x31, 0x36,
+    0x1F, 0x24, 0x2B, 0x36,
+    0x1F, 0x36, 0x36, 0x36,
+    0x1F, 0x1F, 0x36, 0x36,
+    0x1F, 0x1F, 0x1F, 0x36,
+    0x1F, 0x2C, 0x33, 0x39,
+    0x1F, 0x25, 0x33, 0x39,
+    0x1F, 0x25, 0x2C, 0x39,
+    0x1F, 0x39, 0x39, 0x39,
+    0x1F, 0x1F, 0x39, 0x39,
+    0x1F, 0x1F, 0x1F, 0x39,
+    0x1F, 0x2D, 0x35, 0x3C,
+    0x1F, 0x26, 0x35, 0x3C,
+    0x1F, 0x26, 0x2E, 0x3C,
+    0x1F, 0x3C, 0x3C, 0x3C,
+    0x1F, 0x1F, 0x3C, 0x3C,
+    0x1F, 0x1F, 0x1F, 0x3C,
+    0x1F, 0x2F, 0x37, 0x3F,
+    0x1F, 0x27, 0x37, 0x3F,
+    0x1F, 0x27, 0x2F, 0x3F,
+    0x1F, 0x3F, 0x3F, 0x3F,
+    0x1F, 0x1F, 0x3F, 0x3F,
+    0x1F, 0x1F, 0x1F, 0x3F,
+    0x20, 0x21, 0x21, 0x22,
+    0x20, 0x21, 0x22, 0x23,
+    0x20, 0x22, 0x23, 0x24,
+    0x20, 0x21, 0x23, 0x24,
+    0x20, 0x21, 0x22, 0x24,
+    0x20, 0x22, 0x23, 0x25,
+    0x20, 0x22, 0x24, 0x25,
+    0x20, 0x21, 0x24, 0x25,
+    0x20, 0x21, 0x23, 0x25,
+    0x20, 0x22, 0x24, 0x26,
+    0x20, 0x23, 0x25, 0x26,
+    0x20, 0x21, 0x25, 0x26,
+    0x20, 0x21, 0x23, 0x26,
+    0x20, 0x26, 0x26, 0x26,
+    0x20, 0x20, 0x26, 0x26,
+    0x20, 0x20, 0x20, 0x26,
+    0x20, 0x23, 0x24, 0x27,
+    0x20, 0x23, 0x26, 0x27,
+    0x20, 0x21, 0x26, 0x27,
+    0x20, 0x21, 0x24, 0x27,
+    0x20, 0x23, 0x25, 0x28,
+    0x20, 0x24, 0x26, 0x28,
+    0x20, 0x22, 0x26, 0x28,
+    0x20, 0x22, 0x24, 0x28,
+    0x20, 0x28, 0x28, 0x28,
+    0x20, 0x20, 0x28, 0x28,
+    0x20, 0x20, 0x20, 0x28,
+    0x20, 0x24, 0x27, 0x2B,
+    0x20, 0x25, 0x29, 0x2B,
+    0x20, 0x22, 0x29, 0x2B,
+    0x20, 0x22, 0x26, 0x2B,
+    0x20, 0x2B, 0x2B, 0x2B,
+    0x20, 0x20, 0x2B, 0x2B,
+    0x20, 0x20, 0x20, 0x2B,
+    0x20, 0x25, 0x29, 0x2E,
+    0x20, 0x27, 0x2B, 0x2E,
+    0x20, 0x23, 0x2B, 0x2E,
+    0x20, 0x23, 0x27, 0x2E,
+    0x20, 0x2E, 0x2E, 0x2E,
+    0x20, 0x20, 0x2E, 0x2E,
+    0x20, 0x20, 0x20, 0x2E,
+    0x20, 0x26, 0x2B, 0x31,
+    0x20, 0x28, 0x2D, 0x31,
+    0x20, 0x24, 0x2D, 0x31,
+    0x20, 0x24, 0x29, 0x31,
+    0x20, 0x31, 0x31, 0x31,
+    0x20, 0x20, 0x31, 0x31,
+    0x20, 0x20, 0x20, 0x31,
+    0x20, 0x27, 0x2D, 0x34,
+    0x20, 0x2A, 0x2F, 0x34,
+    0x20, 0x25, 0x2F, 0x34,
+    0x20, 0x25, 0x2A, 0x34,
+    0x20, 0x34, 0x34, 0x34,
+    0x20, 0x20, 0x34, 0x34,
+    0x20, 0x20, 0x20, 0x34,
+    0x20, 0x2B, 0x32, 0x37,
+    0x20, 0x25, 0x32, 0x37,
+    0x20, 0x25, 0x2C, 0x37,
+    0x20, 0x37, 0x37, 0x37,
+    0x20, 0x20, 0x37, 0x37,
+    0x20, 0x20, 0x20, 0x37,
+    0x20, 0x2D, 0x34, 0x3A,
+    0x20, 0x26, 0x34, 0x3A,
+    0x20, 0x26, 0x2D, 0x3A,
+    0x20, 0x3A, 0x3A, 0x3A,
+    0x20, 0x20, 0x3A, 0x3A,
+    0x20, 0x20, 0x20, 0x3A,
+    0x20, 0x2E, 0x36, 0x3D,
+    0x20, 0x27, 0x36, 0x3D,
+    0x20, 0x27, 0x2F, 0x3D,
+    0x20, 0x3D, 0x3D, 0x3D,
+    0x20, 0x20, 0x3D, 0x3D,
+    0x20, 0x20, 0x20, 0x3D,
+    0x21, 0x22, 0x22, 0x23,
+    0x21, 0x22, 0x23, 0x24,
+    0x21, 0x23, 0x24, 0x25,
+    0x21, 0x22, 0x24, 0x25,
+    0x21, 0x22, 0x23, 0x25,
+    0x21, 0x23, 0x24, 0x26,
+    0x21, 0x23, 0x25, 0x26,
+    0x21, 0x22, 0x25, 0x26,
+    0x21, 0x22, 0x24, 0x26,
+    0x21, 0x23, 0x25, 0x27,
+    0x21, 0x24, 0x26, 0x27,
+    0x21, 0x22, 0x26, 0x27,
+    0x21, 0x22, 0x24, 0x27,
+    0x21, 0x27, 0x27, 0x27,
+    0x21, 0x21, 0x27, 0x27,
+    0x21, 0x21, 0x21, 0x27,
+    0x21, 0x24, 0x25, 0x28,
+    0x21, 0x24, 0x27, 0x28,
+    0x21, 0x22, 0x27, 0x28,
+    0x21, 0x22, 0x25, 0x28,
+    0x21, 0x24, 0x26, 0x29,
+    0x21, 0x25, 0x27, 0x29,
+    0x21, 0x23, 0x27, 0x29,
+    0x21, 0x23, 0x25, 0x29,
+    0x21, 0x29, 0x29, 0x29,
+    0x21, 0x21, 0x29, 0x29,
+    0x21, 0x21, 0x21, 0x29,
+    0x21, 0x25, 0x28, 0x2C,
+    0x21, 0x26, 0x2A, 0x2C,
+    0x21, 0x23, 0x2A, 0x2C,
+    0x21, 0x23, 0x27, 0x2C,
+    0x21, 0x2C, 0x2C, 0x2C,
+    0x21, 0x21, 0x2C, 0x2C,
+    0x21, 0x21, 0x21, 0x2C,
+    0x21, 0x26, 0x2A, 0x2F,
+    0x21, 0x28, 0x2C, 0x2F,
+    0x21, 0x24, 0x2C, 0x2F,
+    0x21, 0x24, 0x28, 0x2F,
+    0x21, 0x2F, 0x2F, 0x2F,
+    0x21, 0x21, 0x2F, 0x2F,
+    0x21, 0x21, 0x21, 0x2F,
+    0x21, 0x27, 0x2C, 0x32,
+    0x21, 0x29, 0x2E, 0x32,
+    0x21, 0x25, 0x2E, 0x32,
+    0x21, 0x25, 0x2A, 0x32,
+    0x21, 0x32, 0x32, 0x32,
+    0x21, 0x21, 0x32, 0x32,
+    0x21, 0x21, 0x21, 0x32,
+    0x21, 0x28, 0x2E, 0x35,
+    0x21, 0x2B, 0x30, 0x35,
+    0x21, 0x26, 0x30, 0x35,
+    0x21, 0x26, 0x2B, 0x35,
+    0x21, 0x35, 0x35, 0x35,
+    0x21, 0x21, 0x35, 0x35,
+    0x21, 0x21, 0x21, 0x35,
+    0x21, 0x2C, 0x33, 0x38,
+    0x21, 0x26, 0x33, 0x38,
+    0x21, 0x26, 0x2D, 0x38,
+    0x21, 0x38, 0x38, 0x38,
+    0x21, 0x21, 0x38, 0x38,
+    0x21, 0x21, 0x21, 0x38,
+    0x21, 0x2E, 0x35, 0x3B,
+    0x21, 0x27, 0x35, 0x3B,
+    0x21, 0x27, 0x2E, 0x3B,
+    0x21, 0x3B, 0x3B, 0x3B,
+    0x21, 0x21, 0x3B, 0x3B,
+    0x21, 0x21, 0x21, 0x3B,
+    0x21, 0x2F, 0x37, 0x3E,
+    0x21, 0x28, 0x37, 0x3E,
+    0x21, 0x28, 0x30, 0x3E,
+    0x21, 0x3E, 0x3E, 0x3E,
+    0x21, 0x21, 0x3E, 0x3E,
+    0x21, 0x21, 0x21, 0x3E,
+    0x22, 0x23, 0x23, 0x24,
+    0x22, 0x23, 0x24, 0x25,
+    0x22, 0x24, 0x25, 0x26,
+    0x22, 0x23, 0x25, 0x26,
+    0x22, 0x23, 0x24, 0x26,
+    0x22, 0x24, 0x25, 0x27,
+    0x22, 0x24, 0x26, 0x27,
+    0x22, 0x23, 0x26, 0x27,
+    0x22, 0x23, 0x25, 0x27,
+    0x22, 0x24, 0x26, 0x28,
+    0x22, 0x25, 0x27, 0x28,
+    0x22, 0x23, 0x27, 0x28,
+    0x22, 0x23, 0x25, 0x28,
+    0x22, 0x28, 0x28, 0x28,
+    0x22, 0x22, 0x28, 0x28,
+    0x22, 0x22, 0x22, 0x28,
+    0x22, 0x25, 0x26, 0x29,
+    0x22, 0x25, 0x28, 0x29,
+    0x22, 0x23, 0x28, 0x29,
+    0x22, 0x23, 0x26, 0x29,
+    0x22, 0x25, 0x27, 0x2A,
+    0x22, 0x26, 0x28, 0x2A,
+    0x22, 0x24, 0x28, 0x2A,
+    0x22, 0x24, 0x26, 0x2A,
+    0x22, 0x2A, 0x2A, 0x2A,
+    0x22, 0x22, 0x2A, 0x2A,
+    0x22, 0x22, 0x22, 0x2A,
+    0x22, 0x26, 0x29, 0x2D,
+    0x22, 0x27, 0x2B, 0x2D,
+    0x22, 0x24, 0x2B, 0x2D,
+    0x22, 0x24, 0x28, 0x2D,
+    0x22, 0x2D, 0x2D, 0x2D,
+    0x22, 0x22, 0x2D, 0x2D,
+    0x22, 0x22, 0x22, 0x2D,
+    0x22, 0x27, 0x2B, 0x30,
+    0x22, 0x29, 0x2D, 0x30,
+    0x22, 0x25, 0x2D, 0x30,
+    0x22, 0x25, 0x29, 0x30,
+    0x22, 0x30, 0x30, 0x30,
+    0x22, 0x22, 0x30, 0x30,
+    0x22, 0x22, 0x22, 0x30,
+    0x22, 0x28, 0x2D, 0x33,
+    0x22, 0x2A, 0x2F, 0x33,
+    0x22, 0x26, 0x2F, 0x33,
+    0x22, 0x26, 0x2B, 0x33,
+    0x22, 0x33, 0x33, 0x33,
+    0x22, 0x22, 0x33, 0x33,
+    0x22, 0x22, 0x22, 0x33,
+    0x22, 0x29, 0x2F, 0x36,
+    0x22, 0x2C, 0x31, 0x36,
+    0x22, 0x27, 0x31, 0x36,
+    0x22, 0x27, 0x2C, 0x36,
+    0x22, 0x36, 0x36, 0x36,
+    0x22, 0x22, 0x36, 0x36,
+    0x22, 0x22, 0x22, 0x36,
+    0x22, 0x2D, 0x34, 0x39,
+    0x22, 0x27, 0x34, 0x39,
+    0x22, 0x27, 0x2E, 0x39,
+    0x22, 0x39, 0x39, 0x39,
+    0x22, 0x22, 0x39, 0x39,
+    0x22, 0x22, 0x22, 0x39,
+    0x22, 0x2F, 0x36, 0x3C,
+    0x22, 0x28, 0x36, 0x3C,
+    0x22, 0x28, 0x2F, 0x3C,
+    0x22, 0x3C, 0x3C, 0x3C,
+    0x22, 0x22, 0x3C, 0x3C,
+    0x22, 0x22, 0x22, 0x3C,
+    0x22, 0x30, 0x38, 0x3F,
+    0x22, 0x29, 0x38, 0x3F,
+    0x22, 0x29, 0x31, 0x3F,
+    0x22, 0x3F, 0x3F, 0x3F,
+    0x22, 0x22, 0x3F, 0x3F,
+    0x22, 0x22, 0x22, 0x3F,
+    0x23, 0x24, 0x24, 0x25,
+    0x23, 0x24, 0x25, 0x26,
+    0x23, 0x25, 0x26, 0x27,
+    0x23, 0x24, 0x26, 0x27,
+    0x23, 0x24, 0x25, 0x27,
+    0x23, 0x25, 0x26, 0x28,
+    0x23, 0x25, 0x27, 0x28,
+    0x23, 0x24, 0x27, 0x28,
+    0x23, 0x24, 0x26, 0x28,
+    0x23, 0x25, 0x27, 0x29,
+    0x23, 0x26, 0x28, 0x29,
+    0x23, 0x24, 0x28, 0x29,
+    0x23, 0x24, 0x26, 0x29,
+    0x23, 0x29, 0x29, 0x29,
+    0x23, 0x23, 0x29, 0x29,
+    0x23, 0x23, 0x23, 0x29,
+    0x23, 0x26, 0x27, 0x2A,
+    0x23, 0x26, 0x29, 0x2A,
+    0x23, 0x24, 0x29, 0x2A,
+    0x23, 0x24, 0x27, 0x2A,
+    0x23, 0x26, 0x28, 0x2B,
+    0x23, 0x27, 0x29, 0x2B,
+    0x23, 0x25, 0x29, 0x2B,
+    0x23, 0x25, 0x27, 0x2B,
+    0x23, 0x2B, 0x2B, 0x2B,
+    0x23, 0x23, 0x2B, 0x2B,
+    0x23, 0x23, 0x23, 0x2B,
+    0x23, 0x27, 0x2A, 0x2E,
+    0x23, 0x28, 0x2C, 0x2E,
+    0x23, 0x25, 0x2C, 0x2E,
+    0x23, 0x25, 0x29, 0x2E,
+    0x23, 0x2E, 0x2E, 0x2E,
+    0x23, 0x23, 0x2E, 0x2E,
+    0x23, 0x23, 0x23, 0x2E,
+    0x23, 0x28, 0x2C, 0x31,
+    0x23, 0x2A, 0x2E, 0x31,
+    0x23, 0x26, 0x2E, 0x31,
+    0x23, 0x26, 0x2A, 0x31,
+    0x23, 0x31, 0x31, 0x31,
+    0x23, 0x23, 0x31, 0x31,
+    0x23, 0x23, 0x23, 0x31,
+    0x23, 0x29, 0x2E, 0x34,
+    0x23, 0x2B, 0x30, 0x34,
+    0x23, 0x27, 0x30, 0x34,
+    0x23, 0x27, 0x2C, 0x34,
+    0x23, 0x34, 0x34, 0x34,
+    0x23, 0x23, 0x34, 0x34,
+    0x23, 0x23, 0x23, 0x34,
+    0x23, 0x2A, 0x30, 0x37,
+    0x23, 0x2D, 0x32, 0x37,
+    0x23, 0x28, 0x32, 0x37,
+    0x23, 0x28, 0x2D, 0x37,
+    0x23, 0x37, 0x37, 0x37,
+    0x23, 0x23, 0x37, 0x37,
+    0x23, 0x23, 0x23, 0x37,
+    0x23, 0x2E, 0x35, 0x3A,
+    0x23, 0x28, 0x35, 0x3A,
+    0x23, 0x28, 0x2F, 0x3A,
+    0x23, 0x3A, 0x3A, 0x3A,
+    0x23, 0x23, 0x3A, 0x3A,
+    0x23, 0x23, 0x23, 0x3A,
+    0x23, 0x30, 0x37, 0x3D,
+    0x23, 0x29, 0x37, 0x3D,
+    0x23, 0x29, 0x30, 0x3D,
+    0x23, 0x3D, 0x3D, 0x3D,
+    0x23, 0x23, 0x3D, 0x3D,
+    0x23, 0x23, 0x23, 0x3D,
+    0x24, 0x25, 0x25, 0x26,
+    0x24, 0x25, 0x26, 0x27,
+    0x24, 0x26, 0x27, 0x28,
+    0x24, 0x25, 0x27, 0x28,
+    0x24, 0x25, 0x26, 0x28,
+    0x24, 0x26, 0x27, 0x29,
+    0x24, 0x26, 0x28, 0x29,
+    0x24, 0x25, 0x28, 0x29,
+    0x24, 0x25, 0x27, 0x29,
+    0x24, 0x26, 0x28, 0x2A,
+    0x24, 0x27, 0x29, 0x2A,
+    0x24, 0x25, 0x29, 0x2A,
+    0x24, 0x25, 0x27, 0x2A,
+    0x24, 0x2A, 0x2A, 0x2A,
+    0x24, 0x24, 0x2A, 0x2A,
+    0x24, 0x24, 0x24, 0x2A,
+    0x24, 0x27, 0x28, 0x2B,
+    0x24, 0x27, 0x2A, 0x2B,
+    0x24, 0x25, 0x2A, 0x2B,
+    0x24, 0x25, 0x28, 0x2B,
+    0x24, 0x27, 0x29, 0x2C,
+    0x24, 0x28, 0x2A, 0x2C,
+    0x24, 0x26, 0x2A, 0x2C,
+    0x24, 0x26, 0x28, 0x2C,
+    0x24, 0x2C, 0x2C, 0x2C,
+    0x24, 0x24, 0x2C, 0x2C,
+    0x24, 0x24, 0x24, 0x2C,
+    0x24, 0x28, 0x2B, 0x2F,
+    0x24, 0x29, 0x2D, 0x2F,
+    0x24, 0x26, 0x2D, 0x2F,
+    0x24, 0x26, 0x2A, 0x2F,
+    0x24, 0x2F, 0x2F, 0x2F,
+    0x24, 0x24, 0x2F, 0x2F,
+    0x24, 0x24, 0x24, 0x2F,
+    0x24, 0x29, 0x2D, 0x32,
+    0x24, 0x2B, 0x2F, 0x32,
+    0x24, 0x27, 0x2F, 0x32,
+    0x24, 0x27, 0x2B, 0x32,
+    0x24, 0x32, 0x32, 0x32,
+    0x24, 0x24, 0x32, 0x32,
+    0x24, 0x24, 0x24, 0x32,
+    0x24, 0x2A, 0x2F, 0x35,
+    0x24, 0x2C, 0x31, 0x35,
+    0x24, 0x28, 0x31, 0x35,
+    0x24, 0x28, 0x2D, 0x35,
+    0x24, 0x35, 0x35, 0x35,
+    0x24, 0x24, 0x35, 0x35,
+    0x24, 0x24, 0x24, 0x35,
+    0x24, 0x2B, 0x31, 0x38,
+    0x24, 0x2E, 0x33, 0x38,
+    0x24, 0x29, 0x33, 0x38,
+    0x24, 0x29, 0x2E, 0x38,
+    0x24, 0x38, 0x38, 0x38,
+    0x24, 0x24, 0x38, 0x38,
+    0x24, 0x24, 0x24, 0x38,
+    0x24, 0x2F, 0x36, 0x3B,
+    0x24, 0x29, 0x36, 0x3B,
+    0x24, 0x29, 0x30, 0x3B,
+    0x24, 0x3B, 0x3B, 0x3B,
+    0x24, 0x24, 0x3B, 0x3B,
+    0x24, 0x24, 0x24, 0x3B,
+    0x24, 0x31, 0x38, 0x3E,
+    0x24, 0x2A, 0x38, 0x3E,
+    0x24, 0x2A, 0x31, 0x3E,
+    0x24, 0x3E, 0x3E, 0x3E,
+    0x24, 0x24, 0x3E, 0x3E,
+    0x24, 0x24, 0x24, 0x3E,
+    0x25, 0x26, 0x26, 0x27,
+    0x25, 0x26, 0x27, 0x28,
+    0x25, 0x27, 0x28, 0x29,
+    0x25, 0x26, 0x28, 0x29,
+    0x25, 0x26, 0x27, 0x29,
+    0x25, 0x27, 0x28, 0x2A,
+    0x25, 0x27, 0x29, 0x2A,
+    0x25, 0x26, 0x29, 0x2A,
+    0x25, 0x26, 0x28, 0x2A,
+    0x25, 0x27, 0x29, 0x2B,
+    0x25, 0x28, 0x2A, 0x2B,
+    0x25, 0x26, 0x2A, 0x2B,
+    0x25, 0x26, 0x28, 0x2B,
+    0x25, 0x2B, 0x2B, 0x2B,
+    0x25, 0x25, 0x2B, 0x2B,
+    0x25, 0x25, 0x25, 0x2B,
+    0x25, 0x28, 0x29, 0x2C,
+    0x25, 0x28, 0x2B, 0x2C,
+    0x25, 0x26, 0x2B, 0x2C,
+    0x25, 0x26, 0x29, 0x2C,
+    0x25, 0x28, 0x2A, 0x2D,
+    0x25, 0x29, 0x2B, 0x2D,
+    0x25, 0x27, 0x2B, 0x2D,
+    0x25, 0x27, 0x29, 0x2D,
+    0x25, 0x2D, 0x2D, 0x2D,
+    0x25, 0x25, 0x2D, 0x2D,
+    0x25, 0x25, 0x25, 0x2D,
+    0x25, 0x29, 0x2C, 0x30,
+    0x25, 0x2A, 0x2E, 0x30,
+    0x25, 0x27, 0x2E, 0x30,
+    0x25, 0x27, 0x2B, 0x30,
+    0x25, 0x30, 0x30, 0x30,
+    0x25, 0x25, 0x30, 0x30,
+    0x25, 0x25, 0x25, 0x30,
+    0x25, 0x2A, 0x2E, 0x33,
+    0x25, 0x2C, 0x30, 0x33,
+    0x25, 0x28, 0x30, 0x33,
+    0x25, 0x28, 0x2C, 0x33,
+    0x25, 0x33, 0x33, 0x33,
+    0x25, 0x25, 0x33, 0x33,
+    0x25, 0x25, 0x25, 0x33,
+    0x25, 0x2B, 0x30, 0x36,
+    0x25, 0x2D, 0x32, 0x36,
+    0x25, 0x29, 0x32, 0x36,
+    0x25, 0x29, 0x2E, 0x36,
+    0x25, 0x36, 0x36, 0x36,
+    0x25, 0x25, 0x36, 0x36,
+    0x25, 0x25, 0x25, 0x36,
+    0x25, 0x2C, 0x32, 0x39,
+    0x25, 0x2F, 0x34, 0x39,
+    0x25, 0x2A, 0x34, 0x39,
+    0x25, 0x2A, 0x2F, 0x39,
+    0x25, 0x39, 0x39, 0x39,
+    0x25, 0x25, 0x39, 0x39,
+    0x25, 0x25, 0x25, 0x39,
+    0x25, 0x30, 0x37, 0x3C,
+    0x25, 0x2A, 0x37, 0x3C,
+    0x25, 0x2A, 0x31, 0x3C,
+    0x25, 0x3C, 0x3C, 0x3C,
+    0x25, 0x25, 0x3C, 0x3C,
+    0x25, 0x25, 0x25, 0x3C,
+    0x25, 0x32, 0x39, 0x3F,
+    0x25, 0x2B, 0x39, 0x3F,
+    0x25, 0x2B, 0x32, 0x3F,
+    0x25, 0x3F, 0x3F, 0x3F,
+    0x25, 0x25, 0x3F, 0x3F,
+    0x25, 0x25, 0x25, 0x3F,
+    0x26, 0x27, 0x27, 0x28,
+    0x26, 0x27, 0x28, 0x29,
+    0x26, 0x28, 0x29, 0x2A,
+    0x26, 0x27, 0x29, 0x2A,
+    0x26, 0x27, 0x28, 0x2A,
+    0x26, 0x28, 0x29, 0x2B,
+    0x26, 0x28, 0x2A, 0x2B,
+    0x26, 0x27, 0x2A, 0x2B,
+    0x26, 0x27, 0x29, 0x2B,
+    0x26, 0x28, 0x2A, 0x2C,
+    0x26, 0x29, 0x2B, 0x2C,
+    0x26, 0x27, 0x2B, 0x2C,
+    0x26, 0x27, 0x29, 0x2C,
+    0x26, 0x2C, 0x2C, 0x2C,
+    0x26, 0x26, 0x2C, 0x2C,
+    0x26, 0x26, 0x26, 0x2C,
+    0x26, 0x29, 0x2A, 0x2D,
+    0x26, 0x29, 0x2C, 0x2D,
+    0x26, 0x27, 0x2C, 0x2D,
+    0x26, 0x27, 0x2A, 0x2D,
+    0x26, 0x29, 0x2B, 0x2E,
+    0x26, 0x2A, 0x2C, 0x2E,
+    0x26, 0x28, 0x2C, 0x2E,
+    0x26, 0x28, 0x2A, 0x2E,
+    0x26, 0x2E, 0x2E, 0x2E,
+    0x26, 0x26, 0x2E, 0x2E,
+    0x26, 0x26, 0x26, 0x2E,
+    0x26, 0x2A, 0x2D, 0x31,
+    0x26, 0x2B, 0x2F, 0x31,
+    0x26, 0x28, 0x2F, 0x31,
+    0x26, 0x28, 0x2C, 0x31,
+    0x26, 0x31, 0x31, 0x31,
+    0x26, 0x26, 0x31, 0x31,
+    0x26, 0x26, 0x26, 0x31,
+    0x26, 0x2B, 0x2F, 0x34,
+    0x26, 0x2D, 0x31, 0x34,
+    0x26, 0x29, 0x31, 0x34,
+    0x26, 0x29, 0x2D, 0x34,
+    0x26, 0x34, 0x34, 0x34,
+    0x26, 0x26, 0x34, 0x34,
+    0x26, 0x26, 0x26, 0x34,
+    0x26, 0x2C, 0x31, 0x37,
+    0x26, 0x2E, 0x33, 0x37,
+    0x26, 0x2A, 0x33, 0x37,
+    0x26, 0x2A, 0x2F, 0x37,
+    0x26, 0x37, 0x37, 0x37,
+    0x26, 0x26, 0x37, 0x37,
+    0x26, 0x26, 0x26, 0x37,
+    0x26, 0x2D, 0x33, 0x3A,
+    0x26, 0x30, 0x35, 0x3A,
+    0x26, 0x2B, 0x35, 0x3A,
+    0x26, 0x2B, 0x30, 0x3A,
+    0x26, 0x3A, 0x3A, 0x3A,
+    0x26, 0x26, 0x3A, 0x3A,
+    0x26, 0x26, 0x26, 0x3A,
+    0x26, 0x31, 0x38, 0x3D,
+    0x26, 0x2B, 0x38, 0x3D,
+    0x26, 0x2B, 0x32, 0x3D,
+    0x26, 0x3D, 0x3D, 0x3D,
+    0x26, 0x26, 0x3D, 0x3D,
+    0x26, 0x26, 0x26, 0x3D,
+    0x27, 0x28, 0x28, 0x29,
+    0x27, 0x28, 0x29, 0x2A,
+    0x27, 0x29, 0x2A, 0x2B,
+    0x27, 0x28, 0x2A, 0x2B,
+    0x27, 0x28, 0x29, 0x2B,
+    0x27, 0x29, 0x2A, 0x2C,
+    0x27, 0x29, 0x2B, 0x2C,
+    0x27, 0x28, 0x2B, 0x2C,
+    0x27, 0x28, 0x2A, 0x2C,
+    0x27, 0x29, 0x2B, 0x2D,
+    0x27, 0x2A, 0x2C, 0x2D,
+    0x27, 0x28, 0x2C, 0x2D,
+    0x27, 0x28, 0x2A, 0x2D,
+    0x27, 0x2D, 0x2D, 0x2D,
+    0x27, 0x27, 0x2D, 0x2D,
+    0x27, 0x27, 0x27, 0x2D,
+    0x27, 0x2A, 0x2B, 0x2E,
+    0x27, 0x2A, 0x2D, 0x2E,
+    0x27, 0x28, 0x2D, 0x2E,
+    0x27, 0x28, 0x2B, 0x2E,
+    0x27, 0x2A, 0x2C, 0x2F,
+    0x27, 0x2B, 0x2D, 0x2F,
+    0x27, 0x29, 0x2D, 0x2F,
+    0x27, 0x29, 0x2B, 0x2F,
+    0x27, 0x2F, 0x2F, 0x2F,
+    0x27, 0x27, 0x2F, 0x2F,
+    0x27, 0x27, 0x27, 0x2F,
+    0x27, 0x2B, 0x2E, 0x32,
+    0x27, 0x2C, 0x30, 0x32,
+    0x27, 0x29, 0x30, 0x32,
+    0x27, 0x29, 0x2D, 0x32,
+    0x27, 0x32, 0x32, 0x32,
+    0x27, 0x27, 0x32, 0x32,
+    0x27, 0x27, 0x27, 0x32,
+    0x27, 0x2C, 0x30, 0x35,
+    0x27, 0x2E, 0x32, 0x35,
+    0x27, 0x2A, 0x32, 0x35,
+    0x27, 0x2A, 0x2E, 0x35,
+    0x27, 0x35, 0x35, 0x35,
+    0x27, 0x27, 0x35, 0x35,
+    0x27, 0x27, 0x27, 0x35,
+    0x27, 0x2D, 0x32, 0x38,
+    0x27, 0x2F, 0x34, 0x38,
+    0x27, 0x2B, 0x34, 0x38,
+    0x27, 0x2B, 0x30, 0x38,
+    0x27, 0x38, 0x38, 0x38,
+    0x27, 0x27, 0x38, 0x38,
+    0x27, 0x27, 0x27, 0x38,
+    0x27, 0x2E, 0x34, 0x3B,
+    0x27, 0x31, 0x36, 0x3B,
+    0x27, 0x2C, 0x36, 0x3B,
+    0x27, 0x2C, 0x31, 0x3B,
+    0x27, 0x3B, 0x3B, 0x3B,
+    0x27, 0x27, 0x3B, 0x3B,
+    0x27, 0x27, 0x27, 0x3B,
+    0x27, 0x32, 0x39, 0x3E,
+    0x27, 0x2C, 0x39, 0x3E,
+    0x27, 0x2C, 0x33, 0x3E,
+    0x27, 0x3E, 0x3E, 0x3E,
+    0x27, 0x27, 0x3E, 0x3E,
+    0x27, 0x27, 0x27, 0x3E,
+    0x28, 0x29, 0x29, 0x2A,
+    0x28, 0x29, 0x2A, 0x2B,
+    0x28, 0x2A, 0x2B, 0x2C,
+    0x28, 0x29, 0x2B, 0x2C,
+    0x28, 0x29, 0x2A, 0x2C,
+    0x28, 0x2A, 0x2B, 0x2D,
+    0x28, 0x2A, 0x2C, 0x2D,
+    0x28, 0x29, 0x2C, 0x2D,
+    0x28, 0x29, 0x2B, 0x2D,
+    0x28, 0x2A, 0x2C, 0x2E,
+    0x28, 0x2B, 0x2D, 0x2E,
+    0x28, 0x29, 0x2D, 0x2E,
+    0x28, 0x29, 0x2B, 0x2E,
+    0x28, 0x2E, 0x2E, 0x2E,
+    0x28, 0x28, 0x2E, 0x2E,
+    0x28, 0x28, 0x28, 0x2E,
+    0x28, 0x2B, 0x2C, 0x2F,
+    0x28, 0x2B, 0x2E, 0x2F,
+    0x28, 0x29, 0x2E, 0x2F,
+    0x28, 0x29, 0x2C, 0x2F,
+    0x28, 0x2B, 0x2D, 0x30,
+    0x28, 0x2C, 0x2E, 0x30,
+    0x28, 0x2A, 0x2E, 0x30,
+    0x28, 0x2A, 0x2C, 0x30,
+    0x28, 0x30, 0x30, 0x30,
+    0x28, 0x28, 0x30, 0x30,
+    0x28, 0x28, 0x28, 0x30,
+    0x28, 0x2C, 0x2F, 0x33,
+    0x28, 0x2D, 0x31, 0x33,
+    0x28, 0x2A, 0x31, 0x33,
+    0x28, 0x2A, 0x2E, 0x33,
+    0x28, 0x33, 0x33, 0x33,
+    0x28, 0x28, 0x33, 0x33,
+    0x28, 0x28, 0x28, 0x33,
+    0x28, 0x2D, 0x31, 0x36,
+    0x28, 0x2F, 0x33, 0x36,
+    0x28, 0x2B, 0x33, 0x36,
+    0x28, 0x2B, 0x2F, 0x36,
+    0x28, 0x36, 0x36, 0x36,
+    0x28, 0x28, 0x36, 0x36,
+    0x28, 0x28, 0x28, 0x36,
+    0x28, 0x2E, 0x33, 0x39,
+    0x28, 0x30, 0x35, 0x39,
+    0x28, 0x2C, 0x35, 0x39,
+    0x28, 0x2C, 0x31, 0x39,
+    0x28, 0x39, 0x39, 0x39,
+    0x28, 0x28, 0x39, 0x39,
+    0x28, 0x28, 0x28, 0x39,
+    0x28, 0x2F, 0x35, 0x3C,
+    0x28, 0x32, 0x37, 0x3C,
+    0x28, 0x2D, 0x37, 0x3C,
+    0x28, 0x2D, 0x32, 0x3C,
+    0x28, 0x3C, 0x3C, 0x3C,
+    0x28, 0x28, 0x3C, 0x3C,
+    0x28, 0x28, 0x28, 0x3C,
+    0x28, 0x33, 0x3A, 0x3F,
+    0x28, 0x2D, 0x3A, 0x3F,
+    0x28, 0x2D, 0x34, 0x3F,
+    0x28, 0x3F, 0x3F, 0x3F,
+    0x28, 0x28, 0x3F, 0x3F,
+    0x28, 0x28, 0x28, 0x3F,
+    0x29, 0x2A, 0x2A, 0x2B,
+    0x29, 0x2A, 0x2B, 0x2C,
+    0x29, 0x2B, 0x2C, 0x2D,
+    0x29, 0x2A, 0x2C, 0x2D,
+    0x29, 0x2A, 0x2B, 0x2D,
+    0x29, 0x2B, 0x2C, 0x2E,
+    0x29, 0x2B, 0x2D, 0x2E,
+    0x29, 0x2A, 0x2D, 0x2E,
+    0x29, 0x2A, 0x2C, 0x2E,
+    0x29, 0x2B, 0x2D, 0x2F,
+    0x29, 0x2C, 0x2E, 0x2F,
+    0x29, 0x2A, 0x2E, 0x2F,
+    0x29, 0x2A, 0x2C, 0x2F,
+    0x29, 0x2F, 0x2F, 0x2F,
+    0x29, 0x29, 0x2F, 0x2F,
+    0x29, 0x29, 0x29, 0x2F,
+    0x29, 0x2C, 0x2D, 0x30,
+    0x29, 0x2C, 0x2F, 0x30,
+    0x29, 0x2A, 0x2F, 0x30,
+    0x29, 0x2A, 0x2D, 0x30,
+    0x29, 0x2C, 0x2E, 0x31,
+    0x29, 0x2D, 0x2F, 0x31,
+    0x29, 0x2B, 0x2F, 0x31,
+    0x29, 0x2B, 0x2D, 0x31,
+    0x29, 0x31, 0x31, 0x31,
+    0x29, 0x29, 0x31, 0x31,
+    0x29, 0x29, 0x29, 0x31,
+    0x29, 0x2D, 0x30, 0x34,
+    0x29, 0x2E, 0x32, 0x34,
+    0x29, 0x2B, 0x32, 0x34,
+    0x29, 0x2B, 0x2F, 0x34,
+    0x29, 0x34, 0x34, 0x34,
+    0x29, 0x29, 0x34, 0x34,
+    0x29, 0x29, 0x29, 0x34,
+    0x29, 0x2E, 0x32, 0x37,
+    0x29, 0x30, 0x34, 0x37,
+    0x29, 0x2C, 0x34, 0x37,
+    0x29, 0x2C, 0x30, 0x37,
+    0x29, 0x37, 0x37, 0x37,
+    0x29, 0x29, 0x37, 0x37,
+    0x29, 0x29, 0x29, 0x37,
+    0x29, 0x2F, 0x34, 0x3A,
+    0x29, 0x31, 0x36, 0x3A,
+    0x29, 0x2D, 0x36, 0x3A,
+    0x29, 0x2D, 0x32, 0x3A,
+    0x29, 0x3A, 0x3A, 0x3A,
+    0x29, 0x29, 0x3A, 0x3A,
+    0x29, 0x29, 0x29, 0x3A,
+    0x29, 0x30, 0x36, 0x3D,
+    0x29, 0x33, 0x38, 0x3D,
+    0x29, 0x2E, 0x38, 0x3D,
+    0x29, 0x2E, 0x33, 0x3D,
+    0x29, 0x3D, 0x3D, 0x3D,
+    0x29, 0x29, 0x3D, 0x3D,
+    0x29, 0x29, 0x29, 0x3D,
+    0x2A, 0x2B, 0x2B, 0x2C,
+    0x2A, 0x2B, 0x2C, 0x2D,
+    0x2A, 0x2C, 0x2D, 0x2E,
+    0x2A, 0x2B, 0x2D, 0x2E,
+    0x2A, 0x2B, 0x2C, 0x2E,
+    0x2A, 0x2C, 0x2D, 0x2F,
+    0x2A, 0x2C, 0x2E, 0x2F,
+    0x2A, 0x2B, 0x2E, 0x2F,
+    0x2A, 0x2B, 0x2D, 0x2F,
+    0x2A, 0x2C, 0x2E, 0x30,
+    0x2A, 0x2D, 0x2F, 0x30,
+    0x2A, 0x2B, 0x2F, 0x30,
+    0x2A, 0x2B, 0x2D, 0x30,
+    0x2A, 0x30, 0x30, 0x30,
+    0x2A, 0x2A, 0x30, 0x30,
+    0x2A, 0x2A, 0x2A, 0x30,
+    0x2A, 0x2D, 0x2E, 0x31,
+    0x2A, 0x2D, 0x30, 0x31,
+    0x2A, 0x2B, 0x30, 0x31,
+    0x2A, 0x2B, 0x2E, 0x31,
+    0x2A, 0x2D, 0x2F, 0x32,
+    0x2A, 0x2E, 0x30, 0x32,
+    0x2A, 0x2C, 0x30, 0x32,
+    0x2A, 0x2C, 0x2E, 0x32,
+    0x2A, 0x32, 0x32, 0x32,
+    0x2A, 0x2A, 0x32, 0x32,
+    0x2A, 0x2A, 0x2A, 0x32,
+    0x2A, 0x2E, 0x31, 0x35,
+    0x2A, 0x2F, 0x33, 0x35,
+    0x2A, 0x2C, 0x33, 0x35,
+    0x2A, 0x2C, 0x30, 0x35,
+    0x2A, 0x35, 0x35, 0x35,
+    0x2A, 0x2A, 0x35, 0x35,
+    0x2A, 0x2A, 0x2A, 0x35,
+    0x2A, 0x2F, 0x33, 0x38,
+    0x2A, 0x31, 0x35, 0x38,
+    0x2A, 0x2D, 0x35, 0x38,
+    0x2A, 0x2D, 0x31, 0x38,
+    0x2A, 0x38, 0x38, 0x38,
+    0x2A, 0x2A, 0x38, 0x38,
+    0x2A, 0x2A, 0x2A, 0x38,
+    0x2A, 0x30, 0x35, 0x3B,
+    0x2A, 0x32, 0x37, 0x3B,
+    0x2A, 0x2E, 0x37, 0x3B,
+    0x2A, 0x2E, 0x33, 0x3B,
+    0x2A, 0x3B, 0x3B, 0x3B,
+    0x2A, 0x2A, 0x3B, 0x3B,
+    0x2A, 0x2A, 0x2A, 0x3B,
+    0x2A, 0x31, 0x37, 0x3E,
+    0x2A, 0x34, 0x39, 0x3E,
+    0x2A, 0x2F, 0x39, 0x3E,
+    0x2A, 0x2F, 0x34, 0x3E,
+    0x2A, 0x3E, 0x3E, 0x3E,
+    0x2A, 0x2A, 0x3E, 0x3E,
+    0x2A, 0x2A, 0x2A, 0x3E,
+    0x2B, 0x2C, 0x2C, 0x2D,
+    0x2B, 0x2C, 0x2D, 0x2E,
+    0x2B, 0x2D, 0x2E, 0x2F,
+    0x2B, 0x2C, 0x2E, 0x2F,
+    0x2B, 0x2C, 0x2D, 0x2F,
+    0x2B, 0x2D, 0x2E, 0x30,
+    0x2B, 0x2D, 0x2F, 0x30,
+    0x2B, 0x2C, 0x2F, 0x30,
+    0x2B, 0x2C, 0x2E, 0x30,
+    0x2B, 0x2D, 0x2F, 0x31,
+    0x2B, 0x2E, 0x30, 0x31,
+    0x2B, 0x2C, 0x30, 0x31,
+    0x2B, 0x2C, 0x2E, 0x31,
+    0x2B, 0x31, 0x31, 0x31,
+    0x2B, 0x2B, 0x31, 0x31,
+    0x2B, 0x2B, 0x2B, 0x31,
+    0x2B, 0x2E, 0x2F, 0x32,
+    0x2B, 0x2E, 0x31, 0x32,
+    0x2B, 0x2C, 0x31, 0x32,
+    0x2B, 0x2C, 0x2F, 0x32,
+    0x2B, 0x2E, 0x30, 0x33,
+    0x2B, 0x2F, 0x31, 0x33,
+    0x2B, 0x2D, 0x31, 0x33,
+    0x2B, 0x2D, 0x2F, 0x33,
+    0x2B, 0x33, 0x33, 0x33,
+    0x2B, 0x2B, 0x33, 0x33,
+    0x2B, 0x2B, 0x2B, 0x33,
+    0x2B, 0x2F, 0x32, 0x36,
+    0x2B, 0x30, 0x34, 0x36,
+    0x2B, 0x2D, 0x34, 0x36,
+    0x2B, 0x2D, 0x31, 0x36,
+    0x2B, 0x36, 0x36, 0x36,
+    0x2B, 0x2B, 0x36, 0x36,
+    0x2B, 0x2B, 0x2B, 0x36,
+    0x2B, 0x30, 0x34, 0x39,
+    0x2B, 0x32, 0x36, 0x39,
+    0x2B, 0x2E, 0x36, 0x39,
+    0x2B, 0x2E, 0x32, 0x39,
+    0x2B, 0x39, 0x39, 0x39,
+    0x2B, 0x2B, 0x39, 0x39,
+    0x2B, 0x2B, 0x2B, 0x39,
+    0x2B, 0x31, 0x36, 0x3C,
+    0x2B, 0x33, 0x38, 0x3C,
+    0x2B, 0x2F, 0x38, 0x3C,
+    0x2B, 0x2F, 0x34, 0x3C,
+    0x2B, 0x3C, 0x3C, 0x3C,
+    0x2B, 0x2B, 0x3C, 0x3C,
+    0x2B, 0x2B, 0x2B, 0x3C,
+    0x2B, 0x32, 0x38, 0x3F,
+    0x2B, 0x35, 0x3A, 0x3F,
+    0x2B, 0x30, 0x3A, 0x3F,
+    0x2B, 0x30, 0x35, 0x3F,
+    0x2B, 0x3F, 0x3F, 0x3F,
+    0x2B, 0x2B, 0x3F, 0x3F,
+    0x2B, 0x2B, 0x2B, 0x3F,
+    0x2C, 0x2D, 0x2D, 0x2E,
+    0x2C, 0x2D, 0x2E, 0x2F,
+    0x2C, 0x2E, 0x2F, 0x30,
+    0x2C, 0x2D, 0x2F, 0x30,
+    0x2C, 0x2D, 0x2E, 0x30,
+    0x2C, 0x2E, 0x2F, 0x31,
+    0x2C, 0x2E, 0x30, 0x31,
+    0x2C, 0x2D, 0x30, 0x31,
+    0x2C, 0x2D, 0x2F, 0x31,
+    0x2C, 0x2E, 0x30, 0x32,
+    0x2C, 0x2F, 0x31, 0x32,
+    0x2C, 0x2D, 0x31, 0x32,
+    0x2C, 0x2D, 0x2F, 0x32,
+    0x2C, 0x32, 0x32, 0x32,
+    0x2C, 0x2C, 0x32, 0x32,
+    0x2C, 0x2C, 0x2C, 0x32,
+    0x2C, 0x2F, 0x30, 0x33,
+    0x2C, 0x2F, 0x32, 0x33,
+    0x2C, 0x2D, 0x32, 0x33,
+    0x2C, 0x2D, 0x30, 0x33,
+    0x2C, 0x2F, 0x31, 0x34,
+    0x2C, 0x30, 0x32, 0x34,
+    0x2C, 0x2E, 0x32, 0x34,
+    0x2C, 0x2E, 0x30, 0x34,
+    0x2C, 0x34, 0x34, 0x34,
+    0x2C, 0x2C, 0x34, 0x34,
+    0x2C, 0x2C, 0x2C, 0x34,
+    0x2C, 0x30, 0x33, 0x37,
+    0x2C, 0x31, 0x35, 0x37,
+    0x2C, 0x2E, 0x35, 0x37,
+    0x2C, 0x2E, 0x32, 0x37,
+    0x2C, 0x37, 0x37, 0x37,
+    0x2C, 0x2C, 0x37, 0x37,
+    0x2C, 0x2C, 0x2C, 0x37,
+    0x2C, 0x31, 0x35, 0x3A,
+    0x2C, 0x33, 0x37, 0x3A,
+    0x2C, 0x2F, 0x37, 0x3A,
+    0x2C, 0x2F, 0x33, 0x3A,
+    0x2C, 0x3A, 0x3A, 0x3A,
+    0x2C, 0x2C, 0x3A, 0x3A,
+    0x2C, 0x2C, 0x2C, 0x3A,
+    0x2C, 0x32, 0x37, 0x3D,
+    0x2C, 0x34, 0x39, 0x3D,
+    0x2C, 0x30, 0x39, 0x3D,
+    0x2C, 0x30, 0x35, 0x3D,
+    0x2C, 0x3D, 0x3D, 0x3D,
+    0x2C, 0x2C, 0x3D, 0x3D,
+    0x2C, 0x2C, 0x2C, 0x3D,
+    0x2D, 0x2E, 0x2E, 0x2F,
+    0x2D, 0x2E, 0x2F, 0x30,
+    0x2D, 0x2F, 0x30, 0x31,
+    0x2D, 0x2E, 0x30, 0x31,
+    0x2D, 0x2E, 0x2F, 0x31,
+    0x2D, 0x2F, 0x30, 0x32,
+    0x2D, 0x2F, 0x31, 0x32,
+    0x2D, 0x2E, 0x31, 0x32,
+    0x2D, 0x2E, 0x30, 0x32,
+    0x2D, 0x2F, 0x31, 0x33,
+    0x2D, 0x30, 0x32, 0x33,
+    0x2D, 0x2E, 0x32, 0x33,
+    0x2D, 0x2E, 0x30, 0x33,
+    0x2D, 0x33, 0x33, 0x33,
+    0x2D, 0x2D, 0x33, 0x33,
+    0x2D, 0x2D, 0x2D, 0x33,
+    0x2D, 0x30, 0x31, 0x34,
+    0x2D, 0x30, 0x33, 0x34,
+    0x2D, 0x2E, 0x33, 0x34,
+    0x2D, 0x2E, 0x31, 0x34,
+    0x2D, 0x30, 0x32, 0x35,
+    0x2D, 0x31, 0x33, 0x35,
+    0x2D, 0x2F, 0x33, 0x35,
+    0x2D, 0x2F, 0x31, 0x35,
+    0x2D, 0x35, 0x35, 0x35,
+    0x2D, 0x2D, 0x35, 0x35,
+    0x2D, 0x2D, 0x2D, 0x35,
+    0x2D, 0x31, 0x34, 0x38,
+    0x2D, 0x32, 0x36, 0x38,
+    0x2D, 0x2F, 0x36, 0x38,
+    0x2D, 0x2F, 0x33, 0x38,
+    0x2D, 0x38, 0x38, 0x38,
+    0x2D, 0x2D, 0x38, 0x38,
+    0x2D, 0x2D, 0x2D, 0x38,
+    0x2D, 0x32, 0x36, 0x3B,
+    0x2D, 0x34, 0x38, 0x3B,
+    0x2D, 0x30, 0x38, 0x3B,
+    0x2D, 0x30, 0x34, 0x3B,
+    0x2D, 0x3B, 0x3B, 0x3B,
+    0x2D, 0x2D, 0x3B, 0x3B,
+    0x2D, 0x2D, 0x2D, 0x3B,
+    0x2D, 0x33, 0x38, 0x3E,
+    0x2D, 0x35, 0x3A, 0x3E,
+    0x2D, 0x31, 0x3A, 0x3E,
+    0x2D, 0x31, 0x36, 0x3E,
+    0x2D, 0x3E, 0x3E, 0x3E,
+    0x2D, 0x2D, 0x3E, 0x3E,
+    0x2D, 0x2D, 0x2D, 0x3E,
+    0x2E, 0x2F, 0x2F, 0x30,
+    0x2E, 0x2F, 0x30, 0x31,
+    0x2E, 0x30, 0x31, 0x32,
+    0x2E, 0x2F, 0x31, 0x32,
+    0x2E, 0x2F, 0x30, 0x32,
+    0x2E, 0x30, 0x31, 0x33,
+    0x2E, 0x30, 0x32, 0x33,
+    0x2E, 0x2F, 0x32, 0x33,
+    0x2E, 0x2F, 0x31, 0x33,
+    0x2E, 0x30, 0x32, 0x34,
+    0x2E, 0x31, 0x33, 0x34,
+    0x2E, 0x2F, 0x33, 0x34,
+    0x2E, 0x2F, 0x31, 0x34,
+    0x2E, 0x34, 0x34, 0x34,
+    0x2E, 0x2E, 0x34, 0x34,
+    0x2E, 0x2E, 0x2E, 0x34,
+    0x2E, 0x31, 0x32, 0x35,
+    0x2E, 0x31, 0x34, 0x35,
+    0x2E, 0x2F, 0x34, 0x35,
+    0x2E, 0x2F, 0x32, 0x35,
+    0x2E, 0x31, 0x33, 0x36,
+    0x2E, 0x32, 0x34, 0x36,
+    0x2E, 0x30, 0x34, 0x36,
+    0x2E, 0x30, 0x32, 0x36,
+    0x2E, 0x36, 0x36, 0x36,
+    0x2E, 0x2E, 0x36, 0x36,
+    0x2E, 0x2E, 0x2E, 0x36,
+    0x2E, 0x32, 0x35, 0x39,
+    0x2E, 0x33, 0x37, 0x39,
+    0x2E, 0x30, 0x37, 0x39,
+    0x2E, 0x30, 0x34, 0x39,
+    0x2E, 0x39, 0x39, 0x39,
+    0x2E, 0x2E, 0x39, 0x39,
+    0x2E, 0x2E, 0x2E, 0x39,
+    0x2E, 0x33, 0x37, 0x3C,
+    0x2E, 0x35, 0x39, 0x3C,
+    0x2E, 0x31, 0x39, 0x3C,
+    0x2E, 0x31, 0x35, 0x3C,
+    0x2E, 0x3C, 0x3C, 0x3C,
+    0x2E, 0x2E, 0x3C, 0x3C,
+    0x2E, 0x2E, 0x2E, 0x3C,
+    0x2E, 0x34, 0x39, 0x3F,
+    0x2E, 0x36, 0x3B, 0x3F,
+    0x2E, 0x32, 0x3B, 0x3F,
+    0x2E, 0x32, 0x37, 0x3F,
+    0x2E, 0x3F, 0x3F, 0x3F,
+    0x2E, 0x2E, 0x3F, 0x3F,
+    0x2E, 0x2E, 0x2E, 0x3F,
+    0x2F, 0x30, 0x30, 0x31,
+    0x2F, 0x30, 0x31, 0x32,
+    0x2F, 0x31, 0x32, 0x33,
+    0x2F, 0x30, 0x32, 0x33,
+    0x2F, 0x30, 0x31, 0x33,
+    0x2F, 0x31, 0x32, 0x34,
+    0x2F, 0x31, 0x33, 0x34,
+    0x2F, 0x30, 0x33, 0x34,
+    0x2F, 0x30, 0x32, 0x34,
+    0x2F, 0x31, 0x33, 0x35,
+    0x2F, 0x32, 0x34, 0x35,
+    0x2F, 0x30, 0x34, 0x35,
+    0x2F, 0x30, 0x32, 0x35,
+    0x2F, 0x35, 0x35, 0x35,
+    0x2F, 0x2F, 0x35, 0x35,
+    0x2F, 0x2F, 0x2F, 0x35,
+    0x2F, 0x32, 0x33, 0x36,
+    0x2F, 0x32, 0x35, 0x36,
+    0x2F, 0x30, 0x35, 0x36,
+    0x2F, 0x30, 0x33, 0x36,
+    0x2F, 0x32, 0x34, 0x37,
+    0x2F, 0x33, 0x35, 0x37,
+    0x2F, 0x31, 0x35, 0x37,
+    0x2F, 0x31, 0x33, 0x37,
+    0x2F, 0x37, 0x37, 0x37,
+    0x2F, 0x2F, 0x37, 0x37,
+    0x2F, 0x2F, 0x2F, 0x37,
+    0x2F, 0x33, 0x36, 0x3A,
+    0x2F, 0x34, 0x38, 0x3A,
+    0x2F, 0x31, 0x38, 0x3A,
+    0x2F, 0x31, 0x35, 0x3A,
+    0x2F, 0x3A, 0x3A, 0x3A,
+    0x2F, 0x2F, 0x3A, 0x3A,
+    0x2F, 0x2F, 0x2F, 0x3A,
+    0x2F, 0x34, 0x38, 0x3D,
+    0x2F, 0x36, 0x3A, 0x3D,
+    0x2F, 0x32, 0x3A, 0x3D,
+    0x2F, 0x32, 0x36, 0x3D,
+    0x2F, 0x3D, 0x3D, 0x3D,
+    0x2F, 0x2F, 0x3D, 0x3D,
+    0x2F, 0x2F, 0x2F, 0x3D,
+    0x30, 0x31, 0x31, 0x32,
+    0x30, 0x31, 0x32, 0x33,
+    0x30, 0x32, 0x33, 0x34,
+    0x30, 0x31, 0x33, 0x34,
+    0x30, 0x31, 0x32, 0x34,
+    0x30, 0x32, 0x33, 0x35,
+    0x30, 0x32, 0x34, 0x35,
+    0x30, 0x31, 0x34, 0x35,
+    0x30, 0x31, 0x33, 0x35,
+    0x30, 0x32, 0x34, 0x36,
+    0x30, 0x33, 0x35, 0x36,
+    0x30, 0x31, 0x35, 0x36,
+    0x30, 0x31, 0x33, 0x36,
+    0x30, 0x36, 0x36, 0x36,
+    0x30, 0x30, 0x36, 0x36,
+    0x30, 0x30, 0x30, 0x36,
+    0x30, 0x33, 0x34, 0x37,
+    0x30, 0x33, 0x36, 0x37,
+    0x30, 0x31, 0x36, 0x37,
+    0x30, 0x31, 0x34, 0x37,
+    0x30, 0x33, 0x35, 0x38,
+    0x30, 0x34, 0x36, 0x38,
+    0x30, 0x32, 0x36, 0x38,
+    0x30, 0x32, 0x34, 0x38,
+    0x30, 0x38, 0x38, 0x38,
+    0x30, 0x30, 0x38, 0x38,
+    0x30, 0x30, 0x30, 0x38,
+    0x30, 0x34, 0x37, 0x3B,
+    0x30, 0x35, 0x39, 0x3B,
+    0x30, 0x32, 0x39, 0x3B,
+    0x30, 0x32, 0x36, 0x3B,
+    0x30, 0x3B, 0x3B, 0x3B,
+    0x30, 0x30, 0x3B, 0x3B,
+    0x30, 0x30, 0x30, 0x3B,
+    0x30, 0x35, 0x39, 0x3E,
+    0x30, 0x37, 0x3B, 0x3E,
+    0x30, 0x33, 0x3B, 0x3E,
+    0x30, 0x33, 0x37, 0x3E,
+    0x30, 0x3E, 0x3E, 0x3E,
+    0x30, 0x30, 0x3E, 0x3E,
+    0x30, 0x30, 0x30, 0x3E,
+    0x31, 0x32, 0x32, 0x33,
+    0x31, 0x32, 0x33, 0x34,
+    0x31, 0x33, 0x34, 0x35,
+    0x31, 0x32, 0x34, 0x35,
+    0x31, 0x32, 0x33, 0x35,
+    0x31, 0x33, 0x34, 0x36,
+    0x31, 0x33, 0x35, 0x36,
+    0x31, 0x32, 0x35, 0x36,
+    0x31, 0x32, 0x34, 0x36,
+    0x31, 0x33, 0x35, 0x37,
+    0x31, 0x34, 0x36, 0x37,
+    0x31, 0x32, 0x36, 0x37,
+    0x31, 0x32, 0x34, 0x37,
+    0x31, 0x37, 0x37, 0x37,
+    0x31, 0x31, 0x37, 0x37,
+    0x31, 0x31, 0x31, 0x37,
+    0x31, 0x34, 0x35, 0x38,
+    0x31, 0x34, 0x37, 0x38,
+    0x31, 0x32, 0x37, 0x38,
+    0x31, 0x32, 0x35, 0x38,
+    0x31, 0x34, 0x36, 0x39,
+    0x31, 0x35, 0x37, 0x39,
+    0x31, 0x33, 0x37, 0x39,
+    0x31, 0x33, 0x35, 0x39,
+    0x31, 0x39, 0x39, 0x39,
+    0x31, 0x31, 0x39, 0x39,
+    0x31, 0x31, 0x31, 0x39,
+    0x31, 0x35, 0x38, 0x3C,
+    0x31, 0x36, 0x3A, 0x3C,
+    0x31, 0x33, 0x3A, 0x3C,
+    0x31, 0x33, 0x37, 0x3C,
+    0x31, 0x3C, 0x3C, 0x3C,
+    0x31, 0x31, 0x3C, 0x3C,
+    0x31, 0x31, 0x31, 0x3C,
+    0x31, 0x36, 0x3A, 0x3F,
+    0x31, 0x38, 0x3C, 0x3F,
+    0x31, 0x34, 0x3C, 0x3F,
+    0x31, 0x34, 0x38, 0x3F,
+    0x31, 0x3F, 0x3F, 0x3F,
+    0x31, 0x31, 0x3F, 0x3F,
+    0x31, 0x31, 0x31, 0x3F,
+    0x32, 0x33, 0x33, 0x34,
+    0x32, 0x33, 0x34, 0x35,
+    0x32, 0x34, 0x35, 0x36,
+    0x32, 0x33, 0x35, 0x36,
+    0x32, 0x33, 0x34, 0x36,
+    0x32, 0x34, 0x35, 0x37,
+    0x32, 0x34, 0x36, 0x37,
+    0x32, 0x33, 0x36, 0x37,
+    0x32, 0x33, 0x35, 0x37,
+    0x32, 0x34, 0x36, 0x38,
+    0x32, 0x35, 0x37, 0x38,
+    0x32, 0x33, 0x37, 0x38,
+    0x32, 0x33, 0x35, 0x38,
+    0x32, 0x38, 0x38, 0x38,
+    0x32, 0x32, 0x38, 0x38,
+    0x32, 0x32, 0x32, 0x38,
+    0x32, 0x35, 0x36, 0x39,
+    0x32, 0x35, 0x38, 0x39,
+    0x32, 0x33, 0x38, 0x39,
+    0x32, 0x33, 0x36, 0x39,
+    0x32, 0x35, 0x37, 0x3A,
+    0x32, 0x36, 0x38, 0x3A,
+    0x32, 0x34, 0x38, 0x3A,
+    0x32, 0x34, 0x36, 0x3A,
+    0x32, 0x3A, 0x3A, 0x3A,
+    0x32, 0x32, 0x3A, 0x3A,
+    0x32, 0x32, 0x32, 0x3A,
+    0x32, 0x36, 0x39, 0x3D,
+    0x32, 0x37, 0x3B, 0x3D,
+    0x32, 0x34, 0x3B, 0x3D,
+    0x32, 0x34, 0x38, 0x3D,
+    0x32, 0x3D, 0x3D, 0x3D,
+    0x32, 0x32, 0x3D, 0x3D,
+    0x32, 0x32, 0x32, 0x3D,
+    0x33, 0x34, 0x34, 0x35,
+    0x33, 0x34, 0x35, 0x36,
+    0x33, 0x35, 0x36, 0x37,
+    0x33, 0x34, 0x36, 0x37,
+    0x33, 0x34, 0x35, 0x37,
+    0x33, 0x35, 0x36, 0x38,
+    0x33, 0x35, 0x37, 0x38,
+    0x33, 0x34, 0x37, 0x38,
+    0x33, 0x34, 0x36, 0x38,
+    0x33, 0x35, 0x37, 0x39,
+    0x33, 0x36, 0x38, 0x39,
+    0x33, 0x34, 0x38, 0x39,
+    0x33, 0x34, 0x36, 0x39,
+    0x33, 0x39, 0x39, 0x39,
+    0x33, 0x33, 0x39, 0x39,
+    0x33, 0x33, 0x33, 0x39,
+    0x33, 0x36, 0x37, 0x3A,
+    0x33, 0x36, 0x39, 0x3A,
+    0x33, 0x34, 0x39, 0x3A,
+    0x33, 0x34, 0x37, 0x3A,
+    0x33, 0x36, 0x38, 0x3B,
+    0x33, 0x37, 0x39, 0x3B,
+    0x33, 0x35, 0x39, 0x3B,
+    0x33, 0x35, 0x37, 0x3B,
+    0x33, 0x3B, 0x3B, 0x3B,
+    0x33, 0x33, 0x3B, 0x3B,
+    0x33, 0x33, 0x33, 0x3B,
+    0x33, 0x37, 0x3A, 0x3E,
+    0x33, 0x38, 0x3C, 0x3E,
+    0x33, 0x35, 0x3C, 0x3E,
+    0x33, 0x35, 0x39, 0x3E,
+    0x33, 0x3E, 0x3E, 0x3E,
+    0x33, 0x33, 0x3E, 0x3E,
+    0x33, 0x33, 0x33, 0x3E,
+    0x34, 0x35, 0x35, 0x36,
+    0x34, 0x35, 0x36, 0x37,
+    0x34, 0x36, 0x37, 0x38,
+    0x34, 0x35, 0x37, 0x38,
+    0x34, 0x35, 0x36, 0x38,
+    0x34, 0x36, 0x37, 0x39,
+    0x34, 0x36, 0x38, 0x39,
+    0x34, 0x35, 0x38, 0x39,
+    0x34, 0x35, 0x37, 0x39,
+    0x34, 0x36, 0x38, 0x3A,
+    0x34, 0x37, 0x39, 0x3A,
+    0x34, 0x35, 0x39, 0x3A,
+    0x34, 0x35, 0x37, 0x3A,
+    0x34, 0x3A, 0x3A, 0x3A,
+    0x34, 0x34, 0x3A, 0x3A,
+    0x34, 0x34, 0x34, 0x3A,
+    0x34, 0x37, 0x38, 0x3B,
+    0x34, 0x37, 0x3A, 0x3B,
+    0x34, 0x35, 0x3A, 0x3B,
+    0x34, 0x35, 0x38, 0x3B,
+    0x34, 0x37, 0x39, 0x3C,
+    0x34, 0x38, 0x3A, 0x3C,
+    0x34, 0x36, 0x3A, 0x3C,
+    0x34, 0x36, 0x38, 0x3C,
+    0x34, 0x3C, 0x3C, 0x3C,
+    0x34, 0x34, 0x3C, 0x3C,
+    0x34, 0x34, 0x34, 0x3C,
+    0x34, 0x38, 0x3B, 0x3F,
+    0x34, 0x39, 0x3D, 0x3F,
+    0x34, 0x36, 0x3D, 0x3F,
+    0x34, 0x36, 0x3A, 0x3F,
+    0x34, 0x3F, 0x3F, 0x3F,
+    0x34, 0x34, 0x3F, 0x3F,
+    0x34, 0x34, 0x34, 0x3F,
+    0x35, 0x36, 0x36, 0x37,
+    0x35, 0x36, 0x37, 0x38,
+    0x35, 0x37, 0x38, 0x39,
+    0x35, 0x36, 0x38, 0x39,
+    0x35, 0x36, 0x37, 0x39,
+    0x35, 0x37, 0x38, 0x3A,
+    0x35, 0x37, 0x39, 0x3A,
+    0x35, 0x36, 0x39, 0x3A,
+    0x35, 0x36, 0x38, 0x3A,
+    0x35, 0x37, 0x39, 0x3B,
+    0x35, 0x38, 0x3A, 0x3B,
+    0x35, 0x36, 0x3A, 0x3B,
+    0x35, 0x36, 0x38, 0x3B,
+    0x35, 0x3B, 0x3B, 0x3B,
+    0x35, 0x35, 0x3B, 0x3B,
+    0x35, 0x35, 0x35, 0x3B,
+    0x35, 0x38, 0x39, 0x3C,
+    0x35, 0x38, 0x3B, 0x3C,
+    0x35, 0x36, 0x3B, 0x3C,
+    0x35, 0x36, 0x39, 0x3C,
+    0x35, 0x38, 0x3A, 0x3D,
+    0x35, 0x39, 0x3B, 0x3D,
+    0x35, 0x37, 0x3B, 0x3D,
+    0x35, 0x37, 0x39, 0x3D,
+    0x35, 0x3D, 0x3D, 0x3D,
+    0x35, 0x35, 0x3D, 0x3D,
+    0x35, 0x35, 0x35, 0x3D,
+    0x36, 0x37, 0x37, 0x38,
+    0x36, 0x37, 0x38, 0x39,
+    0x36, 0x38, 0x39, 0x3A,
+    0x36, 0x37, 0x39, 0x3A,
+    0x36, 0x37, 0x38, 0x3A,
+    0x36, 0x38, 0x39, 0x3B,
+    0x36, 0x38, 0x3A, 0x3B,
+    0x36, 0x37, 0x3A, 0x3B,
+    0x36, 0x37, 0x39, 0x3B,
+    0x36, 0x38, 0x3A, 0x3C,
+    0x36, 0x39, 0x3B, 0x3C,
+    0x36, 0x37, 0x3B, 0x3C,
+    0x36, 0x37, 0x39, 0x3C,
+    0x36, 0x3C, 0x3C, 0x3C,
+    0x36, 0x36, 0x3C, 0x3C,
+    0x36, 0x36, 0x36, 0x3C,
+    0x36, 0x39, 0x3A, 0x3D,
+    0x36, 0x39, 0x3C, 0x3D,
+    0x36, 0x37, 0x3C, 0x3D,
+    0x36, 0x37, 0x3A, 0x3D,
+    0x36, 0x39, 0x3B, 0x3E,
+    0x36, 0x3A, 0x3C, 0x3E,
+    0x36, 0x38, 0x3C, 0x3E,
+    0x36, 0x38, 0x3A, 0x3E,
+    0x36, 0x3E, 0x3E, 0x3E,
+    0x36, 0x36, 0x3E, 0x3E,
+    0x36, 0x36, 0x36, 0x3E,
+    0x37, 0x38, 0x38, 0x39,
+    0x37, 0x38, 0x39, 0x3A,
+    0x37, 0x39, 0x3A, 0x3B,
+    0x37, 0x38, 0x3A, 0x3B,
+    0x37, 0x38, 0x39, 0x3B,
+    0x37, 0x39, 0x3A, 0x3C,
+    0x37, 0x39, 0x3B, 0x3C,
+    0x37, 0x38, 0x3B, 0x3C,
+    0x37, 0x38, 0x3A, 0x3C,
+    0x37, 0x39, 0x3B, 0x3D,
+    0x37, 0x3A, 0x3C, 0x3D,
+    0x37, 0x38, 0x3C, 0x3D,
+    0x37, 0x38, 0x3A, 0x3D,
+    0x37, 0x3D, 0x3D, 0x3D,
+    0x37, 0x37, 0x3D, 0x3D,
+    0x37, 0x37, 0x37, 0x3D,
+    0x37, 0x3A, 0x3B, 0x3E,
+    0x37, 0x3A, 0x3D, 0x3E,
+    0x37, 0x38, 0x3D, 0x3E,
+    0x37, 0x38, 0x3B, 0x3E,
+    0x37, 0x3A, 0x3C, 0x3F,
+    0x37, 0x3B, 0x3D, 0x3F,
+    0x37, 0x39, 0x3D, 0x3F,
+    0x37, 0x39, 0x3B, 0x3F,
+    0x37, 0x3F, 0x3F, 0x3F,
+    0x37, 0x37, 0x3F, 0x3F,
+    0x37, 0x37, 0x37, 0x3F,
+    0x38, 0x39, 0x39, 0x3A,
+    0x38, 0x39, 0x3A, 0x3B,
+    0x38, 0x3A, 0x3B, 0x3C,
+    0x38, 0x39, 0x3B, 0x3C,
+    0x38, 0x39, 0x3A, 0x3C,
+    0x38, 0x3A, 0x3B, 0x3D,
+    0x38, 0x3A, 0x3C, 0x3D,
+    0x38, 0x39, 0x3C, 0x3D,
+    0x38, 0x39, 0x3B, 0x3D,
+    0x38, 0x3A, 0x3C, 0x3E,
+    0x38, 0x3B, 0x3D, 0x3E,
+    0x38, 0x39, 0x3D, 0x3E,
+    0x38, 0x39, 0x3B, 0x3E,
+    0x38, 0x3E, 0x3E, 0x3E,
+    0x38, 0x38, 0x3E, 0x3E,
+    0x38, 0x38, 0x38, 0x3E,
+    0x38, 0x3B, 0x3C, 0x3F,
+    0x38, 0x3B, 0x3E, 0x3F,
+    0x38, 0x39, 0x3E, 0x3F,
+    0x38, 0x39, 0x3C, 0x3F,
+    0x39, 0x3A, 0x3A, 0x3B,
+    0x39, 0x3A, 0x3B, 0x3C,
+    0x39, 0x3B, 0x3C, 0x3D,
+    0x39, 0x3A, 0x3C, 0x3D,
+    0x39, 0x3A, 0x3B, 0x3D,
+    0x39, 0x3B, 0x3C, 0x3E,
+    0x39, 0x3B, 0x3D, 0x3E,
+    0x39, 0x3A, 0x3D, 0x3E,
+    0x39, 0x3A, 0x3C, 0x3E,
+    0x39, 0x3B, 0x3D, 0x3F,
+    0x39, 0x3C, 0x3E, 0x3F,
+    0x39, 0x3A, 0x3E, 0x3F,
+    0x39, 0x3A, 0x3C, 0x3F,
+    0x39, 0x3F, 0x3F, 0x3F,
+    0x39, 0x39, 0x3F, 0x3F,
+    0x39, 0x39, 0x39, 0x3F,
+    0x3A, 0x3B, 0x3B, 0x3C,
+    0x3A, 0x3B, 0x3C, 0x3D,
+    0x3A, 0x3C, 0x3D, 0x3E,
+    0x3A, 0x3B, 0x3D, 0x3E,
+    0x3A, 0x3B, 0x3C, 0x3E,
+    0x3A, 0x3C, 0x3D, 0x3F,
+    0x3A, 0x3C, 0x3E, 0x3F,
+    0x3A, 0x3B, 0x3E, 0x3F,
+    0x3A, 0x3B, 0x3D, 0x3F,
+    0x3B, 0x3C, 0x3C, 0x3D,
+    0x3B, 0x3C, 0x3D, 0x3E,
+    0x3B, 0x3D, 0x3E, 0x3F,
+    0x3B, 0x3C, 0x3E, 0x3F,
+    0x3B, 0x3C, 0x3D, 0x3F,
+    0x3C, 0x3D, 0x3D, 0x3E,
+    0x3C, 0x3D, 0x3E, 0x3F,
+    0x3D, 0x3E, 0x3E, 0x3F
+};
diff --git a/contrib/ffmpeg/libavcodec/utils.c b/contrib/ffmpeg/libavcodec/utils.c
new file mode 100644
index 000000000..2c7a76c11
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/utils.c
@@ -0,0 +1,1331 @@
+/*
+ * utils for libavcodec
+ * Copyright (c) 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file utils.c
+ * utils.
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+#include "integer.h"
+#include "opt.h"
+#include "crc.h"
+#include <stdarg.h>
+#include <limits.h>
+#include <float.h>
+#ifdef __MINGW32__
+#include <fcntl.h>
+#endif
+
+const uint8_t ff_reverse[256]={
+0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
+0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
+0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
+0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
+0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
+0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
+0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
+0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
+0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
+0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
+0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
+0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
+0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
+0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
+0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
+0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
+};
+
+static int volatile entangled_thread_counter=0;
+
+/**
+ * realloc which does nothing if the block is large enough
+ */
+void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size)
+{
+    if(min_size < *size)
+        return ptr;
+
+    *size= FFMAX(17*min_size/16 + 32, min_size);
+
+    return av_realloc(ptr, *size);
+}
+
+static unsigned int last_static = 0;
+static unsigned int allocated_static = 0;
+static void** array_static = NULL;
+
+/**
+ * allocation of static arrays - do not use for normal allocation.
+ */
+void *av_mallocz_static(unsigned int size)
+{
+    void *ptr = av_mallocz(size);
+
+    if(ptr){
+        array_static =av_fast_realloc(array_static, &allocated_static, sizeof(void*)*(last_static+1));
+        if(!array_static)
+            return NULL;
+        array_static[last_static++] = ptr;
+    }
+
+    return ptr;
+}
+
+/**
+ * same as above, but does realloc
+ */
+
+void *av_realloc_static(void *ptr, unsigned int size)
+{
+    int i;
+    if(!ptr)
+      return av_mallocz_static(size);
+    /* Look for the old ptr */
+    for(i = 0; i < last_static; i++) {
+        if(array_static[i] == ptr) {
+            array_static[i] = av_realloc(array_static[i], size);
+            return array_static[i];
+        }
+    }
+    return NULL;
+
+}
+
+/**
+ * free all static arrays and reset pointers to 0.
+ */
+void av_free_static(void)
+{
+    while(last_static){
+        av_freep(&array_static[--last_static]);
+    }
+    av_freep(&array_static);
+}
+
+/**
+ * Call av_free_static automatically before it's too late
+ */
+
+static void do_free(void) __attribute__ ((destructor));
+
+static void do_free(void)
+{
+    av_free_static();
+}
+
+
+/* encoder management */
+AVCodec *first_avcodec = NULL;
+
+void register_avcodec(AVCodec *format)
+{
+    AVCodec **p;
+    p = &first_avcodec;
+    while (*p != NULL) p = &(*p)->next;
+    *p = format;
+    format->next = NULL;
+}
+
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height){
+    s->coded_width = width;
+    s->coded_height= height;
+    s->width = -((-width )>>s->lowres);
+    s->height= -((-height)>>s->lowres);
+}
+
+typedef struct InternalBuffer{
+    int last_pic_num;
+    uint8_t *base[4];
+    uint8_t *data[4];
+    int linesize[4];
+}InternalBuffer;
+
+#define INTERNAL_BUFFER_SIZE 32
+
+#define ALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
+    int w_align= 1;
+    int h_align= 1;
+
+    switch(s->pix_fmt){
+    case PIX_FMT_YUV420P:
+    case PIX_FMT_YUV422:
+    case PIX_FMT_UYVY422:
+    case PIX_FMT_YUV422P:
+    case PIX_FMT_YUV444P:
+    case PIX_FMT_GRAY8:
+    case PIX_FMT_GRAY16BE:
+    case PIX_FMT_GRAY16LE:
+    case PIX_FMT_YUVJ420P:
+    case PIX_FMT_YUVJ422P:
+    case PIX_FMT_YUVJ444P:
+        w_align= 16; //FIXME check for non mpeg style codecs and use less alignment
+        h_align= 16;
+        break;
+    case PIX_FMT_YUV411P:
+    case PIX_FMT_UYVY411:
+        w_align=32;
+        h_align=8;
+        break;
+    case PIX_FMT_YUV410P:
+        if(s->codec_id == CODEC_ID_SVQ1){
+            w_align=64;
+            h_align=64;
+        }
+    case PIX_FMT_RGB555:
+        if(s->codec_id == CODEC_ID_RPZA){
+            w_align=4;
+            h_align=4;
+        }
+    case PIX_FMT_PAL8:
+        if(s->codec_id == CODEC_ID_SMC){
+            w_align=4;
+            h_align=4;
+        }
+        break;
+    case PIX_FMT_BGR24:
+        if((s->codec_id == CODEC_ID_MSZH) || (s->codec_id == CODEC_ID_ZLIB)){
+            w_align=4;
+            h_align=4;
+        }
+        break;
+    default:
+        w_align= 1;
+        h_align= 1;
+        break;
+    }
+
+    *width = ALIGN(*width , w_align);
+    *height= ALIGN(*height, h_align);
+}
+
+int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h){
+    if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/4)
+        return 0;
+
+    av_log(av_log_ctx, AV_LOG_ERROR, "picture size invalid (%ux%u)\n", w, h);
+    return -1;
+}
+
+int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
+    int i;
+    int w= s->width;
+    int h= s->height;
+    InternalBuffer *buf;
+    int *picture_number;
+
+    assert(pic->data[0]==NULL);
+    assert(INTERNAL_BUFFER_SIZE > s->internal_buffer_count);
+
+    if(avcodec_check_dimensions(s,w,h))
+        return -1;
+
+    if(s->internal_buffer==NULL){
+        s->internal_buffer= av_mallocz(INTERNAL_BUFFER_SIZE*sizeof(InternalBuffer));
+    }
+#if 0
+    s->internal_buffer= av_fast_realloc(
+        s->internal_buffer,
+        &s->internal_buffer_size,
+        sizeof(InternalBuffer)*FFMAX(99,  s->internal_buffer_count+1)/*FIXME*/
+        );
+#endif
+
+    buf= &((InternalBuffer*)s->internal_buffer)[s->internal_buffer_count];
+    picture_number= &(((InternalBuffer*)s->internal_buffer)[INTERNAL_BUFFER_SIZE-1]).last_pic_num; //FIXME ugly hack
+    (*picture_number)++;
+
+    if(buf->base[0]){
+        pic->age= *picture_number - buf->last_pic_num;
+        buf->last_pic_num= *picture_number;
+    }else{
+        int h_chroma_shift, v_chroma_shift;
+        int pixel_size, size[3];
+        AVPicture picture;
+
+        avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
+
+        avcodec_align_dimensions(s, &w, &h);
+
+        if(!(s->flags&CODEC_FLAG_EMU_EDGE)){
+            w+= EDGE_WIDTH*2;
+            h+= EDGE_WIDTH*2;
+        }
+        avpicture_fill(&picture, NULL, s->pix_fmt, w, h);
+        pixel_size= picture.linesize[0]*8 / w;
+//av_log(NULL, AV_LOG_ERROR, "%d %d %d %d\n", (int)picture.data[1], w, h, s->pix_fmt);
+        assert(pixel_size>=1);
+            //FIXME next ensures that linesize= 2^x uvlinesize, thats needed because some MC code assumes it
+        if(pixel_size == 3*8)
+            w= ALIGN(w, STRIDE_ALIGN<<h_chroma_shift);
+        else
+            w= ALIGN(pixel_size*w, STRIDE_ALIGN<<(h_chroma_shift+3)) / pixel_size;
+        size[1] = avpicture_fill(&picture, NULL, s->pix_fmt, w, h);
+        size[0] = picture.linesize[0] * h;
+        size[1] -= size[0];
+        if(picture.data[2])
+            size[1]= size[2]= size[1]/2;
+        else
+            size[2]= 0;
+
+        buf->last_pic_num= -256*256*256*64;
+        memset(buf->base, 0, sizeof(buf->base));
+        memset(buf->data, 0, sizeof(buf->data));
+
+        for(i=0; i<3 && size[i]; i++){
+            const int h_shift= i==0 ? 0 : h_chroma_shift;
+            const int v_shift= i==0 ? 0 : v_chroma_shift;
+
+            buf->linesize[i]= picture.linesize[i];
+
+            buf->base[i]= av_malloc(size[i]+16); //FIXME 16
+            if(buf->base[i]==NULL) return -1;
+            memset(buf->base[i], 128, size[i]);
+
+            // no edge if EDEG EMU or not planar YUV, we check for PAL8 redundantly to protect against a exploitable bug regression ...
+            if((s->flags&CODEC_FLAG_EMU_EDGE) || (s->pix_fmt == PIX_FMT_PAL8) || !size[2])
+                buf->data[i] = buf->base[i];
+            else
+                buf->data[i] = buf->base[i] + ALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), STRIDE_ALIGN);
+        }
+        pic->age= 256*256*256*64;
+    }
+    pic->type= FF_BUFFER_TYPE_INTERNAL;
+
+    for(i=0; i<4; i++){
+        pic->base[i]= buf->base[i];
+        pic->data[i]= buf->data[i];
+        pic->linesize[i]= buf->linesize[i];
+    }
+    s->internal_buffer_count++;
+
+    return 0;
+}
+
+void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
+    int i;
+    InternalBuffer *buf, *last, temp;
+
+    assert(pic->type==FF_BUFFER_TYPE_INTERNAL);
+    assert(s->internal_buffer_count);
+
+    buf = NULL; /* avoids warning */
+    for(i=0; i<s->internal_buffer_count; i++){ //just 3-5 checks so is not worth to optimize
+        buf= &((InternalBuffer*)s->internal_buffer)[i];
+        if(buf->data[0] == pic->data[0])
+            break;
+    }
+    assert(i < s->internal_buffer_count);
+    s->internal_buffer_count--;
+    last = &((InternalBuffer*)s->internal_buffer)[s->internal_buffer_count];
+
+    temp= *buf;
+    *buf= *last;
+    *last= temp;
+
+    for(i=0; i<3; i++){
+        pic->data[i]=NULL;
+//        pic->base[i]=NULL;
+    }
+//printf("R%X\n", pic->opaque);
+}
+
+int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic){
+    AVFrame temp_pic;
+    int i;
+
+    /* If no picture return a new buffer */
+    if(pic->data[0] == NULL) {
+        /* We will copy from buffer, so must be readable */
+        pic->buffer_hints |= FF_BUFFER_HINTS_READABLE;
+        return s->get_buffer(s, pic);
+    }
+
+    /* If internal buffer type return the same buffer */
+    if(pic->type == FF_BUFFER_TYPE_INTERNAL)
+        return 0;
+
+    /*
+     * Not internal type and reget_buffer not overridden, emulate cr buffer
+     */
+    temp_pic = *pic;
+    for(i = 0; i < 4; i++)
+        pic->data[i] = pic->base[i] = NULL;
+    pic->opaque = NULL;
+    /* Allocate new frame */
+    if (s->get_buffer(s, pic))
+        return -1;
+    /* Copy image data from old buffer to new buffer */
+    img_copy((AVPicture*)pic, (AVPicture*)&temp_pic, s->pix_fmt, s->width,
+             s->height);
+    s->release_buffer(s, &temp_pic); // Release old frame
+    return 0;
+}
+
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
+    int i;
+
+    for(i=0; i<count; i++){
+        int r= func(c, arg[i]);
+        if(ret) ret[i]= r;
+    }
+    return 0;
+}
+
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt){
+    return fmt[0];
+}
+
+static const char* context_to_name(void* ptr) {
+    AVCodecContext *avc= ptr;
+
+    if(avc && avc->codec && avc->codec->name)
+        return avc->codec->name;
+    else
+        return "NULL";
+}
+
+#define OFFSET(x) offsetof(AVCodecContext,x)
+#define DEFAULT 0 //should be NAN but it doesnt work as its not a constant in glibc as required by ANSI/ISO C
+//these names are too long to be readable
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define S AV_OPT_FLAG_SUBTITLE_PARAM
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
+
+#define AV_CODEC_DEFAULT_BITRATE 200*1000
+
+static const AVOption options[]={
+{"b", "set video bitrate (in bits/s)", OFFSET(bit_rate), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE, INT_MIN, INT_MAX, V|A|E},
+{"bt", "set video bitrate tolerance (in bits/s)", OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, AV_CODEC_DEFAULT_BITRATE*20, INT_MIN, INT_MAX, V|E},
+{"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags"},
+{"mv4", "use four motion vector by macroblock (mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_4MV, INT_MIN, INT_MAX, V|E, "flags"},
+{"obmc", "use overlapped block motion compensation (h263+)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_OBMC, INT_MIN, INT_MAX, V|E, "flags"},
+{"qpel", "use 1/4 pel motion compensation", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_QPEL, INT_MIN, INT_MAX, V|E, "flags"},
+{"loop", "use loop filter", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_LOOP_FILTER, INT_MIN, INT_MAX, V|E, "flags"},
+{"qscale", "use fixed qscale", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_QSCALE, INT_MIN, INT_MAX, 0, "flags"},
+{"gmc", "use gmc", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_GMC, INT_MIN, INT_MAX, V|E, "flags"},
+{"mv0", "always try a mb with mv=<0,0>", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_MV0, INT_MIN, INT_MAX, V|E, "flags"},
+{"part", "use data partitioning", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PART, INT_MIN, INT_MAX, V|E, "flags"},
+{"input_preserved", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG_INPUT_PRESERVED, INT_MIN, INT_MAX, 0, "flags"},
+{"pass1", "use internal 2pass ratecontrol in first  pass mode", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PASS1, INT_MIN, INT_MAX, 0, "flags"},
+{"pass2", "use internal 2pass ratecontrol in second pass mode", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PASS2, INT_MIN, INT_MAX, 0, "flags"},
+{"extern_huff", "use external huffman table (for mjpeg)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_EXTERN_HUFF, INT_MIN, INT_MAX, 0, "flags"},
+{"gray", "only decode/encode grayscale", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_GRAY, INT_MIN, INT_MAX, V|E|D, "flags"},
+{"emu_edge", "don't draw edges", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_EMU_EDGE, INT_MIN, INT_MAX, 0, "flags"},
+{"psnr", "error[?] variables will be set during encoding", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_PSNR, INT_MIN, INT_MAX, V|E, "flags"},
+{"truncated", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG_TRUNCATED, INT_MIN, INT_MAX, 0, "flags"},
+{"naq", "normalize adaptive quantization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_NORMALIZE_AQP, INT_MIN, INT_MAX, V|E, "flags"},
+{"ildct", "use interlaced dct", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_INTERLACED_DCT, INT_MIN, INT_MAX, V|E, "flags"},
+{"low_delay", "force low delay", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_LOW_DELAY, INT_MIN, INT_MAX, V|D|E, "flags"},
+{"alt", "enable alternate scantable (mpeg2/mpeg4)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_ALT_SCAN, INT_MIN, INT_MAX, V|E, "flags"},
+{"trell", "use trellis quantization", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_TRELLIS_QUANT, INT_MIN, INT_MAX, V|E, "flags"},
+{"global_header", "place global headers in extradata instead of every keyframe", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_GLOBAL_HEADER, INT_MIN, INT_MAX, 0, "flags"},
+{"bitexact", "use only bitexact stuff (except (i)dct)", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_BITEXACT, INT_MIN, INT_MAX, A|V|S|D|E, "flags"},
+{"aic", "h263 advanced intra coding / mpeg4 ac prediction", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_AC_PRED, INT_MIN, INT_MAX, V|E, "flags"},
+{"umv", "use unlimited motion vectors", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_H263P_UMV, INT_MIN, INT_MAX, V|E, "flags"},
+{"cbp", "use rate distortion optimization for cbp", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_CBP_RD, INT_MIN, INT_MAX, V|E, "flags"},
+{"qprd", "use rate distortion optimization for qp selection", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_QP_RD, INT_MIN, INT_MAX, V|E, "flags"},
+{"aiv", "h263 alternative inter vlc", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_H263P_AIV, INT_MIN, INT_MAX, V|E, "flags"},
+{"slice", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG_H263P_SLICE_STRUCT, INT_MIN, INT_MAX, V|E, "flags"},
+{"ilme", "interlaced motion estimation", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_INTERLACED_ME, INT_MIN, INT_MAX, V|E, "flags"},
+{"scan_offset", "will reserve space for svcd scan offset user data", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_SVCD_SCAN_OFFSET, INT_MIN, INT_MAX, V|E, "flags"},
+{"cgop", "closed gop", 0, FF_OPT_TYPE_CONST, CODEC_FLAG_CLOSED_GOP, INT_MIN, INT_MAX, V|E, "flags"},
+{"fast", "allow non spec compliant speedup tricks", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FAST, INT_MIN, INT_MAX, V|E, "flags2"},
+{"sgop", "strictly enforce gop size", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_STRICT_GOP, INT_MIN, INT_MAX, V|E, "flags2"},
+{"noout", "skip bitstream encoding", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_NO_OUTPUT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"local_header", "place global headers at every keyframe instead of in extradata", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_LOCAL_HEADER, INT_MIN, INT_MAX, V|E, "flags2"},
+{"sub_id", NULL, OFFSET(sub_id), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"me_method", "set motion estimation method", OFFSET(me_method), FF_OPT_TYPE_INT, ME_EPZS, INT_MIN, INT_MAX, V|E, "me_method"},
+{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
+{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
+{"rate_emu", NULL, OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
+{"frame_size", NULL, OFFSET(frame_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
+{"frame_number", NULL, OFFSET(frame_number), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"real_pict_num", NULL, OFFSET(real_pict_num), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"delay", NULL, OFFSET(delay), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"qcomp", "video quantizer scale compression (VBR)", OFFSET(qcompress), FF_OPT_TYPE_FLOAT, 0.5, FLT_MIN, FLT_MAX, V|E},
+{"qblur", "video quantizer scale blur (VBR)", OFFSET(qblur), FF_OPT_TYPE_FLOAT, 0.5, FLT_MIN, FLT_MAX, V|E},
+{"qmin", "min video quantizer scale (VBR)", OFFSET(qmin), FF_OPT_TYPE_INT, 2, 1, 51, V|E},
+{"qmax", "max video quantizer scale (VBR)", OFFSET(qmax), FF_OPT_TYPE_INT, 31, 1, 51, V|E},
+{"qdiff", "max difference between the quantizer scale (VBR)", OFFSET(max_qdiff), FF_OPT_TYPE_INT, 3, INT_MIN, INT_MAX, V|E},
+{"bf", "use 'frames' B frames", OFFSET(max_b_frames), FF_OPT_TYPE_INT, DEFAULT, 0, FF_MAX_B_FRAMES, V|E},
+{"b_qfactor", "qp factor between p and b frames", OFFSET(b_quant_factor), FF_OPT_TYPE_FLOAT, 1.25, FLT_MIN, FLT_MAX, V|E},
+{"rc_strategy", NULL, OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"b_strategy", NULL, OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
+{"hurry_up", NULL, OFFSET(hurry_up), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"rtp_mode", NULL, OFFSET(rtp_mode), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"rtp_payload_size", NULL, OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"header_bits", NULL, OFFSET(header_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"i_tex_bits", NULL, OFFSET(i_tex_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"p_tex_bits", NULL, OFFSET(p_tex_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"i_count", NULL, OFFSET(i_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"p_count", NULL, OFFSET(p_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"skip_count", NULL, OFFSET(skip_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"misc_bits", NULL, OFFSET(misc_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"frame_bits", NULL, OFFSET(frame_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"codec_tag", NULL, OFFSET(codec_tag), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"bug", "workaround not auto detected encoder bugs", OFFSET(workaround_bugs), FF_OPT_TYPE_FLAGS, FF_BUG_AUTODETECT, INT_MIN, INT_MAX, V|D, "bug"},
+{"autodetect", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_AUTODETECT, INT_MIN, INT_MAX, V|D, "bug"},
+{"old_msmpeg4", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_OLD_MSMPEG4, INT_MIN, INT_MAX, V|D, "bug"},
+{"xvid_ilace", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_XVID_ILACE, INT_MIN, INT_MAX, V|D, "bug"},
+{"ump4", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_UMP4, INT_MIN, INT_MAX, V|D, "bug"},
+{"no_padding", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_NO_PADDING, INT_MIN, INT_MAX, V|D, "bug"},
+{"amv", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_AMV, INT_MIN, INT_MAX, V|D, "bug"},
+{"ac_vlc", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_AC_VLC, INT_MIN, INT_MAX, V|D, "bug"},
+{"qpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_QPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
+{"std_qpel", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_STD_QPEL, INT_MIN, INT_MAX, V|D, "bug"},
+{"qpel_chroma2", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_QPEL_CHROMA2, INT_MIN, INT_MAX, V|D, "bug"},
+{"direct_blocksize", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DIRECT_BLOCKSIZE, INT_MIN, INT_MAX, V|D, "bug"},
+{"edge", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_EDGE, INT_MIN, INT_MAX, V|D, "bug"},
+{"hpel_chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_HPEL_CHROMA, INT_MIN, INT_MAX, V|D, "bug"},
+{"dc_clip", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_DC_CLIP, INT_MIN, INT_MAX, V|D, "bug"},
+{"ms", NULL, 0, FF_OPT_TYPE_CONST, FF_BUG_MS, INT_MIN, INT_MAX, V|D, "bug"},
+{"lelim", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)", OFFSET(luma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"celim", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)", OFFSET(chroma_elim_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "strict"},
+{"very", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_VERY_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"strict", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_STRICT, INT_MIN, INT_MAX, V|E, "strict"},
+{"normal", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_NORMAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"inofficial", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_INOFFICIAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"experimental", NULL, 0, FF_OPT_TYPE_CONST, FF_COMPLIANCE_EXPERIMENTAL, INT_MIN, INT_MAX, V|E, "strict"},
+{"b_qoffset", "qp offset between p and b frames", OFFSET(b_quant_offset), FF_OPT_TYPE_FLOAT, 1.25, FLT_MIN, FLT_MAX, V|E},
+{"er", NULL, OFFSET(error_resilience), FF_OPT_TYPE_INT, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
+{"careful", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_CAREFUL, INT_MIN, INT_MAX, V|D, "er"},
+{"compliant", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_COMPLIANT, INT_MIN, INT_MAX, V|D, "er"},
+{"aggressive", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_AGGRESSIVE, INT_MIN, INT_MAX, V|D, "er"},
+{"very_aggressive", NULL, 0, FF_OPT_TYPE_CONST, FF_ER_VERY_AGGRESSIVE, INT_MIN, INT_MAX, V|D, "er"},
+{"has_b_frames", NULL, OFFSET(has_b_frames), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"block_align", NULL, OFFSET(block_align), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"parse_only", NULL, OFFSET(parse_only), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"mpeg_quant", NULL, OFFSET(mpeg_quant), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"stats_out", NULL, OFFSET(stats_out), FF_OPT_TYPE_STRING, DEFAULT, CHAR_MIN, CHAR_MAX},
+{"stats_in", NULL, OFFSET(stats_in), FF_OPT_TYPE_STRING, DEFAULT, CHAR_MIN, CHAR_MAX},
+{"qsquish", "how to keep quantizer between qmin and qmax (0 = clip, 1 = use differentiable function)", OFFSET(rc_qsquish), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 99, V|E},
+{"rc_qmod_amp", NULL, OFFSET(rc_qmod_amp), FF_OPT_TYPE_FLOAT, DEFAULT, -FLT_MAX, FLT_MAX, V|E},
+{"rc_qmod_freq", NULL, OFFSET(rc_qmod_freq), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"rc_override_count", NULL, OFFSET(rc_override_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"rc_eq", "set rate control equation", OFFSET(rc_eq), FF_OPT_TYPE_STRING, DEFAULT, CHAR_MIN, CHAR_MAX, V|E},
+{"maxrate", "set max video bitrate tolerance (in bits/s)", OFFSET(rc_max_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"minrate", "set min video bitrate tolerance (in bits/s)", OFFSET(rc_min_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"rc_buf_aggressivity", NULL, OFFSET(rc_buffer_aggressivity), FF_OPT_TYPE_FLOAT, 1.0, FLT_MIN, FLT_MAX, V|E},
+{"i_qfactor", "qp factor between p and i frames", OFFSET(i_quant_factor), FF_OPT_TYPE_FLOAT, -0.8, -FLT_MAX, FLT_MAX, V|E},
+{"i_qoffset", "qp offset between p and i frames", OFFSET(i_quant_offset), FF_OPT_TYPE_FLOAT, 0.0, -FLT_MAX, FLT_MAX, V|E},
+{"rc_init_cplx", "initial complexity for 1-pass encoding", OFFSET(rc_initial_cplx), FF_OPT_TYPE_FLOAT, DEFAULT, -FLT_MAX, FLT_MAX, V|E},
+{"dct", NULL, OFFSET(dct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E, "dct"},
+{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_AUTO, INT_MIN, INT_MAX, V|E, "dct"},
+{"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_FASTINT, INT_MIN, INT_MAX, V|E, "dct"},
+{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_INT, INT_MIN, INT_MAX, V|E, "dct"},
+{"mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MMX, INT_MIN, INT_MAX, V|E, "dct"},
+{"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_MLIB, INT_MIN, INT_MAX, V|E, "dct"},
+{"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_ALTIVEC, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", NULL, 0, FF_OPT_TYPE_CONST, FF_DCT_FAAN, INT_MIN, INT_MAX, V|E, "dct"},
+{"lumi_mask", "lumimasking", OFFSET(lumi_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
+{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
+{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
+{"p_mask", "inter masking", OFFSET(p_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
+{"dark_mask", "darkness masking", OFFSET(dark_masking), FF_OPT_TYPE_FLOAT, 0, -FLT_MAX, FLT_MAX, V|E},
+{"unused", NULL, OFFSET(unused), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"idct", NULL, OFFSET(idct_algo), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|E|D, "idct"},
+{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_AUTO, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_INT, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLE, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplemmx", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEMMX, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"libmpeg2mmx", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_LIBMPEG2MMX, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"ps2", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_PS2, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"mlib", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_MLIB, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"arm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_ARM, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"altivec", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_ALTIVEC, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"sh4", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SH4, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARM, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv5te", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV5TE, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"h264", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_H264, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"vp3", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_VP3, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"ipp", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_IPP, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvidmmx", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_XVIDMMX, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"slice_count", NULL, OFFSET(slice_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"ec", NULL, OFFSET(error_concealment), FF_OPT_TYPE_FLAGS, 3, INT_MIN, INT_MAX, V|D, "ec"},
+{"guess_mvs", NULL, 0, FF_OPT_TYPE_CONST, FF_EC_GUESS_MVS, INT_MIN, INT_MAX, V|D, "ec"},
+{"deblock", NULL, 0, FF_OPT_TYPE_CONST, FF_EC_DEBLOCK, INT_MIN, INT_MAX, V|D, "ec"},
+{"bits_per_sample", NULL, OFFSET(bits_per_sample), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"pred", "prediction method", OFFSET(prediction_method), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "pred"},
+{"left", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_LEFT, INT_MIN, INT_MAX, V|E, "pred"},
+{"plane", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_PLANE, INT_MIN, INT_MAX, V|E, "pred"},
+{"median", NULL, 0, FF_OPT_TYPE_CONST, FF_PRED_MEDIAN, INT_MIN, INT_MAX, V|E, "pred"},
+{"aspect", NULL, OFFSET(sample_aspect_ratio), FF_OPT_TYPE_RATIONAL, DEFAULT, 0, 10, V|E},
+{"debug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, DEFAULT, 0, INT_MAX, V|A|S|E|D, "debug"},
+{"pict", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_PICT_INFO, INT_MIN, INT_MAX, V|D, "debug"},
+{"rc", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_RC, INT_MIN, INT_MAX, V|E, "debug"},
+{"bitstream", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_BITSTREAM, INT_MIN, INT_MAX, V|D, "debug"},
+{"mb_type", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_MB_TYPE, INT_MIN, INT_MAX, V|D, "debug"},
+{"qp", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_QP, INT_MIN, INT_MAX, V|D, "debug"},
+{"mv", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_MV, INT_MIN, INT_MAX, V|D, "debug"},
+{"dct_coeff", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_DCT_COEFF, INT_MIN, INT_MAX, V|D, "debug"},
+{"skip", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_SKIP, INT_MIN, INT_MAX, V|D, "debug"},
+{"startcode", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_STARTCODE, INT_MIN, INT_MAX, V|D, "debug"},
+{"pts", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_PTS, INT_MIN, INT_MAX, V|D, "debug"},
+{"er", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_ER, INT_MIN, INT_MAX, V|D, "debug"},
+{"mmco", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_MMCO, INT_MIN, INT_MAX, V|D, "debug"},
+{"bugs", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_BUGS, INT_MIN, INT_MAX, V|D, "debug"},
+{"vis_qp", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_QP, INT_MIN, INT_MAX, V|D, "debug"},
+{"vis_mb_type", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MB_TYPE, INT_MIN, INT_MAX, V|D, "debug"},
+{"vismv", "visualize motion vectors", OFFSET(debug_mv), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, V|D, "debug_mv"},
+{"pf", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
+{"bf", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
+{"bb", NULL, 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
+{"mb_qmin", NULL, OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"mb_qmax", NULL, OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"ildctcmp", "interlaced dct compare function", OFFSET(ildct_cmp), FF_OPT_TYPE_INT, FF_CMP_VSAD, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"dia_size", NULL, OFFSET(dia_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"last_pred", NULL, OFFSET(last_predictor_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"preme", "pre motion estimation", OFFSET(pre_me), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sad", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_SAD, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sse", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_SSE, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"satd", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_SATD, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"dct", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_DCT, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"psnr", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_PSNR, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"bit", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_BIT, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"rd", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_RD, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"zero", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_ZERO, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsad", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_VSAD, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsse", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_VSSE, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"nsse", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_NSSE, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#ifdef CONFIG_SNOW_ENCODER
+{"w53", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_W53, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"w97", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_W97, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#endif
+{"dctmax", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_CHROMA, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"pre_dia_size", NULL, OFFSET(pre_dia_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"subq", "sub pel motion estimation quality", OFFSET(me_subpel_quality), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
+{"dtg_active_format", NULL, OFFSET(dtg_active_format), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"ibias", "intra quant bias", OFFSET(intra_quant_bias), FF_OPT_TYPE_INT, FF_DEFAULT_QUANT_BIAS, INT_MIN, INT_MAX, V|E},
+{"pbias", "inter quant bias", OFFSET(inter_quant_bias), FF_OPT_TYPE_INT, FF_DEFAULT_QUANT_BIAS, INT_MIN, INT_MAX, V|E},
+{"color_table_id", NULL, OFFSET(color_table_id), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"internal_buffer_count", NULL, OFFSET(internal_buffer_count), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"global_quality", NULL, OFFSET(global_quality), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"coder", NULL, OFFSET(coder_type), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "coder"},
+{"vlc", "variable length coder / huffman coder", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_VLC, INT_MIN, INT_MAX, V|E, "coder"},
+{"ac", "arithmetic coder", 0, FF_OPT_TYPE_CONST, FF_CODER_TYPE_AC, INT_MIN, INT_MAX, V|E, "coder"},
+{"context", "context model", OFFSET(context_model), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"slice_flags", NULL, OFFSET(slice_flags), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"mbd", NULL, OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "mbd"},
+{"simple", NULL, 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_SIMPLE, INT_MIN, INT_MAX, V|E, "mbd"},
+{"bits", NULL, 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_BITS, INT_MIN, INT_MAX, V|E, "mbd"},
+{"rd", NULL, 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_RD, INT_MIN, INT_MAX, V|E, "mbd"},
+{"stream_codec_tag", NULL, OFFSET(stream_codec_tag), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"sc_threshold", "scene change threshold", OFFSET(scenechange_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"lmin", "min lagrange factor (VBR)", OFFSET(lmin), FF_OPT_TYPE_INT,  2*FF_QP2LAMBDA, 0, INT_MAX, V|E},
+{"lmax", "max lagrange factor (VBR)", OFFSET(lmax), FF_OPT_TYPE_INT, 31*FF_QP2LAMBDA, 0, INT_MAX, V|E},
+{"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"rc_init_occupancy", NULL, OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|A|E|D, "flags2"},
+{"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"antialias", NULL, OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
+{"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"},
+{"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"},
+{"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"},
+{"float", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FLOAT, INT_MIN, INT_MAX, V|D, "aa"},
+{"qns", "quantizer noise shaping", OFFSET(quantizer_noise_shaping), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"threads", NULL, OFFSET(thread_count), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E|D},
+{"me_threshold", "motion estimaton threshold", OFFSET(me_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
+{"nssew", "nsse weight", OFFSET(nsse_weight), FF_OPT_TYPE_INT, 8, INT_MIN, INT_MAX, V|E},
+{"skip_top", NULL, OFFSET(skip_top), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"skip_bottom", NULL, OFFSET(skip_bottom), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
+{"profile", NULL, OFFSET(profile), FF_OPT_TYPE_INT, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
+{"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "profile"},
+{"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
+{"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"},
+{"lowres", NULL, OFFSET(lowres), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|D},
+{"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), FF_OPT_TYPE_INT, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"border_mask", NULL, OFFSET(border_masking), FF_OPT_TYPE_FLOAT, DEFAULT, -FLT_MAX, FLT_MAX, V|E},
+{"mblmin", "min macroblock lagrange factor (VBR)", OFFSET(mb_lmin), FF_OPT_TYPE_INT, FF_QP2LAMBDA * 2, 1, FF_LAMBDA_MAX, V|E},
+{"mblmax", "max macroblock lagrange factor (VBR)", OFFSET(mb_lmax), FF_OPT_TYPE_INT, FF_QP2LAMBDA * 31, 1, FF_LAMBDA_MAX, V|E},
+{"mepc", "motion estimation bitrate penalty compensation (1.0 = 256)", OFFSET(me_penalty_compensation), FF_OPT_TYPE_INT, 256, INT_MIN, INT_MAX, V|E},
+{"bidir_refine", NULL, OFFSET(bidir_refine), FF_OPT_TYPE_INT, DEFAULT, 0, 4, V|E},
+{"brd_scale", NULL, OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E},
+{"crf", NULL, OFFSET(crf), FF_OPT_TYPE_FLOAT, DEFAULT, 0, 51, V|E},
+{"cqp", NULL, OFFSET(cqp), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, V|E},
+{"keyint_min", NULL, OFFSET(keyint_min), FF_OPT_TYPE_INT, 25, INT_MIN, INT_MAX, V|E},
+{"refs", NULL, OFFSET(refs), FF_OPT_TYPE_INT, 1, INT_MIN, INT_MAX, V|E},
+{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"trellis", NULL, OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E},
+{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, 2, INT_MIN, INT_MAX, V|E},
+{"bpyramid", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
+{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
+{"mixed_refs", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
+{"8x8dct", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"fastpskip", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
+{"aud", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"brdo", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
+{"complexityblur", NULL, OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, 20.0, FLT_MIN, FLT_MAX, V|E},
+{"deblockalpha", NULL, OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"deblockbeta", NULL, OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"partitions", NULL, OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
+{"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"},
+{"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"},
+{"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
+{"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E},
+{"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"},
+{"b_sensitivity", NULL, OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E},
+{"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E},
+{"use_lpc", NULL, OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"lpc_coeff_precision", NULL, OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E},
+{"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"prediction_order_method", NULL, OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E},
+{"timecode_frame_start", NULL, OFFSET(timecode_frame_start), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|E},
+{"drop_frame_timecode", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_DROP_FRAME_TIMECODE, INT_MIN, INT_MAX, V|E, "flags2"},
+{NULL},
+};
+
+#undef A
+#undef V
+#undef S
+#undef E
+#undef D
+#undef DEFAULT
+
+static AVClass av_codec_context_class = { "AVCodecContext", context_to_name, options };
+
+void avcodec_get_context_defaults(AVCodecContext *s){
+    memset(s, 0, sizeof(AVCodecContext));
+
+    s->av_class= &av_codec_context_class;
+
+    av_opt_set_defaults(s);
+
+    s->rc_eq= "tex^qComp";
+    s->time_base= (AVRational){0,1};
+    s->get_buffer= avcodec_default_get_buffer;
+    s->release_buffer= avcodec_default_release_buffer;
+    s->get_format= avcodec_default_get_format;
+    s->execute= avcodec_default_execute;
+    s->sample_aspect_ratio= (AVRational){0,1};
+    s->pix_fmt= PIX_FMT_NONE;
+    s->sample_fmt= SAMPLE_FMT_S16; // FIXME: set to NONE
+
+    s->palctrl = NULL;
+    s->reget_buffer= avcodec_default_reget_buffer;
+}
+
+/**
+ * allocates a AVCodecContext and set it to defaults.
+ * this can be deallocated by simply calling free()
+ */
+AVCodecContext *avcodec_alloc_context(void){
+    AVCodecContext *avctx= av_malloc(sizeof(AVCodecContext));
+
+    if(avctx==NULL) return NULL;
+
+    avcodec_get_context_defaults(avctx);
+
+    return avctx;
+}
+
+void avcodec_get_frame_defaults(AVFrame *pic){
+    memset(pic, 0, sizeof(AVFrame));
+
+    pic->pts= AV_NOPTS_VALUE;
+    pic->key_frame= 1;
+}
+
+/**
+ * allocates a AVPFrame and set it to defaults.
+ * this can be deallocated by simply calling free()
+ */
+AVFrame *avcodec_alloc_frame(void){
+    AVFrame *pic= av_malloc(sizeof(AVFrame));
+
+    if(pic==NULL) return NULL;
+
+    avcodec_get_frame_defaults(pic);
+
+    return pic;
+}
+
+int avcodec_open(AVCodecContext *avctx, AVCodec *codec)
+{
+    int ret= -1;
+
+    entangled_thread_counter++;
+    if(entangled_thread_counter != 1){
+        av_log(avctx, AV_LOG_ERROR, "insufficient thread locking around avcodec_open/close()\n");
+        goto end;
+    }
+
+    if(avctx->codec)
+        goto end;
+
+    if (codec->priv_data_size > 0) {
+        avctx->priv_data = av_mallocz(codec->priv_data_size);
+        if (!avctx->priv_data)
+            goto end;
+    } else {
+        avctx->priv_data = NULL;
+    }
+
+    if(avctx->coded_width && avctx->coded_height)
+        avcodec_set_dimensions(avctx, avctx->coded_width, avctx->coded_height);
+    else if(avctx->width && avctx->height)
+        avcodec_set_dimensions(avctx, avctx->width, avctx->height);
+
+    if((avctx->coded_width||avctx->coded_height) && avcodec_check_dimensions(avctx,avctx->coded_width,avctx->coded_height)){
+        av_freep(&avctx->priv_data);
+        goto end;
+    }
+
+    avctx->codec = codec;
+    avctx->codec_id = codec->id;
+    avctx->frame_number = 0;
+    ret = avctx->codec->init(avctx);
+    if (ret < 0) {
+        av_freep(&avctx->priv_data);
+        avctx->codec= NULL;
+        goto end;
+    }
+    ret=0;
+end:
+    entangled_thread_counter--;
+    return ret;
+}
+
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const short *samples)
+{
+    if(buf_size < FF_MIN_BUFFER_SIZE && 0){
+        av_log(avctx, AV_LOG_ERROR, "buffer smaller then minimum size\n");
+        return -1;
+    }
+    if((avctx->codec->capabilities & CODEC_CAP_DELAY) || samples){
+        int ret = avctx->codec->encode(avctx, buf, buf_size, (void *)samples);
+        avctx->frame_number++;
+        return ret;
+    }else
+        return 0;
+}
+
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const AVFrame *pict)
+{
+    if(buf_size < FF_MIN_BUFFER_SIZE){
+        av_log(avctx, AV_LOG_ERROR, "buffer smaller then minimum size\n");
+        return -1;
+    }
+    if(avcodec_check_dimensions(avctx,avctx->width,avctx->height))
+        return -1;
+    if((avctx->codec->capabilities & CODEC_CAP_DELAY) || pict){
+        int ret = avctx->codec->encode(avctx, buf, buf_size, (void *)pict);
+        avctx->frame_number++;
+        emms_c(); //needed to avoid an emms_c() call before every return;
+
+        return ret;
+    }else
+        return 0;
+}
+
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub)
+{
+    int ret;
+    ret = avctx->codec->encode(avctx, buf, buf_size, (void *)sub);
+    avctx->frame_number++;
+    return ret;
+}
+
+/**
+ * decode a frame.
+ * @param buf bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE larger then the actual read bytes
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param buf_size the size of the buffer in bytes
+ * @param got_picture_ptr zero if no frame could be decompressed, Otherwise, it is non zero
+ * @return -1 if error, otherwise return the number of
+ * bytes used.
+ */
+int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         uint8_t *buf, int buf_size)
+{
+    int ret;
+
+    *got_picture_ptr= 0;
+    if((avctx->coded_width||avctx->coded_height) && avcodec_check_dimensions(avctx,avctx->coded_width,avctx->coded_height))
+        return -1;
+    if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){
+        ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
+                                buf, buf_size);
+
+        emms_c(); //needed to avoid an emms_c() call before every return;
+
+        if (*got_picture_ptr)
+            avctx->frame_number++;
+    }else
+        ret= 0;
+
+    return ret;
+}
+
+/* decode an audio frame. return -1 if error, otherwise return the
+   *number of bytes used. If no frame could be decompressed,
+   *frame_size_ptr is zero. Otherwise, it is the decompressed frame
+   *size in BYTES. */
+int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         uint8_t *buf, int buf_size)
+{
+    int ret;
+
+    *frame_size_ptr= 0;
+    if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){
+        ret = avctx->codec->decode(avctx, samples, frame_size_ptr,
+                                buf, buf_size);
+        avctx->frame_number++;
+    }else
+        ret= 0;
+    return ret;
+}
+
+/* decode a subtitle message. return -1 if error, otherwise return the
+   *number of bytes used. If no subtitle could be decompressed,
+   *got_sub_ptr is zero. Otherwise, the subtitle is stored in *sub. */
+int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            const uint8_t *buf, int buf_size)
+{
+    int ret;
+
+    *got_sub_ptr = 0;
+    ret = avctx->codec->decode(avctx, sub, got_sub_ptr,
+                               (uint8_t *)buf, buf_size);
+    if (*got_sub_ptr)
+        avctx->frame_number++;
+    return ret;
+}
+
+int avcodec_close(AVCodecContext *avctx)
+{
+    entangled_thread_counter++;
+    if(entangled_thread_counter != 1){
+        av_log(avctx, AV_LOG_ERROR, "insufficient thread locking around avcodec_open/close()\n");
+        entangled_thread_counter--;
+        return -1;
+    }
+
+    if (avctx->codec->close)
+        avctx->codec->close(avctx);
+    avcodec_default_free_buffers(avctx);
+    av_freep(&avctx->priv_data);
+    avctx->codec = NULL;
+    entangled_thread_counter--;
+    return 0;
+}
+
+AVCodec *avcodec_find_encoder(enum CodecID id)
+{
+    AVCodec *p;
+    p = first_avcodec;
+    while (p) {
+        if (p->encode != NULL && p->id == id)
+            return p;
+        p = p->next;
+    }
+    return NULL;
+}
+
+AVCodec *avcodec_find_encoder_by_name(const char *name)
+{
+    AVCodec *p;
+    p = first_avcodec;
+    while (p) {
+        if (p->encode != NULL && strcmp(name,p->name) == 0)
+            return p;
+        p = p->next;
+    }
+    return NULL;
+}
+
+AVCodec *avcodec_find_decoder(enum CodecID id)
+{
+    AVCodec *p;
+    p = first_avcodec;
+    while (p) {
+        if (p->decode != NULL && p->id == id)
+            return p;
+        p = p->next;
+    }
+    return NULL;
+}
+
+AVCodec *avcodec_find_decoder_by_name(const char *name)
+{
+    AVCodec *p;
+    p = first_avcodec;
+    while (p) {
+        if (p->decode != NULL && strcmp(name,p->name) == 0)
+            return p;
+        p = p->next;
+    }
+    return NULL;
+}
+
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
+{
+    const char *codec_name;
+    AVCodec *p;
+    char buf1[32];
+    char channels_str[100];
+    int bitrate;
+
+    if (encode)
+        p = avcodec_find_encoder(enc->codec_id);
+    else
+        p = avcodec_find_decoder(enc->codec_id);
+
+    if (p) {
+        codec_name = p->name;
+        if (!encode && enc->codec_id == CODEC_ID_MP3) {
+            if (enc->sub_id == 2)
+                codec_name = "mp2";
+            else if (enc->sub_id == 1)
+                codec_name = "mp1";
+        }
+    } else if (enc->codec_id == CODEC_ID_MPEG2TS) {
+        /* fake mpeg2 transport stream codec (currently not
+           registered) */
+        codec_name = "mpeg2ts";
+    } else if (enc->codec_name[0] != '\0') {
+        codec_name = enc->codec_name;
+    } else {
+        /* output avi tags */
+        if(   isprint(enc->codec_tag&0xFF) && isprint((enc->codec_tag>>8)&0xFF)
+           && isprint((enc->codec_tag>>16)&0xFF) && isprint((enc->codec_tag>>24)&0xFF)){
+            snprintf(buf1, sizeof(buf1), "%c%c%c%c / 0x%04X",
+                     enc->codec_tag & 0xff,
+                     (enc->codec_tag >> 8) & 0xff,
+                     (enc->codec_tag >> 16) & 0xff,
+                     (enc->codec_tag >> 24) & 0xff,
+                      enc->codec_tag);
+        } else {
+            snprintf(buf1, sizeof(buf1), "0x%04x", enc->codec_tag);
+        }
+        codec_name = buf1;
+    }
+
+    switch(enc->codec_type) {
+    case CODEC_TYPE_VIDEO:
+        snprintf(buf, buf_size,
+                 "Video: %s%s",
+                 codec_name, enc->mb_decision ? " (hq)" : "");
+        if (enc->pix_fmt != PIX_FMT_NONE) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %s",
+                     avcodec_get_pix_fmt_name(enc->pix_fmt));
+        }
+        if (enc->width) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %dx%d",
+                     enc->width, enc->height);
+            if(av_log_get_level() >= AV_LOG_DEBUG){
+                int g= ff_gcd(enc->time_base.num, enc->time_base.den);
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %d/%d",
+                     enc->time_base.num/g, enc->time_base.den/g);
+            }
+        }
+        if (encode) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", q=%d-%d", enc->qmin, enc->qmax);
+        }
+        bitrate = enc->bit_rate;
+        break;
+    case CODEC_TYPE_AUDIO:
+        snprintf(buf, buf_size,
+                 "Audio: %s",
+                 codec_name);
+        switch (enc->channels) {
+            case 1:
+                strcpy(channels_str, "mono");
+                break;
+            case 2:
+                strcpy(channels_str, "stereo");
+                break;
+            case 6:
+                strcpy(channels_str, "5:1");
+                break;
+            default:
+                snprintf(channels_str, sizeof(channels_str), "%d channels", enc->channels);
+                break;
+        }
+        if (enc->sample_rate) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %d Hz, %s",
+                     enc->sample_rate,
+                     channels_str);
+        }
+
+        /* for PCM codecs, compute bitrate directly */
+        switch(enc->codec_id) {
+        case CODEC_ID_PCM_S32LE:
+        case CODEC_ID_PCM_S32BE:
+        case CODEC_ID_PCM_U32LE:
+        case CODEC_ID_PCM_U32BE:
+            bitrate = enc->sample_rate * enc->channels * 32;
+            break;
+        case CODEC_ID_PCM_S24LE:
+        case CODEC_ID_PCM_S24BE:
+        case CODEC_ID_PCM_U24LE:
+        case CODEC_ID_PCM_U24BE:
+        case CODEC_ID_PCM_S24DAUD:
+            bitrate = enc->sample_rate * enc->channels * 24;
+            break;
+        case CODEC_ID_PCM_S16LE:
+        case CODEC_ID_PCM_S16BE:
+        case CODEC_ID_PCM_U16LE:
+        case CODEC_ID_PCM_U16BE:
+            bitrate = enc->sample_rate * enc->channels * 16;
+            break;
+        case CODEC_ID_PCM_S8:
+        case CODEC_ID_PCM_U8:
+        case CODEC_ID_PCM_ALAW:
+        case CODEC_ID_PCM_MULAW:
+            bitrate = enc->sample_rate * enc->channels * 8;
+            break;
+        default:
+            bitrate = enc->bit_rate;
+            break;
+        }
+        break;
+    case CODEC_TYPE_DATA:
+        snprintf(buf, buf_size, "Data: %s", codec_name);
+        bitrate = enc->bit_rate;
+        break;
+    case CODEC_TYPE_SUBTITLE:
+        snprintf(buf, buf_size, "Subtitle: %s", codec_name);
+        bitrate = enc->bit_rate;
+        break;
+    default:
+        snprintf(buf, buf_size, "Invalid Codec type %d", enc->codec_type);
+        return;
+    }
+    if (encode) {
+        if (enc->flags & CODEC_FLAG_PASS1)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", pass 1");
+        if (enc->flags & CODEC_FLAG_PASS2)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", pass 2");
+    }
+    if (bitrate != 0) {
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 ", %d kb/s", bitrate / 1000);
+    }
+}
+
+unsigned avcodec_version( void )
+{
+  return LIBAVCODEC_VERSION_INT;
+}
+
+unsigned avcodec_build( void )
+{
+  return LIBAVCODEC_BUILD;
+}
+
+static void init_crcs(void){
+    av_crc04C11DB7= av_mallocz_static(sizeof(AVCRC) * 257);
+    av_crc8005    = av_mallocz_static(sizeof(AVCRC) * 257);
+    av_crc07      = av_mallocz_static(sizeof(AVCRC) * 257);
+    av_crc_init(av_crc04C11DB7, 0, 32, 0x04c11db7, sizeof(AVCRC)*257);
+    av_crc_init(av_crc8005    , 0, 16, 0x8005    , sizeof(AVCRC)*257);
+    av_crc_init(av_crc07      , 0,  8, 0x07      , sizeof(AVCRC)*257);
+}
+
+/* must be called before any other functions */
+void avcodec_init(void)
+{
+    static int inited = 0;
+
+    if (inited != 0)
+        return;
+    inited = 1;
+
+    dsputil_static_init();
+    init_crcs();
+}
+
+/**
+ * Flush buffers, should be called when seeking or when swicthing to a different stream.
+ */
+void avcodec_flush_buffers(AVCodecContext *avctx)
+{
+    if(avctx->codec->flush)
+        avctx->codec->flush(avctx);
+}
+
+void avcodec_default_free_buffers(AVCodecContext *s){
+    int i, j;
+
+    if(s->internal_buffer==NULL) return;
+
+    for(i=0; i<INTERNAL_BUFFER_SIZE; i++){
+        InternalBuffer *buf= &((InternalBuffer*)s->internal_buffer)[i];
+        for(j=0; j<4; j++){
+            av_freep(&buf->base[j]);
+            buf->data[j]= NULL;
+        }
+    }
+    av_freep(&s->internal_buffer);
+
+    s->internal_buffer_count=0;
+}
+
+char av_get_pict_type_char(int pict_type){
+    switch(pict_type){
+    case I_TYPE: return 'I';
+    case P_TYPE: return 'P';
+    case B_TYPE: return 'B';
+    case S_TYPE: return 'S';
+    case SI_TYPE:return 'i';
+    case SP_TYPE:return 'p';
+    default:     return '?';
+    }
+}
+
+int av_get_bits_per_sample(enum CodecID codec_id){
+    switch(codec_id){
+    case CODEC_ID_ADPCM_SBPRO_2:
+        return 2;
+    case CODEC_ID_ADPCM_SBPRO_3:
+        return 3;
+    case CODEC_ID_ADPCM_SBPRO_4:
+    case CODEC_ID_ADPCM_CT:
+        return 4;
+    case CODEC_ID_PCM_ALAW:
+    case CODEC_ID_PCM_MULAW:
+    case CODEC_ID_PCM_S8:
+    case CODEC_ID_PCM_U8:
+        return 8;
+    case CODEC_ID_PCM_S16BE:
+    case CODEC_ID_PCM_S16LE:
+    case CODEC_ID_PCM_U16BE:
+    case CODEC_ID_PCM_U16LE:
+        return 16;
+    case CODEC_ID_PCM_S24DAUD:
+    case CODEC_ID_PCM_S24BE:
+    case CODEC_ID_PCM_S24LE:
+    case CODEC_ID_PCM_U24BE:
+    case CODEC_ID_PCM_U24LE:
+        return 24;
+    case CODEC_ID_PCM_S32BE:
+    case CODEC_ID_PCM_S32LE:
+    case CODEC_ID_PCM_U32BE:
+    case CODEC_ID_PCM_U32LE:
+        return 32;
+    default:
+        return 0;
+    }
+}
+
+#if !defined(HAVE_THREADS)
+int avcodec_thread_init(AVCodecContext *s, int thread_count){
+    return -1;
+}
+#endif
+
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v)
+{
+    unsigned int n = 0;
+
+    while(v >= 0xff) {
+        *s++ = 0xff;
+        v -= 0xff;
+        n++;
+    }
+    *s = v;
+    n++;
+    return n;
+}
+
+/* Wrapper to work around the lack of mkstemp() on mingw/cygin.
+ * Also, tries to create file in /tmp first, if possible.
+ * *prefix can be a character constant; *filename will be allocated internally.
+ * Returns file descriptor of opened file (or -1 on error)
+ * and opened file name in **filename. */
+int av_tempfile(char *prefix, char **filename) {
+    int fd=-1;
+#ifdef __MINGW32__
+    *filename = tempnam(".", prefix);
+#else
+    size_t len = strlen(prefix) + 12; /* room for "/tmp/" and "XXXXXX\0" */
+    *filename = av_malloc(len);
+#endif
+    /* -----common section-----*/
+    if (*filename == NULL) {
+        av_log(NULL, AV_LOG_ERROR, "ff_tempfile: Cannot allocate file name\n");
+        return -1;
+    }
+#ifdef __MINGW32__
+    fd = open(*filename, _O_RDWR | _O_BINARY | _O_CREAT, 0444);
+#else
+    snprintf(*filename, len, "/tmp/%sXXXXXX", prefix);
+    fd = mkstemp(*filename);
+    if (fd < 0) {
+        snprintf(*filename, len, "./%sXXXXXX", prefix);
+        fd = mkstemp(*filename);
+    }
+#endif
+    /* -----common section-----*/
+    if (fd < 0) {
+        av_log(NULL, AV_LOG_ERROR, "ff_tempfile: Cannot open temporary file %s\n", *filename);
+        return -1;
+    }
+    return fd; /* success */
+}
diff --git a/contrib/ffmpeg/libavcodec/vc1.c b/contrib/ffmpeg/libavcodec/vc1.c
new file mode 100644
index 000000000..cd3c0c2d6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vc1.c
@@ -0,0 +1,4370 @@
+/*
+ * VC-1 and WMV3 decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file vc1.c
+ * VC-1 and WMV3 decoder
+ *
+ */
+#include "common.h"
+#include "dsputil.h"
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "vc1data.h"
+#include "vc1acdata.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+extern const uint32_t ff_table0_dc_lum[120][2], ff_table1_dc_lum[120][2];
+extern const uint32_t ff_table0_dc_chroma[120][2], ff_table1_dc_chroma[120][2];
+extern VLC ff_msmp4_dc_luma_vlc[2], ff_msmp4_dc_chroma_vlc[2];
+#define MB_INTRA_VLC_BITS 9
+extern VLC ff_msmp4_mb_i_vlc;
+extern const uint16_t ff_msmp4_mb_i_table[64][2];
+#define DC_VLC_BITS 9
+#define AC_VLC_BITS 9
+static const uint16_t table_mb_intra[64][2];
+
+
+/** Available Profiles */
+//@{
+enum Profile {
+    PROFILE_SIMPLE,
+    PROFILE_MAIN,
+    PROFILE_COMPLEX, ///< TODO: WMV9 specific
+    PROFILE_ADVANCED
+};
+//@}
+
+/** Sequence quantizer mode */
+//@{
+enum QuantMode {
+    QUANT_FRAME_IMPLICIT,    ///< Implicitly specified at frame level
+    QUANT_FRAME_EXPLICIT,    ///< Explicitly specified at frame level
+    QUANT_NON_UNIFORM,       ///< Non-uniform quant used for all frames
+    QUANT_UNIFORM            ///< Uniform quant used for all frames
+};
+//@}
+
+/** Where quant can be changed */
+//@{
+enum DQProfile {
+    DQPROFILE_FOUR_EDGES,
+    DQPROFILE_DOUBLE_EDGES,
+    DQPROFILE_SINGLE_EDGE,
+    DQPROFILE_ALL_MBS
+};
+//@}
+
+/** @name Where quant can be changed
+ */
+//@{
+enum DQSingleEdge {
+    DQSINGLE_BEDGE_LEFT,
+    DQSINGLE_BEDGE_TOP,
+    DQSINGLE_BEDGE_RIGHT,
+    DQSINGLE_BEDGE_BOTTOM
+};
+//@}
+
+/** Which pair of edges is quantized with ALTPQUANT */
+//@{
+enum DQDoubleEdge {
+    DQDOUBLE_BEDGE_TOPLEFT,
+    DQDOUBLE_BEDGE_TOPRIGHT,
+    DQDOUBLE_BEDGE_BOTTOMRIGHT,
+    DQDOUBLE_BEDGE_BOTTOMLEFT
+};
+//@}
+
+/** MV modes for P frames */
+//@{
+enum MVModes {
+    MV_PMODE_1MV_HPEL_BILIN,
+    MV_PMODE_1MV,
+    MV_PMODE_1MV_HPEL,
+    MV_PMODE_MIXED_MV,
+    MV_PMODE_INTENSITY_COMP
+};
+//@}
+
+/** @name MV types for B frames */
+//@{
+enum BMVTypes {
+    BMV_TYPE_BACKWARD,
+    BMV_TYPE_FORWARD,
+    BMV_TYPE_INTERPOLATED
+};
+//@}
+
+/** @name Block types for P/B frames */
+//@{
+enum TransformTypes {
+    TT_8X8,
+    TT_8X4_BOTTOM,
+    TT_8X4_TOP,
+    TT_8X4, //Both halves
+    TT_4X8_RIGHT,
+    TT_4X8_LEFT,
+    TT_4X8, //Both halves
+    TT_4X4
+};
+//@}
+
+/** Table for conversion between TTBLK and TTMB */
+static const int ttblk_to_tt[3][8] = {
+  { TT_8X4, TT_4X8, TT_8X8, TT_4X4, TT_8X4_TOP, TT_8X4_BOTTOM, TT_4X8_RIGHT, TT_4X8_LEFT },
+  { TT_8X8, TT_4X8_RIGHT, TT_4X8_LEFT, TT_4X4, TT_8X4, TT_4X8, TT_8X4_BOTTOM, TT_8X4_TOP },
+  { TT_8X8, TT_4X8, TT_4X4, TT_8X4_BOTTOM, TT_4X8_RIGHT, TT_4X8_LEFT, TT_8X4, TT_8X4_TOP }
+};
+
+static const int ttfrm_to_tt[4] = { TT_8X8, TT_8X4, TT_4X8, TT_4X4 };
+
+/** MV P mode - the 5th element is only used for mode 1 */
+static const uint8_t mv_pmode_table[2][5] = {
+  { MV_PMODE_1MV_HPEL_BILIN, MV_PMODE_1MV, MV_PMODE_1MV_HPEL, MV_PMODE_INTENSITY_COMP, MV_PMODE_MIXED_MV },
+  { MV_PMODE_1MV, MV_PMODE_MIXED_MV, MV_PMODE_1MV_HPEL, MV_PMODE_INTENSITY_COMP, MV_PMODE_1MV_HPEL_BILIN }
+};
+static const uint8_t mv_pmode_table2[2][4] = {
+  { MV_PMODE_1MV_HPEL_BILIN, MV_PMODE_1MV, MV_PMODE_1MV_HPEL, MV_PMODE_MIXED_MV },
+  { MV_PMODE_1MV, MV_PMODE_MIXED_MV, MV_PMODE_1MV_HPEL, MV_PMODE_1MV_HPEL_BILIN }
+};
+
+/** One more frame type */
+#define BI_TYPE 7
+
+static const int fps_nr[5] = { 24, 25, 30, 50, 60 },
+  fps_dr[2] = { 1000, 1001 };
+static const uint8_t pquant_table[3][32] = {
+  {  /* Implicit quantizer */
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  6,  7,  8,  9, 10, 11, 12,
+    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 31
+  },
+  {  /* Explicit quantizer, pquantizer uniform */
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+  },
+  {  /* Explicit quantizer, pquantizer non-uniform */
+     0,  1,  1,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
+    14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 31
+  }
+};
+
+/** @name VC-1 VLC tables and defines
+ *  @todo TODO move this into the context
+ */
+//@{
+#define VC1_BFRACTION_VLC_BITS 7
+static VLC vc1_bfraction_vlc;
+#define VC1_IMODE_VLC_BITS 4
+static VLC vc1_imode_vlc;
+#define VC1_NORM2_VLC_BITS 3
+static VLC vc1_norm2_vlc;
+#define VC1_NORM6_VLC_BITS 9
+static VLC vc1_norm6_vlc;
+/* Could be optimized, one table only needs 8 bits */
+#define VC1_TTMB_VLC_BITS 9 //12
+static VLC vc1_ttmb_vlc[3];
+#define VC1_MV_DIFF_VLC_BITS 9 //15
+static VLC vc1_mv_diff_vlc[4];
+#define VC1_CBPCY_P_VLC_BITS 9 //14
+static VLC vc1_cbpcy_p_vlc[4];
+#define VC1_4MV_BLOCK_PATTERN_VLC_BITS 6
+static VLC vc1_4mv_block_pattern_vlc[4];
+#define VC1_TTBLK_VLC_BITS 5
+static VLC vc1_ttblk_vlc[3];
+#define VC1_SUBBLKPAT_VLC_BITS 6
+static VLC vc1_subblkpat_vlc[3];
+
+static VLC vc1_ac_coeff_table[8];
+//@}
+
+enum CodingSet {
+    CS_HIGH_MOT_INTRA = 0,
+    CS_HIGH_MOT_INTER,
+    CS_LOW_MOT_INTRA,
+    CS_LOW_MOT_INTER,
+    CS_MID_RATE_INTRA,
+    CS_MID_RATE_INTER,
+    CS_HIGH_RATE_INTRA,
+    CS_HIGH_RATE_INTER
+};
+
+/** @name Overlap conditions for Advanced Profile */
+//@{
+enum COTypes {
+    CONDOVER_NONE = 0,
+    CONDOVER_ALL,
+    CONDOVER_SELECT
+};
+//@}
+
+
+/** The VC1 Context
+ * @fixme Change size wherever another size is more efficient
+ * Many members are only used for Advanced Profile
+ */
+typedef struct VC1Context{
+    MpegEncContext s;
+
+    int bits;
+
+    /** Simple/Main Profile sequence header */
+    //@{
+    int res_sm;           ///< reserved, 2b
+    int res_x8;           ///< reserved
+    int multires;         ///< frame-level RESPIC syntax element present
+    int res_fasttx;       ///< reserved, always 1
+    int res_transtab;     ///< reserved, always 0
+    int rangered;         ///< RANGEREDFRM (range reduction) syntax element present
+                          ///< at frame level
+    int res_rtm_flag;     ///< reserved, set to 1
+    int reserved;         ///< reserved
+    //@}
+
+    /** Advanced Profile */
+    //@{
+    int level;            ///< 3bits, for Advanced/Simple Profile, provided by TS layer
+    int chromaformat;     ///< 2bits, 2=4:2:0, only defined
+    int postprocflag;     ///< Per-frame processing suggestion flag present
+    int broadcast;        ///< TFF/RFF present
+    int interlace;        ///< Progressive/interlaced (RPTFTM syntax element)
+    int tfcntrflag;       ///< TFCNTR present
+    int panscanflag;      ///< NUMPANSCANWIN, TOPLEFT{X,Y}, BOTRIGHT{X,Y} present
+    int extended_dmv;     ///< Additional extended dmv range at P/B frame-level
+    int color_prim;       ///< 8bits, chroma coordinates of the color primaries
+    int transfer_char;    ///< 8bits, Opto-electronic transfer characteristics
+    int matrix_coef;      ///< 8bits, Color primaries->YCbCr transform matrix
+    int hrd_param_flag;   ///< Presence of Hypothetical Reference
+                          ///< Decoder parameters
+    int psf;              ///< Progressive Segmented Frame
+    //@}
+
+    /** Sequence header data for all Profiles
+     * TODO: choose between ints, uint8_ts and monobit flags
+     */
+    //@{
+    int profile;          ///< 2bits, Profile
+    int frmrtq_postproc;  ///< 3bits,
+    int bitrtq_postproc;  ///< 5bits, quantized framerate-based postprocessing strength
+    int fastuvmc;         ///< Rounding of qpel vector to hpel ? (not in Simple)
+    int extended_mv;      ///< Ext MV in P/B (not in Simple)
+    int dquant;           ///< How qscale varies with MBs, 2bits (not in Simple)
+    int vstransform;      ///< variable-size [48]x[48] transform type + info
+    int overlap;          ///< overlapped transforms in use
+    int quantizer_mode;   ///< 2bits, quantizer mode used for sequence, see QUANT_*
+    int finterpflag;      ///< INTERPFRM present
+    //@}
+
+    /** Frame decoding info for all profiles */
+    //@{
+    uint8_t mv_mode;      ///< MV coding monde
+    uint8_t mv_mode2;     ///< Secondary MV coding mode (B frames)
+    int k_x;              ///< Number of bits for MVs (depends on MV range)
+    int k_y;              ///< Number of bits for MVs (depends on MV range)
+    int range_x, range_y; ///< MV range
+    uint8_t pq, altpq;    ///< Current/alternate frame quantizer scale
+    /** pquant parameters */
+    //@{
+    uint8_t dquantfrm;
+    uint8_t dqprofile;
+    uint8_t dqsbedge;
+    uint8_t dqbilevel;
+    //@}
+    /** AC coding set indexes
+     * @see 8.1.1.10, p(1)10
+     */
+    //@{
+    int c_ac_table_index; ///< Chroma index from ACFRM element
+    int y_ac_table_index; ///< Luma index from AC2FRM element
+    //@}
+    int ttfrm;            ///< Transform type info present at frame level
+    uint8_t ttmbf;        ///< Transform type flag
+    uint8_t ttblk4x4;     ///< Value of ttblk which indicates a 4x4 transform
+    int codingset;        ///< index of current table set from 11.8 to use for luma block decoding
+    int codingset2;       ///< index of current table set from 11.8 to use for chroma block decoding
+    int pqindex;          ///< raw pqindex used in coding set selection
+    int a_avail, c_avail;
+    uint8_t *mb_type_base, *mb_type[3];
+
+
+    /** Luma compensation parameters */
+    //@{
+    uint8_t lumscale;
+    uint8_t lumshift;
+    //@}
+    int16_t bfraction;    ///< Relative position % anchors=> how to scale MVs
+    uint8_t halfpq;       ///< Uniform quant over image and qp+.5
+    uint8_t respic;       ///< Frame-level flag for resized images
+    int buffer_fullness;  ///< HRD info
+    /** Ranges:
+     * -# 0 -> [-64n 63.f] x [-32, 31.f]
+     * -# 1 -> [-128, 127.f] x [-64, 63.f]
+     * -# 2 -> [-512, 511.f] x [-128, 127.f]
+     * -# 3 -> [-1024, 1023.f] x [-256, 255.f]
+     */
+    uint8_t mvrange;
+    uint8_t pquantizer;           ///< Uniform (over sequence) quantizer in use
+    VLC *cbpcy_vlc;               ///< CBPCY VLC table
+    int tt_index;                 ///< Index for Transform Type tables
+    uint8_t* mv_type_mb_plane;    ///< bitplane for mv_type == (4MV)
+    uint8_t* direct_mb_plane;     ///< bitplane for "direct" MBs
+    int mv_type_is_raw;           ///< mv type mb plane is not coded
+    int dmb_is_raw;               ///< direct mb plane is raw
+    int skip_is_raw;              ///< skip mb plane is not coded
+    uint8_t luty[256], lutuv[256]; // lookup tables used for intensity compensation
+    int use_ic;                   ///< use intensity compensation in B-frames
+    int rnd;                      ///< rounding control
+
+    /** Frame decoding info for S/M profiles only */
+    //@{
+    uint8_t rangeredfrm; ///< out_sample = CLIP((in_sample-128)*2+128)
+    uint8_t interpfrm;
+    //@}
+
+    /** Frame decoding info for Advanced profile */
+    //@{
+    uint8_t fcm; ///< 0->Progressive, 2->Frame-Interlace, 3->Field-Interlace
+    uint8_t numpanscanwin;
+    uint8_t tfcntr;
+    uint8_t rptfrm, tff, rff;
+    uint16_t topleftx;
+    uint16_t toplefty;
+    uint16_t bottomrightx;
+    uint16_t bottomrighty;
+    uint8_t uvsamp;
+    uint8_t postproc;
+    int hrd_num_leaky_buckets;
+    uint8_t bit_rate_exponent;
+    uint8_t buffer_size_exponent;
+    uint8_t* acpred_plane;       ///< AC prediction flags bitplane
+    int acpred_is_raw;
+    uint8_t* over_flags_plane;   ///< Overflags bitplane
+    int overflg_is_raw;
+    uint8_t condover;
+    uint16_t *hrd_rate, *hrd_buffer;
+    uint8_t *hrd_fullness;
+    uint8_t range_mapy_flag;
+    uint8_t range_mapuv_flag;
+    uint8_t range_mapy;
+    uint8_t range_mapuv;
+    //@}
+
+    int p_frame_skipped;
+    int bi_type;
+} VC1Context;
+
+/**
+ * Get unary code of limited length
+ * @fixme FIXME Slow and ugly
+ * @param gb GetBitContext
+ * @param[in] stop The bitstop value (unary code of 1's or 0's)
+ * @param[in] len Maximum length
+ * @return Unary length/index
+ */
+static int get_prefix(GetBitContext *gb, int stop, int len)
+{
+#if 1
+    int i;
+
+    for(i = 0; i < len && get_bits1(gb) != stop; i++);
+    return i;
+/*  int i = 0, tmp = !stop;
+
+  while (i != len && tmp != stop)
+  {
+    tmp = get_bits(gb, 1);
+    i++;
+  }
+  if (i == len && tmp != stop) return len+1;
+  return i;*/
+#else
+  unsigned int buf;
+  int log;
+
+  OPEN_READER(re, gb);
+  UPDATE_CACHE(re, gb);
+  buf=GET_CACHE(re, gb); //Still not sure
+  if (stop) buf = ~buf;
+
+  log= av_log2(-buf); //FIXME: -?
+  if (log < limit){
+    LAST_SKIP_BITS(re, gb, log+1);
+    CLOSE_READER(re, gb);
+    return log;
+  }
+
+  LAST_SKIP_BITS(re, gb, limit);
+  CLOSE_READER(re, gb);
+  return limit;
+#endif
+}
+
+static inline int decode210(GetBitContext *gb){
+    int n;
+    n = get_bits1(gb);
+    if (n == 1)
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+
+/**
+ * Init VC-1 specific tables and VC1Context members
+ * @param v The VC1Context to initialize
+ * @return Status
+ */
+static int vc1_init_common(VC1Context *v)
+{
+    static int done = 0;
+    int i = 0;
+
+    v->hrd_rate = v->hrd_buffer = NULL;
+
+    /* VLC tables */
+    if(!done)
+    {
+        done = 1;
+        init_vlc(&vc1_bfraction_vlc, VC1_BFRACTION_VLC_BITS, 23,
+                 vc1_bfraction_bits, 1, 1,
+                 vc1_bfraction_codes, 1, 1, 1);
+        init_vlc(&vc1_norm2_vlc, VC1_NORM2_VLC_BITS, 4,
+                 vc1_norm2_bits, 1, 1,
+                 vc1_norm2_codes, 1, 1, 1);
+        init_vlc(&vc1_norm6_vlc, VC1_NORM6_VLC_BITS, 64,
+                 vc1_norm6_bits, 1, 1,
+                 vc1_norm6_codes, 2, 2, 1);
+        init_vlc(&vc1_imode_vlc, VC1_IMODE_VLC_BITS, 7,
+                 vc1_imode_bits, 1, 1,
+                 vc1_imode_codes, 1, 1, 1);
+        for (i=0; i<3; i++)
+        {
+            init_vlc(&vc1_ttmb_vlc[i], VC1_TTMB_VLC_BITS, 16,
+                     vc1_ttmb_bits[i], 1, 1,
+                     vc1_ttmb_codes[i], 2, 2, 1);
+            init_vlc(&vc1_ttblk_vlc[i], VC1_TTBLK_VLC_BITS, 8,
+                     vc1_ttblk_bits[i], 1, 1,
+                     vc1_ttblk_codes[i], 1, 1, 1);
+            init_vlc(&vc1_subblkpat_vlc[i], VC1_SUBBLKPAT_VLC_BITS, 15,
+                     vc1_subblkpat_bits[i], 1, 1,
+                     vc1_subblkpat_codes[i], 1, 1, 1);
+        }
+        for(i=0; i<4; i++)
+        {
+            init_vlc(&vc1_4mv_block_pattern_vlc[i], VC1_4MV_BLOCK_PATTERN_VLC_BITS, 16,
+                     vc1_4mv_block_pattern_bits[i], 1, 1,
+                     vc1_4mv_block_pattern_codes[i], 1, 1, 1);
+            init_vlc(&vc1_cbpcy_p_vlc[i], VC1_CBPCY_P_VLC_BITS, 64,
+                     vc1_cbpcy_p_bits[i], 1, 1,
+                     vc1_cbpcy_p_codes[i], 2, 2, 1);
+            init_vlc(&vc1_mv_diff_vlc[i], VC1_MV_DIFF_VLC_BITS, 73,
+                     vc1_mv_diff_bits[i], 1, 1,
+                     vc1_mv_diff_codes[i], 2, 2, 1);
+        }
+        for(i=0; i<8; i++)
+            init_vlc(&vc1_ac_coeff_table[i], AC_VLC_BITS, vc1_ac_sizes[i],
+                     &vc1_ac_tables[i][0][1], 8, 4,
+                     &vc1_ac_tables[i][0][0], 8, 4, 1);
+        init_vlc(&ff_msmp4_mb_i_vlc, MB_INTRA_VLC_BITS, 64,
+                 &ff_msmp4_mb_i_table[0][1], 4, 2,
+                 &ff_msmp4_mb_i_table[0][0], 4, 2, 1);
+    }
+
+    /* Other defaults */
+    v->pq = -1;
+    v->mvrange = 0; /* 7.1.1.18, p80 */
+
+    return 0;
+}
+
+/***********************************************************************/
+/**
+ * @defgroup bitplane VC9 Bitplane decoding
+ * @see 8.7, p56
+ * @{
+ */
+
+/** @addtogroup bitplane
+ * Imode types
+ * @{
+ */
+enum Imode {
+    IMODE_RAW,
+    IMODE_NORM2,
+    IMODE_DIFF2,
+    IMODE_NORM6,
+    IMODE_DIFF6,
+    IMODE_ROWSKIP,
+    IMODE_COLSKIP
+};
+/** @} */ //imode defines
+
+/** Decode rows by checking if they are skipped
+ * @param plane Buffer to store decoded bits
+ * @param[in] width Width of this buffer
+ * @param[in] height Height of this buffer
+ * @param[in] stride of this buffer
+ */
+static void decode_rowskip(uint8_t* plane, int width, int height, int stride, GetBitContext *gb){
+    int x, y;
+
+    for (y=0; y<height; y++){
+        if (!get_bits(gb, 1)) //rowskip
+            memset(plane, 0, width);
+        else
+            for (x=0; x<width; x++)
+                plane[x] = get_bits(gb, 1);
+        plane += stride;
+    }
+}
+
+/** Decode columns by checking if they are skipped
+ * @param plane Buffer to store decoded bits
+ * @param[in] width Width of this buffer
+ * @param[in] height Height of this buffer
+ * @param[in] stride of this buffer
+ * @fixme FIXME: Optimize
+ */
+static void decode_colskip(uint8_t* plane, int width, int height, int stride, GetBitContext *gb){
+    int x, y;
+
+    for (x=0; x<width; x++){
+        if (!get_bits(gb, 1)) //colskip
+            for (y=0; y<height; y++)
+                plane[y*stride] = 0;
+        else
+            for (y=0; y<height; y++)
+                plane[y*stride] = get_bits(gb, 1);
+        plane ++;
+    }
+}
+
+/** Decode a bitplane's bits
+ * @param bp Bitplane where to store the decode bits
+ * @param v VC-1 context for bit reading and logging
+ * @return Status
+ * @fixme FIXME: Optimize
+ */
+static int bitplane_decoding(uint8_t* data, int *raw_flag, VC1Context *v)
+{
+    GetBitContext *gb = &v->s.gb;
+
+    int imode, x, y, code, offset;
+    uint8_t invert, *planep = data;
+    int width, height, stride;
+
+    width = v->s.mb_width;
+    height = v->s.mb_height;
+    stride = v->s.mb_stride;
+    invert = get_bits(gb, 1);
+    imode = get_vlc2(gb, vc1_imode_vlc.table, VC1_IMODE_VLC_BITS, 1);
+
+    *raw_flag = 0;
+    switch (imode)
+    {
+    case IMODE_RAW:
+        //Data is actually read in the MB layer (same for all tests == "raw")
+        *raw_flag = 1; //invert ignored
+        return invert;
+    case IMODE_DIFF2:
+    case IMODE_NORM2:
+        if ((height * width) & 1)
+        {
+            *planep++ = get_bits(gb, 1);
+            offset = 1;
+        }
+        else offset = 0;
+        // decode bitplane as one long line
+        for (y = offset; y < height * width; y += 2) {
+            code = get_vlc2(gb, vc1_norm2_vlc.table, VC1_NORM2_VLC_BITS, 1);
+            *planep++ = code & 1;
+            offset++;
+            if(offset == width) {
+                offset = 0;
+                planep += stride - width;
+            }
+            *planep++ = code >> 1;
+            offset++;
+            if(offset == width) {
+                offset = 0;
+                planep += stride - width;
+            }
+        }
+        break;
+    case IMODE_DIFF6:
+    case IMODE_NORM6:
+        if(!(height % 3) && (width % 3)) { // use 2x3 decoding
+            for(y = 0; y < height; y+= 3) {
+                for(x = width & 1; x < width; x += 2) {
+                    code = get_vlc2(gb, vc1_norm6_vlc.table, VC1_NORM6_VLC_BITS, 2);
+                    if(code < 0){
+                        av_log(v->s.avctx, AV_LOG_DEBUG, "invalid NORM-6 VLC\n");
+                        return -1;
+                    }
+                    planep[x + 0] = (code >> 0) & 1;
+                    planep[x + 1] = (code >> 1) & 1;
+                    planep[x + 0 + stride] = (code >> 2) & 1;
+                    planep[x + 1 + stride] = (code >> 3) & 1;
+                    planep[x + 0 + stride * 2] = (code >> 4) & 1;
+                    planep[x + 1 + stride * 2] = (code >> 5) & 1;
+                }
+                planep += stride * 3;
+            }
+            if(width & 1) decode_colskip(data, 1, height, stride, &v->s.gb);
+        } else { // 3x2
+            planep += (height & 1) * stride;
+            for(y = height & 1; y < height; y += 2) {
+                for(x = width % 3; x < width; x += 3) {
+                    code = get_vlc2(gb, vc1_norm6_vlc.table, VC1_NORM6_VLC_BITS, 2);
+                    if(code < 0){
+                        av_log(v->s.avctx, AV_LOG_DEBUG, "invalid NORM-6 VLC\n");
+                        return -1;
+                    }
+                    planep[x + 0] = (code >> 0) & 1;
+                    planep[x + 1] = (code >> 1) & 1;
+                    planep[x + 2] = (code >> 2) & 1;
+                    planep[x + 0 + stride] = (code >> 3) & 1;
+                    planep[x + 1 + stride] = (code >> 4) & 1;
+                    planep[x + 2 + stride] = (code >> 5) & 1;
+                }
+                planep += stride * 2;
+            }
+            x = width % 3;
+            if(x) decode_colskip(data  ,             x, height    , stride, &v->s.gb);
+            if(height & 1) decode_rowskip(data+x, width - x, 1, stride, &v->s.gb);
+        }
+        break;
+    case IMODE_ROWSKIP:
+        decode_rowskip(data, width, height, stride, &v->s.gb);
+        break;
+    case IMODE_COLSKIP:
+        decode_colskip(data, width, height, stride, &v->s.gb);
+        break;
+    default: break;
+    }
+
+    /* Applying diff operator */
+    if (imode == IMODE_DIFF2 || imode == IMODE_DIFF6)
+    {
+        planep = data;
+        planep[0] ^= invert;
+        for (x=1; x<width; x++)
+            planep[x] ^= planep[x-1];
+        for (y=1; y<height; y++)
+        {
+            planep += stride;
+            planep[0] ^= planep[-stride];
+            for (x=1; x<width; x++)
+            {
+                if (planep[x-1] != planep[x-stride]) planep[x] ^= invert;
+                else                                 planep[x] ^= planep[x-1];
+            }
+        }
+    }
+    else if (invert)
+    {
+        planep = data;
+        for (x=0; x<stride*height; x++) planep[x] = !planep[x]; //FIXME stride
+    }
+    return (imode<<1) + invert;
+}
+
+/** @} */ //Bitplane group
+
+/***********************************************************************/
+/** VOP Dquant decoding
+ * @param v VC-1 Context
+ */
+static int vop_dquant_decoding(VC1Context *v)
+{
+    GetBitContext *gb = &v->s.gb;
+    int pqdiff;
+
+    //variable size
+    if (v->dquant == 2)
+    {
+        pqdiff = get_bits(gb, 3);
+        if (pqdiff == 7) v->altpq = get_bits(gb, 5);
+        else v->altpq = v->pq + pqdiff + 1;
+    }
+    else
+    {
+        v->dquantfrm = get_bits(gb, 1);
+        if ( v->dquantfrm )
+        {
+            v->dqprofile = get_bits(gb, 2);
+            switch (v->dqprofile)
+            {
+            case DQPROFILE_SINGLE_EDGE:
+            case DQPROFILE_DOUBLE_EDGES:
+                v->dqsbedge = get_bits(gb, 2);
+                break;
+            case DQPROFILE_ALL_MBS:
+                v->dqbilevel = get_bits(gb, 1);
+            default: break; //Forbidden ?
+            }
+            if (v->dqbilevel || v->dqprofile != DQPROFILE_ALL_MBS)
+            {
+                pqdiff = get_bits(gb, 3);
+                if (pqdiff == 7) v->altpq = get_bits(gb, 5);
+                else v->altpq = v->pq + pqdiff + 1;
+            }
+        }
+    }
+    return 0;
+}
+
+/** Put block onto picture
+ */
+static void vc1_put_block(VC1Context *v, DCTELEM block[6][64])
+{
+    uint8_t *Y;
+    int ys, us, vs;
+    DSPContext *dsp = &v->s.dsp;
+
+    if(v->rangeredfrm) {
+        int i, j, k;
+        for(k = 0; k < 6; k++)
+            for(j = 0; j < 8; j++)
+                for(i = 0; i < 8; i++)
+                    block[k][i + j*8] = ((block[k][i + j*8] - 128) << 1) + 128;
+
+    }
+    ys = v->s.current_picture.linesize[0];
+    us = v->s.current_picture.linesize[1];
+    vs = v->s.current_picture.linesize[2];
+    Y = v->s.dest[0];
+
+    dsp->put_pixels_clamped(block[0], Y, ys);
+    dsp->put_pixels_clamped(block[1], Y + 8, ys);
+    Y += ys * 8;
+    dsp->put_pixels_clamped(block[2], Y, ys);
+    dsp->put_pixels_clamped(block[3], Y + 8, ys);
+
+    if(!(v->s.flags & CODEC_FLAG_GRAY)) {
+        dsp->put_pixels_clamped(block[4], v->s.dest[1], us);
+        dsp->put_pixels_clamped(block[5], v->s.dest[2], vs);
+    }
+}
+
+/** Do motion compensation over 1 macroblock
+ * Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
+ */
+static void vc1_mc_1mv(VC1Context *v, int dir)
+{
+    MpegEncContext *s = &v->s;
+    DSPContext *dsp = &v->s.dsp;
+    uint8_t *srcY, *srcU, *srcV;
+    int dxy, uvdxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
+
+    if(!v->s.last_picture.data[0])return;
+
+    mx = s->mv[dir][0][0];
+    my = s->mv[dir][0][1];
+
+    // store motion vectors for further use in B frames
+    if(s->pict_type == P_TYPE) {
+        s->current_picture.motion_val[1][s->block_index[0]][0] = mx;
+        s->current_picture.motion_val[1][s->block_index[0]][1] = my;
+    }
+    uvmx = (mx + ((mx & 3) == 3)) >> 1;
+    uvmy = (my + ((my & 3) == 3)) >> 1;
+    if(!dir) {
+        srcY = s->last_picture.data[0];
+        srcU = s->last_picture.data[1];
+        srcV = s->last_picture.data[2];
+    } else {
+        srcY = s->next_picture.data[0];
+        srcU = s->next_picture.data[1];
+        srcV = s->next_picture.data[2];
+    }
+
+    src_x = s->mb_x * 16 + (mx >> 2);
+    src_y = s->mb_y * 16 + (my >> 2);
+    uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
+    uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
+
+    src_x   = clip(  src_x, -16, s->mb_width  * 16);
+    src_y   = clip(  src_y, -16, s->mb_height * 16);
+    uvsrc_x = clip(uvsrc_x,  -8, s->mb_width  *  8);
+    uvsrc_y = clip(uvsrc_y,  -8, s->mb_height *  8);
+
+    srcY += src_y * s->linesize + src_x;
+    srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
+    srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
+
+    /* for grayscale we should not try to read from unknown area */
+    if(s->flags & CODEC_FLAG_GRAY) {
+        srcU = s->edge_emu_buffer + 18 * s->linesize;
+        srcV = s->edge_emu_buffer + 18 * s->linesize;
+    }
+
+    if(v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+       || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel*3
+       || (unsigned)(src_y - s->mspel) > s->v_edge_pos - (my&3) - 16 - s->mspel*3){
+        uint8_t *uvbuf= s->edge_emu_buffer + 19 * s->linesize;
+
+        srcY -= s->mspel * (1 + s->linesize);
+        ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, 17+s->mspel*2, 17+s->mspel*2,
+                            src_x - s->mspel, src_y - s->mspel, s->h_edge_pos, s->v_edge_pos);
+        srcY = s->edge_emu_buffer;
+        ff_emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8+1, 8+1,
+                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        ff_emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8+1, 8+1,
+                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        srcU = uvbuf;
+        srcV = uvbuf + 16;
+        /* if we deal with range reduction we need to scale source blocks */
+        if(v->rangeredfrm) {
+            int i, j;
+            uint8_t *src, *src2;
+
+            src = srcY;
+            for(j = 0; j < 17 + s->mspel*2; j++) {
+                for(i = 0; i < 17 + s->mspel*2; i++) src[i] = ((src[i] - 128) >> 1) + 128;
+                src += s->linesize;
+            }
+            src = srcU; src2 = srcV;
+            for(j = 0; j < 9; j++) {
+                for(i = 0; i < 9; i++) {
+                    src[i] = ((src[i] - 128) >> 1) + 128;
+                    src2[i] = ((src2[i] - 128) >> 1) + 128;
+                }
+                src += s->uvlinesize;
+                src2 += s->uvlinesize;
+            }
+        }
+        /* if we deal with intensity compensation we need to scale source blocks */
+        if(v->mv_mode == MV_PMODE_INTENSITY_COMP) {
+            int i, j;
+            uint8_t *src, *src2;
+
+            src = srcY;
+            for(j = 0; j < 17 + s->mspel*2; j++) {
+                for(i = 0; i < 17 + s->mspel*2; i++) src[i] = v->luty[src[i]];
+                src += s->linesize;
+            }
+            src = srcU; src2 = srcV;
+            for(j = 0; j < 9; j++) {
+                for(i = 0; i < 9; i++) {
+                    src[i] = v->lutuv[src[i]];
+                    src2[i] = v->lutuv[src2[i]];
+                }
+                src += s->uvlinesize;
+                src2 += s->uvlinesize;
+            }
+        }
+        srcY += s->mspel * (1 + s->linesize);
+    }
+
+    if(v->fastuvmc) {
+        uvmx = uvmx + ((uvmx<0)?-(uvmx&1):(uvmx&1));
+        uvmy = uvmy + ((uvmy<0)?-(uvmy&1):(uvmy&1));
+    }
+
+    if(s->mspel) {
+        dxy = ((my & 3) << 2) | (mx & 3);
+        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
+        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
+        srcY += s->linesize * 8;
+        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
+        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+    } else { // hpel mc - always used for luma
+        dxy = (my & 2) | ((mx & 2) >> 1);
+
+        if(!v->rnd)
+            dsp->put_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+        else
+            dsp->put_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+    }
+
+    if(s->flags & CODEC_FLAG_GRAY) return;
+    /* Chroma MC always uses qpel bilinear */
+    uvdxy = ((uvmy & 3) << 2) | (uvmx & 3);
+    uvmx = (uvmx&3)<<1;
+    uvmy = (uvmy&3)<<1;
+    if(!v->rnd){
+        dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
+        dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
+    }else{
+        dsp->put_no_rnd_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
+        dsp->put_no_rnd_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
+    }
+}
+
+/** Do motion compensation for 4-MV macroblock - luminance block
+ */
+static void vc1_mc_4mv_luma(VC1Context *v, int n)
+{
+    MpegEncContext *s = &v->s;
+    DSPContext *dsp = &v->s.dsp;
+    uint8_t *srcY;
+    int dxy, mx, my, src_x, src_y;
+    int off;
+
+    if(!v->s.last_picture.data[0])return;
+    mx = s->mv[0][n][0];
+    my = s->mv[0][n][1];
+    srcY = s->last_picture.data[0];
+
+    off = s->linesize * 4 * (n&2) + (n&1) * 8;
+
+    src_x = s->mb_x * 16 + (n&1) * 8 + (mx >> 2);
+    src_y = s->mb_y * 16 + (n&2) * 4 + (my >> 2);
+
+    src_x   = clip(  src_x, -16, s->mb_width  * 16);
+    src_y   = clip(  src_y, -16, s->mb_height * 16);
+
+    srcY += src_y * s->linesize + src_x;
+
+    if(v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+       || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 8 - s->mspel*2
+       || (unsigned)(src_y - s->mspel) > s->v_edge_pos - (my&3) - 8 - s->mspel*2){
+        srcY -= s->mspel * (1 + s->linesize);
+        ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, 9+s->mspel*2, 9+s->mspel*2,
+                            src_x - s->mspel, src_y - s->mspel, s->h_edge_pos, s->v_edge_pos);
+        srcY = s->edge_emu_buffer;
+        /* if we deal with range reduction we need to scale source blocks */
+        if(v->rangeredfrm) {
+            int i, j;
+            uint8_t *src;
+
+            src = srcY;
+            for(j = 0; j < 9 + s->mspel*2; j++) {
+                for(i = 0; i < 9 + s->mspel*2; i++) src[i] = ((src[i] - 128) >> 1) + 128;
+                src += s->linesize;
+            }
+        }
+        /* if we deal with intensity compensation we need to scale source blocks */
+        if(v->mv_mode == MV_PMODE_INTENSITY_COMP) {
+            int i, j;
+            uint8_t *src;
+
+            src = srcY;
+            for(j = 0; j < 9 + s->mspel*2; j++) {
+                for(i = 0; i < 9 + s->mspel*2; i++) src[i] = v->luty[src[i]];
+                src += s->linesize;
+            }
+        }
+        srcY += s->mspel * (1 + s->linesize);
+    }
+
+    if(s->mspel) {
+        dxy = ((my & 3) << 2) | (mx & 3);
+        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd);
+    } else { // hpel mc - always used for luma
+        dxy = (my & 2) | ((mx & 2) >> 1);
+        if(!v->rnd)
+            dsp->put_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
+        else
+            dsp->put_no_rnd_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize, 8);
+    }
+}
+
+static inline int median4(int a, int b, int c, int d)
+{
+    if(a < b) {
+        if(c < d) return (FFMIN(b, d) + FFMAX(a, c)) / 2;
+        else      return (FFMIN(b, c) + FFMAX(a, d)) / 2;
+    } else {
+        if(c < d) return (FFMIN(a, d) + FFMAX(b, c)) / 2;
+        else      return (FFMIN(a, c) + FFMAX(b, d)) / 2;
+    }
+}
+
+
+/** Do motion compensation for 4-MV macroblock - both chroma blocks
+ */
+static void vc1_mc_4mv_chroma(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    DSPContext *dsp = &v->s.dsp;
+    uint8_t *srcU, *srcV;
+    int uvdxy, uvmx, uvmy, uvsrc_x, uvsrc_y;
+    int i, idx, tx = 0, ty = 0;
+    int mvx[4], mvy[4], intra[4];
+    static const int count[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+
+    if(!v->s.last_picture.data[0])return;
+    if(s->flags & CODEC_FLAG_GRAY) return;
+
+    for(i = 0; i < 4; i++) {
+        mvx[i] = s->mv[0][i][0];
+        mvy[i] = s->mv[0][i][1];
+        intra[i] = v->mb_type[0][s->block_index[i]];
+    }
+
+    /* calculate chroma MV vector from four luma MVs */
+    idx = (intra[3] << 3) | (intra[2] << 2) | (intra[1] << 1) | intra[0];
+    if(!idx) { // all blocks are inter
+        tx = median4(mvx[0], mvx[1], mvx[2], mvx[3]);
+        ty = median4(mvy[0], mvy[1], mvy[2], mvy[3]);
+    } else if(count[idx] == 1) { // 3 inter blocks
+        switch(idx) {
+        case 0x1:
+            tx = mid_pred(mvx[1], mvx[2], mvx[3]);
+            ty = mid_pred(mvy[1], mvy[2], mvy[3]);
+            break;
+        case 0x2:
+            tx = mid_pred(mvx[0], mvx[2], mvx[3]);
+            ty = mid_pred(mvy[0], mvy[2], mvy[3]);
+            break;
+        case 0x4:
+            tx = mid_pred(mvx[0], mvx[1], mvx[3]);
+            ty = mid_pred(mvy[0], mvy[1], mvy[3]);
+            break;
+        case 0x8:
+            tx = mid_pred(mvx[0], mvx[1], mvx[2]);
+            ty = mid_pred(mvy[0], mvy[1], mvy[2]);
+            break;
+        }
+    } else if(count[idx] == 2) {
+        int t1 = 0, t2 = 0;
+        for(i=0; i<3;i++) if(!intra[i]) {t1 = i; break;}
+        for(i= t1+1; i<4; i++)if(!intra[i]) {t2 = i; break;}
+        tx = (mvx[t1] + mvx[t2]) / 2;
+        ty = (mvy[t1] + mvy[t2]) / 2;
+    } else
+        return; //no need to do MC for inter blocks
+
+    s->current_picture.motion_val[1][s->block_index[0]][0] = tx;
+    s->current_picture.motion_val[1][s->block_index[0]][1] = ty;
+    uvmx = (tx + ((tx&3) == 3)) >> 1;
+    uvmy = (ty + ((ty&3) == 3)) >> 1;
+
+    uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
+    uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
+
+    uvsrc_x = clip(uvsrc_x,  -8, s->mb_width  *  8);
+    uvsrc_y = clip(uvsrc_y,  -8, s->mb_height *  8);
+    srcU = s->last_picture.data[1] + uvsrc_y * s->uvlinesize + uvsrc_x;
+    srcV = s->last_picture.data[2] + uvsrc_y * s->uvlinesize + uvsrc_x;
+    if(v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+       || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
+       || (unsigned)uvsrc_y > (s->v_edge_pos >> 1) - 9){
+        ff_emulated_edge_mc(s->edge_emu_buffer     , srcU, s->uvlinesize, 8+1, 8+1,
+                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        ff_emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize, 8+1, 8+1,
+                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        srcU = s->edge_emu_buffer;
+        srcV = s->edge_emu_buffer + 16;
+
+        /* if we deal with range reduction we need to scale source blocks */
+        if(v->rangeredfrm) {
+            int i, j;
+            uint8_t *src, *src2;
+
+            src = srcU; src2 = srcV;
+            for(j = 0; j < 9; j++) {
+                for(i = 0; i < 9; i++) {
+                    src[i] = ((src[i] - 128) >> 1) + 128;
+                    src2[i] = ((src2[i] - 128) >> 1) + 128;
+                }
+                src += s->uvlinesize;
+                src2 += s->uvlinesize;
+            }
+        }
+        /* if we deal with intensity compensation we need to scale source blocks */
+        if(v->mv_mode == MV_PMODE_INTENSITY_COMP) {
+            int i, j;
+            uint8_t *src, *src2;
+
+            src = srcU; src2 = srcV;
+            for(j = 0; j < 9; j++) {
+                for(i = 0; i < 9; i++) {
+                    src[i] = v->lutuv[src[i]];
+                    src2[i] = v->lutuv[src2[i]];
+                }
+                src += s->uvlinesize;
+                src2 += s->uvlinesize;
+            }
+        }
+    }
+
+    if(v->fastuvmc) {
+        uvmx = uvmx + ((uvmx<0)?-(uvmx&1):(uvmx&1));
+        uvmy = uvmy + ((uvmy<0)?-(uvmy&1):(uvmy&1));
+    }
+
+    /* Chroma MC always uses qpel bilinear */
+    uvdxy = ((uvmy & 3) << 2) | (uvmx & 3);
+    uvmx = (uvmx&3)<<1;
+    uvmy = (uvmy&3)<<1;
+    if(!v->rnd){
+        dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
+        dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
+    }else{
+        dsp->put_no_rnd_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
+        dsp->put_no_rnd_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
+    }
+}
+
+static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
+
+/**
+ * Decode Simple/Main Profiles sequence header
+ * @see Figure 7-8, p16-17
+ * @param avctx Codec context
+ * @param gb GetBit context initialized from Codec context extra_data
+ * @return Status
+ */
+static int decode_sequence_header(AVCodecContext *avctx, GetBitContext *gb)
+{
+    VC1Context *v = avctx->priv_data;
+
+    av_log(avctx, AV_LOG_DEBUG, "Header: %0X\n", show_bits(gb, 32));
+    v->profile = get_bits(gb, 2);
+    if (v->profile == 2)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Profile value 2 is forbidden (and WMV3 Complex Profile is unsupported)\n");
+        return -1;
+    }
+
+    if (v->profile == PROFILE_ADVANCED)
+    {
+        return decode_sequence_header_adv(v, gb);
+    }
+    else
+    {
+        v->res_sm = get_bits(gb, 2); //reserved
+        if (v->res_sm)
+        {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Reserved RES_SM=%i is forbidden\n", v->res_sm);
+            return -1;
+        }
+    }
+
+    // (fps-2)/4 (->30)
+    v->frmrtq_postproc = get_bits(gb, 3); //common
+    // (bitrate-32kbps)/64kbps
+    v->bitrtq_postproc = get_bits(gb, 5); //common
+    v->s.loop_filter = get_bits(gb, 1); //common
+    if(v->s.loop_filter == 1 && v->profile == PROFILE_SIMPLE)
+    {
+        av_log(avctx, AV_LOG_ERROR,
+               "LOOPFILTER shell not be enabled in simple profile\n");
+    }
+
+    v->res_x8 = get_bits(gb, 1); //reserved
+    if (v->res_x8)
+    {
+        av_log(avctx, AV_LOG_ERROR,
+               "1 for reserved RES_X8 is forbidden\n");
+        //return -1;
+    }
+    v->multires = get_bits(gb, 1);
+    v->res_fasttx = get_bits(gb, 1);
+    if (!v->res_fasttx)
+    {
+        av_log(avctx, AV_LOG_ERROR,
+               "0 for reserved RES_FASTTX is forbidden\n");
+        //return -1;
+    }
+
+    v->fastuvmc =  get_bits(gb, 1); //common
+    if (!v->profile && !v->fastuvmc)
+    {
+        av_log(avctx, AV_LOG_ERROR,
+               "FASTUVMC unavailable in Simple Profile\n");
+        return -1;
+    }
+    v->extended_mv =  get_bits(gb, 1); //common
+    if (!v->profile && v->extended_mv)
+    {
+        av_log(avctx, AV_LOG_ERROR,
+               "Extended MVs unavailable in Simple Profile\n");
+        return -1;
+    }
+    v->dquant =  get_bits(gb, 2); //common
+    v->vstransform =  get_bits(gb, 1); //common
+
+    v->res_transtab = get_bits(gb, 1);
+    if (v->res_transtab)
+    {
+        av_log(avctx, AV_LOG_ERROR,
+               "1 for reserved RES_TRANSTAB is forbidden\n");
+        return -1;
+    }
+
+    v->overlap = get_bits(gb, 1); //common
+
+    v->s.resync_marker = get_bits(gb, 1);
+    v->rangered = get_bits(gb, 1);
+    if (v->rangered && v->profile == PROFILE_SIMPLE)
+    {
+        av_log(avctx, AV_LOG_INFO,
+               "RANGERED should be set to 0 in simple profile\n");
+    }
+
+    v->s.max_b_frames = avctx->max_b_frames = get_bits(gb, 3); //common
+    v->quantizer_mode = get_bits(gb, 2); //common
+
+    v->finterpflag = get_bits(gb, 1); //common
+    v->res_rtm_flag = get_bits(gb, 1); //reserved
+    if (!v->res_rtm_flag)
+    {
+//            av_log(avctx, AV_LOG_ERROR,
+//                   "0 for reserved RES_RTM_FLAG is forbidden\n");
+        av_log(avctx, AV_LOG_ERROR,
+               "Old WMV3 version detected, only I-frames will be decoded\n");
+        //return -1;
+    }
+    av_log(avctx, AV_LOG_DEBUG,
+               "Profile %i:\nfrmrtq_postproc=%i, bitrtq_postproc=%i\n"
+               "LoopFilter=%i, MultiRes=%i, FastUVMC=%i, Extended MV=%i\n"
+               "Rangered=%i, VSTransform=%i, Overlap=%i, SyncMarker=%i\n"
+               "DQuant=%i, Quantizer mode=%i, Max B frames=%i\n",
+               v->profile, v->frmrtq_postproc, v->bitrtq_postproc,
+               v->s.loop_filter, v->multires, v->fastuvmc, v->extended_mv,
+               v->rangered, v->vstransform, v->overlap, v->s.resync_marker,
+               v->dquant, v->quantizer_mode, avctx->max_b_frames
+               );
+    return 0;
+}
+
+static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
+{
+    v->res_rtm_flag = 1;
+    v->level = get_bits(gb, 3);
+    if(v->level >= 5)
+    {
+        av_log(v->s.avctx, AV_LOG_ERROR, "Reserved LEVEL %i\n",v->level);
+    }
+    v->chromaformat = get_bits(gb, 2);
+    if (v->chromaformat != 1)
+    {
+        av_log(v->s.avctx, AV_LOG_ERROR,
+               "Only 4:2:0 chroma format supported\n");
+        return -1;
+    }
+
+    // (fps-2)/4 (->30)
+    v->frmrtq_postproc = get_bits(gb, 3); //common
+    // (bitrate-32kbps)/64kbps
+    v->bitrtq_postproc = get_bits(gb, 5); //common
+    v->postprocflag = get_bits(gb, 1); //common
+
+    v->s.avctx->coded_width = (get_bits(gb, 12) + 1) << 1;
+    v->s.avctx->coded_height = (get_bits(gb, 12) + 1) << 1;
+    v->broadcast = get_bits1(gb);
+    v->interlace = get_bits1(gb);
+    v->tfcntrflag = get_bits1(gb);
+    v->finterpflag = get_bits1(gb);
+    get_bits1(gb); // reserved
+    v->psf = get_bits1(gb);
+    if(v->psf) { //PsF, 6.1.13
+        av_log(v->s.avctx, AV_LOG_ERROR, "Progressive Segmented Frame mode: not supported (yet)\n");
+        return -1;
+    }
+    if(get_bits1(gb)) { //Display Info - decoding is not affected by it
+        int w, h, ar = 0;
+        av_log(v->s.avctx, AV_LOG_INFO, "Display extended info:\n");
+        w = get_bits(gb, 14);
+        h = get_bits(gb, 14);
+        av_log(v->s.avctx, AV_LOG_INFO, "Display dimensions: %ix%i\n", w, h);
+        //TODO: store aspect ratio in AVCodecContext
+        if(get_bits1(gb))
+            ar = get_bits(gb, 4);
+        if(ar == 15) {
+            w = get_bits(gb, 8);
+            h = get_bits(gb, 8);
+        }
+
+        if(get_bits1(gb)){ //framerate stuff
+            if(get_bits1(gb)) {
+                get_bits(gb, 16);
+            } else {
+                get_bits(gb, 8);
+                get_bits(gb, 4);
+            }
+        }
+
+        if(get_bits1(gb)){
+            v->color_prim = get_bits(gb, 8);
+            v->transfer_char = get_bits(gb, 8);
+            v->matrix_coef = get_bits(gb, 8);
+        }
+    }
+
+    v->hrd_param_flag = get_bits1(gb);
+    if(v->hrd_param_flag) {
+        int i;
+        v->hrd_num_leaky_buckets = get_bits(gb, 5);
+        get_bits(gb, 4); //bitrate exponent
+        get_bits(gb, 4); //buffer size exponent
+        for(i = 0; i < v->hrd_num_leaky_buckets; i++) {
+            get_bits(gb, 16); //hrd_rate[n]
+            get_bits(gb, 16); //hrd_buffer[n]
+        }
+    }
+    return 0;
+}
+
+static int decode_entry_point(AVCodecContext *avctx, GetBitContext *gb)
+{
+    VC1Context *v = avctx->priv_data;
+    int i;
+
+    av_log(avctx, AV_LOG_DEBUG, "Entry point: %08X\n", show_bits_long(gb, 32));
+    get_bits1(gb); // broken link
+    avctx->max_b_frames = 1 - get_bits1(gb); // 'closed entry' also signalize possible B-frames
+    v->panscanflag = get_bits1(gb);
+    get_bits1(gb); // refdist flag
+    v->s.loop_filter = get_bits1(gb);
+    v->fastuvmc = get_bits1(gb);
+    v->extended_mv = get_bits1(gb);
+    v->dquant = get_bits(gb, 2);
+    v->vstransform = get_bits1(gb);
+    v->overlap = get_bits1(gb);
+    v->quantizer_mode = get_bits(gb, 2);
+
+    if(v->hrd_param_flag){
+        for(i = 0; i < v->hrd_num_leaky_buckets; i++) {
+            get_bits(gb, 8); //hrd_full[n]
+        }
+    }
+
+    if(get_bits1(gb)){
+        avctx->coded_width = (get_bits(gb, 12)+1)<<1;
+        avctx->coded_height = (get_bits(gb, 12)+1)<<1;
+    }
+    if(v->extended_mv)
+        v->extended_dmv = get_bits1(gb);
+    if(get_bits1(gb)) {
+        av_log(avctx, AV_LOG_ERROR, "Luma scaling is not supported, expect wrong picture\n");
+        skip_bits(gb, 3); // Y range, ignored for now
+    }
+    if(get_bits1(gb)) {
+        av_log(avctx, AV_LOG_ERROR, "Chroma scaling is not supported, expect wrong picture\n");
+        skip_bits(gb, 3); // UV range, ignored for now
+    }
+
+    return 0;
+}
+
+static int vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
+{
+    int pqindex, lowquant, status;
+
+    if(v->finterpflag) v->interpfrm = get_bits(gb, 1);
+    skip_bits(gb, 2); //framecnt unused
+    v->rangeredfrm = 0;
+    if (v->rangered) v->rangeredfrm = get_bits(gb, 1);
+    v->s.pict_type = get_bits(gb, 1);
+    if (v->s.avctx->max_b_frames) {
+        if (!v->s.pict_type) {
+            if (get_bits(gb, 1)) v->s.pict_type = I_TYPE;
+            else v->s.pict_type = B_TYPE;
+        } else v->s.pict_type = P_TYPE;
+    } else v->s.pict_type = v->s.pict_type ? P_TYPE : I_TYPE;
+
+    v->bi_type = 0;
+    if(v->s.pict_type == B_TYPE) {
+        v->bfraction = get_vlc2(gb, vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
+        v->bfraction = vc1_bfraction_lut[v->bfraction];
+        if(v->bfraction == 0) {
+            v->s.pict_type = BI_TYPE;
+        }
+    }
+    if(v->s.pict_type == I_TYPE || v->s.pict_type == BI_TYPE)
+        get_bits(gb, 7); // skip buffer fullness
+
+    /* calculate RND */
+    if(v->s.pict_type == I_TYPE || v->s.pict_type == BI_TYPE)
+        v->rnd = 1;
+    if(v->s.pict_type == P_TYPE)
+        v->rnd ^= 1;
+
+    /* Quantizer stuff */
+    pqindex = get_bits(gb, 5);
+    if (v->quantizer_mode == QUANT_FRAME_IMPLICIT)
+        v->pq = pquant_table[0][pqindex];
+    else
+        v->pq = pquant_table[1][pqindex];
+
+    v->pquantizer = 1;
+    if (v->quantizer_mode == QUANT_FRAME_IMPLICIT)
+        v->pquantizer = pqindex < 9;
+    if (v->quantizer_mode == QUANT_NON_UNIFORM)
+        v->pquantizer = 0;
+    v->pqindex = pqindex;
+    if (pqindex < 9) v->halfpq = get_bits(gb, 1);
+    else v->halfpq = 0;
+    if (v->quantizer_mode == QUANT_FRAME_EXPLICIT)
+        v->pquantizer = get_bits(gb, 1);
+    v->dquantfrm = 0;
+    if (v->extended_mv == 1) v->mvrange = get_prefix(gb, 0, 3);
+    v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
+    v->k_y = v->mvrange + 8; //k_y can be 8 9 10 11
+    v->range_x = 1 << (v->k_x - 1);
+    v->range_y = 1 << (v->k_y - 1);
+    if (v->profile == PROFILE_ADVANCED)
+    {
+        if (v->postprocflag) v->postproc = get_bits(gb, 1);
+    }
+    else
+        if (v->multires && v->s.pict_type != B_TYPE) v->respic = get_bits(gb, 2);
+
+//av_log(v->s.avctx, AV_LOG_INFO, "%c Frame: QP=[%i]%i (+%i/2) %i\n",
+//        (v->s.pict_type == P_TYPE) ? 'P' : ((v->s.pict_type == I_TYPE) ? 'I' : 'B'), pqindex, v->pq, v->halfpq, v->rangeredfrm);
+
+    if(v->s.pict_type == I_TYPE || v->s.pict_type == P_TYPE) v->use_ic = 0;
+
+    switch(v->s.pict_type) {
+    case P_TYPE:
+        if (v->pq < 5) v->tt_index = 0;
+        else if(v->pq < 13) v->tt_index = 1;
+        else v->tt_index = 2;
+
+        lowquant = (v->pq > 12) ? 0 : 1;
+        v->mv_mode = mv_pmode_table[lowquant][get_prefix(gb, 1, 4)];
+        if (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+        {
+            int scale, shift, i;
+            v->mv_mode2 = mv_pmode_table2[lowquant][get_prefix(gb, 1, 3)];
+            v->lumscale = get_bits(gb, 6);
+            v->lumshift = get_bits(gb, 6);
+            v->use_ic = 1;
+            /* fill lookup tables for intensity compensation */
+            if(!v->lumscale) {
+                scale = -64;
+                shift = (255 - v->lumshift * 2) << 6;
+                if(v->lumshift > 31)
+                    shift += 128 << 6;
+            } else {
+                scale = v->lumscale + 32;
+                if(v->lumshift > 31)
+                    shift = (v->lumshift - 64) << 6;
+                else
+                    shift = v->lumshift << 6;
+            }
+            for(i = 0; i < 256; i++) {
+                v->luty[i] = clip_uint8((scale * i + shift + 32) >> 6);
+                v->lutuv[i] = clip_uint8((scale * (i - 128) + 128*64 + 32) >> 6);
+            }
+        }
+        if(v->mv_mode == MV_PMODE_1MV_HPEL || v->mv_mode == MV_PMODE_1MV_HPEL_BILIN)
+            v->s.quarter_sample = 0;
+        else if(v->mv_mode == MV_PMODE_INTENSITY_COMP) {
+            if(v->mv_mode2 == MV_PMODE_1MV_HPEL || v->mv_mode2 == MV_PMODE_1MV_HPEL_BILIN)
+                v->s.quarter_sample = 0;
+            else
+                v->s.quarter_sample = 1;
+        } else
+            v->s.quarter_sample = 1;
+        v->s.mspel = !(v->mv_mode == MV_PMODE_1MV_HPEL_BILIN || (v->mv_mode == MV_PMODE_INTENSITY_COMP && v->mv_mode2 == MV_PMODE_1MV_HPEL_BILIN));
+
+        if ((v->mv_mode == MV_PMODE_INTENSITY_COMP &&
+                 v->mv_mode2 == MV_PMODE_MIXED_MV)
+                || v->mv_mode == MV_PMODE_MIXED_MV)
+        {
+            status = bitplane_decoding(v->mv_type_mb_plane, &v->mv_type_is_raw, v);
+            if (status < 0) return -1;
+            av_log(v->s.avctx, AV_LOG_DEBUG, "MB MV Type plane encoding: "
+                   "Imode: %i, Invert: %i\n", status>>1, status&1);
+        } else {
+            v->mv_type_is_raw = 0;
+            memset(v->mv_type_mb_plane, 0, v->s.mb_stride * v->s.mb_height);
+        }
+        status = bitplane_decoding(v->s.mbskip_table, &v->skip_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
+               "Imode: %i, Invert: %i\n", status>>1, status&1);
+
+        /* Hopefully this is correct for P frames */
+        v->s.mv_table_index = get_bits(gb, 2); //but using vc1_ tables
+        v->cbpcy_vlc = &vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+
+        if (v->dquant)
+        {
+            av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
+            vop_dquant_decoding(v);
+        }
+
+        v->ttfrm = 0; //FIXME Is that so ?
+        if (v->vstransform)
+        {
+            v->ttmbf = get_bits(gb, 1);
+            if (v->ttmbf)
+            {
+                v->ttfrm = ttfrm_to_tt[get_bits(gb, 2)];
+            }
+        } else {
+            v->ttmbf = 1;
+            v->ttfrm = TT_8X8;
+        }
+        break;
+    case B_TYPE:
+        if (v->pq < 5) v->tt_index = 0;
+        else if(v->pq < 13) v->tt_index = 1;
+        else v->tt_index = 2;
+
+        lowquant = (v->pq > 12) ? 0 : 1;
+        v->mv_mode = get_bits1(gb) ? MV_PMODE_1MV : MV_PMODE_1MV_HPEL_BILIN;
+        v->s.quarter_sample = (v->mv_mode == MV_PMODE_1MV);
+        v->s.mspel = v->s.quarter_sample;
+
+        status = bitplane_decoding(v->direct_mb_plane, &v->dmb_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "MB Direct Type plane encoding: "
+               "Imode: %i, Invert: %i\n", status>>1, status&1);
+        status = bitplane_decoding(v->s.mbskip_table, &v->skip_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
+               "Imode: %i, Invert: %i\n", status>>1, status&1);
+
+        v->s.mv_table_index = get_bits(gb, 2);
+        v->cbpcy_vlc = &vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+
+        if (v->dquant)
+        {
+            av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
+            vop_dquant_decoding(v);
+        }
+
+        v->ttfrm = 0;
+        if (v->vstransform)
+        {
+            v->ttmbf = get_bits(gb, 1);
+            if (v->ttmbf)
+            {
+                v->ttfrm = ttfrm_to_tt[get_bits(gb, 2)];
+            }
+        } else {
+            v->ttmbf = 1;
+            v->ttfrm = TT_8X8;
+        }
+        break;
+    }
+
+    /* AC Syntax */
+    v->c_ac_table_index = decode012(gb);
+    if (v->s.pict_type == I_TYPE || v->s.pict_type == BI_TYPE)
+    {
+        v->y_ac_table_index = decode012(gb);
+    }
+    /* DC Syntax */
+    v->s.dc_table_index = get_bits(gb, 1);
+
+    if(v->s.pict_type == BI_TYPE) {
+        v->s.pict_type = B_TYPE;
+        v->bi_type = 1;
+    }
+    return 0;
+}
+
+static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
+{
+    int fcm;
+    int pqindex, lowquant;
+    int status;
+
+    v->p_frame_skipped = 0;
+
+    if(v->interlace)
+        fcm = decode012(gb);
+    switch(get_prefix(gb, 0, 4)) {
+    case 0:
+        v->s.pict_type = P_TYPE;
+        break;
+    case 1:
+        v->s.pict_type = B_TYPE;
+        break;
+    case 2:
+        v->s.pict_type = I_TYPE;
+        break;
+    case 3:
+        v->s.pict_type = BI_TYPE;
+        break;
+    case 4:
+        v->s.pict_type = P_TYPE; // skipped pic
+        v->p_frame_skipped = 1;
+        return 0;
+    }
+    if(v->tfcntrflag)
+        get_bits(gb, 8);
+    if(v->broadcast) {
+        if(!v->interlace || v->panscanflag) {
+            get_bits(gb, 2);
+        } else {
+            get_bits1(gb);
+            get_bits1(gb);
+        }
+    }
+    if(v->panscanflag) {
+        //...
+    }
+    v->rnd = get_bits1(gb);
+    if(v->interlace)
+        v->uvsamp = get_bits1(gb);
+    if(v->finterpflag) v->interpfrm = get_bits(gb, 1);
+    if(v->s.pict_type == B_TYPE) {
+        v->bfraction = get_vlc2(gb, vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
+        v->bfraction = vc1_bfraction_lut[v->bfraction];
+        if(v->bfraction == 0) {
+            v->s.pict_type = BI_TYPE; /* XXX: should not happen here */
+        }
+    }
+    pqindex = get_bits(gb, 5);
+    v->pqindex = pqindex;
+    if (v->quantizer_mode == QUANT_FRAME_IMPLICIT)
+        v->pq = pquant_table[0][pqindex];
+    else
+        v->pq = pquant_table[1][pqindex];
+
+    v->pquantizer = 1;
+    if (v->quantizer_mode == QUANT_FRAME_IMPLICIT)
+        v->pquantizer = pqindex < 9;
+    if (v->quantizer_mode == QUANT_NON_UNIFORM)
+        v->pquantizer = 0;
+    v->pqindex = pqindex;
+    if (pqindex < 9) v->halfpq = get_bits(gb, 1);
+    else v->halfpq = 0;
+    if (v->quantizer_mode == QUANT_FRAME_EXPLICIT)
+        v->pquantizer = get_bits(gb, 1);
+
+    switch(v->s.pict_type) {
+    case I_TYPE:
+    case BI_TYPE:
+        status = bitplane_decoding(v->acpred_plane, &v->acpred_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "ACPRED plane encoding: "
+                "Imode: %i, Invert: %i\n", status>>1, status&1);
+        v->condover = CONDOVER_NONE;
+        if(v->overlap && v->pq <= 8) {
+            v->condover = decode012(gb);
+            if(v->condover == CONDOVER_SELECT) {
+                status = bitplane_decoding(v->over_flags_plane, &v->overflg_is_raw, v);
+                if (status < 0) return -1;
+                av_log(v->s.avctx, AV_LOG_DEBUG, "CONDOVER plane encoding: "
+                        "Imode: %i, Invert: %i\n", status>>1, status&1);
+            }
+        }
+        break;
+    case P_TYPE:
+        if(v->postprocflag)
+            v->postproc = get_bits1(gb);
+        if (v->extended_mv) v->mvrange = get_prefix(gb, 0, 3);
+        else v->mvrange = 0;
+        v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
+        v->k_y = v->mvrange + 8; //k_y can be 8 9 10 11
+        v->range_x = 1 << (v->k_x - 1);
+        v->range_y = 1 << (v->k_y - 1);
+
+        if (v->pq < 5) v->tt_index = 0;
+        else if(v->pq < 13) v->tt_index = 1;
+        else v->tt_index = 2;
+
+        lowquant = (v->pq > 12) ? 0 : 1;
+        v->mv_mode = mv_pmode_table[lowquant][get_prefix(gb, 1, 4)];
+        if (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+        {
+            int scale, shift, i;
+            v->mv_mode2 = mv_pmode_table2[lowquant][get_prefix(gb, 1, 3)];
+            v->lumscale = get_bits(gb, 6);
+            v->lumshift = get_bits(gb, 6);
+            /* fill lookup tables for intensity compensation */
+            if(!v->lumscale) {
+                scale = -64;
+                shift = (255 - v->lumshift * 2) << 6;
+                if(v->lumshift > 31)
+                    shift += 128 << 6;
+            } else {
+                scale = v->lumscale + 32;
+                if(v->lumshift > 31)
+                    shift = (v->lumshift - 64) << 6;
+                else
+                    shift = v->lumshift << 6;
+            }
+            for(i = 0; i < 256; i++) {
+                v->luty[i] = clip_uint8((scale * i + shift + 32) >> 6);
+                v->lutuv[i] = clip_uint8((scale * (i - 128) + 128*64 + 32) >> 6);
+            }
+        }
+        if(v->mv_mode == MV_PMODE_1MV_HPEL || v->mv_mode == MV_PMODE_1MV_HPEL_BILIN)
+            v->s.quarter_sample = 0;
+        else if(v->mv_mode == MV_PMODE_INTENSITY_COMP) {
+            if(v->mv_mode2 == MV_PMODE_1MV_HPEL || v->mv_mode2 == MV_PMODE_1MV_HPEL_BILIN)
+                v->s.quarter_sample = 0;
+            else
+                v->s.quarter_sample = 1;
+        } else
+            v->s.quarter_sample = 1;
+        v->s.mspel = !(v->mv_mode == MV_PMODE_1MV_HPEL_BILIN || (v->mv_mode == MV_PMODE_INTENSITY_COMP && v->mv_mode2 == MV_PMODE_1MV_HPEL_BILIN));
+
+        if ((v->mv_mode == MV_PMODE_INTENSITY_COMP &&
+                 v->mv_mode2 == MV_PMODE_MIXED_MV)
+                || v->mv_mode == MV_PMODE_MIXED_MV)
+        {
+            status = bitplane_decoding(v->mv_type_mb_plane, &v->mv_type_is_raw, v);
+            if (status < 0) return -1;
+            av_log(v->s.avctx, AV_LOG_DEBUG, "MB MV Type plane encoding: "
+                   "Imode: %i, Invert: %i\n", status>>1, status&1);
+        } else {
+            v->mv_type_is_raw = 0;
+            memset(v->mv_type_mb_plane, 0, v->s.mb_stride * v->s.mb_height);
+        }
+        status = bitplane_decoding(v->s.mbskip_table, &v->skip_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
+               "Imode: %i, Invert: %i\n", status>>1, status&1);
+
+        /* Hopefully this is correct for P frames */
+        v->s.mv_table_index = get_bits(gb, 2); //but using vc1_ tables
+        v->cbpcy_vlc = &vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+        if (v->dquant)
+        {
+            av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
+            vop_dquant_decoding(v);
+        }
+
+        v->ttfrm = 0; //FIXME Is that so ?
+        if (v->vstransform)
+        {
+            v->ttmbf = get_bits(gb, 1);
+            if (v->ttmbf)
+            {
+                v->ttfrm = ttfrm_to_tt[get_bits(gb, 2)];
+            }
+        } else {
+            v->ttmbf = 1;
+            v->ttfrm = TT_8X8;
+        }
+        break;
+    case B_TYPE:
+        if(v->postprocflag)
+            v->postproc = get_bits1(gb);
+        if (v->extended_mv) v->mvrange = get_prefix(gb, 0, 3);
+        else v->mvrange = 0;
+        v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
+        v->k_y = v->mvrange + 8; //k_y can be 8 9 10 11
+        v->range_x = 1 << (v->k_x - 1);
+        v->range_y = 1 << (v->k_y - 1);
+
+        if (v->pq < 5) v->tt_index = 0;
+        else if(v->pq < 13) v->tt_index = 1;
+        else v->tt_index = 2;
+
+        lowquant = (v->pq > 12) ? 0 : 1;
+        v->mv_mode = get_bits1(gb) ? MV_PMODE_1MV : MV_PMODE_1MV_HPEL_BILIN;
+        v->s.quarter_sample = (v->mv_mode == MV_PMODE_1MV);
+        v->s.mspel = v->s.quarter_sample;
+
+        status = bitplane_decoding(v->direct_mb_plane, &v->dmb_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "MB Direct Type plane encoding: "
+               "Imode: %i, Invert: %i\n", status>>1, status&1);
+        status = bitplane_decoding(v->s.mbskip_table, &v->skip_is_raw, v);
+        if (status < 0) return -1;
+        av_log(v->s.avctx, AV_LOG_DEBUG, "MB Skip plane encoding: "
+               "Imode: %i, Invert: %i\n", status>>1, status&1);
+
+        v->s.mv_table_index = get_bits(gb, 2);
+        v->cbpcy_vlc = &vc1_cbpcy_p_vlc[get_bits(gb, 2)];
+
+        if (v->dquant)
+        {
+            av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
+            vop_dquant_decoding(v);
+        }
+
+        v->ttfrm = 0;
+        if (v->vstransform)
+        {
+            v->ttmbf = get_bits(gb, 1);
+            if (v->ttmbf)
+            {
+                v->ttfrm = ttfrm_to_tt[get_bits(gb, 2)];
+            }
+        } else {
+            v->ttmbf = 1;
+            v->ttfrm = TT_8X8;
+        }
+        break;
+    }
+
+    /* AC Syntax */
+    v->c_ac_table_index = decode012(gb);
+    if (v->s.pict_type == I_TYPE || v->s.pict_type == BI_TYPE)
+    {
+        v->y_ac_table_index = decode012(gb);
+    }
+    /* DC Syntax */
+    v->s.dc_table_index = get_bits(gb, 1);
+    if (v->s.pict_type == I_TYPE && v->dquant) {
+        av_log(v->s.avctx, AV_LOG_DEBUG, "VOP DQuant info\n");
+        vop_dquant_decoding(v);
+    }
+
+    v->bi_type = 0;
+    if(v->s.pict_type == BI_TYPE) {
+        v->s.pict_type = B_TYPE;
+        v->bi_type = 1;
+    }
+    return 0;
+}
+
+/***********************************************************************/
+/**
+ * @defgroup block VC-1 Block-level functions
+ * @see 7.1.4, p91 and 8.1.1.7, p(1)04
+ * @{
+ */
+
+/**
+ * @def GET_MQUANT
+ * @brief Get macroblock-level quantizer scale
+ */
+#define GET_MQUANT()                                           \
+  if (v->dquantfrm)                                            \
+  {                                                            \
+    int edges = 0;                                             \
+    if (v->dqprofile == DQPROFILE_ALL_MBS)                     \
+    {                                                          \
+      if (v->dqbilevel)                                        \
+      {                                                        \
+        mquant = (get_bits(gb, 1)) ? v->altpq : v->pq;         \
+      }                                                        \
+      else                                                     \
+      {                                                        \
+        mqdiff = get_bits(gb, 3);                              \
+        if (mqdiff != 7) mquant = v->pq + mqdiff;              \
+        else mquant = get_bits(gb, 5);                         \
+      }                                                        \
+    }                                                          \
+    if(v->dqprofile == DQPROFILE_SINGLE_EDGE)                  \
+        edges = 1 << v->dqsbedge;                              \
+    else if(v->dqprofile == DQPROFILE_DOUBLE_EDGES)            \
+        edges = (3 << v->dqsbedge) % 15;                       \
+    else if(v->dqprofile == DQPROFILE_FOUR_EDGES)              \
+        edges = 15;                                            \
+    if((edges&1) && !s->mb_x)                                  \
+        mquant = v->altpq;                                     \
+    if((edges&2) && s->first_slice_line)                       \
+        mquant = v->altpq;                                     \
+    if((edges&4) && s->mb_x == (s->mb_width - 1))              \
+        mquant = v->altpq;                                     \
+    if((edges&8) && s->mb_y == (s->mb_height - 1))             \
+        mquant = v->altpq;                                     \
+  }
+
+/**
+ * @def GET_MVDATA(_dmv_x, _dmv_y)
+ * @brief Get MV differentials
+ * @see MVDATA decoding from 8.3.5.2, p(1)20
+ * @param _dmv_x Horizontal differential for decoded MV
+ * @param _dmv_y Vertical differential for decoded MV
+ */
+#define GET_MVDATA(_dmv_x, _dmv_y)                                  \
+  index = 1 + get_vlc2(gb, vc1_mv_diff_vlc[s->mv_table_index].table,\
+                       VC1_MV_DIFF_VLC_BITS, 2);                    \
+  if (index > 36)                                                   \
+  {                                                                 \
+    mb_has_coeffs = 1;                                              \
+    index -= 37;                                                    \
+  }                                                                 \
+  else mb_has_coeffs = 0;                                           \
+  s->mb_intra = 0;                                                  \
+  if (!index) { _dmv_x = _dmv_y = 0; }                              \
+  else if (index == 35)                                             \
+  {                                                                 \
+    _dmv_x = get_bits(gb, v->k_x - 1 + s->quarter_sample);          \
+    _dmv_y = get_bits(gb, v->k_y - 1 + s->quarter_sample);          \
+  }                                                                 \
+  else if (index == 36)                                             \
+  {                                                                 \
+    _dmv_x = 0;                                                     \
+    _dmv_y = 0;                                                     \
+    s->mb_intra = 1;                                                \
+  }                                                                 \
+  else                                                              \
+  {                                                                 \
+    index1 = index%6;                                               \
+    if (!s->quarter_sample && index1 == 5) val = 1;                 \
+    else                                   val = 0;                 \
+    if(size_table[index1] - val > 0)                                \
+        val = get_bits(gb, size_table[index1] - val);               \
+    else                                   val = 0;                 \
+    sign = 0 - (val&1);                                             \
+    _dmv_x = (sign ^ ((val>>1) + offset_table[index1])) - sign;     \
+                                                                    \
+    index1 = index/6;                                               \
+    if (!s->quarter_sample && index1 == 5) val = 1;                 \
+    else                                   val = 0;                 \
+    if(size_table[index1] - val > 0)                                \
+        val = get_bits(gb, size_table[index1] - val);               \
+    else                                   val = 0;                 \
+    sign = 0 - (val&1);                                             \
+    _dmv_y = (sign ^ ((val>>1) + offset_table[index1])) - sign;     \
+  }
+
+/** Predict and set motion vector
+ */
+static inline void vc1_pred_mv(MpegEncContext *s, int n, int dmv_x, int dmv_y, int mv1, int r_x, int r_y, uint8_t* is_intra)
+{
+    int xy, wrap, off = 0;
+    int16_t *A, *B, *C;
+    int px, py;
+    int sum;
+
+    /* scale MV difference to be quad-pel */
+    dmv_x <<= 1 - s->quarter_sample;
+    dmv_y <<= 1 - s->quarter_sample;
+
+    wrap = s->b8_stride;
+    xy = s->block_index[n];
+
+    if(s->mb_intra){
+        s->mv[0][n][0] = s->current_picture.motion_val[0][xy][0] = 0;
+        s->mv[0][n][1] = s->current_picture.motion_val[0][xy][1] = 0;
+        if(mv1) { /* duplicate motion data for 1-MV block */
+            s->current_picture.motion_val[0][xy + 1][0] = 0;
+            s->current_picture.motion_val[0][xy + 1][1] = 0;
+            s->current_picture.motion_val[0][xy + wrap][0] = 0;
+            s->current_picture.motion_val[0][xy + wrap][1] = 0;
+            s->current_picture.motion_val[0][xy + wrap + 1][0] = 0;
+            s->current_picture.motion_val[0][xy + wrap + 1][1] = 0;
+        }
+        return;
+    }
+
+    C = s->current_picture.motion_val[0][xy - 1];
+    A = s->current_picture.motion_val[0][xy - wrap];
+    if(mv1)
+        off = (s->mb_x == (s->mb_width - 1)) ? -1 : 2;
+    else {
+        //in 4-MV mode different blocks have different B predictor position
+        switch(n){
+        case 0:
+            off = (s->mb_x > 0) ? -1 : 1;
+            break;
+        case 1:
+            off = (s->mb_x == (s->mb_width - 1)) ? -1 : 1;
+            break;
+        case 2:
+            off = 1;
+            break;
+        case 3:
+            off = -1;
+        }
+    }
+    B = s->current_picture.motion_val[0][xy - wrap + off];
+
+    if(!s->first_slice_line || (n==2 || n==3)) { // predictor A is not out of bounds
+        if(s->mb_width == 1) {
+            px = A[0];
+            py = A[1];
+        } else {
+            px = mid_pred(A[0], B[0], C[0]);
+            py = mid_pred(A[1], B[1], C[1]);
+        }
+    } else if(s->mb_x || (n==1 || n==3)) { // predictor C is not out of bounds
+        px = C[0];
+        py = C[1];
+    } else {
+        px = py = 0;
+    }
+    /* Pullback MV as specified in 8.3.5.3.4 */
+    {
+        int qx, qy, X, Y;
+        qx = (s->mb_x << 6) + ((n==1 || n==3) ? 32 : 0);
+        qy = (s->mb_y << 6) + ((n==2 || n==3) ? 32 : 0);
+        X = (s->mb_width << 6) - 4;
+        Y = (s->mb_height << 6) - 4;
+        if(mv1) {
+            if(qx + px < -60) px = -60 - qx;
+            if(qy + py < -60) py = -60 - qy;
+        } else {
+            if(qx + px < -28) px = -28 - qx;
+            if(qy + py < -28) py = -28 - qy;
+        }
+        if(qx + px > X) px = X - qx;
+        if(qy + py > Y) py = Y - qy;
+    }
+    /* Calculate hybrid prediction as specified in 8.3.5.3.5 */
+    if((!s->first_slice_line || (n==2 || n==3)) && (s->mb_x || (n==1 || n==3))) {
+        if(is_intra[xy - wrap])
+            sum = FFABS(px) + FFABS(py);
+        else
+            sum = FFABS(px - A[0]) + FFABS(py - A[1]);
+        if(sum > 32) {
+            if(get_bits1(&s->gb)) {
+                px = A[0];
+                py = A[1];
+            } else {
+                px = C[0];
+                py = C[1];
+            }
+        } else {
+            if(is_intra[xy - 1])
+                sum = FFABS(px) + FFABS(py);
+            else
+                sum = FFABS(px - C[0]) + FFABS(py - C[1]);
+            if(sum > 32) {
+                if(get_bits1(&s->gb)) {
+                    px = A[0];
+                    py = A[1];
+                } else {
+                    px = C[0];
+                    py = C[1];
+                }
+            }
+        }
+    }
+    /* store MV using signed modulus of MV range defined in 4.11 */
+    s->mv[0][n][0] = s->current_picture.motion_val[0][xy][0] = ((px + dmv_x + r_x) & ((r_x << 1) - 1)) - r_x;
+    s->mv[0][n][1] = s->current_picture.motion_val[0][xy][1] = ((py + dmv_y + r_y) & ((r_y << 1) - 1)) - r_y;
+    if(mv1) { /* duplicate motion data for 1-MV block */
+        s->current_picture.motion_val[0][xy + 1][0] = s->current_picture.motion_val[0][xy][0];
+        s->current_picture.motion_val[0][xy + 1][1] = s->current_picture.motion_val[0][xy][1];
+        s->current_picture.motion_val[0][xy + wrap][0] = s->current_picture.motion_val[0][xy][0];
+        s->current_picture.motion_val[0][xy + wrap][1] = s->current_picture.motion_val[0][xy][1];
+        s->current_picture.motion_val[0][xy + wrap + 1][0] = s->current_picture.motion_val[0][xy][0];
+        s->current_picture.motion_val[0][xy + wrap + 1][1] = s->current_picture.motion_val[0][xy][1];
+    }
+}
+
+/** Motion compensation for direct or interpolated blocks in B-frames
+ */
+static void vc1_interp_mc(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    DSPContext *dsp = &v->s.dsp;
+    uint8_t *srcY, *srcU, *srcV;
+    int dxy, uvdxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
+
+    if(!v->s.next_picture.data[0])return;
+
+    mx = s->mv[1][0][0];
+    my = s->mv[1][0][1];
+    uvmx = (mx + ((mx & 3) == 3)) >> 1;
+    uvmy = (my + ((my & 3) == 3)) >> 1;
+    srcY = s->next_picture.data[0];
+    srcU = s->next_picture.data[1];
+    srcV = s->next_picture.data[2];
+
+    src_x = s->mb_x * 16 + (mx >> 2);
+    src_y = s->mb_y * 16 + (my >> 2);
+    uvsrc_x = s->mb_x * 8 + (uvmx >> 2);
+    uvsrc_y = s->mb_y * 8 + (uvmy >> 2);
+
+    src_x   = clip(  src_x, -16, s->mb_width  * 16);
+    src_y   = clip(  src_y, -16, s->mb_height * 16);
+    uvsrc_x = clip(uvsrc_x,  -8, s->mb_width  *  8);
+    uvsrc_y = clip(uvsrc_y,  -8, s->mb_height *  8);
+
+    srcY += src_y * s->linesize + src_x;
+    srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
+    srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
+
+    /* for grayscale we should not try to read from unknown area */
+    if(s->flags & CODEC_FLAG_GRAY) {
+        srcU = s->edge_emu_buffer + 18 * s->linesize;
+        srcV = s->edge_emu_buffer + 18 * s->linesize;
+    }
+
+    if(v->rangeredfrm
+       || (unsigned)src_x > s->h_edge_pos - (mx&3) - 16
+       || (unsigned)src_y > s->v_edge_pos - (my&3) - 16){
+        uint8_t *uvbuf= s->edge_emu_buffer + 19 * s->linesize;
+
+        srcY -= s->mspel * (1 + s->linesize);
+        ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, 17+s->mspel*2, 17+s->mspel*2,
+                            src_x - s->mspel, src_y - s->mspel, s->h_edge_pos, s->v_edge_pos);
+        srcY = s->edge_emu_buffer;
+        ff_emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8+1, 8+1,
+                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        ff_emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8+1, 8+1,
+                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        srcU = uvbuf;
+        srcV = uvbuf + 16;
+        /* if we deal with range reduction we need to scale source blocks */
+        if(v->rangeredfrm) {
+            int i, j;
+            uint8_t *src, *src2;
+
+            src = srcY;
+            for(j = 0; j < 17 + s->mspel*2; j++) {
+                for(i = 0; i < 17 + s->mspel*2; i++) src[i] = ((src[i] - 128) >> 1) + 128;
+                src += s->linesize;
+            }
+            src = srcU; src2 = srcV;
+            for(j = 0; j < 9; j++) {
+                for(i = 0; i < 9; i++) {
+                    src[i] = ((src[i] - 128) >> 1) + 128;
+                    src2[i] = ((src2[i] - 128) >> 1) + 128;
+                }
+                src += s->uvlinesize;
+                src2 += s->uvlinesize;
+            }
+        }
+        srcY += s->mspel * (1 + s->linesize);
+    }
+
+    if(v->fastuvmc) {
+        uvmx = uvmx + ((uvmx<0)?-(uvmx&1):(uvmx&1));
+        uvmy = uvmy + ((uvmy<0)?-(uvmy&1):(uvmy&1));
+    }
+
+    mx >>= 1;
+    my >>= 1;
+    dxy = ((my & 1) << 1) | (mx & 1);
+
+    dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+
+    if(s->flags & CODEC_FLAG_GRAY) return;
+    /* Chroma MC always uses qpel blilinear */
+    uvdxy = ((uvmy & 3) << 2) | (uvmx & 3);
+    uvmx = (uvmx&3)<<1;
+    uvmy = (uvmy&3)<<1;
+    dsp->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
+    dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
+}
+
+static always_inline int scale_mv(int value, int bfrac, int inv, int qs)
+{
+    int n = bfrac;
+
+#if B_FRACTION_DEN==256
+    if(inv)
+        n -= 256;
+    if(!qs)
+        return 2 * ((value * n + 255) >> 9);
+    return (value * n + 128) >> 8;
+#else
+    if(inv)
+        n -= B_FRACTION_DEN;
+    if(!qs)
+        return 2 * ((value * n + B_FRACTION_DEN - 1) / (2 * B_FRACTION_DEN));
+    return (value * n + B_FRACTION_DEN/2) / B_FRACTION_DEN;
+#endif
+}
+
+/** Reconstruct motion vector for B-frame and do motion compensation
+ */
+static inline void vc1_b_mc(VC1Context *v, int dmv_x[2], int dmv_y[2], int direct, int mode)
+{
+    if(v->use_ic) {
+        v->mv_mode2 = v->mv_mode;
+        v->mv_mode = MV_PMODE_INTENSITY_COMP;
+    }
+    if(direct) {
+        vc1_mc_1mv(v, 0);
+        vc1_interp_mc(v);
+        if(v->use_ic) v->mv_mode = v->mv_mode2;
+        return;
+    }
+    if(mode == BMV_TYPE_INTERPOLATED) {
+        vc1_mc_1mv(v, 0);
+        vc1_interp_mc(v);
+        if(v->use_ic) v->mv_mode = v->mv_mode2;
+        return;
+    }
+
+    if(v->use_ic && (mode == BMV_TYPE_BACKWARD)) v->mv_mode = v->mv_mode2;
+    vc1_mc_1mv(v, (mode == BMV_TYPE_BACKWARD));
+    if(v->use_ic) v->mv_mode = v->mv_mode2;
+}
+
+static inline void vc1_pred_b_mv(VC1Context *v, int dmv_x[2], int dmv_y[2], int direct, int mvtype)
+{
+    MpegEncContext *s = &v->s;
+    int xy, wrap, off = 0;
+    int16_t *A, *B, *C;
+    int px, py;
+    int sum;
+    int r_x, r_y;
+    const uint8_t *is_intra = v->mb_type[0];
+
+    r_x = v->range_x;
+    r_y = v->range_y;
+    /* scale MV difference to be quad-pel */
+    dmv_x[0] <<= 1 - s->quarter_sample;
+    dmv_y[0] <<= 1 - s->quarter_sample;
+    dmv_x[1] <<= 1 - s->quarter_sample;
+    dmv_y[1] <<= 1 - s->quarter_sample;
+
+    wrap = s->b8_stride;
+    xy = s->block_index[0];
+
+    if(s->mb_intra) {
+        s->current_picture.motion_val[0][xy][0] =
+        s->current_picture.motion_val[0][xy][1] =
+        s->current_picture.motion_val[1][xy][0] =
+        s->current_picture.motion_val[1][xy][1] = 0;
+        return;
+    }
+    s->mv[0][0][0] = scale_mv(s->next_picture.motion_val[1][xy][0], v->bfraction, 0, s->quarter_sample);
+    s->mv[0][0][1] = scale_mv(s->next_picture.motion_val[1][xy][1], v->bfraction, 0, s->quarter_sample);
+    s->mv[1][0][0] = scale_mv(s->next_picture.motion_val[1][xy][0], v->bfraction, 1, s->quarter_sample);
+    s->mv[1][0][1] = scale_mv(s->next_picture.motion_val[1][xy][1], v->bfraction, 1, s->quarter_sample);
+    if(direct) {
+        s->current_picture.motion_val[0][xy][0] = s->mv[0][0][0];
+        s->current_picture.motion_val[0][xy][1] = s->mv[0][0][1];
+        s->current_picture.motion_val[1][xy][0] = s->mv[1][0][0];
+        s->current_picture.motion_val[1][xy][1] = s->mv[1][0][1];
+        return;
+    }
+
+    if((mvtype == BMV_TYPE_FORWARD) || (mvtype == BMV_TYPE_INTERPOLATED)) {
+        C = s->current_picture.motion_val[0][xy - 2];
+        A = s->current_picture.motion_val[0][xy - wrap*2];
+        off = (s->mb_x == (s->mb_width - 1)) ? -2 : 2;
+        B = s->current_picture.motion_val[0][xy - wrap*2 + off];
+
+        if(!s->first_slice_line) { // predictor A is not out of bounds
+            if(s->mb_width == 1) {
+                px = A[0];
+                py = A[1];
+            } else {
+                px = mid_pred(A[0], B[0], C[0]);
+                py = mid_pred(A[1], B[1], C[1]);
+            }
+        } else if(s->mb_x) { // predictor C is not out of bounds
+            px = C[0];
+            py = C[1];
+        } else {
+            px = py = 0;
+        }
+        /* Pullback MV as specified in 8.3.5.3.4 */
+        {
+            int qx, qy, X, Y;
+            if(v->profile < PROFILE_ADVANCED) {
+                qx = (s->mb_x << 5);
+                qy = (s->mb_y << 5);
+                X = (s->mb_width << 5) - 4;
+                Y = (s->mb_height << 5) - 4;
+                if(qx + px < -28) px = -28 - qx;
+                if(qy + py < -28) py = -28 - qy;
+                if(qx + px > X) px = X - qx;
+                if(qy + py > Y) py = Y - qy;
+            } else {
+                qx = (s->mb_x << 6);
+                qy = (s->mb_y << 6);
+                X = (s->mb_width << 6) - 4;
+                Y = (s->mb_height << 6) - 4;
+                if(qx + px < -60) px = -60 - qx;
+                if(qy + py < -60) py = -60 - qy;
+                if(qx + px > X) px = X - qx;
+                if(qy + py > Y) py = Y - qy;
+            }
+        }
+        /* Calculate hybrid prediction as specified in 8.3.5.3.5 */
+        if(0 && !s->first_slice_line && s->mb_x) {
+            if(is_intra[xy - wrap])
+                sum = FFABS(px) + FFABS(py);
+            else
+                sum = FFABS(px - A[0]) + FFABS(py - A[1]);
+            if(sum > 32) {
+                if(get_bits1(&s->gb)) {
+                    px = A[0];
+                    py = A[1];
+                } else {
+                    px = C[0];
+                    py = C[1];
+                }
+            } else {
+                if(is_intra[xy - 2])
+                    sum = FFABS(px) + FFABS(py);
+                else
+                    sum = FFABS(px - C[0]) + FFABS(py - C[1]);
+                if(sum > 32) {
+                    if(get_bits1(&s->gb)) {
+                        px = A[0];
+                        py = A[1];
+                    } else {
+                        px = C[0];
+                        py = C[1];
+                    }
+                }
+            }
+        }
+        /* store MV using signed modulus of MV range defined in 4.11 */
+        s->mv[0][0][0] = ((px + dmv_x[0] + r_x) & ((r_x << 1) - 1)) - r_x;
+        s->mv[0][0][1] = ((py + dmv_y[0] + r_y) & ((r_y << 1) - 1)) - r_y;
+    }
+    if((mvtype == BMV_TYPE_BACKWARD) || (mvtype == BMV_TYPE_INTERPOLATED)) {
+        C = s->current_picture.motion_val[1][xy - 2];
+        A = s->current_picture.motion_val[1][xy - wrap*2];
+        off = (s->mb_x == (s->mb_width - 1)) ? -2 : 2;
+        B = s->current_picture.motion_val[1][xy - wrap*2 + off];
+
+        if(!s->first_slice_line) { // predictor A is not out of bounds
+            if(s->mb_width == 1) {
+                px = A[0];
+                py = A[1];
+            } else {
+                px = mid_pred(A[0], B[0], C[0]);
+                py = mid_pred(A[1], B[1], C[1]);
+            }
+        } else if(s->mb_x) { // predictor C is not out of bounds
+            px = C[0];
+            py = C[1];
+        } else {
+            px = py = 0;
+        }
+        /* Pullback MV as specified in 8.3.5.3.4 */
+        {
+            int qx, qy, X, Y;
+            if(v->profile < PROFILE_ADVANCED) {
+                qx = (s->mb_x << 5);
+                qy = (s->mb_y << 5);
+                X = (s->mb_width << 5) - 4;
+                Y = (s->mb_height << 5) - 4;
+                if(qx + px < -28) px = -28 - qx;
+                if(qy + py < -28) py = -28 - qy;
+                if(qx + px > X) px = X - qx;
+                if(qy + py > Y) py = Y - qy;
+            } else {
+                qx = (s->mb_x << 6);
+                qy = (s->mb_y << 6);
+                X = (s->mb_width << 6) - 4;
+                Y = (s->mb_height << 6) - 4;
+                if(qx + px < -60) px = -60 - qx;
+                if(qy + py < -60) py = -60 - qy;
+                if(qx + px > X) px = X - qx;
+                if(qy + py > Y) py = Y - qy;
+            }
+        }
+        /* Calculate hybrid prediction as specified in 8.3.5.3.5 */
+        if(0 && !s->first_slice_line && s->mb_x) {
+            if(is_intra[xy - wrap])
+                sum = FFABS(px) + FFABS(py);
+            else
+                sum = FFABS(px - A[0]) + FFABS(py - A[1]);
+            if(sum > 32) {
+                if(get_bits1(&s->gb)) {
+                    px = A[0];
+                    py = A[1];
+                } else {
+                    px = C[0];
+                    py = C[1];
+                }
+            } else {
+                if(is_intra[xy - 2])
+                    sum = FFABS(px) + FFABS(py);
+                else
+                    sum = FFABS(px - C[0]) + FFABS(py - C[1]);
+                if(sum > 32) {
+                    if(get_bits1(&s->gb)) {
+                        px = A[0];
+                        py = A[1];
+                    } else {
+                        px = C[0];
+                        py = C[1];
+                    }
+                }
+            }
+        }
+        /* store MV using signed modulus of MV range defined in 4.11 */
+
+        s->mv[1][0][0] = ((px + dmv_x[1] + r_x) & ((r_x << 1) - 1)) - r_x;
+        s->mv[1][0][1] = ((py + dmv_y[1] + r_y) & ((r_y << 1) - 1)) - r_y;
+    }
+    s->current_picture.motion_val[0][xy][0] = s->mv[0][0][0];
+    s->current_picture.motion_val[0][xy][1] = s->mv[0][0][1];
+    s->current_picture.motion_val[1][xy][0] = s->mv[1][0][0];
+    s->current_picture.motion_val[1][xy][1] = s->mv[1][0][1];
+}
+
+/** Get predicted DC value for I-frames only
+ * prediction dir: left=0, top=1
+ * @param s MpegEncContext
+ * @param[in] n block index in the current MB
+ * @param dc_val_ptr Pointer to DC predictor
+ * @param dir_ptr Prediction direction for use in AC prediction
+ */
+static inline int vc1_i_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
+                              int16_t **dc_val_ptr, int *dir_ptr)
+{
+    int a, b, c, wrap, pred, scale;
+    int16_t *dc_val;
+    static const uint16_t dcpred[32] = {
+    -1, 1024,  512,  341,  256,  205,  171,  146,  128,
+         114,  102,   93,   85,   79,   73,   68,   64,
+          60,   57,   54,   51,   49,   47,   45,   43,
+          41,   39,   38,   37,   35,   34,   33
+    };
+
+    /* find prediction - wmv3_dc_scale always used here in fact */
+    if (n < 4)     scale = s->y_dc_scale;
+    else           scale = s->c_dc_scale;
+
+    wrap = s->block_wrap[n];
+    dc_val= s->dc_val[0] + s->block_index[n];
+
+    /* B A
+     * C X
+     */
+    c = dc_val[ - 1];
+    b = dc_val[ - 1 - wrap];
+    a = dc_val[ - wrap];
+
+    if (pq < 9 || !overlap)
+    {
+        /* Set outer values */
+        if (s->first_slice_line && (n!=2 && n!=3)) b=a=dcpred[scale];
+        if (s->mb_x == 0 && (n!=1 && n!=3)) b=c=dcpred[scale];
+    }
+    else
+    {
+        /* Set outer values */
+        if (s->first_slice_line && (n!=2 && n!=3)) b=a=0;
+        if (s->mb_x == 0 && (n!=1 && n!=3)) b=c=0;
+    }
+
+    if (abs(a - b) <= abs(b - c)) {
+        pred = c;
+        *dir_ptr = 1;//left
+    } else {
+        pred = a;
+        *dir_ptr = 0;//top
+    }
+
+    /* update predictor */
+    *dc_val_ptr = &dc_val[0];
+    return pred;
+}
+
+
+/** Get predicted DC value
+ * prediction dir: left=0, top=1
+ * @param s MpegEncContext
+ * @param[in] n block index in the current MB
+ * @param dc_val_ptr Pointer to DC predictor
+ * @param dir_ptr Prediction direction for use in AC prediction
+ */
+static inline int vc1_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
+                              int a_avail, int c_avail,
+                              int16_t **dc_val_ptr, int *dir_ptr)
+{
+    int a, b, c, wrap, pred, scale;
+    int16_t *dc_val;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int q1, q2 = 0;
+
+    /* find prediction - wmv3_dc_scale always used here in fact */
+    if (n < 4)     scale = s->y_dc_scale;
+    else           scale = s->c_dc_scale;
+
+    wrap = s->block_wrap[n];
+    dc_val= s->dc_val[0] + s->block_index[n];
+
+    /* B A
+     * C X
+     */
+    c = dc_val[ - 1];
+    b = dc_val[ - 1 - wrap];
+    a = dc_val[ - wrap];
+    /* scale predictors if needed */
+    q1 = s->current_picture.qscale_table[mb_pos];
+    if(c_avail && (n!= 1 && n!=3)) {
+        q2 = s->current_picture.qscale_table[mb_pos - 1];
+        if(q2 && q2 != q1)
+            c = (c * s->y_dc_scale_table[q2] * vc1_dqscale[s->y_dc_scale_table[q1] - 1] + 0x20000) >> 18;
+    }
+    if(a_avail && (n!= 2 && n!=3)) {
+        q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+        if(q2 && q2 != q1)
+            a = (a * s->y_dc_scale_table[q2] * vc1_dqscale[s->y_dc_scale_table[q1] - 1] + 0x20000) >> 18;
+    }
+    if(a_avail && c_avail && (n!=3)) {
+        int off = mb_pos;
+        if(n != 1) off--;
+        if(n != 2) off -= s->mb_stride;
+        q2 = s->current_picture.qscale_table[off];
+        if(q2 && q2 != q1)
+            b = (b * s->y_dc_scale_table[q2] * vc1_dqscale[s->y_dc_scale_table[q1] - 1] + 0x20000) >> 18;
+    }
+
+    if(a_avail && c_avail) {
+        if(abs(a - b) <= abs(b - c)) {
+            pred = c;
+            *dir_ptr = 1;//left
+        } else {
+            pred = a;
+            *dir_ptr = 0;//top
+        }
+    } else if(a_avail) {
+        pred = a;
+        *dir_ptr = 0;//top
+    } else if(c_avail) {
+        pred = c;
+        *dir_ptr = 1;//left
+    } else {
+        pred = 0;
+        *dir_ptr = 1;//left
+    }
+
+    /* update predictor */
+    *dc_val_ptr = &dc_val[0];
+    return pred;
+}
+
+
+/**
+ * @defgroup std_mb VC1 Macroblock-level functions in Simple/Main Profiles
+ * @see 7.1.4, p91 and 8.1.1.7, p(1)04
+ * @{
+ */
+
+static inline int vc1_coded_block_pred(MpegEncContext * s, int n, uint8_t **coded_block_ptr)
+{
+    int xy, wrap, pred, a, b, c;
+
+    xy = s->block_index[n];
+    wrap = s->b8_stride;
+
+    /* B C
+     * A X
+     */
+    a = s->coded_block[xy - 1       ];
+    b = s->coded_block[xy - 1 - wrap];
+    c = s->coded_block[xy     - wrap];
+
+    if (b == c) {
+        pred = a;
+    } else {
+        pred = c;
+    }
+
+    /* store value */
+    *coded_block_ptr = &s->coded_block[xy];
+
+    return pred;
+}
+
+/**
+ * Decode one AC coefficient
+ * @param v The VC1 context
+ * @param last Last coefficient
+ * @param skip How much zero coefficients to skip
+ * @param value Decoded AC coefficient value
+ * @see 8.1.3.4
+ */
+static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip, int *value, int codingset)
+{
+    GetBitContext *gb = &v->s.gb;
+    int index, escape, run = 0, level = 0, lst = 0;
+
+    index = get_vlc2(gb, vc1_ac_coeff_table[codingset].table, AC_VLC_BITS, 3);
+    if (index != vc1_ac_sizes[codingset] - 1) {
+        run = vc1_index_decode_table[codingset][index][0];
+        level = vc1_index_decode_table[codingset][index][1];
+        lst = index >= vc1_last_decode_table[codingset];
+        if(get_bits(gb, 1))
+            level = -level;
+    } else {
+        escape = decode210(gb);
+        if (escape != 2) {
+            index = get_vlc2(gb, vc1_ac_coeff_table[codingset].table, AC_VLC_BITS, 3);
+            run = vc1_index_decode_table[codingset][index][0];
+            level = vc1_index_decode_table[codingset][index][1];
+            lst = index >= vc1_last_decode_table[codingset];
+            if(escape == 0) {
+                if(lst)
+                    level += vc1_last_delta_level_table[codingset][run];
+                else
+                    level += vc1_delta_level_table[codingset][run];
+            } else {
+                if(lst)
+                    run += vc1_last_delta_run_table[codingset][level] + 1;
+                else
+                    run += vc1_delta_run_table[codingset][level] + 1;
+            }
+            if(get_bits(gb, 1))
+                level = -level;
+        } else {
+            int sign;
+            lst = get_bits(gb, 1);
+            if(v->s.esc3_level_length == 0) {
+                if(v->pq < 8 || v->dquantfrm) { // table 59
+                    v->s.esc3_level_length = get_bits(gb, 3);
+                    if(!v->s.esc3_level_length)
+                        v->s.esc3_level_length = get_bits(gb, 2) + 8;
+                } else { //table 60
+                    v->s.esc3_level_length = get_prefix(gb, 1, 6) + 2;
+                }
+                v->s.esc3_run_length = 3 + get_bits(gb, 2);
+            }
+            run = get_bits(gb, v->s.esc3_run_length);
+            sign = get_bits(gb, 1);
+            level = get_bits(gb, v->s.esc3_level_length);
+            if(sign)
+                level = -level;
+        }
+    }
+
+    *last = lst;
+    *skip = run;
+    *value = level;
+}
+
+/** Decode intra block in intra frames - should be faster than decode_intra_block
+ * @param v VC1Context
+ * @param block block to decode
+ * @param coded are AC coeffs present or not
+ * @param codingset set of VLC to decode data
+ */
+static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded, int codingset)
+{
+    GetBitContext *gb = &v->s.gb;
+    MpegEncContext *s = &v->s;
+    int dc_pred_dir = 0; /* Direction of the DC prediction used */
+    int run_diff, i;
+    int16_t *dc_val;
+    int16_t *ac_val, *ac_val2;
+    int dcdiff;
+
+    /* Get DC differential */
+    if (n < 4) {
+        dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+    } else {
+        dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+    }
+    if (dcdiff < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "Illegal DC VLC\n");
+        return -1;
+    }
+    if (dcdiff)
+    {
+        if (dcdiff == 119 /* ESC index value */)
+        {
+            /* TODO: Optimize */
+            if (v->pq == 1) dcdiff = get_bits(gb, 10);
+            else if (v->pq == 2) dcdiff = get_bits(gb, 9);
+            else dcdiff = get_bits(gb, 8);
+        }
+        else
+        {
+            if (v->pq == 1)
+                dcdiff = (dcdiff<<2) + get_bits(gb, 2) - 3;
+            else if (v->pq == 2)
+                dcdiff = (dcdiff<<1) + get_bits(gb, 1) - 1;
+        }
+        if (get_bits(gb, 1))
+            dcdiff = -dcdiff;
+    }
+
+    /* Prediction */
+    dcdiff += vc1_i_pred_dc(&v->s, v->overlap, v->pq, n, &dc_val, &dc_pred_dir);
+    *dc_val = dcdiff;
+
+    /* Store the quantized DC coeff, used for prediction */
+    if (n < 4) {
+        block[0] = dcdiff * s->y_dc_scale;
+    } else {
+        block[0] = dcdiff * s->c_dc_scale;
+    }
+    /* Skip ? */
+    run_diff = 0;
+    i = 0;
+    if (!coded) {
+        goto not_coded;
+    }
+
+    //AC Decoding
+    i = 1;
+
+    {
+        int last = 0, skip, value;
+        const int8_t *zz_table;
+        int scale;
+        int k;
+
+        scale = v->pq * 2 + v->halfpq;
+
+        if(v->s.ac_pred) {
+            if(!dc_pred_dir)
+                zz_table = vc1_horizontal_zz;
+            else
+                zz_table = vc1_vertical_zz;
+        } else
+            zz_table = vc1_normal_zz;
+
+        ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+        ac_val2 = ac_val;
+        if(dc_pred_dir) //left
+            ac_val -= 16;
+        else //top
+            ac_val -= 16 * s->block_wrap[n];
+
+        while (!last) {
+            vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
+            i += skip;
+            if(i > 63)
+                break;
+            block[zz_table[i++]] = value;
+        }
+
+        /* apply AC prediction if needed */
+        if(s->ac_pred) {
+            if(dc_pred_dir) { //left
+                for(k = 1; k < 8; k++)
+                    block[k << 3] += ac_val[k];
+            } else { //top
+                for(k = 1; k < 8; k++)
+                    block[k] += ac_val[k + 8];
+            }
+        }
+        /* save AC coeffs for further prediction */
+        for(k = 1; k < 8; k++) {
+            ac_val2[k] = block[k << 3];
+            ac_val2[k + 8] = block[k];
+        }
+
+        /* scale AC coeffs */
+        for(k = 1; k < 64; k++)
+            if(block[k]) {
+                block[k] *= scale;
+                if(!v->pquantizer)
+                    block[k] += (block[k] < 0) ? -v->pq : v->pq;
+            }
+
+        if(s->ac_pred) i = 63;
+    }
+
+not_coded:
+    if(!coded) {
+        int k, scale;
+        ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+        ac_val2 = ac_val;
+
+        scale = v->pq * 2 + v->halfpq;
+        memset(ac_val2, 0, 16 * 2);
+        if(dc_pred_dir) {//left
+            ac_val -= 16;
+            if(s->ac_pred)
+                memcpy(ac_val2, ac_val, 8 * 2);
+        } else {//top
+            ac_val -= 16 * s->block_wrap[n];
+            if(s->ac_pred)
+                memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
+        }
+
+        /* apply AC prediction if needed */
+        if(s->ac_pred) {
+            if(dc_pred_dir) { //left
+                for(k = 1; k < 8; k++) {
+                    block[k << 3] = ac_val[k] * scale;
+                    if(!v->pquantizer && block[k << 3])
+                        block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
+                }
+            } else { //top
+                for(k = 1; k < 8; k++) {
+                    block[k] = ac_val[k + 8] * scale;
+                    if(!v->pquantizer && block[k])
+                        block[k] += (block[k] < 0) ? -v->pq : v->pq;
+                }
+            }
+            i = 63;
+        }
+    }
+    s->block_last_index[n] = i;
+
+    return 0;
+}
+
+/** Decode intra block in intra frames - should be faster than decode_intra_block
+ * @param v VC1Context
+ * @param block block to decode
+ * @param coded are AC coeffs present or not
+ * @param codingset set of VLC to decode data
+ */
+static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int coded, int codingset, int mquant)
+{
+    GetBitContext *gb = &v->s.gb;
+    MpegEncContext *s = &v->s;
+    int dc_pred_dir = 0; /* Direction of the DC prediction used */
+    int run_diff, i;
+    int16_t *dc_val;
+    int16_t *ac_val, *ac_val2;
+    int dcdiff;
+    int a_avail = v->a_avail, c_avail = v->c_avail;
+    int use_pred = s->ac_pred;
+    int scale;
+    int q1, q2 = 0;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+
+    /* Get DC differential */
+    if (n < 4) {
+        dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+    } else {
+        dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+    }
+    if (dcdiff < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "Illegal DC VLC\n");
+        return -1;
+    }
+    if (dcdiff)
+    {
+        if (dcdiff == 119 /* ESC index value */)
+        {
+            /* TODO: Optimize */
+            if (mquant == 1) dcdiff = get_bits(gb, 10);
+            else if (mquant == 2) dcdiff = get_bits(gb, 9);
+            else dcdiff = get_bits(gb, 8);
+        }
+        else
+        {
+            if (mquant == 1)
+                dcdiff = (dcdiff<<2) + get_bits(gb, 2) - 3;
+            else if (mquant == 2)
+                dcdiff = (dcdiff<<1) + get_bits(gb, 1) - 1;
+        }
+        if (get_bits(gb, 1))
+            dcdiff = -dcdiff;
+    }
+
+    /* Prediction */
+    dcdiff += vc1_pred_dc(&v->s, v->overlap, mquant, n, v->a_avail, v->c_avail, &dc_val, &dc_pred_dir);
+    *dc_val = dcdiff;
+
+    /* Store the quantized DC coeff, used for prediction */
+    if (n < 4) {
+        block[0] = dcdiff * s->y_dc_scale;
+    } else {
+        block[0] = dcdiff * s->c_dc_scale;
+    }
+    /* Skip ? */
+    run_diff = 0;
+    i = 0;
+
+    //AC Decoding
+    i = 1;
+
+    /* check if AC is needed at all and adjust direction if needed */
+    if(!a_avail) dc_pred_dir = 1;
+    if(!c_avail) dc_pred_dir = 0;
+    if(!a_avail && !c_avail) use_pred = 0;
+    ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val2 = ac_val;
+
+    scale = mquant * 2 + v->halfpq;
+
+    if(dc_pred_dir) //left
+        ac_val -= 16;
+    else //top
+        ac_val -= 16 * s->block_wrap[n];
+
+    q1 = s->current_picture.qscale_table[mb_pos];
+    if(dc_pred_dir && c_avail) q2 = s->current_picture.qscale_table[mb_pos - 1];
+    if(!dc_pred_dir && a_avail) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+    if(n && n<4) q2 = q1;
+
+    if(coded) {
+        int last = 0, skip, value;
+        const int8_t *zz_table;
+        int k;
+
+        if(v->s.ac_pred) {
+            if(!dc_pred_dir)
+                zz_table = vc1_horizontal_zz;
+            else
+                zz_table = vc1_vertical_zz;
+        } else
+            zz_table = vc1_normal_zz;
+
+        while (!last) {
+            vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
+            i += skip;
+            if(i > 63)
+                break;
+            block[zz_table[i++]] = value;
+        }
+
+        /* apply AC prediction if needed */
+        if(use_pred) {
+            /* scale predictors if needed*/
+            if(q2 && q1!=q2) {
+                q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+                q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+
+                if(dc_pred_dir) { //left
+                    for(k = 1; k < 8; k++)
+                        block[k << 3] += (ac_val[k] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                } else { //top
+                    for(k = 1; k < 8; k++)
+                        block[k] += (ac_val[k + 8] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                }
+            } else {
+                if(dc_pred_dir) { //left
+                    for(k = 1; k < 8; k++)
+                        block[k << 3] += ac_val[k];
+                } else { //top
+                    for(k = 1; k < 8; k++)
+                        block[k] += ac_val[k + 8];
+                }
+            }
+        }
+        /* save AC coeffs for further prediction */
+        for(k = 1; k < 8; k++) {
+            ac_val2[k] = block[k << 3];
+            ac_val2[k + 8] = block[k];
+        }
+
+        /* scale AC coeffs */
+        for(k = 1; k < 64; k++)
+            if(block[k]) {
+                block[k] *= scale;
+                if(!v->pquantizer)
+                    block[k] += (block[k] < 0) ? -mquant : mquant;
+            }
+
+        if(use_pred) i = 63;
+    } else { // no AC coeffs
+        int k;
+
+        memset(ac_val2, 0, 16 * 2);
+        if(dc_pred_dir) {//left
+            if(use_pred) {
+                memcpy(ac_val2, ac_val, 8 * 2);
+                if(q2 && q1!=q2) {
+                    q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+                    q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+                    for(k = 1; k < 8; k++)
+                        ac_val2[k] = (ac_val2[k] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                }
+            }
+        } else {//top
+            if(use_pred) {
+                memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
+                if(q2 && q1!=q2) {
+                    q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+                    q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+                    for(k = 1; k < 8; k++)
+                        ac_val2[k + 8] = (ac_val2[k + 8] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                }
+            }
+        }
+
+        /* apply AC prediction if needed */
+        if(use_pred) {
+            if(dc_pred_dir) { //left
+                for(k = 1; k < 8; k++) {
+                    block[k << 3] = ac_val2[k] * scale;
+                    if(!v->pquantizer && block[k << 3])
+                        block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
+                }
+            } else { //top
+                for(k = 1; k < 8; k++) {
+                    block[k] = ac_val2[k + 8] * scale;
+                    if(!v->pquantizer && block[k])
+                        block[k] += (block[k] < 0) ? -mquant : mquant;
+                }
+            }
+            i = 63;
+        }
+    }
+    s->block_last_index[n] = i;
+
+    return 0;
+}
+
+/** Decode intra block in inter frames - more generic version than vc1_decode_i_block
+ * @param v VC1Context
+ * @param block block to decode
+ * @param coded are AC coeffs present or not
+ * @param mquant block quantizer
+ * @param codingset set of VLC to decode data
+ */
+static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int coded, int mquant, int codingset)
+{
+    GetBitContext *gb = &v->s.gb;
+    MpegEncContext *s = &v->s;
+    int dc_pred_dir = 0; /* Direction of the DC prediction used */
+    int run_diff, i;
+    int16_t *dc_val;
+    int16_t *ac_val, *ac_val2;
+    int dcdiff;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int a_avail = v->a_avail, c_avail = v->c_avail;
+    int use_pred = s->ac_pred;
+    int scale;
+    int q1, q2 = 0;
+
+    /* XXX: Guard against dumb values of mquant */
+    mquant = (mquant < 1) ? 0 : ( (mquant>31) ? 31 : mquant );
+
+    /* Set DC scale - y and c use the same */
+    s->y_dc_scale = s->y_dc_scale_table[mquant];
+    s->c_dc_scale = s->c_dc_scale_table[mquant];
+
+    /* Get DC differential */
+    if (n < 4) {
+        dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+    } else {
+        dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
+    }
+    if (dcdiff < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "Illegal DC VLC\n");
+        return -1;
+    }
+    if (dcdiff)
+    {
+        if (dcdiff == 119 /* ESC index value */)
+        {
+            /* TODO: Optimize */
+            if (mquant == 1) dcdiff = get_bits(gb, 10);
+            else if (mquant == 2) dcdiff = get_bits(gb, 9);
+            else dcdiff = get_bits(gb, 8);
+        }
+        else
+        {
+            if (mquant == 1)
+                dcdiff = (dcdiff<<2) + get_bits(gb, 2) - 3;
+            else if (mquant == 2)
+                dcdiff = (dcdiff<<1) + get_bits(gb, 1) - 1;
+        }
+        if (get_bits(gb, 1))
+            dcdiff = -dcdiff;
+    }
+
+    /* Prediction */
+    dcdiff += vc1_pred_dc(&v->s, v->overlap, mquant, n, a_avail, c_avail, &dc_val, &dc_pred_dir);
+    *dc_val = dcdiff;
+
+    /* Store the quantized DC coeff, used for prediction */
+
+    if (n < 4) {
+        block[0] = dcdiff * s->y_dc_scale;
+    } else {
+        block[0] = dcdiff * s->c_dc_scale;
+    }
+    /* Skip ? */
+    run_diff = 0;
+    i = 0;
+
+    //AC Decoding
+    i = 1;
+
+    /* check if AC is needed at all and adjust direction if needed */
+    if(!a_avail) dc_pred_dir = 1;
+    if(!c_avail) dc_pred_dir = 0;
+    if(!a_avail && !c_avail) use_pred = 0;
+    ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val2 = ac_val;
+
+    scale = mquant * 2 + v->halfpq;
+
+    if(dc_pred_dir) //left
+        ac_val -= 16;
+    else //top
+        ac_val -= 16 * s->block_wrap[n];
+
+    q1 = s->current_picture.qscale_table[mb_pos];
+    if(dc_pred_dir && c_avail) q2 = s->current_picture.qscale_table[mb_pos - 1];
+    if(!dc_pred_dir && a_avail) q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+    if(n && n<4) q2 = q1;
+
+    if(coded) {
+        int last = 0, skip, value;
+        const int8_t *zz_table;
+        int k;
+
+        zz_table = vc1_simple_progressive_8x8_zz;
+
+        while (!last) {
+            vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
+            i += skip;
+            if(i > 63)
+                break;
+            block[zz_table[i++]] = value;
+        }
+
+        /* apply AC prediction if needed */
+        if(use_pred) {
+            /* scale predictors if needed*/
+            if(q2 && q1!=q2) {
+                q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+                q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+
+                if(dc_pred_dir) { //left
+                    for(k = 1; k < 8; k++)
+                        block[k << 3] += (ac_val[k] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                } else { //top
+                    for(k = 1; k < 8; k++)
+                        block[k] += (ac_val[k + 8] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                }
+            } else {
+                if(dc_pred_dir) { //left
+                    for(k = 1; k < 8; k++)
+                        block[k << 3] += ac_val[k];
+                } else { //top
+                    for(k = 1; k < 8; k++)
+                        block[k] += ac_val[k + 8];
+                }
+            }
+        }
+        /* save AC coeffs for further prediction */
+        for(k = 1; k < 8; k++) {
+            ac_val2[k] = block[k << 3];
+            ac_val2[k + 8] = block[k];
+        }
+
+        /* scale AC coeffs */
+        for(k = 1; k < 64; k++)
+            if(block[k]) {
+                block[k] *= scale;
+                if(!v->pquantizer)
+                    block[k] += (block[k] < 0) ? -mquant : mquant;
+            }
+
+        if(use_pred) i = 63;
+    } else { // no AC coeffs
+        int k;
+
+        memset(ac_val2, 0, 16 * 2);
+        if(dc_pred_dir) {//left
+            if(use_pred) {
+                memcpy(ac_val2, ac_val, 8 * 2);
+                if(q2 && q1!=q2) {
+                    q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+                    q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+                    for(k = 1; k < 8; k++)
+                        ac_val2[k] = (ac_val2[k] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                }
+            }
+        } else {//top
+            if(use_pred) {
+                memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
+                if(q2 && q1!=q2) {
+                    q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+                    q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+                    for(k = 1; k < 8; k++)
+                        ac_val2[k + 8] = (ac_val2[k + 8] * q2 * vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+                }
+            }
+        }
+
+        /* apply AC prediction if needed */
+        if(use_pred) {
+            if(dc_pred_dir) { //left
+                for(k = 1; k < 8; k++) {
+                    block[k << 3] = ac_val2[k] * scale;
+                    if(!v->pquantizer && block[k << 3])
+                        block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
+                }
+            } else { //top
+                for(k = 1; k < 8; k++) {
+                    block[k] = ac_val2[k + 8] * scale;
+                    if(!v->pquantizer && block[k])
+                        block[k] += (block[k] < 0) ? -mquant : mquant;
+                }
+            }
+            i = 63;
+        }
+    }
+    s->block_last_index[n] = i;
+
+    return 0;
+}
+
+/** Decode P block
+ */
+static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquant, int ttmb, int first_block)
+{
+    MpegEncContext *s = &v->s;
+    GetBitContext *gb = &s->gb;
+    int i, j;
+    int subblkpat = 0;
+    int scale, off, idx, last, skip, value;
+    int ttblk = ttmb & 7;
+
+    if(ttmb == -1) {
+        ttblk = ttblk_to_tt[v->tt_index][get_vlc2(gb, vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)];
+    }
+    if(ttblk == TT_4X4) {
+        subblkpat = ~(get_vlc2(gb, vc1_subblkpat_vlc[v->tt_index].table, VC1_SUBBLKPAT_VLC_BITS, 1) + 1);
+    }
+    if((ttblk != TT_8X8 && ttblk != TT_4X4) && (v->ttmbf || (ttmb != -1 && (ttmb & 8) && !first_block))) {
+        subblkpat = decode012(gb);
+        if(subblkpat) subblkpat ^= 3; //swap decoded pattern bits
+        if(ttblk == TT_8X4_TOP || ttblk == TT_8X4_BOTTOM) ttblk = TT_8X4;
+        if(ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT) ttblk = TT_4X8;
+    }
+    scale = 2 * mquant + v->halfpq;
+
+    // convert transforms like 8X4_TOP to generic TT and SUBBLKPAT
+    if(ttblk == TT_8X4_TOP || ttblk == TT_8X4_BOTTOM) {
+        subblkpat = 2 - (ttblk == TT_8X4_TOP);
+        ttblk = TT_8X4;
+    }
+    if(ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT) {
+        subblkpat = 2 - (ttblk == TT_4X8_LEFT);
+        ttblk = TT_4X8;
+    }
+    switch(ttblk) {
+    case TT_8X8:
+        i = 0;
+        last = 0;
+        while (!last) {
+            vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
+            i += skip;
+            if(i > 63)
+                break;
+            idx = vc1_simple_progressive_8x8_zz[i++];
+            block[idx] = value * scale;
+            if(!v->pquantizer)
+                block[idx] += (block[idx] < 0) ? -mquant : mquant;
+        }
+        s->dsp.vc1_inv_trans_8x8(block);
+        break;
+    case TT_4X4:
+        for(j = 0; j < 4; j++) {
+            last = subblkpat & (1 << (3 - j));
+            i = 0;
+            off = (j & 1) * 4 + (j & 2) * 16;
+            while (!last) {
+                vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
+                i += skip;
+                if(i > 15)
+                    break;
+                idx = vc1_simple_progressive_4x4_zz[i++];
+                block[idx + off] = value * scale;
+                if(!v->pquantizer)
+                    block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
+            }
+            if(!(subblkpat & (1 << (3 - j))))
+                s->dsp.vc1_inv_trans_4x4(block, j);
+        }
+        break;
+    case TT_8X4:
+        for(j = 0; j < 2; j++) {
+            last = subblkpat & (1 << (1 - j));
+            i = 0;
+            off = j * 32;
+            while (!last) {
+                vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
+                i += skip;
+                if(i > 31)
+                    break;
+                if(v->profile < PROFILE_ADVANCED)
+                    idx = vc1_simple_progressive_8x4_zz[i++];
+                else
+                    idx = vc1_adv_progressive_8x4_zz[i++];
+                block[idx + off] = value * scale;
+                if(!v->pquantizer)
+                    block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
+            }
+            if(!(subblkpat & (1 << (1 - j))))
+                s->dsp.vc1_inv_trans_8x4(block, j);
+        }
+        break;
+    case TT_4X8:
+        for(j = 0; j < 2; j++) {
+            last = subblkpat & (1 << (1 - j));
+            i = 0;
+            off = j * 4;
+            while (!last) {
+                vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
+                i += skip;
+                if(i > 31)
+                    break;
+                if(v->profile < PROFILE_ADVANCED)
+                    idx = vc1_simple_progressive_4x8_zz[i++];
+                else
+                    idx = vc1_adv_progressive_4x8_zz[i++];
+                block[idx + off] = value * scale;
+                if(!v->pquantizer)
+                    block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
+            }
+            if(!(subblkpat & (1 << (1 - j))))
+                s->dsp.vc1_inv_trans_4x8(block, j);
+        }
+        break;
+    }
+    return 0;
+}
+
+
+/** Decode one P-frame MB (in Simple/Main profile)
+ */
+static int vc1_decode_p_mb(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    GetBitContext *gb = &s->gb;
+    int i, j;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int cbp; /* cbp decoding stuff */
+    int mqdiff, mquant; /* MB quantization */
+    int ttmb = v->ttfrm; /* MB Transform type */
+    int status;
+
+    static const int size_table[6] = { 0, 2, 3, 4, 5, 8 },
+      offset_table[6] = { 0, 1, 3, 7, 15, 31 };
+    int mb_has_coeffs = 1; /* last_flag */
+    int dmv_x, dmv_y; /* Differential MV components */
+    int index, index1; /* LUT indices */
+    int val, sign; /* temp values */
+    int first_block = 1;
+    int dst_idx, off;
+    int skipped, fourmv;
+
+    mquant = v->pq; /* Loosy initialization */
+
+    if (v->mv_type_is_raw)
+        fourmv = get_bits1(gb);
+    else
+        fourmv = v->mv_type_mb_plane[mb_pos];
+    if (v->skip_is_raw)
+        skipped = get_bits1(gb);
+    else
+        skipped = v->s.mbskip_table[mb_pos];
+
+    s->dsp.clear_blocks(s->block[0]);
+
+    if (!fourmv) /* 1MV mode */
+    {
+        if (!skipped)
+        {
+            GET_MVDATA(dmv_x, dmv_y);
+
+            if (s->mb_intra) {
+                s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
+                s->current_picture.motion_val[1][s->block_index[0]][1] = 0;
+            }
+            s->current_picture.mb_type[mb_pos] = s->mb_intra ? MB_TYPE_INTRA : MB_TYPE_16x16;
+            vc1_pred_mv(s, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0]);
+
+            /* FIXME Set DC val for inter block ? */
+            if (s->mb_intra && !mb_has_coeffs)
+            {
+                GET_MQUANT();
+                s->ac_pred = get_bits(gb, 1);
+                cbp = 0;
+            }
+            else if (mb_has_coeffs)
+            {
+                if (s->mb_intra) s->ac_pred = get_bits(gb, 1);
+                cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
+                GET_MQUANT();
+            }
+            else
+            {
+                mquant = v->pq;
+                cbp = 0;
+            }
+            s->current_picture.qscale_table[mb_pos] = mquant;
+
+            if (!v->ttmbf && !s->mb_intra && mb_has_coeffs)
+                ttmb = get_vlc2(gb, vc1_ttmb_vlc[v->tt_index].table,
+                                VC1_TTMB_VLC_BITS, 2);
+            if(!s->mb_intra) vc1_mc_1mv(v, 0);
+            dst_idx = 0;
+            for (i=0; i<6; i++)
+            {
+                s->dc_val[0][s->block_index[i]] = 0;
+                dst_idx += i >> 2;
+                val = ((cbp >> (5 - i)) & 1);
+                off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
+                v->mb_type[0][s->block_index[i]] = s->mb_intra;
+                if(s->mb_intra) {
+                    /* check if prediction blocks A and C are available */
+                    v->a_avail = v->c_avail = 0;
+                    if(i == 2 || i == 3 || !s->first_slice_line)
+                        v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+                    if(i == 1 || i == 3 || s->mb_x)
+                        v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+                    vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
+                    if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
+                    s->dsp.vc1_inv_trans_8x8(s->block[i]);
+                    if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
+                    for(j = 0; j < 64; j++) s->block[i][j] += 128;
+                    s->dsp.put_pixels_clamped(s->block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+                    if(v->pq >= 9 && v->overlap) {
+                        if(v->c_avail)
+                            s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+                        if(v->a_avail)
+                            s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+                    }
+                } else if(val) {
+                    vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block);
+                    if(!v->ttmbf && ttmb < 8) ttmb = -1;
+                    first_block = 0;
+                    if((i<4) || !(s->flags & CODEC_FLAG_GRAY))
+                        s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
+                }
+            }
+        }
+        else //Skipped
+        {
+            s->mb_intra = 0;
+            for(i = 0; i < 6; i++) {
+                v->mb_type[0][s->block_index[i]] = 0;
+                s->dc_val[0][s->block_index[i]] = 0;
+            }
+            s->current_picture.mb_type[mb_pos] = MB_TYPE_SKIP;
+            s->current_picture.qscale_table[mb_pos] = 0;
+            vc1_pred_mv(s, 0, 0, 0, 1, v->range_x, v->range_y, v->mb_type[0]);
+            vc1_mc_1mv(v, 0);
+            return 0;
+        }
+    } //1MV mode
+    else //4MV mode
+    {
+        if (!skipped /* unskipped MB */)
+        {
+            int intra_count = 0, coded_inter = 0;
+            int is_intra[6], is_coded[6];
+            /* Get CBPCY */
+            cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
+            for (i=0; i<6; i++)
+            {
+                val = ((cbp >> (5 - i)) & 1);
+                s->dc_val[0][s->block_index[i]] = 0;
+                s->mb_intra = 0;
+                if(i < 4) {
+                    dmv_x = dmv_y = 0;
+                    s->mb_intra = 0;
+                    mb_has_coeffs = 0;
+                    if(val) {
+                        GET_MVDATA(dmv_x, dmv_y);
+                    }
+                    vc1_pred_mv(s, i, dmv_x, dmv_y, 0, v->range_x, v->range_y, v->mb_type[0]);
+                    if(!s->mb_intra) vc1_mc_4mv_luma(v, i);
+                    intra_count += s->mb_intra;
+                    is_intra[i] = s->mb_intra;
+                    is_coded[i] = mb_has_coeffs;
+                }
+                if(i&4){
+                    is_intra[i] = (intra_count >= 3);
+                    is_coded[i] = val;
+                }
+                if(i == 4) vc1_mc_4mv_chroma(v);
+                v->mb_type[0][s->block_index[i]] = is_intra[i];
+                if(!coded_inter) coded_inter = !is_intra[i] & is_coded[i];
+            }
+            // if there are no coded blocks then don't do anything more
+            if(!intra_count && !coded_inter) return 0;
+            dst_idx = 0;
+            GET_MQUANT();
+            s->current_picture.qscale_table[mb_pos] = mquant;
+            /* test if block is intra and has pred */
+            {
+                int intrapred = 0;
+                for(i=0; i<6; i++)
+                    if(is_intra[i]) {
+                        if(((!s->first_slice_line || (i==2 || i==3)) && v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
+                            || ((s->mb_x || (i==1 || i==3)) && v->mb_type[0][s->block_index[i] - 1])) {
+                            intrapred = 1;
+                            break;
+                        }
+                    }
+                if(intrapred)s->ac_pred = get_bits(gb, 1);
+                else s->ac_pred = 0;
+            }
+            if (!v->ttmbf && coded_inter)
+                ttmb = get_vlc2(gb, vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
+            for (i=0; i<6; i++)
+            {
+                dst_idx += i >> 2;
+                off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
+                s->mb_intra = is_intra[i];
+                if (is_intra[i]) {
+                    /* check if prediction blocks A and C are available */
+                    v->a_avail = v->c_avail = 0;
+                    if(i == 2 || i == 3 || !s->first_slice_line)
+                        v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+                    if(i == 1 || i == 3 || s->mb_x)
+                        v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+                    vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset);
+                    if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
+                    s->dsp.vc1_inv_trans_8x8(s->block[i]);
+                    if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
+                    for(j = 0; j < 64; j++) s->block[i][j] += 128;
+                    s->dsp.put_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
+                    if(v->pq >= 9 && v->overlap) {
+                        if(v->c_avail)
+                            s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+                        if(v->a_avail)
+                            s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+                    }
+                } else if(is_coded[i]) {
+                    status = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block);
+                    if(!v->ttmbf && ttmb < 8) ttmb = -1;
+                    first_block = 0;
+                    if((i<4) || !(s->flags & CODEC_FLAG_GRAY))
+                        s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
+                }
+            }
+            return status;
+        }
+        else //Skipped MB
+        {
+            s->mb_intra = 0;
+            s->current_picture.qscale_table[mb_pos] = 0;
+            for (i=0; i<6; i++) {
+                v->mb_type[0][s->block_index[i]] = 0;
+                s->dc_val[0][s->block_index[i]] = 0;
+            }
+            for (i=0; i<4; i++)
+            {
+                vc1_pred_mv(s, i, 0, 0, 0, v->range_x, v->range_y, v->mb_type[0]);
+                vc1_mc_4mv_luma(v, i);
+            }
+            vc1_mc_4mv_chroma(v);
+            s->current_picture.qscale_table[mb_pos] = 0;
+            return 0;
+        }
+    }
+
+    /* Should never happen */
+    return -1;
+}
+
+/** Decode one B-frame MB (in Main profile)
+ */
+static void vc1_decode_b_mb(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+    GetBitContext *gb = &s->gb;
+    int i, j;
+    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+    int cbp = 0; /* cbp decoding stuff */
+    int mqdiff, mquant; /* MB quantization */
+    int ttmb = v->ttfrm; /* MB Transform type */
+
+    static const int size_table[6] = { 0, 2, 3, 4, 5, 8 },
+      offset_table[6] = { 0, 1, 3, 7, 15, 31 };
+    int mb_has_coeffs = 0; /* last_flag */
+    int index, index1; /* LUT indices */
+    int val, sign; /* temp values */
+    int first_block = 1;
+    int dst_idx, off;
+    int skipped, direct;
+    int dmv_x[2], dmv_y[2];
+    int bmvtype = BMV_TYPE_BACKWARD;
+
+    mquant = v->pq; /* Loosy initialization */
+    s->mb_intra = 0;
+
+    if (v->dmb_is_raw)
+        direct = get_bits1(gb);
+    else
+        direct = v->direct_mb_plane[mb_pos];
+    if (v->skip_is_raw)
+        skipped = get_bits1(gb);
+    else
+        skipped = v->s.mbskip_table[mb_pos];
+
+    s->dsp.clear_blocks(s->block[0]);
+    dmv_x[0] = dmv_x[1] = dmv_y[0] = dmv_y[1] = 0;
+    for(i = 0; i < 6; i++) {
+        v->mb_type[0][s->block_index[i]] = 0;
+        s->dc_val[0][s->block_index[i]] = 0;
+    }
+    s->current_picture.qscale_table[mb_pos] = 0;
+
+    if (!direct) {
+        if (!skipped) {
+            GET_MVDATA(dmv_x[0], dmv_y[0]);
+            dmv_x[1] = dmv_x[0];
+            dmv_y[1] = dmv_y[0];
+        }
+        if(skipped || !s->mb_intra) {
+            bmvtype = decode012(gb);
+            switch(bmvtype) {
+            case 0:
+                bmvtype = (v->bfraction >= (B_FRACTION_DEN/2)) ? BMV_TYPE_BACKWARD : BMV_TYPE_FORWARD;
+                break;
+            case 1:
+                bmvtype = (v->bfraction >= (B_FRACTION_DEN/2)) ? BMV_TYPE_FORWARD : BMV_TYPE_BACKWARD;
+                break;
+            case 2:
+                bmvtype = BMV_TYPE_INTERPOLATED;
+                dmv_x[0] = dmv_y[0] = 0;
+            }
+        }
+    }
+    for(i = 0; i < 6; i++)
+        v->mb_type[0][s->block_index[i]] = s->mb_intra;
+
+    if (skipped) {
+        if(direct) bmvtype = BMV_TYPE_INTERPOLATED;
+        vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+        vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+        return;
+    }
+    if (direct) {
+        cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
+        GET_MQUANT();
+        s->mb_intra = 0;
+        mb_has_coeffs = 0;
+        s->current_picture.qscale_table[mb_pos] = mquant;
+        if(!v->ttmbf)
+            ttmb = get_vlc2(gb, vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
+        dmv_x[0] = dmv_y[0] = dmv_x[1] = dmv_y[1] = 0;
+        vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+        vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+    } else {
+        if(!mb_has_coeffs && !s->mb_intra) {
+            /* no coded blocks - effectively skipped */
+            vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+            vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+            return;
+        }
+        if(s->mb_intra && !mb_has_coeffs) {
+            GET_MQUANT();
+            s->current_picture.qscale_table[mb_pos] = mquant;
+            s->ac_pred = get_bits1(gb);
+            cbp = 0;
+            vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+        } else {
+            if(bmvtype == BMV_TYPE_INTERPOLATED) {
+                GET_MVDATA(dmv_x[0], dmv_y[0]);
+                if(!mb_has_coeffs) {
+                    /* interpolated skipped block */
+                    vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+                    vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+                    return;
+                }
+            }
+            vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+            if(!s->mb_intra) {
+                vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+            }
+            if(s->mb_intra)
+                s->ac_pred = get_bits1(gb);
+            cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
+            GET_MQUANT();
+            s->current_picture.qscale_table[mb_pos] = mquant;
+            if(!v->ttmbf && !s->mb_intra && mb_has_coeffs)
+                ttmb = get_vlc2(gb, vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
+        }
+    }
+    dst_idx = 0;
+    for (i=0; i<6; i++)
+    {
+        s->dc_val[0][s->block_index[i]] = 0;
+        dst_idx += i >> 2;
+        val = ((cbp >> (5 - i)) & 1);
+        off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
+        v->mb_type[0][s->block_index[i]] = s->mb_intra;
+        if(s->mb_intra) {
+            /* check if prediction blocks A and C are available */
+            v->a_avail = v->c_avail = 0;
+            if(i == 2 || i == 3 || !s->first_slice_line)
+                v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+            if(i == 1 || i == 3 || s->mb_x)
+                v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+            vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
+            if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
+            s->dsp.vc1_inv_trans_8x8(s->block[i]);
+            if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
+            for(j = 0; j < 64; j++) s->block[i][j] += 128;
+            s->dsp.put_pixels_clamped(s->block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
+        } else if(val) {
+            vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block);
+            if(!v->ttmbf && ttmb < 8) ttmb = -1;
+            first_block = 0;
+            if((i<4) || !(s->flags & CODEC_FLAG_GRAY))
+                s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
+        }
+    }
+}
+
+/** Decode blocks of I-frame
+ */
+static void vc1_decode_i_blocks(VC1Context *v)
+{
+    int k, j;
+    MpegEncContext *s = &v->s;
+    int cbp, val;
+    uint8_t *coded_val;
+    int mb_pos;
+
+    /* select codingmode used for VLC tables selection */
+    switch(v->y_ac_table_index){
+    case 0:
+        v->codingset = (v->pqindex <= 8) ? CS_HIGH_RATE_INTRA : CS_LOW_MOT_INTRA;
+        break;
+    case 1:
+        v->codingset = CS_HIGH_MOT_INTRA;
+        break;
+    case 2:
+        v->codingset = CS_MID_RATE_INTRA;
+        break;
+    }
+
+    switch(v->c_ac_table_index){
+    case 0:
+        v->codingset2 = (v->pqindex <= 8) ? CS_HIGH_RATE_INTER : CS_LOW_MOT_INTER;
+        break;
+    case 1:
+        v->codingset2 = CS_HIGH_MOT_INTER;
+        break;
+    case 2:
+        v->codingset2 = CS_MID_RATE_INTER;
+        break;
+    }
+
+    /* Set DC scale - y and c use the same */
+    s->y_dc_scale = s->y_dc_scale_table[v->pq];
+    s->c_dc_scale = s->c_dc_scale_table[v->pq];
+
+    //do frame decode
+    s->mb_x = s->mb_y = 0;
+    s->mb_intra = 1;
+    s->first_slice_line = 1;
+    ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, (AC_END|DC_END|MV_END));
+    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
+            ff_init_block_index(s);
+            ff_update_block_index(s);
+            s->dsp.clear_blocks(s->block[0]);
+            mb_pos = s->mb_x + s->mb_y * s->mb_width;
+            s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
+            s->current_picture.qscale_table[mb_pos] = v->pq;
+            s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
+            s->current_picture.motion_val[1][s->block_index[0]][1] = 0;
+
+            // do actual MB decoding and displaying
+            cbp = get_vlc2(&v->s.gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2);
+            v->s.ac_pred = get_bits(&v->s.gb, 1);
+
+            for(k = 0; k < 6; k++) {
+                val = ((cbp >> (5 - k)) & 1);
+
+                if (k < 4) {
+                    int pred = vc1_coded_block_pred(&v->s, k, &coded_val);
+                    val = val ^ pred;
+                    *coded_val = val;
+                }
+                cbp |= val << (5 - k);
+
+                vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2);
+
+                s->dsp.vc1_inv_trans_8x8(s->block[k]);
+                if(v->pq >= 9 && v->overlap) {
+                    for(j = 0; j < 64; j++) s->block[k][j] += 128;
+                }
+            }
+
+            vc1_put_block(v, s->block);
+            if(v->pq >= 9 && v->overlap) {
+                if(s->mb_x) {
+                    s->dsp.vc1_h_overlap(s->dest[0], s->linesize);
+                    s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
+                    if(!(s->flags & CODEC_FLAG_GRAY)) {
+                        s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
+                        s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
+                    }
+                }
+                s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
+                s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
+                if(!s->first_slice_line) {
+                    s->dsp.vc1_v_overlap(s->dest[0], s->linesize);
+                    s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
+                    if(!(s->flags & CODEC_FLAG_GRAY)) {
+                        s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
+                        s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
+                    }
+                }
+                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
+                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
+            }
+
+            if(get_bits_count(&s->gb) > v->bits) {
+                av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i\n", get_bits_count(&s->gb), v->bits);
+                return;
+            }
+        }
+        ff_draw_horiz_band(s, s->mb_y * 16, 16);
+        s->first_slice_line = 0;
+    }
+}
+
+/** Decode blocks of I-frame for advanced profile
+ */
+static void vc1_decode_i_blocks_adv(VC1Context *v)
+{
+    int k, j;
+    MpegEncContext *s = &v->s;
+    int cbp, val;
+    uint8_t *coded_val;
+    int mb_pos;
+    int mquant = v->pq;
+    int mqdiff;
+    int overlap;
+    GetBitContext *gb = &s->gb;
+
+    /* select codingmode used for VLC tables selection */
+    switch(v->y_ac_table_index){
+    case 0:
+        v->codingset = (v->pqindex <= 8) ? CS_HIGH_RATE_INTRA : CS_LOW_MOT_INTRA;
+        break;
+    case 1:
+        v->codingset = CS_HIGH_MOT_INTRA;
+        break;
+    case 2:
+        v->codingset = CS_MID_RATE_INTRA;
+        break;
+    }
+
+    switch(v->c_ac_table_index){
+    case 0:
+        v->codingset2 = (v->pqindex <= 8) ? CS_HIGH_RATE_INTER : CS_LOW_MOT_INTER;
+        break;
+    case 1:
+        v->codingset2 = CS_HIGH_MOT_INTER;
+        break;
+    case 2:
+        v->codingset2 = CS_MID_RATE_INTER;
+        break;
+    }
+
+    //do frame decode
+    s->mb_x = s->mb_y = 0;
+    s->mb_intra = 1;
+    s->first_slice_line = 1;
+    ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, (AC_END|DC_END|MV_END));
+    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
+            ff_init_block_index(s);
+            ff_update_block_index(s);
+            s->dsp.clear_blocks(s->block[0]);
+            mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+            s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
+            s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
+            s->current_picture.motion_val[1][s->block_index[0]][1] = 0;
+
+            // do actual MB decoding and displaying
+            cbp = get_vlc2(&v->s.gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2);
+            if(v->acpred_is_raw)
+                v->s.ac_pred = get_bits(&v->s.gb, 1);
+            else
+                v->s.ac_pred = v->acpred_plane[mb_pos];
+
+            if(v->condover == CONDOVER_SELECT) {
+                if(v->overflg_is_raw)
+                    overlap = get_bits(&v->s.gb, 1);
+                else
+                    overlap = v->over_flags_plane[mb_pos];
+            } else
+                overlap = (v->condover == CONDOVER_ALL);
+
+            GET_MQUANT();
+
+            s->current_picture.qscale_table[mb_pos] = mquant;
+            /* Set DC scale - y and c use the same */
+            s->y_dc_scale = s->y_dc_scale_table[mquant];
+            s->c_dc_scale = s->c_dc_scale_table[mquant];
+
+            for(k = 0; k < 6; k++) {
+                val = ((cbp >> (5 - k)) & 1);
+
+                if (k < 4) {
+                    int pred = vc1_coded_block_pred(&v->s, k, &coded_val);
+                    val = val ^ pred;
+                    *coded_val = val;
+                }
+                cbp |= val << (5 - k);
+
+                v->a_avail = !s->first_slice_line || (k==2 || k==3);
+                v->c_avail = !!s->mb_x || (k==1 || k==3);
+
+                vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
+
+                s->dsp.vc1_inv_trans_8x8(s->block[k]);
+                for(j = 0; j < 64; j++) s->block[k][j] += 128;
+            }
+
+            vc1_put_block(v, s->block);
+            if(overlap) {
+                if(s->mb_x) {
+                    s->dsp.vc1_h_overlap(s->dest[0], s->linesize);
+                    s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
+                    if(!(s->flags & CODEC_FLAG_GRAY)) {
+                        s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
+                        s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
+                    }
+                }
+                s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
+                s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
+                if(!s->first_slice_line) {
+                    s->dsp.vc1_v_overlap(s->dest[0], s->linesize);
+                    s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
+                    if(!(s->flags & CODEC_FLAG_GRAY)) {
+                        s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
+                        s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
+                    }
+                }
+                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
+                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
+            }
+
+            if(get_bits_count(&s->gb) > v->bits) {
+                av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i\n", get_bits_count(&s->gb), v->bits);
+                return;
+            }
+        }
+        ff_draw_horiz_band(s, s->mb_y * 16, 16);
+        s->first_slice_line = 0;
+    }
+}
+
+static void vc1_decode_p_blocks(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+
+    /* select codingmode used for VLC tables selection */
+    switch(v->c_ac_table_index){
+    case 0:
+        v->codingset = (v->pqindex <= 8) ? CS_HIGH_RATE_INTRA : CS_LOW_MOT_INTRA;
+        break;
+    case 1:
+        v->codingset = CS_HIGH_MOT_INTRA;
+        break;
+    case 2:
+        v->codingset = CS_MID_RATE_INTRA;
+        break;
+    }
+
+    switch(v->c_ac_table_index){
+    case 0:
+        v->codingset2 = (v->pqindex <= 8) ? CS_HIGH_RATE_INTER : CS_LOW_MOT_INTER;
+        break;
+    case 1:
+        v->codingset2 = CS_HIGH_MOT_INTER;
+        break;
+    case 2:
+        v->codingset2 = CS_MID_RATE_INTER;
+        break;
+    }
+
+    ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, (AC_END|DC_END|MV_END));
+    s->first_slice_line = 1;
+    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
+            ff_init_block_index(s);
+            ff_update_block_index(s);
+            s->dsp.clear_blocks(s->block[0]);
+
+            vc1_decode_p_mb(v);
+            if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
+                return;
+            }
+        }
+        ff_draw_horiz_band(s, s->mb_y * 16, 16);
+        s->first_slice_line = 0;
+    }
+}
+
+static void vc1_decode_b_blocks(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+
+    /* select codingmode used for VLC tables selection */
+    switch(v->c_ac_table_index){
+    case 0:
+        v->codingset = (v->pqindex <= 8) ? CS_HIGH_RATE_INTRA : CS_LOW_MOT_INTRA;
+        break;
+    case 1:
+        v->codingset = CS_HIGH_MOT_INTRA;
+        break;
+    case 2:
+        v->codingset = CS_MID_RATE_INTRA;
+        break;
+    }
+
+    switch(v->c_ac_table_index){
+    case 0:
+        v->codingset2 = (v->pqindex <= 8) ? CS_HIGH_RATE_INTER : CS_LOW_MOT_INTER;
+        break;
+    case 1:
+        v->codingset2 = CS_HIGH_MOT_INTER;
+        break;
+    case 2:
+        v->codingset2 = CS_MID_RATE_INTER;
+        break;
+    }
+
+    ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, (AC_END|DC_END|MV_END));
+    s->first_slice_line = 1;
+    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
+            ff_init_block_index(s);
+            ff_update_block_index(s);
+            s->dsp.clear_blocks(s->block[0]);
+
+            vc1_decode_b_mb(v);
+            if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
+                return;
+            }
+        }
+        ff_draw_horiz_band(s, s->mb_y * 16, 16);
+        s->first_slice_line = 0;
+    }
+}
+
+static void vc1_decode_skip_blocks(VC1Context *v)
+{
+    MpegEncContext *s = &v->s;
+
+    ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, (AC_END|DC_END|MV_END));
+    s->first_slice_line = 1;
+    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+        s->mb_x = 0;
+        ff_init_block_index(s);
+        ff_update_block_index(s);
+        memcpy(s->dest[0], s->last_picture.data[0] + s->mb_y * 16 * s->linesize, s->linesize * 16);
+        memcpy(s->dest[1], s->last_picture.data[1] + s->mb_y * 8 * s->uvlinesize, s->uvlinesize * 8);
+        memcpy(s->dest[2], s->last_picture.data[2] + s->mb_y * 8 * s->uvlinesize, s->uvlinesize * 8);
+        ff_draw_horiz_band(s, s->mb_y * 16, 16);
+        s->first_slice_line = 0;
+    }
+    s->pict_type = P_TYPE;
+}
+
+static void vc1_decode_blocks(VC1Context *v)
+{
+
+    v->s.esc3_level_length = 0;
+
+    switch(v->s.pict_type) {
+    case I_TYPE:
+        if(v->profile == PROFILE_ADVANCED)
+            vc1_decode_i_blocks_adv(v);
+        else
+            vc1_decode_i_blocks(v);
+        break;
+    case P_TYPE:
+        if(v->p_frame_skipped)
+            vc1_decode_skip_blocks(v);
+        else
+            vc1_decode_p_blocks(v);
+        break;
+    case B_TYPE:
+        if(v->bi_type)
+            vc1_decode_i_blocks(v);
+        else
+            vc1_decode_b_blocks(v);
+        break;
+    }
+}
+
+
+/** Initialize a VC1/WMV3 decoder
+ * @todo TODO: Handle VC-1 IDUs (Transport level?)
+ * @todo TODO: Decypher remaining bits in extra_data
+ */
+static int vc1_decode_init(AVCodecContext *avctx)
+{
+    VC1Context *v = avctx->priv_data;
+    MpegEncContext *s = &v->s;
+    GetBitContext gb;
+
+    if (!avctx->extradata_size || !avctx->extradata) return -1;
+    if (!(avctx->flags & CODEC_FLAG_GRAY))
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+    else
+        avctx->pix_fmt = PIX_FMT_GRAY8;
+    v->s.avctx = avctx;
+    avctx->flags |= CODEC_FLAG_EMU_EDGE;
+    v->s.flags |= CODEC_FLAG_EMU_EDGE;
+
+    if(ff_h263_decode_init(avctx) < 0)
+        return -1;
+    if (vc1_init_common(v) < 0) return -1;
+
+    avctx->coded_width = avctx->width;
+    avctx->coded_height = avctx->height;
+    if (avctx->codec_id == CODEC_ID_WMV3)
+    {
+        int count = 0;
+
+        // looks like WMV3 has a sequence header stored in the extradata
+        // advanced sequence header may be before the first frame
+        // the last byte of the extradata is a version number, 1 for the
+        // samples we can decode
+
+        init_get_bits(&gb, avctx->extradata, avctx->extradata_size*8);
+
+        if (decode_sequence_header(avctx, &gb) < 0)
+          return -1;
+
+        count = avctx->extradata_size*8 - get_bits_count(&gb);
+        if (count>0)
+        {
+            av_log(avctx, AV_LOG_INFO, "Extra data: %i bits left, value: %X\n",
+                   count, get_bits(&gb, count));
+        }
+        else if (count < 0)
+        {
+            av_log(avctx, AV_LOG_INFO, "Read %i bits in overflow\n", -count);
+        }
+    } else { // VC1/WVC1
+        int edata_size = avctx->extradata_size;
+        uint8_t *edata = avctx->extradata;
+
+        if(avctx->extradata_size < 16) {
+            av_log(avctx, AV_LOG_ERROR, "Extradata size too small: %i\n", edata_size);
+            return -1;
+        }
+        while(edata_size > 8) {
+            // test if we've found header
+            if(BE_32(edata) == 0x0000010F) {
+                edata += 4;
+                edata_size -= 4;
+                break;
+            }
+            edata_size--;
+            edata++;
+        }
+
+        init_get_bits(&gb, edata, edata_size*8);
+
+        if (decode_sequence_header(avctx, &gb) < 0)
+          return -1;
+
+        while(edata_size > 8) {
+            // test if we've found entry point
+            if(BE_32(edata) == 0x0000010E) {
+                edata += 4;
+                edata_size -= 4;
+                break;
+            }
+            edata_size--;
+            edata++;
+        }
+
+        init_get_bits(&gb, edata, edata_size*8);
+
+        if (decode_entry_point(avctx, &gb) < 0)
+          return -1;
+    }
+    avctx->has_b_frames= !!(avctx->max_b_frames);
+    s->low_delay = !avctx->has_b_frames;
+
+    s->mb_width = (avctx->coded_width+15)>>4;
+    s->mb_height = (avctx->coded_height+15)>>4;
+
+    /* Allocate mb bitplanes */
+    v->mv_type_mb_plane = av_malloc(s->mb_stride * s->mb_height);
+    v->direct_mb_plane = av_malloc(s->mb_stride * s->mb_height);
+    v->acpred_plane = av_malloc(s->mb_stride * s->mb_height);
+    v->over_flags_plane = av_malloc(s->mb_stride * s->mb_height);
+
+    /* allocate block type info in that way so it could be used with s->block_index[] */
+    v->mb_type_base = av_malloc(s->b8_stride * (s->mb_height * 2 + 1) + s->mb_stride * (s->mb_height + 1) * 2);
+    v->mb_type[0] = v->mb_type_base + s->b8_stride + 1;
+    v->mb_type[1] = v->mb_type_base + s->b8_stride * (s->mb_height * 2 + 1) + s->mb_stride + 1;
+    v->mb_type[2] = v->mb_type[1] + s->mb_stride * (s->mb_height + 1);
+
+    /* Init coded blocks info */
+    if (v->profile == PROFILE_ADVANCED)
+    {
+//        if (alloc_bitplane(&v->over_flags_plane, s->mb_width, s->mb_height) < 0)
+//            return -1;
+//        if (alloc_bitplane(&v->ac_pred_plane, s->mb_width, s->mb_height) < 0)
+//            return -1;
+    }
+
+    return 0;
+}
+
+
+/** Decode a VC1/WMV3 frame
+ * @todo TODO: Handle VC-1 IDUs (Transport level?)
+ */
+static int vc1_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    VC1Context *v = avctx->priv_data;
+    MpegEncContext *s = &v->s;
+    AVFrame *pict = data;
+    uint8_t *buf2 = NULL;
+
+    /* no supplementary picture */
+    if (buf_size == 0) {
+        /* special case for last picture */
+        if (s->low_delay==0 && s->next_picture_ptr) {
+            *pict= *(AVFrame*)s->next_picture_ptr;
+            s->next_picture_ptr= NULL;
+
+            *data_size = sizeof(AVFrame);
+        }
+
+        return 0;
+    }
+
+    //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there
+    if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){
+        int i= ff_find_unused_picture(s, 0);
+        s->current_picture_ptr= &s->picture[i];
+    }
+
+    //for advanced profile we need to unescape buffer
+    if (avctx->codec_id == CODEC_ID_VC1) {
+        int i, buf_size2;
+        buf2 = av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        buf_size2 = 0;
+        for(i = 0; i < buf_size; i++) {
+            if(buf[i] == 3 && i >= 2 && !buf[i-1] && !buf[i-2] && i < buf_size-1 && buf[i+1] < 4) {
+                buf2[buf_size2++] = buf[i+1];
+                i++;
+            } else
+                buf2[buf_size2++] = buf[i];
+        }
+        init_get_bits(&s->gb, buf2, buf_size2*8);
+    } else
+        init_get_bits(&s->gb, buf, buf_size*8);
+    // do parse frame header
+    if(v->profile < PROFILE_ADVANCED) {
+        if(vc1_parse_frame_header(v, &s->gb) == -1) {
+            av_free(buf2);
+            return -1;
+        }
+    } else {
+        if(vc1_parse_frame_header_adv(v, &s->gb) == -1) {
+            av_free(buf2);
+            return -1;
+        }
+    }
+
+    if(s->pict_type != I_TYPE && !v->res_rtm_flag){
+        av_free(buf2);
+        return -1;
+    }
+
+    // for hurry_up==5
+    s->current_picture.pict_type= s->pict_type;
+    s->current_picture.key_frame= s->pict_type == I_TYPE;
+
+    /* skip B-frames if we don't have reference frames */
+    if(s->last_picture_ptr==NULL && (s->pict_type==B_TYPE || s->dropable)){
+        av_free(buf2);
+        return -1;//buf_size;
+    }
+    /* skip b frames if we are in a hurry */
+    if(avctx->hurry_up && s->pict_type==B_TYPE) return -1;//buf_size;
+    if(   (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
+       || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
+       ||  avctx->skip_frame >= AVDISCARD_ALL) {
+        av_free(buf2);
+        return buf_size;
+    }
+    /* skip everything if we are in a hurry>=5 */
+    if(avctx->hurry_up>=5) {
+        av_free(buf2);
+        return -1;//buf_size;
+    }
+
+    if(s->next_p_frame_damaged){
+        if(s->pict_type==B_TYPE)
+            return buf_size;
+        else
+            s->next_p_frame_damaged=0;
+    }
+
+    if(MPV_frame_start(s, avctx) < 0) {
+        av_free(buf2);
+        return -1;
+    }
+
+    ff_er_frame_start(s);
+
+    v->bits = buf_size * 8;
+    vc1_decode_blocks(v);
+//av_log(s->avctx, AV_LOG_INFO, "Consumed %i/%i bits\n", get_bits_count(&s->gb), buf_size*8);
+//  if(get_bits_count(&s->gb) > buf_size * 8)
+//      return -1;
+    ff_er_frame_end(s);
+
+    MPV_frame_end(s);
+
+assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type);
+assert(s->current_picture.pict_type == s->pict_type);
+    if (s->pict_type == B_TYPE || s->low_delay) {
+        *pict= *(AVFrame*)s->current_picture_ptr;
+    } else if (s->last_picture_ptr != NULL) {
+        *pict= *(AVFrame*)s->last_picture_ptr;
+    }
+
+    if(s->last_picture_ptr || s->low_delay){
+        *data_size = sizeof(AVFrame);
+        ff_print_debug_info(s, pict);
+    }
+
+    /* Return the Picture timestamp as the frame number */
+    /* we substract 1 because it is added on utils.c    */
+    avctx->frame_number = s->picture_number - 1;
+
+    av_free(buf2);
+    return buf_size;
+}
+
+
+/** Close a VC1/WMV3 decoder
+ * @warning Initial try at using MpegEncContext stuff
+ */
+static int vc1_decode_end(AVCodecContext *avctx)
+{
+    VC1Context *v = avctx->priv_data;
+
+    av_freep(&v->hrd_rate);
+    av_freep(&v->hrd_buffer);
+    MPV_common_end(&v->s);
+    av_freep(&v->mv_type_mb_plane);
+    av_freep(&v->direct_mb_plane);
+    av_freep(&v->acpred_plane);
+    av_freep(&v->over_flags_plane);
+    av_freep(&v->mb_type_base);
+    return 0;
+}
+
+
+AVCodec vc1_decoder = {
+    "vc1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VC1,
+    sizeof(VC1Context),
+    vc1_decode_init,
+    NULL,
+    vc1_decode_end,
+    vc1_decode_frame,
+    CODEC_CAP_DELAY,
+    NULL
+};
+
+AVCodec wmv3_decoder = {
+    "wmv3",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV3,
+    sizeof(VC1Context),
+    vc1_decode_init,
+    NULL,
+    vc1_decode_end,
+    vc1_decode_frame,
+    CODEC_CAP_DELAY,
+    NULL
+};
diff --git a/contrib/ffmpeg/libavcodec/vc1acdata.h b/contrib/ffmpeg/libavcodec/vc1acdata.h
new file mode 100644
index 000000000..a6acecd78
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vc1acdata.h
@@ -0,0 +1,585 @@
+/*
+ * VC-1 and WMV3 decoder
+ * copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define AC_MODES 8
+
+static const int vc1_ac_sizes[AC_MODES] = {
+    186, 169, 133, 149, 103, 103, 163, 175
+};
+
+static const uint32_t vc1_ac_tables[AC_MODES][186][2] = {
+{
+{ 0x0001,  2}, { 0x0005,  3}, { 0x000D,  4}, { 0x0012,  5}, { 0x000E,  6}, { 0x0015,  7},
+{ 0x0013,  8}, { 0x003F,  8}, { 0x004B,  9}, { 0x011F,  9}, { 0x00B8, 10}, { 0x03E3, 10},
+{ 0x0172, 11}, { 0x024D, 12}, { 0x03DA, 12}, { 0x02DD, 13}, { 0x1F55, 13}, { 0x05B9, 14},
+{ 0x3EAE, 14}, { 0x0000,  4}, { 0x0010,  5}, { 0x0008,  7}, { 0x0020,  8}, { 0x0029,  9},
+{ 0x01F4,  9}, { 0x0233, 10}, { 0x01E0, 11}, { 0x012A, 12}, { 0x03DD, 12}, { 0x050A, 13},
+{ 0x1F29, 13}, { 0x0A42, 14}, { 0x1272, 15}, { 0x1737, 15}, { 0x0003,  5}, { 0x0011,  7},
+{ 0x00C4,  8}, { 0x004B, 10}, { 0x00B4, 11}, { 0x07D4, 11}, { 0x0345, 12}, { 0x02D7, 13},
+{ 0x07BF, 13}, { 0x0938, 14}, { 0x0BBB, 14}, { 0x095E, 15}, { 0x0013,  5}, { 0x0078,  7},
+{ 0x0069,  9}, { 0x0232, 10}, { 0x0461, 11}, { 0x03EC, 12}, { 0x0520, 13}, { 0x1F2A, 13},
+{ 0x3E50, 14}, { 0x3E51, 14}, { 0x1486, 15}, { 0x000C,  6}, { 0x0024,  9}, { 0x0094, 11},
+{ 0x08C0, 12}, { 0x0F09, 14}, { 0x1EF0, 15}, { 0x003D,  6}, { 0x0053,  9}, { 0x01A0, 11},
+{ 0x02D6, 13}, { 0x0F08, 14}, { 0x0013,  7}, { 0x007C,  9}, { 0x07C1, 11}, { 0x04AC, 14},
+{ 0x001B,  7}, { 0x00A0, 10}, { 0x0344, 12}, { 0x0F79, 14}, { 0x0079,  7}, { 0x03E1, 10},
+{ 0x02D4, 13}, { 0x2306, 14}, { 0x0021,  8}, { 0x023C, 10}, { 0x0FAE, 12}, { 0x23DE, 14},
+{ 0x0035,  8}, { 0x0175, 11}, { 0x07B3, 13}, { 0x00C5,  8}, { 0x0174, 11}, { 0x0785, 13},
+{ 0x0048,  9}, { 0x01A3, 11}, { 0x049E, 13}, { 0x002C,  9}, { 0x00FA, 10}, { 0x07D6, 11},
+{ 0x0092, 10}, { 0x05CC, 13}, { 0x1EF1, 15}, { 0x00A3, 10}, { 0x03ED, 12}, { 0x093E, 14},
+{ 0x01E2, 11}, { 0x1273, 15}, { 0x07C4, 11}, { 0x1487, 15}, { 0x0291, 12}, { 0x0293, 12},
+{ 0x0F8A, 12}, { 0x0509, 13}, { 0x0508, 13}, { 0x078D, 13}, { 0x07BE, 13}, { 0x078C, 13},
+{ 0x04AE, 14}, { 0x0BBA, 14}, { 0x2307, 14}, { 0x0B9A, 14}, { 0x1736, 15}, { 0x000E,  4},
+{ 0x0045,  7}, { 0x01F3,  9}, { 0x047A, 11}, { 0x05DC, 13}, { 0x23DF, 14}, { 0x0019,  5},
+{ 0x0028,  9}, { 0x0176, 11}, { 0x049D, 13}, { 0x23DD, 14}, { 0x0030,  6}, { 0x00A2, 10},
+{ 0x02EF, 12}, { 0x05B8, 14}, { 0x003F,  6}, { 0x00A5, 10}, { 0x03DB, 12}, { 0x093F, 14},
+{ 0x0044,  7}, { 0x07CB, 11}, { 0x095F, 15}, { 0x0063,  7}, { 0x03C3, 12}, { 0x0015,  8},
+{ 0x08F6, 12}, { 0x0017,  8}, { 0x0498, 13}, { 0x002C,  8}, { 0x07B2, 13}, { 0x002F,  8},
+{ 0x1F54, 13}, { 0x008D,  8}, { 0x07BD, 13}, { 0x008E,  8}, { 0x1182, 13}, { 0x00FB,  8},
+{ 0x050B, 13}, { 0x002D,  8}, { 0x07C0, 11}, { 0x0079,  9}, { 0x1F5F, 13}, { 0x007A,  9},
+{ 0x1F56, 13}, { 0x0231, 10}, { 0x03E4, 10}, { 0x01A1, 11}, { 0x0143, 11}, { 0x01F7, 11},
+{ 0x016F, 12}, { 0x0292, 12}, { 0x02E7, 12}, { 0x016C, 12}, { 0x016D, 12}, { 0x03DC, 12},
+{ 0x0F8B, 12}, { 0x0499, 13}, { 0x03D8, 12}, { 0x078E, 13}, { 0x02D5, 13}, { 0x1F5E, 13},
+{ 0x1F2B, 13}, { 0x078F, 13}, { 0x04AD, 14}, { 0x3EAF, 14}, { 0x23DC, 14}, { 0x004A,  9}
+},
+{
+{ 0x0000,  3}, { 0x0003,  4}, { 0x000B,  5}, { 0x0014,  6}, { 0x003F,  6}, { 0x005D,  7},
+{ 0x00A2,  8}, { 0x00AC,  9}, { 0x016E,  9}, { 0x020A, 10}, { 0x02E2, 10}, { 0x0432, 11},
+{ 0x05C9, 11}, { 0x0827, 12}, { 0x0B54, 12}, { 0x04E6, 13}, { 0x105F, 13}, { 0x172A, 13},
+{ 0x20B2, 14}, { 0x2D4E, 14}, { 0x39F0, 14}, { 0x4175, 15}, { 0x5A9E, 15}, { 0x0004,  4},
+{ 0x001E,  5}, { 0x0042,  7}, { 0x00B6,  8}, { 0x0173,  9}, { 0x0395, 10}, { 0x072E, 11},
+{ 0x0B94, 12}, { 0x16A4, 13}, { 0x20B3, 14}, { 0x2E45, 14}, { 0x0005,  5}, { 0x0040,  7},
+{ 0x0049,  9}, { 0x028F, 10}, { 0x05CB, 11}, { 0x048A, 13}, { 0x09DD, 14}, { 0x73E2, 15},
+{ 0x0018,  5}, { 0x0025,  8}, { 0x008A, 10}, { 0x051B, 11}, { 0x0E5F, 12}, { 0x09C9, 14},
+{ 0x139C, 15}, { 0x0029,  6}, { 0x004F,  9}, { 0x0412, 11}, { 0x048D, 13}, { 0x2E41, 14},
+{ 0x0038,  6}, { 0x010E,  9}, { 0x05A8, 11}, { 0x105C, 13}, { 0x39F2, 14}, { 0x0058,  7},
+{ 0x021F, 10}, { 0x0E7E, 12}, { 0x39FF, 14}, { 0x0023,  8}, { 0x02E3, 10}, { 0x04E5, 13},
+{ 0x2E40, 14}, { 0x00A1,  8}, { 0x05BE, 11}, { 0x09C8, 14}, { 0x0083,  8}, { 0x013A, 11},
+{ 0x1721, 13}, { 0x0044,  9}, { 0x0276, 12}, { 0x39F6, 14}, { 0x008B, 10}, { 0x04EF, 13},
+{ 0x5A9B, 15}, { 0x0208, 10}, { 0x1CFE, 13}, { 0x0399, 10}, { 0x1CB4, 13}, { 0x039E, 10},
+{ 0x39F3, 14}, { 0x05AB, 11}, { 0x73E3, 15}, { 0x0737, 11}, { 0x5A9F, 15}, { 0x082D, 12},
+{ 0x0E69, 12}, { 0x0E68, 12}, { 0x0433, 11}, { 0x0B7B, 12}, { 0x2DF8, 14}, { 0x2E56, 14},
+{ 0x2E57, 14}, { 0x39F7, 14}, { 0x51A5, 15}, { 0x0003,  3}, { 0x002A,  6}, { 0x00E4,  8},
+{ 0x028E, 10}, { 0x0735, 11}, { 0x1058, 13}, { 0x1CFA, 13}, { 0x2DF9, 14}, { 0x4174, 15},
+{ 0x0009,  4}, { 0x0054,  8}, { 0x0398, 10}, { 0x048B, 13}, { 0x139D, 15}, { 0x000D,  4},
+{ 0x00AD,  9}, { 0x0826, 12}, { 0x2D4C, 14}, { 0x0011,  5}, { 0x016B,  9}, { 0x0B7F, 12},
+{ 0x51A4, 15}, { 0x0019,  5}, { 0x021B, 10}, { 0x16FD, 13}, { 0x001D,  5}, { 0x0394, 10},
+{ 0x28D3, 14}, { 0x002B,  6}, { 0x05BC, 11}, { 0x5A9A, 15}, { 0x002F,  6}, { 0x0247, 12},
+{ 0x0010,  7}, { 0x0A35, 12}, { 0x003E,  6}, { 0x0B7A, 12}, { 0x0059,  7}, { 0x105E, 13},
+{ 0x0026,  8}, { 0x09CF, 14}, { 0x0055,  8}, { 0x1CB5, 13}, { 0x0057,  8}, { 0x0E5B, 12},
+{ 0x00A0,  8}, { 0x1468, 13}, { 0x0170,  9}, { 0x0090, 10}, { 0x01CE,  9}, { 0x021A, 10},
+{ 0x0218, 10}, { 0x0168,  9}, { 0x021E, 10}, { 0x0244, 12}, { 0x0736, 11}, { 0x0138, 11},
+{ 0x0519, 11}, { 0x0E5E, 12}, { 0x072C, 11}, { 0x0B55, 12}, { 0x09DC, 14}, { 0x20BB, 14},
+{ 0x048C, 13}, { 0x1723, 13}, { 0x2E44, 14}, { 0x16A5, 13}, { 0x0518, 11}, { 0x39FE, 14},
+{ 0x0169,  9}
+},
+{
+{ 0x0001,  2}, { 0x0006,  3}, { 0x000F,  4}, { 0x0016,  5}, { 0x0020,  6}, { 0x0018,  7},
+{ 0x0008,  8}, { 0x009A,  8}, { 0x0056,  9}, { 0x013E,  9}, { 0x00F0, 10}, { 0x03A5, 10},
+{ 0x0077, 11}, { 0x01EF, 11}, { 0x009A, 12}, { 0x005D, 13}, { 0x0001,  4}, { 0x0011,  5},
+{ 0x0002,  7}, { 0x000B,  8}, { 0x0012,  9}, { 0x01D6,  9}, { 0x027E, 10}, { 0x0191, 11},
+{ 0x00EA, 12}, { 0x03DC, 12}, { 0x013B, 13}, { 0x0004,  5}, { 0x0014,  7}, { 0x009E,  8},
+{ 0x0009, 10}, { 0x01AC, 11}, { 0x01E2, 11}, { 0x03CA, 12}, { 0x005F, 13}, { 0x0017,  5},
+{ 0x004E,  7}, { 0x005E,  9}, { 0x00F3, 10}, { 0x01AD, 11}, { 0x00EC, 12}, { 0x05F0, 13},
+{ 0x000E,  6}, { 0x00E1,  8}, { 0x03A4, 10}, { 0x009C, 12}, { 0x013D, 13}, { 0x003B,  6},
+{ 0x001C,  9}, { 0x0014, 11}, { 0x09BE, 12}, { 0x0006,  7}, { 0x007A,  9}, { 0x0190, 11},
+{ 0x0137, 13}, { 0x001B,  7}, { 0x0008, 10}, { 0x075C, 11}, { 0x0071,  7}, { 0x00D7, 10},
+{ 0x09BF, 12}, { 0x0007,  8}, { 0x00AF, 10}, { 0x04CC, 11}, { 0x0034,  8}, { 0x0265, 10},
+{ 0x009F, 12}, { 0x00E0,  8}, { 0x0016, 11}, { 0x0327, 12}, { 0x0015,  9}, { 0x017D, 11},
+{ 0x0EBB, 12}, { 0x0014,  9}, { 0x00F6, 10}, { 0x01E4, 11}, { 0x00CB, 10}, { 0x099D, 12},
+{ 0x00CA, 10}, { 0x02FC, 12}, { 0x017F, 11}, { 0x04CD, 11}, { 0x02FD, 12}, { 0x04FE, 11},
+{ 0x013A, 13}, { 0x000A,  4}, { 0x0042,  7}, { 0x01D3,  9}, { 0x04DD, 11}, { 0x0012,  5},
+{ 0x00E8,  8}, { 0x004C, 11}, { 0x0136, 13}, { 0x0039,  6}, { 0x0264, 10}, { 0x0EBA, 12},
+{ 0x0000,  7}, { 0x00AE, 10}, { 0x099C, 12}, { 0x001F,  7}, { 0x04DE, 11}, { 0x0043,  7},
+{ 0x04DC, 11}, { 0x0003,  8}, { 0x03CB, 12}, { 0x0006,  8}, { 0x099E, 12}, { 0x002A,  8},
+{ 0x05F1, 13}, { 0x000F,  8}, { 0x09FE, 12}, { 0x0033,  8}, { 0x09FF, 12}, { 0x0098,  8},
+{ 0x099F, 12}, { 0x00EA,  8}, { 0x013C, 13}, { 0x002E,  8}, { 0x0192, 11}, { 0x0136,  9},
+{ 0x006A,  9}, { 0x0015, 11}, { 0x03AF, 10}, { 0x01E3, 11}, { 0x0074, 11}, { 0x00EB, 12},
+{ 0x02F9, 12}, { 0x005C, 13}, { 0x00ED, 12}, { 0x03DD, 12}, { 0x0326, 12}, { 0x005E, 13},
+{ 0x0016,  7}
+},
+{
+{ 0x0004,  3}, { 0x0014,  5}, { 0x0017,  7}, { 0x007F,  8}, { 0x0154,  9}, { 0x01F2, 10},
+{ 0x00BF, 11}, { 0x0065, 12}, { 0x0AAA, 12}, { 0x0630, 13}, { 0x1597, 13}, { 0x03B7, 14},
+{ 0x2B22, 14}, { 0x0BE6, 15}, { 0x000B,  4}, { 0x0037,  7}, { 0x0062,  9}, { 0x0007, 11},
+{ 0x0166, 12}, { 0x00CE, 13}, { 0x1590, 13}, { 0x05F6, 14}, { 0x0BE7, 15}, { 0x0007,  5},
+{ 0x006D,  8}, { 0x0003, 11}, { 0x031F, 12}, { 0x05F2, 14}, { 0x0002,  6}, { 0x0061,  9},
+{ 0x0055, 12}, { 0x01DF, 14}, { 0x001A,  6}, { 0x001E, 10}, { 0x0AC9, 12}, { 0x2B23, 14},
+{ 0x001E,  6}, { 0x001F, 10}, { 0x0AC3, 12}, { 0x2B2B, 14}, { 0x0006,  7}, { 0x0004, 11},
+{ 0x02F8, 13}, { 0x0019,  7}, { 0x0006, 11}, { 0x063D, 13}, { 0x0057,  7}, { 0x0182, 11},
+{ 0x2AA2, 14}, { 0x0004,  8}, { 0x0180, 11}, { 0x059C, 14}, { 0x007D,  8}, { 0x0164, 12},
+{ 0x076D, 15}, { 0x0002,  9}, { 0x018D, 11}, { 0x1581, 13}, { 0x00AD,  8}, { 0x0060, 12},
+{ 0x0C67, 14}, { 0x001C,  9}, { 0x00EE, 13}, { 0x0003,  9}, { 0x02CF, 13}, { 0x00D9,  9},
+{ 0x1580, 13}, { 0x0002, 11}, { 0x0183, 11}, { 0x0057, 12}, { 0x0061, 12}, { 0x0031, 11},
+{ 0x0066, 12}, { 0x0631, 13}, { 0x0632, 13}, { 0x00AC, 13}, { 0x031D, 12}, { 0x0076, 12},
+{ 0x003A, 11}, { 0x0165, 12}, { 0x0C66, 14}, { 0x0003,  2}, { 0x0054,  7}, { 0x02AB, 10},
+{ 0x0016, 13}, { 0x05F7, 14}, { 0x0005,  4}, { 0x00F8,  9}, { 0x0AA9, 12}, { 0x005F, 15},
+{ 0x0004,  4}, { 0x001C, 10}, { 0x1550, 13}, { 0x0004,  5}, { 0x0077, 11}, { 0x076C, 15},
+{ 0x000E,  5}, { 0x000A, 12}, { 0x000C,  5}, { 0x0562, 11}, { 0x0004,  6}, { 0x031C, 12},
+{ 0x0006,  6}, { 0x00C8, 13}, { 0x000D,  6}, { 0x01DA, 13}, { 0x0007,  6}, { 0x00C9, 13},
+{ 0x0001,  7}, { 0x002E, 14}, { 0x0014,  7}, { 0x1596, 13}, { 0x000A,  7}, { 0x0AC2, 12},
+{ 0x0016,  7}, { 0x015B, 14}, { 0x0015,  7}, { 0x015A, 14}, { 0x000F,  8}, { 0x005E, 15},
+{ 0x007E,  8}, { 0x00AB,  8}, { 0x002D,  9}, { 0x00D8,  9}, { 0x000B,  9}, { 0x0014, 10},
+{ 0x02B3, 10}, { 0x01F3, 10}, { 0x003A, 10}, { 0x0000, 10}, { 0x0058, 10}, { 0x002E,  9},
+{ 0x005E, 10}, { 0x0563, 11}, { 0x00EC, 12}, { 0x0054, 12}, { 0x0AC1, 12}, { 0x1556, 13},
+{ 0x02FA, 13}, { 0x0181, 11}, { 0x1557, 13}, { 0x059D, 14}, { 0x2AA3, 14}, { 0x2B2A, 14},
+{ 0x01DE, 14}, { 0x063C, 13}, { 0x00CF, 13}, { 0x1594, 13}, { 0x000D,  9}
+},
+{
+{ 0x0002,  2}, { 0x0006,  3}, { 0x000F,  4}, { 0x000D,  5}, { 0x000C,  5}, { 0x0015,  6},
+{ 0x0013,  6}, { 0x0012,  6}, { 0x0017,  7}, { 0x001F,  8}, { 0x001E,  8}, { 0x001D,  8},
+{ 0x0025,  9}, { 0x0024,  9}, { 0x0023,  9}, { 0x0021,  9}, { 0x0021, 10}, { 0x0020, 10},
+{ 0x000F, 10}, { 0x000E, 10}, { 0x0007, 11}, { 0x0006, 11}, { 0x0020, 11}, { 0x0021, 11},
+{ 0x0050, 12}, { 0x0051, 12}, { 0x0052, 12}, { 0x000E,  4}, { 0x0014,  6}, { 0x0016,  7},
+{ 0x001C,  8}, { 0x0020,  9}, { 0x001F,  9}, { 0x000D, 10}, { 0x0022, 11}, { 0x0053, 12},
+{ 0x0055, 12}, { 0x000B,  5}, { 0x0015,  7}, { 0x001E,  9}, { 0x000C, 10}, { 0x0056, 12},
+{ 0x0011,  6}, { 0x001B,  8}, { 0x001D,  9}, { 0x000B, 10}, { 0x0010,  6}, { 0x0022,  9},
+{ 0x000A, 10}, { 0x000D,  6}, { 0x001C,  9}, { 0x0008, 10}, { 0x0012,  7}, { 0x001B,  9},
+{ 0x0054, 12}, { 0x0014,  7}, { 0x001A,  9}, { 0x0057, 12}, { 0x0019,  8}, { 0x0009, 10},
+{ 0x0018,  8}, { 0x0023, 11}, { 0x0017,  8}, { 0x0019,  9}, { 0x0018,  9}, { 0x0007, 10},
+{ 0x0058, 12}, { 0x0007,  4}, { 0x000C,  6}, { 0x0016,  8}, { 0x0017,  9}, { 0x0006, 10},
+{ 0x0005, 11}, { 0x0004, 11}, { 0x0059, 12}, { 0x000F,  6}, { 0x0016,  9}, { 0x0005, 10},
+{ 0x000E,  6}, { 0x0004, 10}, { 0x0011,  7}, { 0x0024, 11}, { 0x0010,  7}, { 0x0025, 11},
+{ 0x0013,  7}, { 0x005A, 12}, { 0x0015,  8}, { 0x005B, 12}, { 0x0014,  8}, { 0x0013,  8},
+{ 0x001A,  8}, { 0x0015,  9}, { 0x0014,  9}, { 0x0013,  9}, { 0x0012,  9}, { 0x0011,  9},
+{ 0x0026, 11}, { 0x0027, 11}, { 0x005C, 12}, { 0x005D, 12}, { 0x005E, 12}, { 0x005F, 12},
+{ 0x0003,  7}
+},
+{
+{ 0x0002,  2}, { 0x000F,  4}, { 0x0015,  6}, { 0x0017,  7}, { 0x001F,  8}, { 0x0025,  9},
+{ 0x0024,  9}, { 0x0021, 10}, { 0x0020, 10}, { 0x0007, 11}, { 0x0006, 11}, { 0x0020, 11},
+{ 0x0006,  3}, { 0x0014,  6}, { 0x001E,  8}, { 0x000F, 10}, { 0x0021, 11}, { 0x0050, 12},
+{ 0x000E,  4}, { 0x001D,  8}, { 0x000E, 10}, { 0x0051, 12}, { 0x000D,  5}, { 0x0023,  9},
+{ 0x000D, 10}, { 0x000C,  5}, { 0x0022,  9}, { 0x0052, 12}, { 0x000B,  5}, { 0x000C, 10},
+{ 0x0053, 12}, { 0x0013,  6}, { 0x000B, 10}, { 0x0054, 12}, { 0x0012,  6}, { 0x000A, 10},
+{ 0x0011,  6}, { 0x0009, 10}, { 0x0010,  6}, { 0x0008, 10}, { 0x0016,  7}, { 0x0055, 12},
+{ 0x0015,  7}, { 0x0014,  7}, { 0x001C,  8}, { 0x001B,  8}, { 0x0021,  9}, { 0x0020,  9},
+{ 0x001F,  9}, { 0x001E,  9}, { 0x001D,  9}, { 0x001C,  9}, { 0x001B,  9}, { 0x001A,  9},
+{ 0x0022, 11}, { 0x0023, 11}, { 0x0056, 12}, { 0x0057, 12}, { 0x0007,  4}, { 0x0019,  9},
+{ 0x0005, 11}, { 0x000F,  6}, { 0x0004, 11}, { 0x000E,  6}, { 0x000D,  6}, { 0x000C,  6},
+{ 0x0013,  7}, { 0x0012,  7}, { 0x0011,  7}, { 0x0010,  7}, { 0x001A,  8}, { 0x0019,  8},
+{ 0x0018,  8}, { 0x0017,  8}, { 0x0016,  8}, { 0x0015,  8}, { 0x0014,  8}, { 0x0013,  8},
+{ 0x0018,  9}, { 0x0017,  9}, { 0x0016,  9}, { 0x0015,  9}, { 0x0014,  9}, { 0x0013,  9},
+{ 0x0012,  9}, { 0x0011,  9}, { 0x0007, 10}, { 0x0006, 10}, { 0x0005, 10}, { 0x0004, 10},
+{ 0x0024, 11}, { 0x0025, 11}, { 0x0026, 11}, { 0x0027, 11}, { 0x0058, 12}, { 0x0059, 12},
+{ 0x005A, 12}, { 0x005B, 12}, { 0x005C, 12}, { 0x005D, 12}, { 0x005E, 12}, { 0x005F, 12},
+{ 0x0003,  7}
+},
+{
+{ 0x0000,  2}, { 0x0003,  3}, { 0x000D,  4}, { 0x0005,  4}, { 0x001C,  5}, { 0x0016,  5},
+{ 0x003F,  6}, { 0x003A,  6}, { 0x002E,  6}, { 0x0022,  6}, { 0x007B,  7}, { 0x0067,  7},
+{ 0x005F,  7}, { 0x0047,  7}, { 0x0026,  7}, { 0x00EF,  8}, { 0x00CD,  8}, { 0x00C1,  8},
+{ 0x00A9,  8}, { 0x004F,  8}, { 0x01F2,  9}, { 0x01DD,  9}, { 0x0199,  9}, { 0x0185,  9},
+{ 0x015D,  9}, { 0x011B,  9}, { 0x03EF, 10}, { 0x03E1, 10}, { 0x03C8, 10}, { 0x0331, 10},
+{ 0x0303, 10}, { 0x02F1, 10}, { 0x02A0, 10}, { 0x0233, 10}, { 0x0126, 10}, { 0x07C0, 11},
+{ 0x076F, 11}, { 0x076C, 11}, { 0x0661, 11}, { 0x0604, 11}, { 0x0572, 11}, { 0x0551, 11},
+{ 0x046A, 11}, { 0x0274, 11}, { 0x0F27, 12}, { 0x0F24, 12}, { 0x0EDB, 12}, { 0x0C8E, 12},
+{ 0x0C0B, 12}, { 0x0C0A, 12}, { 0x0AE3, 12}, { 0x08D6, 12}, { 0x0490, 12}, { 0x0495, 12},
+{ 0x1F19, 13}, { 0x1DB5, 13}, { 0x0009,  4}, { 0x0010,  5}, { 0x0029,  6}, { 0x0062,  7},
+{ 0x00F3,  8}, { 0x00AD,  8}, { 0x01E5,  9}, { 0x0179,  9}, { 0x009C,  9}, { 0x03B1, 10},
+{ 0x02AE, 10}, { 0x0127, 10}, { 0x076E, 11}, { 0x0570, 11}, { 0x0275, 11}, { 0x0F25, 12},
+{ 0x0EC0, 12}, { 0x0AA0, 12}, { 0x08D7, 12}, { 0x1E4C, 13}, { 0x0008,  5}, { 0x0063,  7},
+{ 0x00AF,  8}, { 0x017B,  9}, { 0x03B3, 10}, { 0x07DD, 11}, { 0x0640, 11}, { 0x0F8D, 12},
+{ 0x0BC1, 12}, { 0x0491, 12}, { 0x0028,  6}, { 0x00C3,  8}, { 0x0151,  9}, { 0x02A1, 10},
+{ 0x0573, 11}, { 0x0EC3, 12}, { 0x1F35, 13}, { 0x0065,  7}, { 0x01DA,  9}, { 0x02AF, 10},
+{ 0x0277, 11}, { 0x08C9, 12}, { 0x1781, 13}, { 0x0025,  7}, { 0x0118,  9}, { 0x0646, 11},
+{ 0x0AA6, 12}, { 0x1780, 13}, { 0x00C9,  8}, { 0x0321, 10}, { 0x0F9B, 12}, { 0x191E, 13},
+{ 0x0048,  8}, { 0x07CC, 11}, { 0x0AA1, 12}, { 0x0180,  9}, { 0x0465, 11}, { 0x1905, 13},
+{ 0x03E2, 10}, { 0x0EC1, 12}, { 0x3C9B, 14}, { 0x02F4, 10}, { 0x08C8, 12}, { 0x07C1, 11},
+{ 0x0928, 13}, { 0x05E1, 11}, { 0x320D, 14}, { 0x0EC2, 12}, { 0x6418, 15}, { 0x1F34, 13},
+{ 0x0078,  7}, { 0x0155,  9}, { 0x0552, 11}, { 0x191F, 13}, { 0x00FA,  8}, { 0x07DC, 11},
+{ 0x1907, 13}, { 0x00AC,  8}, { 0x0249, 11}, { 0x13B1, 14}, { 0x01F6,  9}, { 0x0AE2, 12},
+{ 0x01DC,  9}, { 0x04ED, 12}, { 0x0184,  9}, { 0x1904, 13}, { 0x0156,  9}, { 0x09D9, 13},
+{ 0x03E7, 10}, { 0x0929, 13}, { 0x03B2, 10}, { 0x3B68, 14}, { 0x02F5, 10}, { 0x13B0, 14},
+{ 0x0322, 10}, { 0x3B69, 14}, { 0x0234, 10}, { 0x7935, 15}, { 0x07C7, 11}, { 0xC833, 16},
+{ 0x0660, 11}, { 0x7934, 15}, { 0x024B, 11}, { 0xC832, 16}, { 0x0AA7, 12}, { 0x1F18, 13},
+{ 0x007A,  7}
+},
+{
+{ 0x0002,  2}, { 0x0000,  3}, { 0x001E,  5}, { 0x0004,  5}, { 0x0012,  6}, { 0x0070,  7},
+{ 0x001A,  7}, { 0x005F,  8}, { 0x0047,  8}, { 0x01D3,  9}, { 0x00B5,  9}, { 0x0057,  9},
+{ 0x03B5, 10}, { 0x016D, 10}, { 0x0162, 10}, { 0x07CE, 11}, { 0x0719, 11}, { 0x0691, 11},
+{ 0x02C6, 11}, { 0x0156, 11}, { 0x0F92, 12}, { 0x0D2E, 12}, { 0x0D20, 12}, { 0x059E, 12},
+{ 0x0468, 12}, { 0x02A6, 12}, { 0x1DA2, 13}, { 0x1C60, 13}, { 0x1A43, 13}, { 0x0B1D, 13},
+{ 0x08C0, 13}, { 0x055D, 13}, { 0x0003,  3}, { 0x000A,  5}, { 0x0077,  7}, { 0x00E5,  8},
+{ 0x01D9,  9}, { 0x03E5, 10}, { 0x0166, 10}, { 0x0694, 11}, { 0x0152, 11}, { 0x059F, 12},
+{ 0x1F3C, 13}, { 0x1A4B, 13}, { 0x055E, 13}, { 0x000C,  4}, { 0x007D,  7}, { 0x0044,  8},
+{ 0x03E0, 10}, { 0x0769, 11}, { 0x0E31, 12}, { 0x1F26, 13}, { 0x055C, 13}, { 0x001B,  5},
+{ 0x00E2,  8}, { 0x03A5, 10}, { 0x02C9, 11}, { 0x1F23, 13}, { 0x3B47, 14}, { 0x0007,  5},
+{ 0x01D8,  9}, { 0x02D8, 11}, { 0x1F27, 13}, { 0x3494, 14}, { 0x0035,  6}, { 0x03E1, 10},
+{ 0x059C, 12}, { 0x38C3, 14}, { 0x000C,  6}, { 0x0165, 10}, { 0x1D23, 13}, { 0x1638, 14},
+{ 0x0068,  7}, { 0x0693, 11}, { 0x3A45, 14}, { 0x0020,  7}, { 0x0F90, 12}, { 0x7CF6, 15},
+{ 0x00E8,  8}, { 0x058F, 12}, { 0x2CEF, 15}, { 0x0045,  8}, { 0x0B3A, 13}, { 0x01F1,  9},
+{ 0x3B46, 14}, { 0x01A7,  9}, { 0x1676, 14}, { 0x0056,  9}, { 0x692A, 15}, { 0x038D, 10},
+{ 0xE309, 16}, { 0x00AA, 10}, { 0x1C611, 17}, { 0x02DF, 11}, { 0xB3B9, 17}, { 0x02C8, 11},
+{ 0x38C20, 18}, { 0x01B0, 11}, { 0x16390, 18}, { 0x0F9F, 12}, { 0x16771, 18}, { 0x0ED0, 12},
+{ 0x71843, 19}, { 0x0D2A, 12}, { 0xF9E8C, 20}, { 0x0461, 12}, { 0xF9E8E, 20}, { 0x0B67, 13},
+{ 0x055F, 13}, { 0x003F,  6}, { 0x006D,  9}, { 0x0E90, 12}, { 0x054E, 13}, { 0x0013,  6},
+{ 0x0119, 10}, { 0x0B66, 13}, { 0x000B,  6}, { 0x0235, 11}, { 0x7CF5, 15}, { 0x0075,  7},
+{ 0x0D24, 12}, { 0xF9E9, 16}, { 0x002E,  7}, { 0x1F22, 13}, { 0x0021,  7}, { 0x054F, 13},
+{ 0x0014,  7}, { 0x3A44, 14}, { 0x00E4,  8}, { 0x7CF7, 15}, { 0x005E,  8}, { 0x7185, 15},
+{ 0x0037,  8}, { 0x2C73, 15}, { 0x01DB,  9}, { 0x59DD, 16}, { 0x01C7,  9}, { 0x692B, 15},
+{ 0x01A6,  9}, { 0x58E5, 16}, { 0x00B4,  9}, { 0x1F3D0, 17}, { 0x00B0,  9}, { 0xB1C9, 17},
+{ 0x03E6, 10}, { 0x16770, 18}, { 0x016E, 10}, { 0x3E7A2, 18}, { 0x011B, 10}, { 0xF9E8D, 20},
+{ 0x00D9, 10}, { 0xF9E8F, 20}, { 0x00A8, 10}, { 0x2C723, 19}, { 0x0749, 11}, { 0xE3084, 20},
+{ 0x0696, 11}, { 0x58E45, 20}, { 0x02DE, 11}, { 0xB1C88, 21}, { 0x0231, 11}, { 0x1C610A, 21},
+{ 0x01B1, 11}, { 0x71842D, 23}, { 0x0D2B, 12}, { 0x38C217, 22}, { 0x0D2F, 12}, { 0x163913, 22},
+{ 0x05B2, 12}, { 0x163912, 22}, { 0x0469, 12}, { 0x71842C, 23}, { 0x1A42, 13}, { 0x08C1, 13},
+{ 0x0073,  7}
+}
+};
+
+/* which indexes point to last=1 entries in tables */
+static const int vc1_last_decode_table[AC_MODES] = {
+    119, 99, 85, 81, 67, 58, 126, 109
+};
+
+static const uint8_t vc1_index_decode_table[AC_MODES][185][2] = {
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  0, 15}, {  0, 16},
+{  0, 17}, {  0, 18}, {  0, 19}, {  1,  1}, {  1,  2}, {  1,  3}, {  1,  4}, {  1,  5},
+{  1,  6}, {  1,  7}, {  1,  8}, {  1,  9}, {  1, 10}, {  1, 11}, {  1, 12}, {  1, 13},
+{  1, 14}, {  1, 15}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4}, {  2,  5}, {  2,  6},
+{  2,  7}, {  2,  8}, {  2,  9}, {  2, 10}, {  2, 11}, {  2, 12}, {  3,  1}, {  3,  2},
+{  3,  3}, {  3,  4}, {  3,  5}, {  3,  6}, {  3,  7}, {  3,  8}, {  3,  9}, {  3, 10},
+{  3, 11}, {  4,  1}, {  4,  2}, {  4,  3}, {  4,  4}, {  4,  5}, {  4,  6}, {  5,  1},
+{  5,  2}, {  5,  3}, {  5,  4}, {  5,  5}, {  6,  1}, {  6,  2}, {  6,  3}, {  6,  4},
+{  7,  1}, {  7,  2}, {  7,  3}, {  7,  4}, {  8,  1}, {  8,  2}, {  8,  3}, {  8,  4},
+{  9,  1}, {  9,  2}, {  9,  3}, {  9,  4}, { 10,  1}, { 10,  2}, { 10,  3}, { 11,  1},
+{ 11,  2}, { 11,  3}, { 12,  1}, { 12,  2}, { 12,  3}, { 13,  1}, { 13,  2}, { 13,  3},
+{ 14,  1}, { 14,  2}, { 14,  3}, { 15,  1}, { 15,  2}, { 15,  3}, { 16,  1}, { 16,  2},
+{ 17,  1}, { 17,  2}, { 18,  1}, { 19,  1}, { 20,  1}, { 21,  1}, { 22,  1}, { 23,  1},
+{ 24,  1}, { 25,  1}, { 26,  1}, { 27,  1}, { 28,  1}, { 29,  1}, { 30,  1}, {  0,  1},
+{  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  1,  1}, {  1,  2}, {  1,  3},
+{  1,  4}, {  1,  5}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4}, {  3,  1}, {  3,  2},
+{  3,  3}, {  3,  4}, {  4,  1}, {  4,  2}, {  4,  3}, {  5,  1}, {  5,  2}, {  6,  1},
+{  6,  2}, {  7,  1}, {  7,  2}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2}, { 10,  1},
+{ 10,  2}, { 11,  1}, { 11,  2}, { 12,  1}, { 12,  2}, { 13,  1}, { 13,  2}, { 14,  1},
+{ 14,  2}, { 15,  1}, { 15,  2}, { 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1},
+{ 21,  1}, { 22,  1}, { 23,  1}, { 24,  1}, { 25,  1}, { 26,  1}, { 27,  1}, { 28,  1},
+{ 29,  1}, { 30,  1}, { 31,  1}, { 32,  1}, { 33,  1}, { 34,  1}, { 35,  1}, { 36,  1},
+{ 37,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  0, 15}, {  0, 16},
+{  0, 17}, {  0, 18}, {  0, 19}, {  0, 20}, {  0, 21}, {  0, 22}, {  0, 23}, {  1,  1},
+{  1,  2}, {  1,  3}, {  1,  4}, {  1,  5}, {  1,  6}, {  1,  7}, {  1,  8}, {  1,  9},
+{  1, 10}, {  1, 11}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4}, {  2,  5}, {  2,  6},
+{  2,  7}, {  2,  8}, {  3,  1}, {  3,  2}, {  3,  3}, {  3,  4}, {  3,  5}, {  3,  6},
+{  3,  7}, {  4,  1}, {  4,  2}, {  4,  3}, {  4,  4}, {  4,  5}, {  5,  1}, {  5,  2},
+{  5,  3}, {  5,  4}, {  5,  5}, {  6,  1}, {  6,  2}, {  6,  3}, {  6,  4}, {  7,  1},
+{  7,  2}, {  7,  3}, {  7,  4}, {  8,  1}, {  8,  2}, {  8,  3}, {  9,  1}, {  9,  2},
+{  9,  3}, { 10,  1}, { 10,  2}, { 10,  3}, { 11,  1}, { 11,  2}, { 11,  3}, { 12,  1},
+{ 12,  2}, { 13,  1}, { 13,  2}, { 14,  1}, { 14,  2}, { 15,  1}, { 15,  2}, { 16,  1},
+{ 16,  2}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1}, { 21,  1}, { 22,  1}, { 23,  1},
+{ 24,  1}, { 25,  1}, { 26,  1}, {  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5},
+{  0,  6}, {  0,  7}, {  0,  8}, {  0,  9}, {  1,  1}, {  1,  2}, {  1,  3}, {  1,  4},
+{  1,  5}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4}, {  3,  1}, {  3,  2}, {  3,  3},
+{  3,  4}, {  4,  1}, {  4,  2}, {  4,  3}, {  5,  1}, {  5,  2}, {  5,  3}, {  6,  1},
+{  6,  2}, {  6,  3}, {  7,  1}, {  7,  2}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2},
+{ 10,  1}, { 10,  2}, { 11,  1}, { 11,  2}, { 12,  1}, { 12,  2}, { 13,  1}, { 13,  2},
+{ 14,  1}, { 14,  2}, { 15,  1}, { 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1},
+{ 21,  1}, { 22,  1}, { 23,  1}, { 24,  1}, { 25,  1}, { 26,  1}, { 27,  1}, { 28,  1},
+{ 29,  1}, { 30,  1}, { 31,  1}, { 32,  1}, { 33,  1}, { 34,  1}, { 35,  1}, { 36,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  0, 15}, {  0, 16},
+{  1,  1}, {  1,  2}, {  1,  3}, {  1,  4}, {  1,  5}, {  1,  6}, {  1,  7}, {  1,  8},
+{  1,  9}, {  1, 10}, {  1, 11}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4}, {  2,  5},
+{  2,  6}, {  2,  7}, {  2,  8}, {  3,  1}, {  3,  2}, {  3,  3}, {  3,  4}, {  3,  5},
+{  3,  6}, {  3,  7}, {  4,  1}, {  4,  2}, {  4,  3}, {  4,  4}, {  4,  5}, {  5,  1},
+{  5,  2}, {  5,  3}, {  5,  4}, {  6,  1}, {  6,  2}, {  6,  3}, {  6,  4}, {  7,  1},
+{  7,  2}, {  7,  3}, {  8,  1}, {  8,  2}, {  8,  3}, {  9,  1}, {  9,  2}, {  9,  3},
+{ 10,  1}, { 10,  2}, { 10,  3}, { 11,  1}, { 11,  2}, { 11,  3}, { 12,  1}, { 12,  2},
+{ 12,  3}, { 13,  1}, { 13,  2}, { 13,  3}, { 14,  1}, { 14,  2}, { 15,  1}, { 15,  2},
+{ 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1}, {  0,  1}, {  0,  2}, {  0,  3},
+{  0,  4}, {  1,  1}, {  1,  2}, {  1,  3}, {  1,  4}, {  2,  1}, {  2,  2}, {  2,  3},
+{  3,  1}, {  3,  2}, {  3,  3}, {  4,  1}, {  4,  2}, {  5,  1}, {  5,  2}, {  6,  1},
+{  6,  2}, {  7,  1}, {  7,  2}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2}, { 10,  1},
+{ 10,  2}, { 11,  1}, { 11,  2}, { 12,  1}, { 12,  2}, { 13,  1}, { 13,  2}, { 14,  1},
+{ 15,  1}, { 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1}, { 21,  1}, { 22,  1},
+{ 23,  1}, { 24,  1}, { 25,  1}, { 26,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  1,  1}, {  1,  2},
+{  1,  3}, {  1,  4}, {  1,  5}, {  1,  6}, {  1,  7}, {  1,  8}, {  1,  9}, {  2,  1},
+{  2,  2}, {  2,  3}, {  2,  4}, {  2,  5}, {  3,  1}, {  3,  2}, {  3,  3}, {  3,  4},
+{  4,  1}, {  4,  2}, {  4,  3}, {  4,  4}, {  5,  1}, {  5,  2}, {  5,  3}, {  5,  4},
+{  6,  1}, {  6,  2}, {  6,  3}, {  7,  1}, {  7,  2}, {  7,  3}, {  8,  1}, {  8,  2},
+{  8,  3}, {  9,  1}, {  9,  2}, {  9,  3}, { 10,  1}, { 10,  2}, { 10,  3}, { 11,  1},
+{ 11,  2}, { 11,  3}, { 12,  1}, { 12,  2}, { 12,  3}, { 13,  1}, { 13,  2}, { 14,  1},
+{ 14,  2}, { 15,  1}, { 15,  2}, { 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1},
+{ 21,  1}, { 22,  1}, { 23,  1}, { 24,  1}, { 25,  1}, { 26,  1}, { 27,  1}, { 28,  1},
+{ 29,  1}, {  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  1,  1}, {  1,  2},
+{  1,  3}, {  1,  4}, {  2,  1}, {  2,  2}, {  2,  3}, {  3,  1}, {  3,  2}, {  3,  3},
+{  4,  1}, {  4,  2}, {  5,  1}, {  5,  2}, {  6,  1}, {  6,  2}, {  7,  1}, {  7,  2},
+{  8,  1}, {  8,  2}, {  9,  1}, {  9,  2}, { 10,  1}, { 10,  2}, { 11,  1}, { 11,  2},
+{ 12,  1}, { 12,  2}, { 13,  1}, { 13,  2}, { 14,  1}, { 14,  2}, { 15,  1}, { 15,  2},
+{ 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1}, { 21,  1}, { 22,  1}, { 23,  1},
+{ 24,  1}, { 25,  1}, { 26,  1}, { 27,  1}, { 28,  1}, { 29,  1}, { 30,  1}, { 31,  1},
+{ 32,  1}, { 33,  1}, { 34,  1}, { 35,  1}, { 36,  1}, { 37,  1}, { 38,  1}, { 39,  1},
+{ 40,  1}, { 41,  1}, { 42,  1}, { 43,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  0, 15}, {  0, 16},
+{  0, 17}, {  0, 18}, {  0, 19}, {  0, 20}, {  0, 21}, {  0, 22}, {  0, 23}, {  0, 24},
+{  0, 25}, {  0, 26}, {  0, 27}, {  1,  1}, {  1,  2}, {  1,  3}, {  1,  4}, {  1,  5},
+{  1,  6}, {  1,  7}, {  1,  8}, {  1,  9}, {  1, 10}, {  2,  1}, {  2,  2}, {  2,  3},
+{  2,  4}, {  2,  5}, {  3,  1}, {  3,  2}, {  3,  3}, {  3,  4}, {  4,  1}, {  4,  2},
+{  4,  3}, {  5,  1}, {  5,  2}, {  5,  3}, {  6,  1}, {  6,  2}, {  6,  3}, {  7,  1},
+{  7,  2}, {  7,  3}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2}, { 10,  1}, { 11,  1},
+{ 12,  1}, { 13,  1}, { 14,  1}, {  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5},
+{  0,  6}, {  0,  7}, {  0,  8}, {  1,  1}, {  1,  2}, {  1,  3}, {  2,  1}, {  2,  2},
+{  3,  1}, {  3,  2}, {  4,  1}, {  4,  2}, {  5,  1}, {  5,  2}, {  6,  1}, {  6,  2},
+{  7,  1}, {  8,  1}, {  9,  1}, { 10,  1}, { 11,  1}, { 12,  1}, { 13,  1}, { 14,  1},
+{ 15,  1}, { 16,  1}, { 17,  1}, { 18,  1}, { 19,  1}, { 20,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  1,  1}, {  1,  2}, {  1,  3}, {  1,  4},
+{  1,  5}, {  1,  6}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4}, {  3,  1}, {  3,  2},
+{  3,  3}, {  4,  1}, {  4,  2}, {  4,  3}, {  5,  1}, {  5,  2}, {  5,  3}, {  6,  1},
+{  6,  2}, {  6,  3}, {  7,  1}, {  7,  2}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2},
+{ 10,  1}, { 10,  2}, { 11,  1}, { 12,  1}, { 13,  1}, { 14,  1}, { 15,  1}, { 16,  1},
+{ 17,  1}, { 18,  1}, { 19,  1}, { 20,  1}, { 21,  1}, { 22,  1}, { 23,  1}, { 24,  1},
+{ 25,  1}, { 26,  1}, {  0,  1}, {  0,  2}, {  0,  3}, {  1,  1}, {  1,  2}, {  2,  1},
+{  3,  1}, {  4,  1}, {  5,  1}, {  6,  1}, {  7,  1}, {  8,  1}, {  9,  1}, { 10,  1},
+{ 11,  1}, { 12,  1}, { 13,  1}, { 14,  1}, { 15,  1}, { 16,  1}, { 17,  1}, { 18,  1},
+{ 19,  1}, { 20,  1}, { 21,  1}, { 22,  1}, { 23,  1}, { 24,  1}, { 25,  1}, { 26,  1},
+{ 27,  1}, { 28,  1}, { 29,  1}, { 30,  1}, { 31,  1}, { 32,  1}, { 33,  1}, { 34,  1},
+{ 35,  1}, { 36,  1}, { 37,  1}, { 38,  1}, { 39,  1}, { 40,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  0, 15}, {  0, 16},
+{  0, 17}, {  0, 18}, {  0, 19}, {  0, 20}, {  0, 21}, {  0, 22}, {  0, 23}, {  0, 24},
+{  0, 25}, {  0, 26}, {  0, 27}, {  0, 28}, {  0, 29}, {  0, 30}, {  0, 31}, {  0, 32},
+{  0, 33}, {  0, 34}, {  0, 35}, {  0, 36}, {  0, 37}, {  0, 38}, {  0, 39}, {  0, 40},
+{  0, 41}, {  0, 42}, {  0, 43}, {  0, 44}, {  0, 45}, {  0, 46}, {  0, 47}, {  0, 48},
+{  0, 49}, {  0, 50}, {  0, 51}, {  0, 52}, {  0, 53}, {  0, 54}, {  0, 55}, {  0, 56},
+{  1,  1}, {  1,  2}, {  1,  3}, {  1,  4}, {  1,  5}, {  1,  6}, {  1,  7}, {  1,  8},
+{  1,  9}, {  1, 10}, {  1, 11}, {  1, 12}, {  1, 13}, {  1, 14}, {  1, 15}, {  1, 16},
+{  1, 17}, {  1, 18}, {  1, 19}, {  1, 20}, {  2,  1}, {  2,  2}, {  2,  3}, {  2,  4},
+{  2,  5}, {  2,  6}, {  2,  7}, {  2,  8}, {  2,  9}, {  2, 10}, {  3,  1}, {  3,  2},
+{  3,  3}, {  3,  4}, {  3,  5}, {  3,  6}, {  3,  7}, {  4,  1}, {  4,  2}, {  4,  3},
+{  4,  4}, {  4,  5}, {  4,  6}, {  5,  1}, {  5,  2}, {  5,  3}, {  5,  4}, {  5,  5},
+{  6,  1}, {  6,  2}, {  6,  3}, {  6,  4}, {  7,  1}, {  7,  2}, {  7,  3}, {  8,  1},
+{  8,  2}, {  8,  3}, {  9,  1}, {  9,  2}, {  9,  3}, { 10,  1}, { 10,  2}, { 11,  1},
+{ 11,  2}, { 12,  1}, { 12,  2}, { 13,  1}, { 13,  2}, { 14,  1}, {  0,  1}, {  0,  2},
+{  0,  3}, {  0,  4}, {  1,  1}, {  1,  2}, {  1,  3}, {  2,  1}, {  2,  2}, {  2,  3},
+{  3,  1}, {  3,  2}, {  4,  1}, {  4,  2}, {  5,  1}, {  5,  2}, {  6,  1}, {  6,  2},
+{  7,  1}, {  7,  2}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2}, { 10,  1}, { 10,  2},
+{ 11,  1}, { 11,  2}, { 12,  1}, { 12,  2}, { 13,  1}, { 13,  2}, { 14,  1}, { 14,  2},
+{ 15,  1}, { 16,  1}
+},
+{
+{  0,  1}, {  0,  2}, {  0,  3}, {  0,  4}, {  0,  5}, {  0,  6}, {  0,  7}, {  0,  8},
+{  0,  9}, {  0, 10}, {  0, 11}, {  0, 12}, {  0, 13}, {  0, 14}, {  0, 15}, {  0, 16},
+{  0, 17}, {  0, 18}, {  0, 19}, {  0, 20}, {  0, 21}, {  0, 22}, {  0, 23}, {  0, 24},
+{  0, 25}, {  0, 26}, {  0, 27}, {  0, 28}, {  0, 29}, {  0, 30}, {  0, 31}, {  0, 32},
+{  1,  1}, {  1,  2}, {  1,  3}, {  1,  4}, {  1,  5}, {  1,  6}, {  1,  7}, {  1,  8},
+{  1,  9}, {  1, 10}, {  1, 11}, {  1, 12}, {  1, 13}, {  2,  1}, {  2,  2}, {  2,  3},
+{  2,  4}, {  2,  5}, {  2,  6}, {  2,  7}, {  2,  8}, {  3,  1}, {  3,  2}, {  3,  3},
+{  3,  4}, {  3,  5}, {  3,  6}, {  4,  1}, {  4,  2}, {  4,  3}, {  4,  4}, {  4,  5},
+{  5,  1}, {  5,  2}, {  5,  3}, {  5,  4}, {  6,  1}, {  6,  2}, {  6,  3}, {  6,  4},
+{  7,  1}, {  7,  2}, {  7,  3}, {  8,  1}, {  8,  2}, {  8,  3}, {  9,  1}, {  9,  2},
+{  9,  3}, { 10,  1}, { 10,  2}, { 11,  1}, { 11,  2}, { 12,  1}, { 12,  2}, { 13,  1},
+{ 13,  2}, { 14,  1}, { 14,  2}, { 15,  1}, { 15,  2}, { 16,  1}, { 16,  2}, { 17,  1},
+{ 17,  2}, { 18,  1}, { 18,  2}, { 19,  1}, { 19,  2}, { 20,  1}, { 20,  2}, { 21,  1},
+{ 21,  2}, { 22,  1}, { 22,  2}, { 23,  1}, { 24,  1}, {  0,  1}, {  0,  2}, {  0,  3},
+{  0,  4}, {  1,  1}, {  1,  2}, {  1,  3}, {  2,  1}, {  2,  2}, {  2,  3}, {  3,  1},
+{  3,  2}, {  3,  3}, {  4,  1}, {  4,  2}, {  5,  1}, {  5,  2}, {  6,  1}, {  6,  2},
+{  7,  1}, {  7,  2}, {  8,  1}, {  8,  2}, {  9,  1}, {  9,  2}, { 10,  1}, { 10,  2},
+{ 11,  1}, { 11,  2}, { 12,  1}, { 12,  2}, { 13,  1}, { 13,  2}, { 14,  1}, { 14,  2},
+{ 15,  1}, { 15,  2}, { 16,  1}, { 16,  2}, { 17,  1}, { 17,  2}, { 18,  1}, { 18,  2},
+{ 19,  1}, { 19,  2}, { 20,  1}, { 20,  2}, { 21,  1}, { 21,  2}, { 22,  1}, { 22,  2},
+{ 23,  1}, { 23,  2}, { 24,  1}, { 24,  2}, { 25,  1}, { 25,  2}, { 26,  1}, { 26,  2},
+{ 27,  1}, { 27,  2}, { 28,  1}, { 28,  2}, { 29,  1}, { 30,  1}
+}
+};
+
+static const uint8_t vc1_delta_level_table[AC_MODES][31] = {
+{
+      19,    15,    12,    11,     6,     5,     4,     4,     4,     4,
+       3,     3,     3,     3,     3,     3,     2,     2,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1
+},
+{
+      23,    11,     8,     7,     5,     5,     4,     4,     3,     3,
+       3,     3,     2,     2,     2,     2,     2,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1
+},
+{
+      16,    11,     8,     7,     5,     4,     4,     3,     3,     3,
+       3,     3,     3,     3,     2,     2,     1,     1,     1,     1,
+       1
+},
+{
+      14,     9,     5,     4,     4,     4,     3,     3,     3,     3,
+       3,     3,     3,     2,     2,     2,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1
+},
+{
+      27,    10,     5,     4,     3,     3,     3,     3,     2,     2,
+       1,     1,     1,     1,     1
+},
+{
+      12,     6,     4,     3,     3,     3,     3,     2,     2,     2,
+       2,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1
+},
+{
+      56,    20,    10,     7,     6,     5,     4,     3,     3,     3,
+       2,     2,     2,     2,     1
+},
+{
+      32,    13,     8,     6,     5,     4,     4,     3,     3,     3,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     1,     1
+}
+};
+
+static const uint8_t vc1_last_delta_level_table[AC_MODES][44] = {
+{
+       6,     5,     4,     4,     3,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1
+},
+{
+       9,     5,     4,     4,     3,     3,     3,     2,     2,     2,
+       2,     2,     2,     2,     2,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1
+},
+{
+       4,     4,     3,     3,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1
+},
+{
+       5,     4,     3,     3,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1
+},
+{
+       8,     3,     2,     2,     2,     2,     2,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1
+},
+{
+       3,     2,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1
+},
+{
+       4,     3,     3,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     1,     1
+},
+{
+       4,     3,     3,     3,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     1,
+       1
+}
+};
+
+static const uint8_t vc1_delta_run_table[AC_MODES][57] = {
+{
+      -1,    30,    17,    15,     9,     5,     4,     3,     3,     3,
+       3,     3,     2,     1,     1,     1,     0,     0,     0,
+       0
+},
+{
+      -1,    26,    16,    11,     7,     5,     3,     3,     2,     1,
+       1,     1,     0,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0
+},
+{
+      -1,    20,    15,    13,     6,     4,     3,     3,     2,     1,
+       1,     1,     0,     0,     0,     0,     0
+},
+{
+      -1,    29,    15,    12,     5,     2,     1,     1,     1,     1,
+       0,     0,     0,     0,     0
+},
+{
+      -1,    14,     9,     7,     3,     2,     1,     1,     1,     1,
+       1,     0,     0,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,     0
+},
+{
+      -1,    26,    10,     6,     2,     1,     1,     0,     0,     0,
+       0,     0,     0
+},
+{
+      -1,    14,    13,     9,     6,     5,     4,     3,     2,     2,
+       2,     1,     1,     1,     1,     1,     1,     1,     1,     1,
+       1,     0,     0,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0
+},
+{
+      -1,    24,    22,     9,     6,     4,     3,     2,     2,     1,
+       1,     1,     1,     1,     0,     0,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
+       0,     0,     0
+}
+};
+
+static const uint8_t vc1_last_delta_run_table[AC_MODES][10] = {
+{
+      -1,    37,    15,     4,     3,     1,     0
+},
+{
+      -1,    36,    14,     6,     3,     1,     0,     0,     0,
+       0
+},
+{
+      -1,    26,    13,     3,     1
+},
+{
+      -1,    43,    15,     3,     1,     0
+},
+{
+      -1,    20,     6,     1,     0,     0,     0,     0,     0
+},
+{
+      -1,    40,     1,     0
+},
+{
+      -1,    16,    14,     2,     0
+},
+{
+      -1,    30,    28,     3,     0
+}
+};
diff --git a/contrib/ffmpeg/libavcodec/vc1data.h b/contrib/ffmpeg/libavcodec/vc1data.h
new file mode 100644
index 000000000..70e88b525
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vc1data.h
@@ -0,0 +1,652 @@
+/*
+ * VC-1 and WMV3 decoder
+ * copyright (c) 2006 Konstantin Shishkov
+ * (c) 2005 anonymous, Alex Beregszaszi, Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file vc1data.h
+ * VC-1 tables.
+ */
+
+#ifndef VC1DATA_H
+#define VC1DATA_H
+
+#if 0 //original bfraction from vc9data.h, not conforming to standard
+/* Denominator used for vc1_bfraction_lut */
+#define B_FRACTION_DEN  840
+
+/* bfraction is fractional, we scale to the GCD 3*5*7*8 = 840 */
+const int16_t vc1_bfraction_lut[23] = {
+  420 /*1/2*/, 280 /*1/3*/, 560 /*2/3*/, 210 /*1/4*/,
+  630 /*3/4*/, 168 /*1/5*/, 336 /*2/5*/,
+  504 /*3/5*/, 672 /*4/5*/, 140 /*1/6*/, 700 /*5/6*/,
+  120 /*1/7*/, 240 /*2/7*/, 360 /*3/7*/, 480 /*4/7*/,
+  600 /*5/7*/, 720 /*6/7*/, 105 /*1/8*/, 315 /*3/8*/,
+  525 /*5/8*/, 735 /*7/8*/,
+  -1 /*inv.*/, 0 /*BI fm*/
+};
+#else
+/* Denominator used for vc1_bfraction_lut */
+#define B_FRACTION_DEN  256
+
+/* pre-computed scales for all bfractions and base=256 */
+static const int16_t vc1_bfraction_lut[23] = {
+  128 /*1/2*/,  85 /*1/3*/, 170 /*2/3*/,  64 /*1/4*/,
+  192 /*3/4*/,  51 /*1/5*/, 102 /*2/5*/,
+  153 /*3/5*/, 204 /*4/5*/,  43 /*1/6*/, 215 /*5/6*/,
+   37 /*1/7*/,  74 /*2/7*/, 111 /*3/7*/, 148 /*4/7*/,
+  185 /*5/7*/, 222 /*6/7*/,  32 /*1/8*/,  96 /*3/8*/,
+  160 /*5/8*/, 224 /*7/8*/,
+  -1 /*inv.*/, 0 /*BI fm*/
+};
+#endif
+
+static const uint8_t vc1_bfraction_bits[23] = {
+    3, 3, 3, 3,
+    3, 3, 3,
+    7, 7, 7, 7,
+    7, 7, 7, 7,
+    7, 7, 7, 7,
+    7, 7,
+    7, 7
+};
+static const uint8_t vc1_bfraction_codes[23] = {
+     0,   1,   2,   3,
+     4,   5,   6,
+   112, 113, 114, 115,
+   116, 117, 118, 119,
+   120, 121, 122, 123,
+   124, 125,
+   126, 127
+};
+
+//Same as H.264
+static const AVRational vc1_pixel_aspect[16]={
+ {0, 1},
+ {1, 1},
+ {12, 11},
+ {10, 11},
+ {16, 11},
+ {40, 33},
+ {24, 11},
+ {20, 11},
+ {32, 11},
+ {80, 33},
+ {18, 11},
+ {15, 11},
+ {64, 33},
+ {160, 99},
+ {0, 1},
+ {0, 1}
+};
+
+/* BitPlane IMODE - such a small table... */
+static const uint8_t vc1_imode_codes[7] = {
+  0, 2, 1, 3, 1, 2, 3
+};
+static const uint8_t vc1_imode_bits[7] = {
+  4, 2, 3, 2, 4, 3, 3
+};
+
+/* Normal-2 imode */
+static const uint8_t vc1_norm2_codes[4] = {
+  0, 4, 5, 3
+};
+static const uint8_t vc1_norm2_bits[4] = {
+  1, 3, 3, 2
+};
+
+static const uint16_t vc1_norm6_codes[64] = {
+0x001, 0x002, 0x003, 0x000, 0x004, 0x001, 0x002, 0x047, 0x005, 0x003, 0x004, 0x04B, 0x005, 0x04D, 0x04E, 0x30E,
+0x006, 0x006, 0x007, 0x053, 0x008, 0x055, 0x056, 0x30D, 0x009, 0x059, 0x05A, 0x30C, 0x05C, 0x30B, 0x30A, 0x037,
+0x007, 0x00A, 0x00B, 0x043, 0x00C, 0x045, 0x046, 0x309, 0x00D, 0x049, 0x04A, 0x308, 0x04C, 0x307, 0x306, 0x036,
+0x00E, 0x051, 0x052, 0x305, 0x054, 0x304, 0x303, 0x035, 0x058, 0x302, 0x301, 0x034, 0x300, 0x033, 0x032, 0x007,
+};
+
+static const uint8_t vc1_norm6_bits[64] = {
+ 1,  4,  4,  8,  4,  8,  8, 10,  4,  8,  8, 10,  8, 10, 10, 13,
+ 4,  8,  8, 10,  8, 10, 10, 13,  8, 10, 10, 13, 10, 13, 13,  9,
+ 4,  8,  8, 10,  8, 10, 10, 13,  8, 10, 10, 13, 10, 13, 13,  9,
+ 8, 10, 10, 13, 10, 13, 13,  9, 10, 13, 13,  9, 13,  9,  9,  6,
+};
+/* Normal-6 imode */
+static const uint8_t vc1_norm6_spec[64][5] = {
+{ 0,  1, 1        },
+{ 1,  2, 4        },
+{ 2,  3, 4        },
+{ 3,  0, 8        },
+{ 4,  4, 4        },
+{ 5,  1, 8        },
+{ 6,  2, 8        },
+{ 7,  2, 5,  7, 5 },
+{ 8,  5, 4        },
+{ 9,  3, 8        },
+{10,  4, 8        },
+{11,  2, 5, 11, 5 },
+{12,  5, 8        },
+{13,  2, 5, 13, 5 },
+{14,  2, 5, 14, 5 },
+{15,  3, 5, 14, 8 },
+{16,  6, 4        },
+{17,  6, 8        },
+{18,  7, 8        },
+{19,  2, 5, 19, 5 },
+{20,  8, 8        },
+{21,  2, 5, 21, 5 },
+{22,  2, 5, 22, 5 },
+{23,  3, 5, 13, 8 },
+{24,  9, 8        },
+{25,  2, 5, 25, 5 },
+{26,  2, 5, 26, 5 },
+{27,  3, 5, 12, 8 },
+{28,  2, 5, 28, 5 },
+{29,  3, 5, 11, 8 },
+{30,  3, 5, 10, 8 },
+{31,  3, 5,  7, 4 },
+{32,  7, 4        },
+{33, 10, 8        },
+{34, 11, 8        },
+{35,  2, 5,  3, 5 },
+{36, 12, 8        },
+{37,  2, 5,  5, 5 },
+{38,  2, 5,  6, 5 },
+{39,  3, 5,  9, 8 },
+{40, 13, 8        },
+{41,  2, 5,  9, 5 },
+{42,  2, 5, 10, 5 },
+{43,  3, 5,  8, 8 },
+{44,  2, 5, 12, 5 },
+{45,  3, 5,  7, 8 },
+{46,  3, 5,  6, 8 },
+{47,  3, 5,  6, 4 },
+{48, 14, 8        },
+{49,  2, 5, 17, 5 },
+{50,  2, 5, 18, 5 },
+{51,  3, 5,  5, 8 },
+{52,  2, 5, 20, 5 },
+{53,  3, 5,  4, 8 },
+{54,  3, 5,  3, 8 },
+{55,  3, 5,  5, 4 },
+{56,  2, 5, 24, 5 },
+{57,  3, 5,  2, 8 },
+{58,  3, 5,  1, 8 },
+{59,  3, 5,  4, 4 },
+{60,  3, 5,  0, 8 },
+{61,  3, 5,  3, 4 },
+{62,  3, 5,  2, 4 },
+{63,  3, 5,  1, 1 },
+};
+
+/* 4MV Block pattern VLC tables */
+static const uint8_t vc1_4mv_block_pattern_codes[4][16] = {
+  { 14, 58, 59, 25, 12, 26, 15, 15, 13, 24, 27,  0, 28,  1,  2,  2},
+  {  8, 18, 19,  4, 20,  5, 30, 11, 21, 31,  6, 12,  7, 13, 14,  0},
+  { 15,  6,  7,  2,  8,  3, 28,  9, 10, 29,  4, 11,  5, 12, 13,  0},
+  {  0, 11, 12,  4, 13,  5, 30, 16, 14, 31,  6, 17,  7, 18, 19, 19}
+};
+static const uint8_t vc1_4mv_block_pattern_bits[4][16] = {
+  { 5, 6, 6, 5, 5, 5, 5, 4, 5, 5, 5, 3, 5, 3, 3, 2},
+  { 4, 5, 5, 4, 5, 4, 5, 4, 5, 5, 4, 4, 4, 4, 4, 2},
+  { 4, 4, 4, 4, 4, 4, 5, 4, 4, 5, 4, 4, 4, 4, 4, 3},
+  { 2, 4, 4, 4, 4, 4, 5, 5, 4, 5, 4, 5, 4, 5, 5, 4}
+};
+
+const uint8_t wmv3_dc_scale_table[32]={
+    0, 2, 4, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21
+};
+
+/* P-Picture CBPCY VLC tables */
+#if 1 // Looks like original tables are not conforming to standard at all. Are they used for old WMV?
+static const uint16_t vc1_cbpcy_p_codes[4][64] = {
+  {
+      0,   6,  15,  13,  13,  11,   3,  13,   5,   8,  49,  10,  12, 114, 102, 119,
+      1,  54,  96,   8,  10, 111,   5,  15,  12,  10,   2,  12,  13, 115,  53,  63,
+      1,   7,   1,   7,  14,  12,   4,  14,   1,   9,  97,  11,   7,  58,  52,  62,
+      4, 103,   1,   9,  11,  56, 101, 118,   4, 110, 100,  30,   2,   5,   4,   3
+  },
+  {
+      0,   9,   1,  18,   5,  14, 237,  26,   3, 121,   3,  22,  13,  16,   6,  30,
+      2,  10,   1,  20,  12, 241,   5,  28,  16,  12,   3,  24,  28, 124, 239, 247,
+      1, 240,   1,  19,  18,  15,   4,  27,   1, 122,   2,  23,   1,  17,   7,  31,
+      1,  11,   2,  21,  19, 246, 238,  29,  17,  13, 236,  25,  58,  63,   8, 125
+  },
+  {
+      0, 201,  25, 231,   5, 221,   1,   3,   2, 414,   2, 241,  16, 225, 195, 492,
+      2, 412,   1, 240,   7, 224,  98, 245,   1, 220,  96,   5,   9, 230, 101, 247,
+      1, 102,   1, 415,  24,   3,   2, 244,   3,  54,   3, 484,  17, 114, 200, 493,
+      3, 413,   1,   4,  13, 113,  99, 485,   4, 111, 194, 243,   5,  29,  26,  31
+  },
+  {
+      0,  28,  12,  44,   3,  36,  20,  52,   2,  32,  16,  48,   8,  40,  24,  28,
+      1,  30,  14,  46,   6,  38,  22,  54,   3,  34,  18,  50,  10,  42,  26,  30,
+      1,  29,  13,  45,   5,  37,  21,  53,   2,  33,  17,  49,   9,  41,  25,  29,
+      1,  31,  15,  47,   7,  39,  23,  55,   4,  35,  19,  51,  11,  43,  27,  31
+   }
+};
+
+static const uint8_t vc1_cbpcy_p_bits[4][64] = {
+  {
+    13,  13,   7,  13,   7,  13,  13,  12,   6,  13,   7,  12,   6,   8,   8,   8,
+     5,   7,   8,  12,   6,   8,  13,  12,   7,  13,  13,  12,   6,   8,   7,   7,
+     6,  13,   8,  12,   7,  13,  13,  12,   7,  13,   8,  12,   5,   7,   7,   7,
+     6,   8,  13,  12,   6,   7,   8,   8,   5,   8,   8,   6,   3,   3,   3,   2
+  },
+  {
+    14,  13,   8,  13,   3,  13,   8,  13,   3,   7,   8,  13,   4,  13,  13,  13,
+     3,  13,  13,  13,   4,   8,  13,  13,   5,  13,  13,  13,   5,   7,   8,   8,
+     3,   8,  14,  13,   5,  13,  13,  13,   4,   7,  13,  13,   6,  13,  13,  13,
+     5,  13,   8,  13,   5,   8,   8,  13,   5,  13,   8,  13,   6,   6,  13,   7
+  },
+  {
+    13,   8,   6,   8,   4,   8,  13,  12,   4,   9,   8,   8,   5,   8,   8,   9,
+     5,   9,  10,   8,   4,   8,   7,   8,   6,   8,   7,  13,   4,   8,   7,   8,
+     5,   7,   8,   9,   6,  13,  13,   8,   4,   6,   8,   9,   5,   7,   8,   9,
+     5,   9,   9,  13,   5,   7,   7,   9,   4,   7,   8,   8,   3,   5,   5,   5
+  },
+  {
+     9,   9,   9,   9,   2,   9,   9,   9,   2,   9,   9,   9,   9,   9,   9,   8,
+     3,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   8,
+     2,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   8,
+     9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,   8
+  }
+};
+#else
+static const uint16_t vc1_cbpcy_p_codes[4][64] = {
+  {
+      0,   1,   1,   4,   5,   1,  12,   4,  13,  14,  10,  11,  12,   7,  13,   2,
+     15,   1,  96,   1,  49,  97,   2, 100,   3,   4,   5, 101, 102,  52,  53,   4,
+      6,   7,  54, 103,   8,   9,  10, 110,  11,  12, 111,  56, 114,  58, 115,   5,
+     13,   7,   8,   9,  10,  11,  12,  30,  13,  14,  15, 118, 119,  62,  63,   3
+  },
+  {
+      0,   1,   2,   1,   3,   1,  16,  17,   5,  18,  12,  19,  13,   1,  28,  58,
+      1,   1,   1,   2,   3,   2,   3, 236, 237,   4,   5, 238,   6,   7, 239,   8,
+      9, 240,  10,  11, 121, 122,  12,  13,  14,  15, 241, 246,  16,  17, 124,  63,
+     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31, 247, 125
+  },
+  {
+      0,   1,   2,   3,   2,   3,   1,   4,   5,  24,   7,  13,  16,  17,   9,   5,
+     25,   1,   1,   1,   2,   3,  96, 194,   1,   2,  98,  99, 195, 200, 101,  26,
+    201, 102, 412, 413, 414,  54, 220, 111, 221,   3, 224, 113, 225, 114, 230,  29,
+    231, 415, 240,   4, 241, 484,   5, 243,   3, 244, 245, 485, 492, 493, 247,  31
+  },
+  {
+      0,   1,   1,   1,   2,   2,   3,   4,   3,   5,   6,   7,   8,   9,  10,  11,
+     12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
+     28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,
+     44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  28,  29,  30,  31
+   }
+};
+static const uint8_t vc1_cbpcy_p_bits[4][64] = {
+  {
+    13,  6,  5,  6,  6,  7,  7,  5,  7,  7,  6,  6,  6,  5,  6,  3,
+     7,  8,  8, 13,  7,  8, 13,  8, 13, 13, 13,  8,  8,  7,  7,  3,
+    13, 13,  7,  8, 13, 13, 13,  8, 13, 13,  8,  7,  8,  7,  8,  3,
+    13, 12, 12, 12, 12, 12, 12,  6, 12, 12, 12,  8,  8,  7,  7,  2
+  },
+  {
+    14,  3,  3,  5,  3,  4,  5,  5,  3,  5,  4,  5,  4,  6,  5,  6,
+     8, 14, 13,  8,  8, 13, 13,  8,  8, 13, 13,  8, 13, 13,  8, 13,
+    13,  8, 13, 13,  7,  7, 13, 13, 13, 13,  8,  8, 13, 13,  7,  6,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,  8,  7
+  },
+  {
+    13,  5,  5,  5,  4,  4,  6,  4,  4,  6,  4,  5,  5,  5,  4,  3,
+     6,  8, 10,  9,  8,  8,  7,  8, 13, 13,  7,  7,  8,  8,  7,  5,
+     8,  7,  9,  9,  9,  6,  8,  7,  8, 13,  8,  7,  8,  7,  8,  5,
+     8,  9,  8, 13,  8,  9, 13,  8, 12,  8,  8,  9,  9,  9,  8,  5
+  },
+  {
+     9,  2,  3,  9,  2,  9,  9,  9,  2,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  8,  8,  8,  8
+  }
+};
+#endif
+
+/* MacroBlock Transform Type: 7.1.3.11, p89
+ * 8x8:B
+ * 8x4:B:btm  8x4:B:top  8x4:B:both,
+ * 4x8:B:right  4x8:B:left  4x8:B:both
+ * 4x4:B  8x8:MB
+ * 8x4:MB:btm  8x4:MB:top  8x4,MB,both
+ * 4x8,MB,right  4x8,MB,left
+ * 4x4,MB                               */
+static const uint16_t vc1_ttmb_codes[3][16] = {
+  {
+    0x0003,
+    0x002E, 0x005F, 0x0000,
+    0x0016, 0x0015, 0x0001,
+    0x0004, 0x0014,
+    0x02F1, 0x0179, 0x017B,
+    0x0BC0, 0x0BC1, 0x05E1,
+    0x017A
+  },
+  {
+    0x0006,
+    0x0006, 0x0003, 0x0007,
+    0x000F, 0x000E, 0x0000,
+    0x0002, 0x0002,
+    0x0014, 0x0011, 0x000B,
+    0x0009, 0x0021, 0x0015,
+    0x0020
+  },
+  {
+    0x0006,
+    0x0000, 0x000E, 0x0005,
+    0x0002, 0x0003, 0x0003,
+    0x000F, 0x0002,
+    0x0081, 0x0021, 0x0009,
+    0x0101, 0x0041, 0x0011,
+    0x0100
+  }
+};
+
+static const uint8_t vc1_ttmb_bits[3][16] = {
+  {
+     2,
+     6,  7,  2,
+     5,  5,  2,
+     3,  5,
+    10,  9,  9,
+    12, 12, 11,
+     9
+  },
+  {
+    3,
+    4, 4, 4,
+    4, 4, 3,
+    3, 2,
+    7, 7, 6,
+    6, 8, 7,
+    8
+  },
+  {
+     3,
+     3, 4, 5,
+     3, 3, 4,
+     4, 2,
+    10, 8, 6,
+    11, 9, 7,
+    11
+  }
+};
+
+/* TTBLK (Transform Type per Block) tables */
+static const uint8_t vc1_ttblk_codes[3][8] = {
+  {  0,  1,  3,  5, 16, 17, 18, 19},
+  {  3,  0,  1,  2,  3,  5,  8,  9},
+  {  1,  0,  1,  4,  6,  7, 10, 11}
+};
+static const uint8_t vc1_ttblk_bits[3][8] = {
+  {  2,  2,  2,  3,  5,  5,  5,  5},
+  {  2,  3,  3,  3,  3,  3,  4,  4},
+  {  2,  3,  3,  3,  3,  3,  4,  4}
+};
+
+/* SUBBLKPAT tables, p93-94, reordered */
+static const uint8_t vc1_subblkpat_codes[3][15] = {
+  { 14, 12,  7, 11,  9, 26,  2, 10, 27,  8,  0,  6,  1, 15,  1},
+  { 14,  0,  8, 15, 10,  4, 23, 13,  5,  9, 25,  3, 24, 22,  1},
+  {  5,  6,  2,  2,  8,  0, 28,  3,  1,  3, 29,  1, 19, 18, 15}
+};
+static const uint8_t vc1_subblkpat_bits[3][15] = {
+  {  5,  5,  5,  5,  5,  6,  4,  5,  6,  5,  4,  5,  4,  5,  1},
+  {  4,  3,  4,  4,  4,  5,  5,  4,  5,  4,  5,  4,  5,  5,  2},
+  {  3,  3,  4,  3,  4,  5,  5,  3,  5,  4,  5,  4,  5,  5,  4}
+};
+
+/* MV differential tables, p265 */
+static const uint16_t vc1_mv_diff_codes[4][73] = {
+  {
+       0,    2,    3,    8,  576,    3,    2,    6,
+       5,  577,  578,    7,    8,    9,   40,   19,
+      37,   82,   21,   22,   23,  579,  580,  166,
+      96,  167,   49,  194,  195,  581,  582,  583,
+     292,  293,  294,   13,    2,    7,   24,   50,
+     102,  295,   13,    7,    8,   18,   50,  103,
+      38,   20,   21,   22,   39,  204,  103,   23,
+      24,   25,  104,  410,  105,  106,  107,  108,
+     109,  220,  411,  442,  222,  443,  446,  447,
+       7 /* 73 elements */
+  },
+  {
+       0,    4,    5,    3,    4,    3,    4,    5,
+      20,    6,   21,   44,   45,   46, 3008,   95,
+     112,  113,   57, 3009, 3010,  116,  117, 3011,
+     118, 3012, 3013, 3014, 3015, 3016, 3017, 3018,
+    3019, 3020, 3021, 3022,    1,    4,   15,  160,
+     161,   41,    6,   11,   42,  162,   43,  119,
+      56,   57,   58,  163,  236,  237, 3023,  119,
+     120,  242,  122,  486, 1512,  487,  246,  494,
+    1513,  495, 1514, 1515, 1516, 1517, 1518, 1519,
+      31 /* 73 elements */
+  },
+  {
+       0,  512,  513,  514,  515,    2,    3,  258,
+     259,  260,  261,  262,  263,  264,  265,  266,
+     267,  268,  269,  270,  271,  272,  273,  274,
+     275,  276,  277,  278,  279,  280,  281,  282,
+     283,  284,  285,  286,    1,    5,  287,  288,
+     289,  290,    6,    7,  291,  292,  293,  294,
+     295,  296,  297,  298,  299,  300,  301,  302,
+     303,  304,  305,  306,  307,  308,  309,  310,
+     311,  312,  313,  314,  315,  316,  317,  318,
+     319 /* 73 elements */
+  },
+  {
+       0,    1,    1,    2,    3,    4,    1,    5,
+       4,    3,    5,    8,    6,    9,   10,   11,
+      12,    7,  104,   14,  105,    4,   10,   15,
+      11,    6,   14,    8,  106,  107,  108,   15,
+     109,    9,   55,   10,    1,    2,    1,    2,
+       3,   12,    6,    2,    6,    7,   28,    7,
+      15,    8,    5,   18,   29,  152,   77,   24,
+      25,   26,   39,  108,   13,  109,   55,   56,
+      57,  116,   11,  153,  234,  235,  118,  119,
+      15 /* 73 elements */
+  }
+};
+static const uint8_t vc1_mv_diff_bits[4][73] = {
+  {
+     6,  7,  7,  8, 14,  6,  5,  6,  7, 14, 14,  6,  6,  6,  8,  9,
+    10,  9,  7,  7,  7, 14, 14, 10,  9, 10,  8, 10, 10, 14, 14, 14,
+    13, 13, 13,  6,  3,  5,  6,  8,  9, 13,  5,  4,  4,  5,  7,  9,
+     6,  5,  5,  5,  6,  9,  8,  5,  5,  5,  7, 10,  7,  7,  7,  7,
+     7,  8, 10,  9,  8,  9,  9,  9,  3 /* 73 elements */
+  },
+  {
+     5,  7,  7,  6,  6,  5,  5,  6,  7,  5,  7,  8,  8,  8, 14,  9,
+     9,  9,  8, 14, 14,  9,  9, 14,  9, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14,  2,  3,  6,  8,  8,  6,  3,  4,  6,  8,  6,  9,
+     6,  6,  6,  8,  8,  8, 14,  7,  7,  8,  7,  9, 13,  9,  8,  9,
+    13,  9, 13, 13, 13, 13, 13, 13,  5 /* 73 elements */
+
+  },
+  {
+     3, 12, 12, 12, 12,  3,  4, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11,  1,  5, 11, 11, 11, 11,  4,  4, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11, 11 /* 73 elements */
+  },
+  {
+    15, 11, 15, 15, 15, 15, 12, 15, 12, 11, 12, 12, 15, 12, 12, 12,
+    12, 15, 15, 12, 15, 10, 11, 12, 11, 10, 11, 10, 15, 15, 15, 11,
+    15, 10, 14, 10,  4,  4,  5,  7,  8,  9,  5,  3,  4,  5,  6,  8,
+     5,  4,  3,  5,  6,  8,  7,  5,  5,  5,  6,  7,  9,  7,  6,  6,
+     6,  7, 10,  8,  8,  8,  7,  7,  4 /* 73 elements */
+  }
+};
+
+/* DC differentials low+hi-mo, p217 are the same as in msmpeg4data .h */
+
+/* Scantables/ZZ scan are at 11.9 (p262) and 8.1.1.12 (p10) */
+static const int8_t vc1_normal_zz[64] = {
+       0,     8,     1,     2,     9,    16,    24,    17,
+      10,     3,     4,    11,    18,    25,    32,    40,
+      33,    48,    26,    19,    12,     5,     6,    13,
+      20,    27,    34,    41,    56,    49,    57,    42,
+      35,    28,    21,    14,     7,    15,    22,    29,
+      36,    43,    50,    58,    51,    59,    44,    37,
+      30,    23,    31,    38,    45,    52,    60,    53,
+      61,    46,    39,    47,    54,    62,    55,    63
+};
+
+static const int8_t vc1_horizontal_zz [64] = /* Table 227 */
+{
+       0,     1,     8,     2,     3,     9,    16,    24,
+      17,    10,     4,     5,    11,    18,    25,    32,
+      40,    48,    33,    26,    19,    12,     6,     7,
+      13,    20,    27,    34,    41,    56,    49,    57,
+      42,    35,    28,    21,    14,    15,    22,    29,
+      36,    43,    50,    58,    51,    44,    37,    30,
+      23,    31,    38,    45,    52,    59,    60,    53,
+      46,    39,    47,    54,    61,    62,    55,    63
+};
+
+static const int8_t vc1_vertical_zz [64] = /* Table 228 */
+{
+       0,     8,    16,     1,    24,    32,    40,     9,
+       2,     3,    10,    17,    25,    48,    56,    41,
+      33,    26,    18,    11,     4,     5,    12,    19,
+      27,    34,    49,    57,    50,    42,    35,    28,
+      20,    13,     6,     7,    14,    21,    29,    36,
+      43,    51,    58,    59,    52,    44,    37,    30,
+      22,    15,    23,    31,    38,    45,    60,    53,
+      46,    39,    47,    54,    61,    62,    55,    63
+};
+
+static const int8_t vc1_simple_progressive_8x8_zz [64] =
+/* Table 229 */
+{
+       0,     8,     1,     2,     9,    16,    24,    17,
+      10,     3,     4,    11,    18,    25,    32,    40,
+      48,    56,    41,    33,    26,    19,    12,     5,
+       6,    13,    20,    27,    34,    49,    57,    58,
+      50,    42,    35,    28,    21,    14,     7,    15,
+      22,    29,    36,    43,    51,    59,    60,    52,
+      44,    37,    30,    23,    31,    38,    45,    53,
+      61,    62,    54,    46,    39,    47,    55,    63
+};
+
+static const int8_t vc1_simple_progressive_8x4_zz [32] = /* Table 230 */
+{
+       0,     1,     2,     8,     3,     9,    10,    16,
+       4,    11,    17,    24,    18,    12,     5,    19,
+      25,    13,    20,    26,    27,     6,    21,    28,
+      14,    22,    29,     7,    30,    15,    23,    31
+};
+
+static const int8_t vc1_simple_progressive_4x8_zz [32] = /* Table 231 */
+{
+       0,     8,     1,    16,
+       9,    24,    17,     2,
+      32,    10,    25,    40,
+      18,    48,    33,    26,
+      56,    41,    34,     3,
+      49,    57,    11,    42,
+      19,    50,    27,    58,
+      35,    43,    51,    59
+};
+
+/* Table 232 */
+static const int8_t vc1_simple_progressive_4x4_zz [16] =
+{
+       0,     8,    16,     1,
+       9,    24,    17,     2,
+      10,    18,    25,     3,
+      11,    26,    19,    27
+};
+
+static const int8_t vc1_adv_progressive_8x4_zz [32] = /* Table 233 */
+{
+       0,     8,     1,    16,     2,     9,    10,     3,
+      24,    17,     4,    11,    18,    12,     5,    19,
+      25,    13,    20,    26,    27,     6,    21,    28,
+      14,    22,    29,     7,    30,    15,    23,    31
+};
+
+static const int8_t vc1_adv_progressive_4x8_zz [32] = /* Table 234 */
+{
+       0,     1,     8,     2,
+       9,    16,    17,    24,
+      10,    32,    25,    18,
+      40,     3,    33,    26,
+      48,    11,    56,    41,
+      34,    49,    57,    42,
+      19,    50,    27,    58,
+      35,    43,    51,    59
+};
+
+static const int8_t vc1_adv_interlaced_8x8_zz [64] = /* Table 235 */
+{
+       0,     8,     1,    16,    24,     9,     2,    32,
+      40,    48,    56,    17,    10,     3,    25,    18,
+      11,     4,    33,    41,    49,    57,    26,    34,
+      42,    50,    58,    19,    12,     5,    27,    20,
+      13,     6,    35,    28,    21,    14,     7,    15,
+      22,    29,    36,    43,    51,    59,    60,    52,
+      44,    37,    30,    23,    31,    38,    45,    53,
+      61,    62,    54,    46,    39,    47,    55,    63
+};
+
+static const int8_t vc1_adv_interlaced_8x4_zz [32] = /* Table 236 */
+{
+       0,     8,    16,    24,     1,     9,     2,    17,
+      25,    10,     3,    18,    26,     4,    11,    19,
+      12,     5,    13,    20,    27,     6,    21,    28,
+      14,    22,    29,     7,    30,    15,    23,    31
+};
+
+static const int8_t vc1_adv_interlaced_4x8_zz [32] = /* Table 237 */
+{
+       0,     1,     2,     8,
+      16,     9,    24,    17,
+      10,     3,    32,    40,
+      48,    56,    25,    18,
+      33,    26,    41,    34,
+      49,    57,    11,    42,
+      19,    50,    27,    58,
+      35,    43,    51,    59
+};
+
+static const int8_t vc1_adv_interlaced_4x4_zz [16] = /* Table 238 */
+{
+       0,     8,    16,    24,
+       1,     9,    17,     2,
+      25,    10,    18,     3,
+      26,    11,    19,    27
+};
+
+
+/* DQScale as specified in 8.1.3.9 - almost identical to 0x40000/i */
+static const int32_t vc1_dqscale[63] = {
+0x40000, 0x20000, 0x15555, 0x10000, 0xCCCD, 0xAAAB, 0x9249, 0x8000,
+    0x71C7, 0x6666, 0x5D17, 0x5555, 0x4EC5, 0x4925, 0x4444, 0x4000,
+    0x3C3C, 0x38E4, 0x35E5, 0x3333, 0x30C3, 0x2E8C, 0x2C86, 0x2AAB,
+    0x28F6, 0x2762, 0x25ED, 0x2492, 0x234F, 0x2222, 0x2108, 0x2000,
+    0x1F08, 0x1E1E, 0x1D42, 0x1C72, 0x1BAD, 0x1AF3, 0x1A42, 0x199A,
+    0x18FA, 0x1862, 0x17D0, 0x1746, 0x16C1, 0x1643, 0x15CA, 0x1555,
+    0x14E6, 0x147B, 0x1414, 0x13B1, 0x1352, 0x12F7, 0x129E, 0x1249,
+    0x11F7, 0x11A8, 0x115B, 0x1111, 0x10C9, 0x1084, 0x1000
+};
+#endif /* VC1DATA_H */
diff --git a/contrib/ffmpeg/libavcodec/vc1dsp.c b/contrib/ffmpeg/libavcodec/vc1dsp.c
new file mode 100644
index 000000000..9139ffb28
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vc1dsp.c
@@ -0,0 +1,463 @@
+/*
+ * VC-1 and WMV3 decoder - DSP functions
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+* @file vc1dsp.c
+ * VC-1 and WMV3 decoder
+ *
+ */
+
+#include "dsputil.h"
+
+
+/** Apply overlap transform to horizontal edge
+*/
+static void vc1_v_overlap_c(uint8_t* src, int stride)
+{
+    int i;
+    int a, b, c, d;
+    int d1, d2;
+    int rnd = 1;
+    for(i = 0; i < 8; i++) {
+        a = src[-2*stride];
+        b = src[-stride];
+        c = src[0];
+        d = src[stride];
+        d1 = (a - d + 3 + rnd) >> 3;
+        d2 = (a - d + b - c + 4 - rnd) >> 3;
+
+        src[-2*stride] = a - d1;
+        src[-stride] = b - d2;
+        src[0] = c + d2;
+        src[stride] = d + d1;
+        src++;
+        rnd = !rnd;
+    }
+}
+
+/** Apply overlap transform to vertical edge
+*/
+static void vc1_h_overlap_c(uint8_t* src, int stride)
+{
+    int i;
+    int a, b, c, d;
+    int d1, d2;
+    int rnd = 1;
+    for(i = 0; i < 8; i++) {
+        a = src[-2];
+        b = src[-1];
+        c = src[0];
+        d = src[1];
+        d1 = (a - d + 3 + rnd) >> 3;
+        d2 = (a - d + b - c + 4 - rnd) >> 3;
+
+        src[-2] = a - d1;
+        src[-1] = b - d2;
+        src[0] = c + d2;
+        src[1] = d + d1;
+        src += stride;
+        rnd = !rnd;
+    }
+}
+
+
+/** Do inverse transform on 8x8 block
+*/
+static void vc1_inv_trans_8x8_c(DCTELEM block[64])
+{
+    int i;
+    register int t1,t2,t3,t4,t5,t6,t7,t8;
+    DCTELEM *src, *dst;
+
+    src = block;
+    dst = block;
+    for(i = 0; i < 8; i++){
+        t1 = 12 * (src[0] + src[4]);
+        t2 = 12 * (src[0] - src[4]);
+        t3 = 16 * src[2] +  6 * src[6];
+        t4 =  6 * src[2] - 16 * src[6];
+
+        t5 = t1 + t3;
+        t6 = t2 + t4;
+        t7 = t2 - t4;
+        t8 = t1 - t3;
+
+        t1 = 16 * src[1] + 15 * src[3] +  9 * src[5] +  4 * src[7];
+        t2 = 15 * src[1] -  4 * src[3] - 16 * src[5] -  9 * src[7];
+        t3 =  9 * src[1] - 16 * src[3] +  4 * src[5] + 15 * src[7];
+        t4 =  4 * src[1] -  9 * src[3] + 15 * src[5] - 16 * src[7];
+
+        dst[0] = (t5 + t1 + 4) >> 3;
+        dst[1] = (t6 + t2 + 4) >> 3;
+        dst[2] = (t7 + t3 + 4) >> 3;
+        dst[3] = (t8 + t4 + 4) >> 3;
+        dst[4] = (t8 - t4 + 4) >> 3;
+        dst[5] = (t7 - t3 + 4) >> 3;
+        dst[6] = (t6 - t2 + 4) >> 3;
+        dst[7] = (t5 - t1 + 4) >> 3;
+
+        src += 8;
+        dst += 8;
+    }
+
+    src = block;
+    dst = block;
+    for(i = 0; i < 8; i++){
+        t1 = 12 * (src[ 0] + src[32]);
+        t2 = 12 * (src[ 0] - src[32]);
+        t3 = 16 * src[16] +  6 * src[48];
+        t4 =  6 * src[16] - 16 * src[48];
+
+        t5 = t1 + t3;
+        t6 = t2 + t4;
+        t7 = t2 - t4;
+        t8 = t1 - t3;
+
+        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
+        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
+        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
+        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
+
+        dst[ 0] = (t5 + t1 + 64) >> 7;
+        dst[ 8] = (t6 + t2 + 64) >> 7;
+        dst[16] = (t7 + t3 + 64) >> 7;
+        dst[24] = (t8 + t4 + 64) >> 7;
+        dst[32] = (t8 - t4 + 64 + 1) >> 7;
+        dst[40] = (t7 - t3 + 64 + 1) >> 7;
+        dst[48] = (t6 - t2 + 64 + 1) >> 7;
+        dst[56] = (t5 - t1 + 64 + 1) >> 7;
+
+        src++;
+        dst++;
+    }
+}
+
+/** Do inverse transform on 8x4 part of block
+*/
+static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
+{
+    int i;
+    register int t1,t2,t3,t4,t5,t6,t7,t8;
+    DCTELEM *src, *dst;
+    int off;
+
+    off = n * 32;
+    src = block + off;
+    dst = block + off;
+    for(i = 0; i < 4; i++){
+        t1 = 12 * (src[0] + src[4]);
+        t2 = 12 * (src[0] - src[4]);
+        t3 = 16 * src[2] +  6 * src[6];
+        t4 =  6 * src[2] - 16 * src[6];
+
+        t5 = t1 + t3;
+        t6 = t2 + t4;
+        t7 = t2 - t4;
+        t8 = t1 - t3;
+
+        t1 = 16 * src[1] + 15 * src[3] +  9 * src[5] +  4 * src[7];
+        t2 = 15 * src[1] -  4 * src[3] - 16 * src[5] -  9 * src[7];
+        t3 =  9 * src[1] - 16 * src[3] +  4 * src[5] + 15 * src[7];
+        t4 =  4 * src[1] -  9 * src[3] + 15 * src[5] - 16 * src[7];
+
+        dst[0] = (t5 + t1 + 4) >> 3;
+        dst[1] = (t6 + t2 + 4) >> 3;
+        dst[2] = (t7 + t3 + 4) >> 3;
+        dst[3] = (t8 + t4 + 4) >> 3;
+        dst[4] = (t8 - t4 + 4) >> 3;
+        dst[5] = (t7 - t3 + 4) >> 3;
+        dst[6] = (t6 - t2 + 4) >> 3;
+        dst[7] = (t5 - t1 + 4) >> 3;
+
+        src += 8;
+        dst += 8;
+    }
+
+    src = block + off;
+    dst = block + off;
+    for(i = 0; i < 8; i++){
+        t1 = 17 * (src[ 0] + src[16]);
+        t2 = 17 * (src[ 0] - src[16]);
+        t3 = 22 * src[ 8];
+        t4 = 22 * src[24];
+        t5 = 10 * src[ 8];
+        t6 = 10 * src[24];
+
+        dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
+        dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
+        dst[16] = (t2 + t4 - t5 + 64) >> 7;
+        dst[24] = (t1 - t3 - t6 + 64) >> 7;
+
+        src ++;
+        dst ++;
+    }
+}
+
+/** Do inverse transform on 4x8 parts of block
+*/
+static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
+{
+    int i;
+    register int t1,t2,t3,t4,t5,t6,t7,t8;
+    DCTELEM *src, *dst;
+    int off;
+
+    off = n * 4;
+    src = block + off;
+    dst = block + off;
+    for(i = 0; i < 8; i++){
+        t1 = 17 * (src[0] + src[2]);
+        t2 = 17 * (src[0] - src[2]);
+        t3 = 22 * src[1];
+        t4 = 22 * src[3];
+        t5 = 10 * src[1];
+        t6 = 10 * src[3];
+
+        dst[0] = (t1 + t3 + t6 + 4) >> 3;
+        dst[1] = (t2 - t4 + t5 + 4) >> 3;
+        dst[2] = (t2 + t4 - t5 + 4) >> 3;
+        dst[3] = (t1 - t3 - t6 + 4) >> 3;
+
+        src += 8;
+        dst += 8;
+    }
+
+    src = block + off;
+    dst = block + off;
+    for(i = 0; i < 4; i++){
+        t1 = 12 * (src[ 0] + src[32]);
+        t2 = 12 * (src[ 0] - src[32]);
+        t3 = 16 * src[16] +  6 * src[48];
+        t4 =  6 * src[16] - 16 * src[48];
+
+        t5 = t1 + t3;
+        t6 = t2 + t4;
+        t7 = t2 - t4;
+        t8 = t1 - t3;
+
+        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
+        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
+        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
+        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
+
+        dst[ 0] = (t5 + t1 + 64) >> 7;
+        dst[ 8] = (t6 + t2 + 64) >> 7;
+        dst[16] = (t7 + t3 + 64) >> 7;
+        dst[24] = (t8 + t4 + 64) >> 7;
+        dst[32] = (t8 - t4 + 64 + 1) >> 7;
+        dst[40] = (t7 - t3 + 64 + 1) >> 7;
+        dst[48] = (t6 - t2 + 64 + 1) >> 7;
+        dst[56] = (t5 - t1 + 64 + 1) >> 7;
+
+        src++;
+        dst++;
+    }
+}
+
+/** Do inverse transform on 4x4 part of block
+*/
+static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
+{
+    int i;
+    register int t1,t2,t3,t4,t5,t6;
+    DCTELEM *src, *dst;
+    int off;
+
+    off = (n&1) * 4 + (n&2) * 16;
+    src = block + off;
+    dst = block + off;
+    for(i = 0; i < 4; i++){
+        t1 = 17 * (src[0] + src[2]);
+        t2 = 17 * (src[0] - src[2]);
+        t3 = 22 * src[1];
+        t4 = 22 * src[3];
+        t5 = 10 * src[1];
+        t6 = 10 * src[3];
+
+        dst[0] = (t1 + t3 + t6 + 4) >> 3;
+        dst[1] = (t2 - t4 + t5 + 4) >> 3;
+        dst[2] = (t2 + t4 - t5 + 4) >> 3;
+        dst[3] = (t1 - t3 - t6 + 4) >> 3;
+
+        src += 8;
+        dst += 8;
+    }
+
+    src = block + off;
+    dst = block + off;
+    for(i = 0; i < 4; i++){
+        t1 = 17 * (src[ 0] + src[16]);
+        t2 = 17 * (src[ 0] - src[16]);
+        t3 = 22 * src[ 8];
+        t4 = 22 * src[24];
+        t5 = 10 * src[ 8];
+        t6 = 10 * src[24];
+
+        dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
+        dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
+        dst[16] = (t2 + t4 - t5 + 64) >> 7;
+        dst[24] = (t1 - t3 - t6 + 64) >> 7;
+
+        src ++;
+        dst ++;
+    }
+}
+
+/* motion compensation functions */
+
+/** Filter used to interpolate fractional pel values
+ */
+static always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
+{
+    switch(mode){
+    case 0: //no shift
+        return src[0];
+    case 1: // 1/4 shift
+        return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
+    case 2: // 1/2 shift
+        return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
+    case 3: // 3/4 shift
+        return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
+    }
+    return 0; //should not occur
+}
+
+/** Function used to do motion compensation with bicubic interpolation
+ */
+static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int mode, int rnd)
+{
+    int i, j;
+    uint8_t tmp[8*11], *tptr;
+    int m, r;
+
+    m = (mode & 3);
+    r = rnd;
+    src -= stride;
+    tptr = tmp;
+    for(j = 0; j < 11; j++) {
+        for(i = 0; i < 8; i++)
+            tptr[i] = clip_uint8(vc1_mspel_filter(src + i, 1, m, r));
+        src += stride;
+        tptr += 8;
+    }
+    r = 1 - rnd;
+    m = (mode >> 2) & 3;
+
+    tptr = tmp + 8;
+    for(j = 0; j < 8; j++) {
+        for(i = 0; i < 8; i++)
+            dst[i] = clip_uint8(vc1_mspel_filter(tptr + i, 8, m, r));
+        dst += stride;
+        tptr += 8;
+    }
+}
+
+/* pixel functions - really are entry points to vc1_mspel_mc */
+
+/* this one is defined in dsputil.c */
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+
+static void ff_put_vc1_mspel_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x1, rnd);
+}
+
+static void ff_put_vc1_mspel_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x2, rnd);
+}
+
+static void ff_put_vc1_mspel_mc30_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x3, rnd);
+}
+
+static void ff_put_vc1_mspel_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x4, rnd);
+}
+
+static void ff_put_vc1_mspel_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x5, rnd);
+}
+
+static void ff_put_vc1_mspel_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x6, rnd);
+}
+
+static void ff_put_vc1_mspel_mc31_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x7, rnd);
+}
+
+static void ff_put_vc1_mspel_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x8, rnd);
+}
+
+static void ff_put_vc1_mspel_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0x9, rnd);
+}
+
+static void ff_put_vc1_mspel_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0xA, rnd);
+}
+
+static void ff_put_vc1_mspel_mc32_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0xB, rnd);
+}
+
+static void ff_put_vc1_mspel_mc03_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0xC, rnd);
+}
+
+static void ff_put_vc1_mspel_mc13_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0xD, rnd);
+}
+
+static void ff_put_vc1_mspel_mc23_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0xE, rnd);
+}
+
+static void ff_put_vc1_mspel_mc33_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    vc1_mspel_mc(dst, src, stride, 0xF, rnd);
+}
+
+void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
+    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
+    dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
+    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
+    dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
+    dsp->vc1_h_overlap = vc1_h_overlap_c;
+    dsp->vc1_v_overlap = vc1_v_overlap_c;
+
+    dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
+    dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_c;
+    dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_c;
+    dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_c;
+    dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_c;
+    dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_c;
+    dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_c;
+    dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_c;
+    dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_c;
+    dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_c;
+    dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_c;
+    dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_c;
+    dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_c;
+    dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_c;
+    dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_c;
+    dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_c;
+}
diff --git a/contrib/ffmpeg/libavcodec/vcr1.c b/contrib/ffmpeg/libavcodec/vcr1.c
new file mode 100644
index 000000000..62bf12320
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vcr1.c
@@ -0,0 +1,190 @@
+/*
+ * ATI VCR1 codec
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file vcr1.c
+ * ati vcr1 codec.
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+typedef struct VCR1Context{
+    AVCodecContext *avctx;
+    AVFrame picture;
+    int delta[16];
+    int offset[4];
+} VCR1Context;
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    VCR1Context * const a = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    uint8_t *bytestream= buf;
+    int i, x, y;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference= 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    for(i=0; i<16; i++){
+        a->delta[i]= *(bytestream++);
+        bytestream++;
+    }
+
+    for(y=0; y<avctx->height; y++){
+        int offset;
+        uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];
+
+        if((y&3) == 0){
+            uint8_t *cb= &a->picture.data[1][ (y>>2)*a->picture.linesize[1] ];
+            uint8_t *cr= &a->picture.data[2][ (y>>2)*a->picture.linesize[2] ];
+
+            for(i=0; i<4; i++)
+                a->offset[i]= *(bytestream++);
+
+            offset= a->offset[0] - a->delta[ bytestream[2]&0xF ];
+            for(x=0; x<avctx->width; x+=4){
+                luma[0]=( offset += a->delta[ bytestream[2]&0xF ]);
+                luma[1]=( offset += a->delta[ bytestream[2]>>4  ]);
+                luma[2]=( offset += a->delta[ bytestream[0]&0xF ]);
+                luma[3]=( offset += a->delta[ bytestream[0]>>4  ]);
+                luma += 4;
+
+                *(cb++) = bytestream[3];
+                *(cr++) = bytestream[1];
+
+                bytestream+= 4;
+            }
+        }else{
+            offset= a->offset[y&3] - a->delta[ bytestream[2]&0xF ];
+
+            for(x=0; x<avctx->width; x+=8){
+                luma[0]=( offset += a->delta[ bytestream[2]&0xF ]);
+                luma[1]=( offset += a->delta[ bytestream[2]>>4  ]);
+                luma[2]=( offset += a->delta[ bytestream[3]&0xF ]);
+                luma[3]=( offset += a->delta[ bytestream[3]>>4  ]);
+                luma[4]=( offset += a->delta[ bytestream[0]&0xF ]);
+                luma[5]=( offset += a->delta[ bytestream[0]>>4  ]);
+                luma[6]=( offset += a->delta[ bytestream[1]&0xF ]);
+                luma[7]=( offset += a->delta[ bytestream[1]>>4  ]);
+                luma += 8;
+                bytestream+= 4;
+            }
+        }
+    }
+
+    *picture= *(AVFrame*)&a->picture;
+    *data_size = sizeof(AVPicture);
+
+    emms_c();
+
+    return buf_size;
+}
+
+#if 0
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    VCR1Context * const a = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&a->picture;
+    int size;
+    int mb_x, mb_y;
+
+    *p = *pict;
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    emms_c();
+
+    align_put_bits(&a->pb);
+    while(get_bit_count(&a->pb)&31)
+        put_bits(&a->pb, 8, 0);
+
+    size= get_bit_count(&a->pb)/32;
+
+    return size*4;
+}
+#endif
+
+static void common_init(AVCodecContext *avctx){
+    VCR1Context * const a = avctx->priv_data;
+
+    avctx->coded_frame= (AVFrame*)&a->picture;
+    a->avctx= avctx;
+}
+
+static int decode_init(AVCodecContext *avctx){
+
+    common_init(avctx);
+
+    avctx->pix_fmt= PIX_FMT_YUV410P;
+
+    return 0;
+}
+
+#if 0
+static int encode_init(AVCodecContext *avctx){
+
+    common_init(avctx);
+
+    return 0;
+}
+#endif
+
+AVCodec vcr1_decoder = {
+    "vcr1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VCR1,
+    sizeof(VCR1Context),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
+#if 0
+#ifdef CONFIG_ENCODERS
+
+AVCodec vcr1_encoder = {
+    "vcr1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VCR1,
+    sizeof(VCR1Context),
+    encode_init,
+    encode_frame,
+    //encode_end,
+};
+
+#endif //CONFIG_ENCODERS
+#endif
diff --git a/contrib/ffmpeg/libavcodec/vmdav.c b/contrib/ffmpeg/libavcodec/vmdav.c
new file mode 100644
index 000000000..a9937144e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vmdav.c
@@ -0,0 +1,572 @@
+/*
+ * Sierra VMD Audio & Video Decoders
+ * Copyright (C) 2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file vmdvideo.c
+ * Sierra VMD audio & video decoders
+ * by Vladimir "VAG" Gneushev (vagsoft at mail.ru)
+ * for more information on the Sierra VMD format, visit:
+ *   http://www.pcisys.net/~melanson/codecs/
+ *
+ * The video decoder outputs PAL8 colorspace data. The decoder expects
+ * a 0x330-byte VMD file header to be transmitted via extradata during
+ * codec initialization. Each encoded frame that is sent to this decoder
+ * is expected to be prepended with the appropriate 16-byte frame
+ * information record from the VMD file.
+ *
+ * The audio decoder, like the video decoder, expects each encoded data
+ * chunk to be prepended with the appropriate 16-byte frame information
+ * record from the VMD file. It does not require the 0x330-byte VMD file
+ * header, but it does need the audio setup parameters passed in through
+ * normal libavcodec API means.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define VMD_HEADER_SIZE 0x330
+#define PALETTE_COUNT 256
+
+/*
+ * Video Decoder
+ */
+
+typedef struct VmdVideoContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+    AVFrame prev_frame;
+
+    unsigned char *buf;
+    int size;
+
+    unsigned char palette[PALETTE_COUNT * 4];
+    unsigned char *unpack_buffer;
+    int unpack_buffer_size;
+
+} VmdVideoContext;
+
+#define QUEUE_SIZE 0x1000
+#define QUEUE_MASK 0x0FFF
+
+static void lz_unpack(unsigned char *src, unsigned char *dest, int dest_len)
+{
+    unsigned char *s;
+    unsigned char *d;
+    unsigned char *d_end;
+    unsigned char queue[QUEUE_SIZE];
+    unsigned int qpos;
+    unsigned int dataleft;
+    unsigned int chainofs;
+    unsigned int chainlen;
+    unsigned int speclen;
+    unsigned char tag;
+    unsigned int i, j;
+
+    s = src;
+    d = dest;
+    d_end = d + dest_len;
+    dataleft = LE_32(s);
+    s += 4;
+    memset(queue, 0x20, QUEUE_SIZE);
+    if (LE_32(s) == 0x56781234) {
+        s += 4;
+        qpos = 0x111;
+        speclen = 0xF + 3;
+    } else {
+        qpos = 0xFEE;
+        speclen = 100;  /* no speclen */
+    }
+
+    while (dataleft > 0) {
+        tag = *s++;
+        if ((tag == 0xFF) && (dataleft > 8)) {
+            if (d + 8 > d_end)
+                return;
+            for (i = 0; i < 8; i++) {
+                queue[qpos++] = *d++ = *s++;
+                qpos &= QUEUE_MASK;
+            }
+            dataleft -= 8;
+        } else {
+            for (i = 0; i < 8; i++) {
+                if (dataleft == 0)
+                    break;
+                if (tag & 0x01) {
+                    if (d + 1 > d_end)
+                        return;
+                    queue[qpos++] = *d++ = *s++;
+                    qpos &= QUEUE_MASK;
+                    dataleft--;
+                } else {
+                    chainofs = *s++;
+                    chainofs |= ((*s & 0xF0) << 4);
+                    chainlen = (*s++ & 0x0F) + 3;
+                    if (chainlen == speclen)
+                        chainlen = *s++ + 0xF + 3;
+                    if (d + chainlen > d_end)
+                        return;
+                    for (j = 0; j < chainlen; j++) {
+                        *d = queue[chainofs++ & QUEUE_MASK];
+                        queue[qpos++] = *d++;
+                        qpos &= QUEUE_MASK;
+                    }
+                    dataleft -= chainlen;
+                }
+                tag >>= 1;
+            }
+        }
+    }
+}
+
+static int rle_unpack(unsigned char *src, unsigned char *dest,
+    int src_len, int dest_len)
+{
+    unsigned char *ps;
+    unsigned char *pd;
+    int i, l;
+    unsigned char *dest_end = dest + dest_len;
+
+    ps = src;
+    pd = dest;
+    if (src_len & 1)
+        *pd++ = *ps++;
+
+    src_len >>= 1;
+    i = 0;
+    do {
+        l = *ps++;
+        if (l & 0x80) {
+            l = (l & 0x7F) * 2;
+            if (pd + l > dest_end)
+                return (ps - src);
+            memcpy(pd, ps, l);
+            ps += l;
+            pd += l;
+        } else {
+            if (pd + i > dest_end)
+                return (ps - src);
+            for (i = 0; i < l; i++) {
+                *pd++ = ps[0];
+                *pd++ = ps[1];
+            }
+            ps += 2;
+        }
+        i += l;
+    } while (i < src_len);
+
+    return (ps - src);
+}
+
+static void vmd_decode(VmdVideoContext *s)
+{
+    int i;
+    unsigned int *palette32;
+    unsigned char r, g, b;
+
+    /* point to the start of the encoded data */
+    unsigned char *p = s->buf + 16;
+
+    unsigned char *pb;
+    unsigned char meth;
+    unsigned char *dp;   /* pointer to current frame */
+    unsigned char *pp;   /* pointer to previous frame */
+    unsigned char len;
+    int ofs;
+
+    int frame_x, frame_y;
+    int frame_width, frame_height;
+    int dp_size;
+
+    frame_x = LE_16(&s->buf[6]);
+    frame_y = LE_16(&s->buf[8]);
+    frame_width = LE_16(&s->buf[10]) - frame_x + 1;
+    frame_height = LE_16(&s->buf[12]) - frame_y + 1;
+
+    /* if only a certain region will be updated, copy the entire previous
+     * frame before the decode */
+    if (frame_x || frame_y || (frame_width != s->avctx->width) ||
+        (frame_height != s->avctx->height)) {
+
+        memcpy(s->frame.data[0], s->prev_frame.data[0],
+            s->avctx->height * s->frame.linesize[0]);
+    }
+
+    /* check if there is a new palette */
+    if (s->buf[15] & 0x02) {
+        p += 2;
+        palette32 = (unsigned int *)s->palette;
+        for (i = 0; i < PALETTE_COUNT; i++) {
+            r = *p++ * 4;
+            g = *p++ * 4;
+            b = *p++ * 4;
+            palette32[i] = (r << 16) | (g << 8) | (b);
+        }
+        s->size -= (256 * 3 + 2);
+    }
+    if (s->size >= 0) {
+        /* originally UnpackFrame in VAG's code */
+        pb = p;
+        meth = *pb++;
+        if (meth & 0x80) {
+            lz_unpack(pb, s->unpack_buffer, s->unpack_buffer_size);
+            meth &= 0x7F;
+            pb = s->unpack_buffer;
+        }
+
+        dp = &s->frame.data[0][frame_y * s->frame.linesize[0] + frame_x];
+        dp_size = s->frame.linesize[0] * s->avctx->height;
+        pp = &s->prev_frame.data[0][frame_y * s->prev_frame.linesize[0] + frame_x];
+        switch (meth) {
+        case 1:
+            for (i = 0; i < frame_height; i++) {
+                ofs = 0;
+                do {
+                    len = *pb++;
+                    if (len & 0x80) {
+                        len = (len & 0x7F) + 1;
+                        if (ofs + len > frame_width)
+                            return;
+                        memcpy(&dp[ofs], pb, len);
+                        pb += len;
+                        ofs += len;
+                    } else {
+                        /* interframe pixel copy */
+                        if (ofs + len + 1 > frame_width)
+                            return;
+                        memcpy(&dp[ofs], &pp[ofs], len + 1);
+                        ofs += len + 1;
+                    }
+                } while (ofs < frame_width);
+                if (ofs > frame_width) {
+                    av_log(s->avctx, AV_LOG_ERROR, "VMD video: offset > width (%d > %d)\n",
+                        ofs, frame_width);
+                    break;
+                }
+                dp += s->frame.linesize[0];
+                pp += s->prev_frame.linesize[0];
+            }
+            break;
+
+        case 2:
+            for (i = 0; i < frame_height; i++) {
+                memcpy(dp, pb, frame_width);
+                pb += frame_width;
+                dp += s->frame.linesize[0];
+                pp += s->prev_frame.linesize[0];
+            }
+            break;
+
+        case 3:
+            for (i = 0; i < frame_height; i++) {
+                ofs = 0;
+                do {
+                    len = *pb++;
+                    if (len & 0x80) {
+                        len = (len & 0x7F) + 1;
+                        if (*pb++ == 0xFF)
+                            len = rle_unpack(pb, &dp[ofs], len, frame_width - ofs);
+                        else
+                            memcpy(&dp[ofs], pb, len);
+                        pb += len;
+                        ofs += len;
+                    } else {
+                        /* interframe pixel copy */
+                        if (ofs + len + 1 > frame_width)
+                            return;
+                        memcpy(&dp[ofs], &pp[ofs], len + 1);
+                        ofs += len + 1;
+                    }
+                } while (ofs < frame_width);
+                if (ofs > frame_width) {
+                    av_log(s->avctx, AV_LOG_ERROR, "VMD video: offset > width (%d > %d)\n",
+                        ofs, frame_width);
+                }
+                dp += s->frame.linesize[0];
+                pp += s->prev_frame.linesize[0];
+            }
+            break;
+        }
+    }
+}
+
+static int vmdvideo_decode_init(AVCodecContext *avctx)
+{
+    VmdVideoContext *s = (VmdVideoContext *)avctx->priv_data;
+    int i;
+    unsigned int *palette32;
+    int palette_index = 0;
+    unsigned char r, g, b;
+    unsigned char *vmd_header;
+    unsigned char *raw_palette;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    /* make sure the VMD header made it */
+    if (s->avctx->extradata_size != VMD_HEADER_SIZE) {
+        av_log(s->avctx, AV_LOG_ERROR, "VMD video: expected extradata size of %d\n",
+            VMD_HEADER_SIZE);
+        return -1;
+    }
+    vmd_header = (unsigned char *)avctx->extradata;
+
+    s->unpack_buffer_size = LE_32(&vmd_header[800]);
+    s->unpack_buffer = av_malloc(s->unpack_buffer_size);
+    if (!s->unpack_buffer)
+        return -1;
+
+    /* load up the initial palette */
+    raw_palette = &vmd_header[28];
+    palette32 = (unsigned int *)s->palette;
+    for (i = 0; i < PALETTE_COUNT; i++) {
+        r = raw_palette[palette_index++] * 4;
+        g = raw_palette[palette_index++] * 4;
+        b = raw_palette[palette_index++] * 4;
+        palette32[i] = (r << 16) | (g << 8) | (b);
+    }
+
+    s->frame.data[0] = s->prev_frame.data[0] = NULL;
+
+    return 0;
+}
+
+static int vmdvideo_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+    VmdVideoContext *s = (VmdVideoContext *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    if (buf_size < 16)
+        return buf_size;
+
+    s->frame.reference = 1;
+    if (avctx->get_buffer(avctx, &s->frame)) {
+        av_log(s->avctx, AV_LOG_ERROR, "VMD Video: get_buffer() failed\n");
+        return -1;
+    }
+
+    vmd_decode(s);
+
+    /* make the palette available on the way out */
+    memcpy(s->frame.data[1], s->palette, PALETTE_COUNT * 4);
+
+    if (s->prev_frame.data[0])
+        avctx->release_buffer(avctx, &s->prev_frame);
+
+    /* shuffle frames */
+    s->prev_frame = s->frame;
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int vmdvideo_decode_end(AVCodecContext *avctx)
+{
+    VmdVideoContext *s = (VmdVideoContext *)avctx->priv_data;
+
+    if (s->prev_frame.data[0])
+        avctx->release_buffer(avctx, &s->prev_frame);
+    av_free(s->unpack_buffer);
+
+    return 0;
+}
+
+
+/*
+ * Audio Decoder
+ */
+
+typedef struct VmdAudioContext {
+    AVCodecContext *avctx;
+    int channels;
+    int bits;
+    int block_align;
+    int predictors[2];
+} VmdAudioContext;
+
+static uint16_t vmdaudio_table[128] = {
+    0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080,
+    0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120,
+    0x130, 0x140, 0x150, 0x160, 0x170, 0x180, 0x190, 0x1A0, 0x1B0, 0x1C0,
+    0x1D0, 0x1E0, 0x1F0, 0x200, 0x208, 0x210, 0x218, 0x220, 0x228, 0x230,
+    0x238, 0x240, 0x248, 0x250, 0x258, 0x260, 0x268, 0x270, 0x278, 0x280,
+    0x288, 0x290, 0x298, 0x2A0, 0x2A8, 0x2B0, 0x2B8, 0x2C0, 0x2C8, 0x2D0,
+    0x2D8, 0x2E0, 0x2E8, 0x2F0, 0x2F8, 0x300, 0x308, 0x310, 0x318, 0x320,
+    0x328, 0x330, 0x338, 0x340, 0x348, 0x350, 0x358, 0x360, 0x368, 0x370,
+    0x378, 0x380, 0x388, 0x390, 0x398, 0x3A0, 0x3A8, 0x3B0, 0x3B8, 0x3C0,
+    0x3C8, 0x3D0, 0x3D8, 0x3E0, 0x3E8, 0x3F0, 0x3F8, 0x400, 0x440, 0x480,
+    0x4C0, 0x500, 0x540, 0x580, 0x5C0, 0x600, 0x640, 0x680, 0x6C0, 0x700,
+    0x740, 0x780, 0x7C0, 0x800, 0x900, 0xA00, 0xB00, 0xC00, 0xD00, 0xE00,
+    0xF00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x3000, 0x4000
+};
+
+static int vmdaudio_decode_init(AVCodecContext *avctx)
+{
+    VmdAudioContext *s = (VmdAudioContext *)avctx->priv_data;
+
+    s->avctx = avctx;
+    s->channels = avctx->channels;
+    s->bits = avctx->bits_per_sample;
+    s->block_align = avctx->block_align;
+
+    av_log(s->avctx, AV_LOG_DEBUG, "%d channels, %d bits/sample, block align = %d, sample rate = %d\n",
+            s->channels, s->bits, s->block_align, avctx->sample_rate);
+
+    return 0;
+}
+
+static void vmdaudio_decode_audio(VmdAudioContext *s, unsigned char *data,
+    uint8_t *buf, int stereo)
+{
+    int i;
+    int chan = 0;
+    int16_t *out = (int16_t*)data;
+
+    for(i = 0; i < s->block_align; i++) {
+        if(buf[i] & 0x80)
+            s->predictors[chan] -= vmdaudio_table[buf[i] & 0x7F];
+        else
+            s->predictors[chan] += vmdaudio_table[buf[i]];
+        s->predictors[chan] = clip(s->predictors[chan], -32768, 32767);
+        out[i] = s->predictors[chan];
+        chan ^= stereo;
+    }
+}
+
+static int vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
+    uint8_t *buf, int silence)
+{
+    int bytes_decoded = 0;
+    int i;
+
+//    if (silence)
+//        av_log(s->avctx, AV_LOG_INFO, "silent block!\n");
+    if (s->channels == 2) {
+
+        /* stereo handling */
+        if (silence) {
+            memset(data, 0, s->block_align * 2);
+        } else {
+            if (s->bits == 16)
+                vmdaudio_decode_audio(s, data, buf, 1);
+            else {
+                /* copy the data but convert it to signed */
+                for (i = 0; i < s->block_align; i++){
+                    *data++ = buf[i] + 0x80;
+                    *data++ = buf[i] + 0x80;
+                }
+            }
+        }
+    } else {
+        bytes_decoded = s->block_align * 2;
+
+        /* mono handling */
+        if (silence) {
+            memset(data, 0, s->block_align * 2);
+        } else {
+            if (s->bits == 16) {
+                vmdaudio_decode_audio(s, data, buf, 0);
+            } else {
+                /* copy the data but convert it to signed */
+                for (i = 0; i < s->block_align; i++){
+                    *data++ = buf[i] + 0x80;
+                    *data++ = buf[i] + 0x80;
+                }
+            }
+        }
+    }
+
+    return s->block_align * 2;
+}
+
+static int vmdaudio_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+    VmdAudioContext *s = (VmdAudioContext *)avctx->priv_data;
+    unsigned char *output_samples = (unsigned char *)data;
+
+    /* point to the start of the encoded data */
+    unsigned char *p = buf + 16;
+
+    if (buf_size < 16)
+        return buf_size;
+
+    if (buf[6] == 1) {
+        /* the chunk contains audio */
+        *data_size = vmdaudio_loadsound(s, output_samples, p, 0);
+    } else if (buf[6] == 2) {
+        /* the chunk may contain audio */
+        p += 4;
+        *data_size = vmdaudio_loadsound(s, output_samples, p, (buf_size == 16));
+        output_samples += (s->block_align * s->bits / 8);
+    } else if (buf[6] == 3) {
+        /* silent chunk */
+        *data_size = vmdaudio_loadsound(s, output_samples, p, 1);
+    }
+
+    return buf_size;
+}
+
+
+/*
+ * Public Data Structures
+ */
+
+AVCodec vmdvideo_decoder = {
+    "vmdvideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VMDVIDEO,
+    sizeof(VmdVideoContext),
+    vmdvideo_decode_init,
+    NULL,
+    vmdvideo_decode_end,
+    vmdvideo_decode_frame,
+    CODEC_CAP_DR1,
+};
+
+AVCodec vmdaudio_decoder = {
+    "vmdaudio",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_VMDAUDIO,
+    sizeof(VmdAudioContext),
+    vmdaudio_decode_init,
+    NULL,
+    NULL,
+    vmdaudio_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/vmnc.c b/contrib/ffmpeg/libavcodec/vmnc.c
new file mode 100644
index 000000000..7b5f567e7
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vmnc.c
@@ -0,0 +1,525 @@
+/*
+ * VMware Screen Codec (VMnc) decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file vmnc.c
+ * VMware Screen Codec (VMnc) decoder
+ * As Alex Beregszaszi discovered, this is effectively RFB data dump
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+enum EncTypes {
+    MAGIC_WMVd = 0x574D5664,
+    MAGIC_WMVe,
+    MAGIC_WMVf,
+    MAGIC_WMVg,
+    MAGIC_WMVh,
+    MAGIC_WMVi,
+    MAGIC_WMVj
+};
+
+enum HexTile_Flags {
+    HT_RAW =  1, // tile is raw
+    HT_BKG =  2, // background color is present
+    HT_FG  =  4, // foreground color is present
+    HT_SUB =  8, // subrects are present
+    HT_CLR = 16  // each subrect has own color
+};
+
+/*
+ * Decoder context
+ */
+typedef struct VmncContext {
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    int bpp;
+    int bpp2;
+    int bigendian;
+    uint8_t pal[768];
+    int width, height;
+
+    /* cursor data */
+    int cur_w, cur_h;
+    int cur_x, cur_y;
+    int cur_hx, cur_hy;
+    uint8_t* curbits, *curmask;
+    uint8_t* screendta;
+} VmncContext;
+
+/* read pixel value from stream */
+static always_inline int vmnc_get_pixel(uint8_t* buf, int bpp, int be) {
+    switch(bpp * 2 + be) {
+    case 2:
+    case 3: return *buf;
+    case 4: return LE_16(buf);
+    case 5: return BE_16(buf);
+    case 8: return LE_32(buf);
+    case 9: return BE_32(buf);
+    default: return 0;
+    }
+}
+
+static void load_cursor(VmncContext *c, uint8_t *src)
+{
+    int i, j, p;
+    const int bpp = c->bpp2;
+    uint8_t  *dst8  = c->curbits;
+    uint16_t *dst16 = (uint16_t*)c->curbits;
+    uint32_t *dst32 = (uint32_t*)c->curbits;
+
+    for(j = 0; j < c->cur_h; j++) {
+        for(i = 0; i < c->cur_w; i++) {
+            p = vmnc_get_pixel(src, bpp, c->bigendian);
+            src += bpp;
+            if(bpp == 1) *dst8++ = p;
+            if(bpp == 2) *dst16++ = p;
+            if(bpp == 4) *dst32++ = p;
+        }
+    }
+    dst8 = c->curmask;
+    dst16 = (uint16_t*)c->curmask;
+    dst32 = (uint32_t*)c->curmask;
+    for(j = 0; j < c->cur_h; j++) {
+        for(i = 0; i < c->cur_w; i++) {
+            p = vmnc_get_pixel(src, bpp, c->bigendian);
+            src += bpp;
+            if(bpp == 1) *dst8++ = p;
+            if(bpp == 2) *dst16++ = p;
+            if(bpp == 4) *dst32++ = p;
+        }
+    }
+}
+
+static void put_cursor(uint8_t *dst, int stride, VmncContext *c, int dx, int dy)
+{
+    int i, j;
+    int w, h, x, y;
+    w = c->cur_w;
+    if(c->width < c->cur_x + c->cur_w) w = c->width - c->cur_x;
+    h = c->cur_h;
+    if(c->height < c->cur_y + c->cur_h) h = c->height - c->cur_y;
+    x = c->cur_x;
+    y = c->cur_y;
+    if(x < 0) {
+        w += x;
+        x = 0;
+    }
+    if(y < 0) {
+        h += y;
+        y = 0;
+    }
+
+    if((w < 1) || (h < 1)) return;
+    dst += x * c->bpp2 + y * stride;
+
+    if(c->bpp2 == 1) {
+        uint8_t* cd = c->curbits, *msk = c->curmask;
+        for(j = 0; j < h; j++) {
+            for(i = 0; i < w; i++)
+                dst[i] = (dst[i] & cd[i]) ^ msk[i];
+            msk += c->cur_w;
+            cd += c->cur_w;
+            dst += stride;
+        }
+    } else if(c->bpp2 == 2) {
+        uint16_t* cd = (uint16_t*)c->curbits, *msk = (uint16_t*)c->curmask;
+        uint16_t* dst2;
+        for(j = 0; j < h; j++) {
+            dst2 = (uint16_t*)dst;
+            for(i = 0; i < w; i++)
+                dst2[i] = (dst2[i] & cd[i]) ^ msk[i];
+            msk += c->cur_w;
+            cd += c->cur_w;
+            dst += stride;
+        }
+    } else if(c->bpp2 == 4) {
+        uint32_t* cd = (uint32_t*)c->curbits, *msk = (uint32_t*)c->curmask;
+        uint32_t* dst2;
+        for(j = 0; j < h; j++) {
+            dst2 = (uint32_t*)dst;
+            for(i = 0; i < w; i++)
+                dst2[i] = (dst2[i] & cd[i]) ^ msk[i];
+            msk += c->cur_w;
+            cd += c->cur_w;
+            dst += stride;
+        }
+    }
+}
+
+/* fill rectangle with given colour */
+static always_inline void paint_rect(uint8_t *dst, int dx, int dy, int w, int h, int color, int bpp, int stride)
+{
+    int i, j;
+    dst += dx * bpp + dy * stride;
+    if(bpp == 1){
+        for(j = 0; j < h; j++) {
+            memset(dst, color, w);
+            dst += stride;
+        }
+    }else if(bpp == 2){
+        uint16_t* dst2;
+        for(j = 0; j < h; j++) {
+            dst2 = (uint16_t*)dst;
+            for(i = 0; i < w; i++) {
+                *dst2++ = color;
+            }
+            dst += stride;
+        }
+    }else if(bpp == 4){
+        uint32_t* dst2;
+        for(j = 0; j < h; j++) {
+            dst2 = (uint32_t*)dst;
+            for(i = 0; i < w; i++) {
+                dst2[i] = color;
+            }
+            dst += stride;
+        }
+    }
+}
+
+static always_inline void paint_raw(uint8_t *dst, int w, int h, uint8_t* src, int bpp, int be, int stride)
+{
+    int i, j, p;
+    for(j = 0; j < h; j++) {
+        for(i = 0; i < w; i++) {
+            p = vmnc_get_pixel(src, bpp, be);
+            src += bpp;
+            switch(bpp){
+            case 1:
+                dst[i] = p;
+                break;
+            case 2:
+                ((uint16_t*)dst)[i] = p;
+                break;
+            case 4:
+                ((uint32_t*)dst)[i] = p;
+                break;
+            }
+        }
+        dst += stride;
+    }
+}
+
+static int decode_hextile(VmncContext *c, uint8_t* dst, uint8_t* src, int ssize, int w, int h, int stride)
+{
+    int i, j, k;
+    int bg = 0, fg = 0, rects, color, flags, xy, wh;
+    const int bpp = c->bpp2;
+    uint8_t *dst2;
+    int bw = 16, bh = 16;
+    uint8_t *ssrc=src;
+
+    for(j = 0; j < h; j += 16) {
+        dst2 = dst;
+        bw = 16;
+        if(j + 16 > h) bh = h - j;
+        for(i = 0; i < w; i += 16, dst2 += 16 * bpp) {
+            if(src - ssrc >= ssize) {
+                av_log(c->avctx, AV_LOG_ERROR, "Premature end of data!\n");
+                return -1;
+            }
+            if(i + 16 > w) bw = w - i;
+            flags = *src++;
+            if(flags & HT_RAW) {
+                if(src - ssrc > ssize - bw * bh * bpp) {
+                    av_log(c->avctx, AV_LOG_ERROR, "Premature end of data!\n");
+                    return -1;
+                }
+                paint_raw(dst2, bw, bh, src, bpp, c->bigendian, stride);
+                src += bw * bh * bpp;
+            } else {
+                if(flags & HT_BKG) {
+                    bg = vmnc_get_pixel(src, bpp, c->bigendian); src += bpp;
+                }
+                if(flags & HT_FG) {
+                    fg = vmnc_get_pixel(src, bpp, c->bigendian); src += bpp;
+                }
+                rects = 0;
+                if(flags & HT_SUB)
+                    rects = *src++;
+                color = !!(flags & HT_CLR);
+
+                paint_rect(dst2, 0, 0, bw, bh, bg, bpp, stride);
+
+                if(src - ssrc > ssize - rects * (color * bpp + 2)) {
+                    av_log(c->avctx, AV_LOG_ERROR, "Premature end of data!\n");
+                    return -1;
+                }
+                for(k = 0; k < rects; k++) {
+                    if(color) {
+                        fg = vmnc_get_pixel(src, bpp, c->bigendian); src += bpp;
+                    }
+                    xy = *src++;
+                    wh = *src++;
+                    paint_rect(dst2, xy >> 4, xy & 0xF, (wh>>4)+1, (wh & 0xF)+1, fg, bpp, stride);
+                }
+            }
+        }
+        dst += stride * 16;
+    }
+    return src - ssrc;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+    VmncContext * const c = (VmncContext *)avctx->priv_data;
+    uint8_t *outptr;
+    uint8_t *src = buf;
+    int dx, dy, w, h, depth, enc, chunks, res, size_left;
+
+    c->pic.reference = 1;
+    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if(avctx->reget_buffer(avctx, &c->pic) < 0){
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return -1;
+    }
+
+    c->pic.key_frame = 0;
+    c->pic.pict_type = FF_P_TYPE;
+
+    //restore screen after cursor
+    if(c->screendta) {
+        int i;
+        w = c->cur_w;
+        if(c->width < c->cur_x + w) w = c->width - c->cur_x;
+        h = c->cur_h;
+        if(c->height < c->cur_y + h) h = c->height - c->cur_y;
+        dx = c->cur_x;
+        if(dx < 0) {
+            w += dx;
+            dx = 0;
+        }
+        dy = c->cur_y;
+        if(dy < 0) {
+            h += dy;
+            dy = 0;
+        }
+        if((w > 0) && (h > 0)) {
+            outptr = c->pic.data[0] + dx * c->bpp2 + dy * c->pic.linesize[0];
+            for(i = 0; i < h; i++) {
+                memcpy(outptr, c->screendta + i * c->cur_w * c->bpp2, w * c->bpp2);
+                outptr += c->pic.linesize[0];
+            }
+        }
+    }
+    src += 2;
+    chunks = BE_16(src); src += 2;
+    while(chunks--) {
+        dx = BE_16(src); src += 2;
+        dy = BE_16(src); src += 2;
+        w  = BE_16(src); src += 2;
+        h  = BE_16(src); src += 2;
+        enc = BE_32(src); src += 4;
+        outptr = c->pic.data[0] + dx * c->bpp2 + dy * c->pic.linesize[0];
+        size_left = buf_size - (src - buf);
+        switch(enc) {
+        case MAGIC_WMVd: // cursor
+            if(size_left < 2 + w * h * c->bpp2 * 2) {
+                av_log(avctx, AV_LOG_ERROR, "Premature end of data! (need %i got %i)\n", 2 + w * h * c->bpp2 * 2, size_left);
+                return -1;
+            }
+            src += 2;
+            c->cur_w = w;
+            c->cur_h = h;
+            c->cur_hx = dx;
+            c->cur_hy = dy;
+            if((c->cur_hx > c->cur_w) || (c->cur_hy > c->cur_h)) {
+                av_log(avctx, AV_LOG_ERROR, "Cursor hot spot is not in image: %ix%i of %ix%i cursor size\n", c->cur_hx, c->cur_hy, c->cur_w, c->cur_h);
+                c->cur_hx = c->cur_hy = 0;
+            }
+            c->curbits = av_realloc(c->curbits, c->cur_w * c->cur_h * c->bpp2);
+            c->curmask = av_realloc(c->curmask, c->cur_w * c->cur_h * c->bpp2);
+            c->screendta = av_realloc(c->screendta, c->cur_w * c->cur_h * c->bpp2);
+            load_cursor(c, src);
+            src += w * h * c->bpp2 * 2;
+            break;
+        case MAGIC_WMVe: // unknown
+            src += 2;
+            break;
+        case MAGIC_WMVf: // update cursor position
+            c->cur_x = dx - c->cur_hx;
+            c->cur_y = dy - c->cur_hy;
+            break;
+        case MAGIC_WMVg: // unknown
+            src += 10;
+            break;
+        case MAGIC_WMVh: // unknown
+            src += 4;
+            break;
+        case MAGIC_WMVi: // ServerInitialization struct
+            c->pic.key_frame = 1;
+            c->pic.pict_type = FF_I_TYPE;
+            depth = *src++;
+            if(depth != c->bpp) {
+                av_log(avctx, AV_LOG_INFO, "Depth mismatch. Container %i bpp, Frame data: %i bpp\n", c->bpp, depth);
+            }
+            src++;
+            c->bigendian = *src++;
+            if(c->bigendian & (~1)) {
+                av_log(avctx, AV_LOG_INFO, "Invalid header: bigendian flag = %i\n", c->bigendian);
+                return -1;
+            }
+            //skip the rest of pixel format data
+            src += 13;
+            break;
+        case MAGIC_WMVj: // unknown
+            src += 2;
+            break;
+        case 0x00000000: // raw rectangle data
+            if((dx + w > c->width) || (dy + h > c->height)) {
+                av_log(avctx, AV_LOG_ERROR, "Incorrect frame size: %ix%i+%ix%i of %ix%i\n", w, h, dx, dy, c->width, c->height);
+                return -1;
+            }
+            if(size_left < w * h * c->bpp2) {
+                av_log(avctx, AV_LOG_ERROR, "Premature end of data! (need %i got %i)\n", w * h * c->bpp2, size_left);
+                return -1;
+            }
+            paint_raw(outptr, w, h, src, c->bpp2, c->bigendian, c->pic.linesize[0]);
+            src += w * h * c->bpp2;
+            break;
+        case 0x00000005: // HexTile encoded rectangle
+            if((dx + w > c->width) || (dy + h > c->height)) {
+                av_log(avctx, AV_LOG_ERROR, "Incorrect frame size: %ix%i+%ix%i of %ix%i\n", w, h, dx, dy, c->width, c->height);
+                return -1;
+            }
+            res = decode_hextile(c, outptr, src, size_left, w, h, c->pic.linesize[0]);
+            if(res < 0)
+                return -1;
+            src += res;
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported block type 0x%08X\n", enc);
+            chunks = 0; // leave chunks decoding loop
+        }
+    }
+    if(c->screendta){
+        int i;
+        //save screen data before painting cursor
+        w = c->cur_w;
+        if(c->width < c->cur_x + w) w = c->width - c->cur_x;
+        h = c->cur_h;
+        if(c->height < c->cur_y + h) h = c->height - c->cur_y;
+        dx = c->cur_x;
+        if(dx < 0) {
+            w += dx;
+            dx = 0;
+        }
+        dy = c->cur_y;
+        if(dy < 0) {
+            h += dy;
+            dy = 0;
+        }
+        if((w > 0) && (h > 0)) {
+            outptr = c->pic.data[0] + dx * c->bpp2 + dy * c->pic.linesize[0];
+            for(i = 0; i < h; i++) {
+                memcpy(c->screendta + i * c->cur_w * c->bpp2, outptr, w * c->bpp2);
+                outptr += c->pic.linesize[0];
+            }
+            outptr = c->pic.data[0];
+            put_cursor(outptr, c->pic.linesize[0], c, c->cur_x, c->cur_y);
+        }
+    }
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = c->pic;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+
+
+/*
+ *
+ * Init VMnc decoder
+ *
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+    VmncContext * const c = (VmncContext *)avctx->priv_data;
+
+    c->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    c->pic.data[0] = NULL;
+    c->width = avctx->width;
+    c->height = avctx->height;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+    c->bpp = avctx->bits_per_sample;
+    c->bpp2 = c->bpp/8;
+
+    switch(c->bpp){
+    case 8:
+        avctx->pix_fmt = PIX_FMT_PAL8;
+        break;
+    case 16:
+        avctx->pix_fmt = PIX_FMT_RGB555;
+        break;
+    case 32:
+        avctx->pix_fmt = PIX_FMT_RGB32;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bitdepth %i\n", c->bpp);
+    }
+
+    return 0;
+}
+
+
+
+/*
+ *
+ * Uninit VMnc decoder
+ *
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+    VmncContext * const c = (VmncContext *)avctx->priv_data;
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+
+    av_free(c->curbits);
+    av_free(c->curmask);
+    av_free(c->screendta);
+    return 0;
+}
+
+AVCodec vmnc_decoder = {
+    "VMware video",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VMNC,
+    sizeof(VmncContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame
+};
+
diff --git a/contrib/ffmpeg/libavcodec/vorbis.c b/contrib/ffmpeg/libavcodec/vorbis.c
new file mode 100644
index 000000000..ca8d0a956
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vorbis.c
@@ -0,0 +1,1814 @@
+/**
+ * @file vorbis.c
+ * Vorbis I decoder
+ * @author Denes Balatoni  ( dbalatoni programozo hu )
+
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#undef V_DEBUG
+//#define V_DEBUG
+//#define AV_DEBUG(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
+
+#include <math.h>
+
+#define ALT_BITSTREAM_READER_LE
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+#include "vorbis.h"
+
+#define V_NB_BITS 8
+#define V_NB_BITS2 11
+#define V_MAX_VLCS (1<<16)
+
+#ifndef V_DEBUG
+#define AV_DEBUG(...)
+#endif
+
+#undef NDEBUG
+#include <assert.h>
+
+typedef struct {
+    uint_fast8_t dimensions;
+    uint_fast8_t lookup_type;
+    uint_fast8_t maxdepth;
+    VLC vlc;
+    float *codevectors;
+    unsigned int nb_bits;
+} vorbis_codebook;
+
+typedef union vorbis_floor_u vorbis_floor_data;
+typedef struct vorbis_floor0_s vorbis_floor0;
+typedef struct vorbis_floor1_s vorbis_floor1;
+struct vorbis_context_s;
+typedef
+uint_fast8_t (* vorbis_floor_decode_func)
+             (struct vorbis_context_s *, vorbis_floor_data *, float *);
+typedef struct {
+    uint_fast8_t floor_type;
+    vorbis_floor_decode_func decode;
+    union vorbis_floor_u
+    {
+        struct vorbis_floor0_s
+        {
+            uint_fast8_t order;
+            uint_fast16_t rate;
+            uint_fast16_t bark_map_size;
+            int_fast32_t * map[2];
+            uint_fast32_t map_size[2];
+            uint_fast8_t amplitude_bits;
+            uint_fast8_t amplitude_offset;
+            uint_fast8_t num_books;
+            uint_fast8_t * book_list;
+            float * lsp;
+        } t0;
+        struct vorbis_floor1_s
+        {
+            uint_fast8_t partitions;
+            uint_fast8_t maximum_class;
+            uint_fast8_t partition_class[32];
+            uint_fast8_t class_dimensions[16];
+            uint_fast8_t class_subclasses[16];
+            uint_fast8_t class_masterbook[16];
+            int_fast16_t subclass_books[16][8];
+            uint_fast8_t multiplier;
+            uint_fast16_t x_list_dim;
+            floor1_entry_t * list;
+        } t1;
+    } data;
+} vorbis_floor;
+
+typedef struct {
+    uint_fast16_t type;
+    uint_fast32_t begin;
+    uint_fast32_t end;
+    uint_fast32_t partition_size;
+    uint_fast8_t classifications;
+    uint_fast8_t classbook;
+    int_fast16_t books[64][8];
+    uint_fast8_t maxpass;
+} vorbis_residue;
+
+typedef struct {
+    uint_fast8_t submaps;
+    uint_fast16_t coupling_steps;
+    uint_fast8_t *magnitude;
+    uint_fast8_t *angle;
+    uint_fast8_t *mux;
+    uint_fast8_t submap_floor[16];
+    uint_fast8_t submap_residue[16];
+} vorbis_mapping;
+
+typedef struct {
+    uint_fast8_t blockflag;
+    uint_fast16_t windowtype;
+    uint_fast16_t transformtype;
+    uint_fast8_t mapping;
+} vorbis_mode;
+
+typedef struct vorbis_context_s {
+    AVCodecContext *avccontext;
+    GetBitContext gb;
+    DSPContext dsp;
+
+    MDCTContext mdct[2];
+    uint_fast8_t first_frame;
+    uint_fast32_t version;
+    uint_fast8_t audio_channels;
+    uint_fast32_t audio_samplerate;
+    uint_fast32_t bitrate_maximum;
+    uint_fast32_t bitrate_nominal;
+    uint_fast32_t bitrate_minimum;
+    uint_fast32_t blocksize[2];
+    const float * win[2];
+    uint_fast16_t codebook_count;
+    vorbis_codebook *codebooks;
+    uint_fast8_t floor_count;
+    vorbis_floor *floors;
+    uint_fast8_t residue_count;
+    vorbis_residue *residues;
+    uint_fast8_t mapping_count;
+    vorbis_mapping *mappings;
+    uint_fast8_t mode_count;
+    vorbis_mode *modes;
+    uint_fast8_t mode_number; // mode number for the current packet
+    float *channel_residues;
+    float *channel_floors;
+    float *saved;
+    uint_fast16_t saved_start;
+    float *ret;
+    float *buf;
+    float *buf_tmp;
+    uint_fast32_t add_bias; // for float->int conversion
+    uint_fast32_t exp_bias;
+} vorbis_context;
+
+/* Helper functions */
+
+#define BARK(x) \
+    (13.1f*atan(0.00074f*(x))+2.24f*atan(1.85e-8f*(x)*(x))+1e-4f*(x))
+
+unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n) {   // x^(1/n)
+    unsigned int ret=0, i, j;
+
+    do {
+        ++ret;
+        for(i=0,j=ret;i<n-1;i++) j*=ret;
+    } while (j<=x);
+
+    return (ret-1);
+}
+
+static float vorbisfloat2float(uint_fast32_t val) {
+    double mant=val&0x1fffff;
+    long exp=(val&0x7fe00000L)>>21;
+    if (val&0x80000000) mant=-mant;
+    return(ldexp(mant, exp-20-768));
+}
+
+
+// Generate vlc codes from vorbis huffman code lengths
+
+int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num) {
+    uint_fast32_t exit_at_level[33]={404,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+    uint_fast8_t i,j;
+    uint_fast32_t code,p;
+
+#ifdef V_DEBUG
+    GetBitContext gb;
+#endif
+
+    for(p=0;(bits[p]==0) && (p<num);++p);
+    if (p==num) {
+//        av_log(vc->avccontext, AV_LOG_INFO, "An empty codebook. Heh?! \n");
+        return 0;
+    }
+
+    codes[p]=0;
+    for(i=0;i<bits[p];++i) {
+        exit_at_level[i+1]=1<<i;
+    }
+
+#ifdef V_DEBUG
+    av_log(NULL, AV_LOG_INFO, " %d. of %d code len %d code %d - ", p, num, bits[p], codes[p]);
+    init_get_bits(&gb, (uint_fast8_t *)&codes[p], bits[p]);
+    for(i=0;i<bits[p];++i) {
+        av_log(NULL, AV_LOG_INFO, "%s", get_bits1(&gb) ? "1" : "0");
+    }
+    av_log(NULL, AV_LOG_INFO, "\n");
+#endif
+
+    ++p;
+
+    for(;p<num;++p) {
+        if (bits[p]==0) continue;
+        // find corresponding exit(node which the tree can grow further from)
+        for(i=bits[p];i>0;--i) {
+            if (exit_at_level[i]) break;
+        }
+        if (!i) return 1; // overspecified tree
+        code=exit_at_level[i];
+        exit_at_level[i]=0;
+        // construct code (append 0s to end) and introduce new exits
+        for(j=i+1;j<=bits[p];++j) {
+            exit_at_level[j]=code+(1<<(j-1));
+        }
+        codes[p]=code;
+
+#ifdef V_DEBUG
+        av_log(NULL, AV_LOG_INFO, " %d. code len %d code %d - ", p, bits[p], codes[p]);
+        init_get_bits(&gb, (uint_fast8_t *)&codes[p], bits[p]);
+        for(i=0;i<bits[p];++i) {
+            av_log(NULL, AV_LOG_INFO, "%s", get_bits1(&gb) ? "1" : "0");
+        }
+        av_log(NULL, AV_LOG_INFO, "\n");
+#endif
+
+    }
+
+    //no exits should be left (underspecified tree - ie. unused valid vlcs - not allowed by SPEC)
+    for (p=1; p<33; p++)
+        if (exit_at_level[p]) return 1;
+
+    return 0;
+}
+
+void ff_vorbis_ready_floor1_list(floor1_entry_t * list, int values) {
+    int i;
+    list[0].sort = 0;
+    list[1].sort = 1;
+    for (i = 2; i < values; i++) {
+        int j;
+        list[i].low = 0;
+        list[i].high = 1;
+        list[i].sort = i;
+        for (j = 2; j < i; j++) {
+            int tmp = list[j].x;
+            if (tmp < list[i].x) {
+                if (tmp > list[list[i].low].x) list[i].low = j;
+            } else {
+                if (tmp < list[list[i].high].x) list[i].high = j;
+            }
+        }
+    }
+    for (i = 0; i < values - 1; i++) {
+        int j;
+        for (j = i + 1; j < values; j++) {
+            if (list[list[i].sort].x > list[list[j].sort].x) {
+                int tmp = list[i].sort;
+                list[i].sort = list[j].sort;
+                list[j].sort = tmp;
+            }
+        }
+    }
+}
+
+// Free all allocated memory -----------------------------------------
+
+static void vorbis_free(vorbis_context *vc) {
+    int_fast16_t i;
+
+    av_freep(&vc->channel_residues);
+    av_freep(&vc->channel_floors);
+    av_freep(&vc->saved);
+    av_freep(&vc->ret);
+    av_freep(&vc->buf);
+    av_freep(&vc->buf_tmp);
+
+    av_freep(&vc->residues);
+    av_freep(&vc->modes);
+
+    ff_mdct_end(&vc->mdct[0]);
+    ff_mdct_end(&vc->mdct[1]);
+
+    for(i=0;i<vc->codebook_count;++i) {
+        av_free(vc->codebooks[i].codevectors);
+        free_vlc(&vc->codebooks[i].vlc);
+    }
+    av_freep(&vc->codebooks);
+
+    for(i=0;i<vc->floor_count;++i) {
+        if(vc->floors[i].floor_type==0) {
+            av_free(vc->floors[i].data.t0.map[0]);
+            av_free(vc->floors[i].data.t0.map[1]);
+            av_free(vc->floors[i].data.t0.book_list);
+            av_free(vc->floors[i].data.t0.lsp);
+        }
+        else {
+            av_free(vc->floors[i].data.t1.list);
+        }
+    }
+    av_freep(&vc->floors);
+
+    for(i=0;i<vc->mapping_count;++i) {
+        av_free(vc->mappings[i].magnitude);
+        av_free(vc->mappings[i].angle);
+        av_free(vc->mappings[i].mux);
+    }
+    av_freep(&vc->mappings);
+
+    if(vc->exp_bias){
+        av_freep(&vc->win[0]);
+        av_freep(&vc->win[1]);
+    }
+}
+
+// Parse setup header -------------------------------------------------
+
+// Process codebooks part
+
+static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) {
+    uint_fast16_t cb;
+    uint8_t *tmp_vlc_bits;
+    uint32_t *tmp_vlc_codes;
+    GetBitContext *gb=&vc->gb;
+
+    vc->codebook_count=get_bits(gb,8)+1;
+
+    AV_DEBUG(" Codebooks: %d \n", vc->codebook_count);
+
+    vc->codebooks=(vorbis_codebook *)av_mallocz(vc->codebook_count * sizeof(vorbis_codebook));
+    tmp_vlc_bits=(uint8_t *)av_mallocz(V_MAX_VLCS * sizeof(uint8_t));
+    tmp_vlc_codes=(uint32_t *)av_mallocz(V_MAX_VLCS * sizeof(uint32_t));
+
+    for(cb=0;cb<vc->codebook_count;++cb) {
+        vorbis_codebook *codebook_setup=&vc->codebooks[cb];
+        uint_fast8_t ordered;
+        uint_fast32_t t, used_entries=0;
+        uint_fast32_t entries;
+
+        AV_DEBUG(" %d. Codebook \n", cb);
+
+        if (get_bits(gb, 24)!=0x564342) {
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook setup data corrupt. \n", cb);
+            goto error;
+        }
+
+        codebook_setup->dimensions=get_bits(gb, 16);
+        if (codebook_setup->dimensions>16) {
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook's dimension is too large (%d). \n", cb, codebook_setup->dimensions);
+            goto error;
+        }
+        entries=get_bits(gb, 24);
+        if (entries>V_MAX_VLCS) {
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook has too many entries (%"PRIdFAST32"). \n", cb, entries);
+            goto error;
+        }
+
+        ordered=get_bits1(gb);
+
+        AV_DEBUG(" codebook_dimensions %d, codebook_entries %d \n", codebook_setup->dimensions, entries);
+
+        if (!ordered) {
+            uint_fast16_t ce;
+            uint_fast8_t flag;
+            uint_fast8_t sparse=get_bits1(gb);
+
+            AV_DEBUG(" not ordered \n");
+
+            if (sparse) {
+                AV_DEBUG(" sparse \n");
+
+                used_entries=0;
+                for(ce=0;ce<entries;++ce) {
+                    flag=get_bits1(gb);
+                    if (flag) {
+                        tmp_vlc_bits[ce]=get_bits(gb, 5)+1;
+                        ++used_entries;
+                    }
+                    else tmp_vlc_bits[ce]=0;
+                }
+            } else {
+                AV_DEBUG(" not sparse \n");
+
+                used_entries=entries;
+                for(ce=0;ce<entries;++ce) {
+                    tmp_vlc_bits[ce]=get_bits(gb, 5)+1;
+                }
+            }
+        } else {
+            uint_fast16_t current_entry=0;
+            uint_fast8_t current_length=get_bits(gb, 5)+1;
+
+            AV_DEBUG(" ordered, current length: %d \n", current_length);  //FIXME
+
+            used_entries=entries;
+            for(;current_entry<used_entries;++current_length) {
+                uint_fast16_t i, number;
+
+                AV_DEBUG(" number bits: %d ", ilog(entries - current_entry));
+
+                number=get_bits(gb, ilog(entries - current_entry));
+
+                AV_DEBUG(" number: %d \n", number);
+
+                for(i=current_entry;i<number+current_entry;++i) {
+                    if (i<used_entries) tmp_vlc_bits[i]=current_length;
+                }
+
+                current_entry+=number;
+            }
+            if (current_entry>used_entries) {
+                av_log(vc->avccontext, AV_LOG_ERROR, " More codelengths than codes in codebook. \n");
+                goto error;
+            }
+        }
+
+        codebook_setup->lookup_type=get_bits(gb, 4);
+
+        AV_DEBUG(" lookup type: %d : %s \n", codebook_setup->lookup_type, codebook_setup->lookup_type ? "vq" : "no lookup" );
+
+// If the codebook is used for (inverse) VQ, calculate codevectors.
+
+        if (codebook_setup->lookup_type==1) {
+            uint_fast16_t i, j, k;
+            uint_fast16_t codebook_lookup_values=ff_vorbis_nth_root(entries, codebook_setup->dimensions);
+            uint_fast16_t codebook_multiplicands[codebook_lookup_values];
+
+            float codebook_minimum_value=vorbisfloat2float(get_bits_long(gb, 32));
+            float codebook_delta_value=vorbisfloat2float(get_bits_long(gb, 32));
+            uint_fast8_t codebook_value_bits=get_bits(gb, 4)+1;
+            uint_fast8_t codebook_sequence_p=get_bits1(gb);
+
+            AV_DEBUG(" We expect %d numbers for building the codevectors. \n", codebook_lookup_values);
+            AV_DEBUG("  delta %f minmum %f \n", codebook_delta_value, codebook_minimum_value);
+
+            for(i=0;i<codebook_lookup_values;++i) {
+                codebook_multiplicands[i]=get_bits(gb, codebook_value_bits);
+
+                AV_DEBUG(" multiplicands*delta+minmum : %e \n", (float)codebook_multiplicands[i]*codebook_delta_value+codebook_minimum_value);
+                AV_DEBUG(" multiplicand %d \n", codebook_multiplicands[i]);
+            }
+
+// Weed out unused vlcs and build codevector vector
+            codebook_setup->codevectors=(float *)av_mallocz(used_entries*codebook_setup->dimensions * sizeof(float));
+            for(j=0, i=0;i<entries;++i) {
+                uint_fast8_t dim=codebook_setup->dimensions;
+
+                if (tmp_vlc_bits[i]) {
+                    float last=0.0;
+                    uint_fast32_t lookup_offset=i;
+
+#ifdef V_DEBUG
+                    av_log(vc->avccontext, AV_LOG_INFO, "Lookup offset %d ,", i);
+#endif
+
+                    for(k=0;k<dim;++k) {
+                        uint_fast32_t multiplicand_offset = lookup_offset % codebook_lookup_values;
+                        codebook_setup->codevectors[j*dim+k]=codebook_multiplicands[multiplicand_offset]*codebook_delta_value+codebook_minimum_value+last;
+                        if (codebook_sequence_p) {
+                            last=codebook_setup->codevectors[j*dim+k];
+                        }
+                        lookup_offset/=codebook_lookup_values;
+                    }
+                    tmp_vlc_bits[j]=tmp_vlc_bits[i];
+
+#ifdef V_DEBUG
+                    av_log(vc->avccontext, AV_LOG_INFO, "real lookup offset %d, vector: ", j);
+                    for(k=0;k<dim;++k) {
+                        av_log(vc->avccontext, AV_LOG_INFO, " %f ", codebook_setup->codevectors[j*dim+k]);
+                    }
+                    av_log(vc->avccontext, AV_LOG_INFO, "\n");
+#endif
+
+                    ++j;
+                }
+            }
+            if (j!=used_entries) {
+                av_log(vc->avccontext, AV_LOG_ERROR, "Bug in codevector vector building code. \n");
+                goto error;
+            }
+            entries=used_entries;
+        }
+        else if (codebook_setup->lookup_type>=2) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "Codebook lookup type not supported. \n");
+            goto error;
+        }
+
+// Initialize VLC table
+        if (ff_vorbis_len2vlc(tmp_vlc_bits, tmp_vlc_codes, entries)) {
+            av_log(vc->avccontext, AV_LOG_ERROR, " Invalid code lengths while generating vlcs. \n");
+            goto error;
+        }
+        codebook_setup->maxdepth=0;
+        for(t=0;t<entries;++t)
+            if (tmp_vlc_bits[t]>=codebook_setup->maxdepth) codebook_setup->maxdepth=tmp_vlc_bits[t];
+
+        if(codebook_setup->maxdepth > 3*V_NB_BITS) codebook_setup->nb_bits=V_NB_BITS2;
+        else                                       codebook_setup->nb_bits=V_NB_BITS;
+
+        codebook_setup->maxdepth=(codebook_setup->maxdepth+codebook_setup->nb_bits-1)/codebook_setup->nb_bits;
+
+        if (init_vlc(&codebook_setup->vlc, codebook_setup->nb_bits, entries, tmp_vlc_bits, sizeof(*tmp_vlc_bits), sizeof(*tmp_vlc_bits), tmp_vlc_codes, sizeof(*tmp_vlc_codes), sizeof(*tmp_vlc_codes), INIT_VLC_LE)) {
+            av_log(vc->avccontext, AV_LOG_ERROR, " Error generating vlc tables. \n");
+            goto error;
+        }
+    }
+
+    av_free(tmp_vlc_bits);
+    av_free(tmp_vlc_codes);
+    return 0;
+
+// Error:
+error:
+    av_free(tmp_vlc_bits);
+    av_free(tmp_vlc_codes);
+    return 1;
+}
+
+// Process time domain transforms part (unused in Vorbis I)
+
+static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc) {
+    GetBitContext *gb=&vc->gb;
+    uint_fast8_t i;
+    uint_fast8_t vorbis_time_count=get_bits(gb, 6)+1;
+
+    for(i=0;i<vorbis_time_count;++i) {
+        uint_fast16_t vorbis_tdtransform=get_bits(gb, 16);
+
+        AV_DEBUG(" Vorbis time domain transform %d: %d \n", vorbis_time_count, vorbis_tdtransform);
+
+        if (vorbis_tdtransform) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis time domain transform data nonzero. \n");
+            return 1;
+        }
+    }
+    return 0;
+}
+
+// Process floors part
+
+static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
+                                         vorbis_floor_data *vfu, float *vec);
+static void create_map( vorbis_context * vc, uint_fast8_t floor_number );
+static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc,
+                                         vorbis_floor_data *vfu, float *vec);
+static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
+    GetBitContext *gb=&vc->gb;
+    uint_fast16_t i,j,k;
+
+    vc->floor_count=get_bits(gb, 6)+1;
+
+    vc->floors=(vorbis_floor *)av_mallocz(vc->floor_count * sizeof(vorbis_floor));
+
+    for (i=0;i<vc->floor_count;++i) {
+        vorbis_floor *floor_setup=&vc->floors[i];
+
+        floor_setup->floor_type=get_bits(gb, 16);
+
+        AV_DEBUG(" %d. floor type %d \n", i, floor_setup->floor_type);
+
+        if (floor_setup->floor_type==1) {
+            uint_fast8_t maximum_class=0;
+            uint_fast8_t rangebits;
+            uint_fast16_t floor1_values=2;
+
+            floor_setup->decode=vorbis_floor1_decode;
+
+            floor_setup->data.t1.partitions=get_bits(gb, 5);
+
+            AV_DEBUG(" %d.floor: %d partitions \n", i, floor_setup->data.t1.partitions);
+
+            for(j=0;j<floor_setup->data.t1.partitions;++j) {
+                floor_setup->data.t1.partition_class[j]=get_bits(gb, 4);
+                if (floor_setup->data.t1.partition_class[j]>maximum_class) maximum_class=floor_setup->data.t1.partition_class[j];
+
+                AV_DEBUG(" %d. floor %d partition class %d \n", i, j, floor_setup->data.t1.partition_class[j]);
+
+            }
+
+            AV_DEBUG(" maximum class %d \n", maximum_class);
+
+            floor_setup->data.t1.maximum_class=maximum_class;
+
+            for(j=0;j<=maximum_class;++j) {
+                floor_setup->data.t1.class_dimensions[j]=get_bits(gb, 3)+1;
+                floor_setup->data.t1.class_subclasses[j]=get_bits(gb, 2);
+
+                AV_DEBUG(" %d floor %d class dim: %d subclasses %d \n", i, j, floor_setup->data.t1.class_dimensions[j], floor_setup->data.t1.class_subclasses[j]);
+
+                if (floor_setup->data.t1.class_subclasses[j]) {
+                    floor_setup->data.t1.class_masterbook[j]=get_bits(gb, 8);
+
+                    AV_DEBUG("   masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]);
+                }
+
+                for(k=0;k<(1<<floor_setup->data.t1.class_subclasses[j]);++k) {
+                    floor_setup->data.t1.subclass_books[j][k]=(int16_t)get_bits(gb, 8)-1;
+
+                    AV_DEBUG("    book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]);
+                }
+            }
+
+            floor_setup->data.t1.multiplier=get_bits(gb, 2)+1;
+            floor_setup->data.t1.x_list_dim=2;
+
+            for(j=0;j<floor_setup->data.t1.partitions;++j) {
+                floor_setup->data.t1.x_list_dim+=floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];
+            }
+
+            floor_setup->data.t1.list=(floor1_entry_t *)av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(floor1_entry_t));
+
+
+            rangebits=get_bits(gb, 4);
+            floor_setup->data.t1.list[0].x = 0;
+            floor_setup->data.t1.list[1].x = (1<<rangebits);
+
+            for(j=0;j<floor_setup->data.t1.partitions;++j) {
+                for(k=0;k<floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];++k,++floor1_values) {
+                    floor_setup->data.t1.list[floor1_values].x=get_bits(gb, rangebits);
+
+                    AV_DEBUG(" %d. floor1 Y coord. %d \n", floor1_values, floor_setup->data.t1.list[floor1_values].x);
+                }
+            }
+
+// Precalculate order of x coordinates - needed for decode
+            ff_vorbis_ready_floor1_list(floor_setup->data.t1.list, floor_setup->data.t1.x_list_dim);
+        }
+        else if(floor_setup->floor_type==0) {
+            uint_fast8_t max_codebook_dim=0;
+
+            floor_setup->decode=vorbis_floor0_decode;
+
+            floor_setup->data.t0.order=get_bits(gb, 8);
+            floor_setup->data.t0.rate=get_bits(gb, 16);
+            floor_setup->data.t0.bark_map_size=get_bits(gb, 16);
+            floor_setup->data.t0.amplitude_bits=get_bits(gb, 6);
+            /* zero would result in a div by zero later *
+             * 2^0 - 1 == 0                             */
+            if (floor_setup->data.t0.amplitude_bits == 0) {
+              av_log(vc->avccontext, AV_LOG_ERROR,
+                     "Floor 0 amplitude bits is 0.\n");
+              return 1;
+            }
+            floor_setup->data.t0.amplitude_offset=get_bits(gb, 8);
+            floor_setup->data.t0.num_books=get_bits(gb, 4)+1;
+
+            /* allocate mem for booklist */
+            floor_setup->data.t0.book_list=
+                av_malloc(floor_setup->data.t0.num_books);
+            if(!floor_setup->data.t0.book_list) { return 1; }
+            /* read book indexes */
+            {
+                int idx;
+                uint_fast8_t book_idx;
+                for (idx=0;idx<floor_setup->data.t0.num_books;++idx) {
+                    book_idx=get_bits(gb, 8);
+                    floor_setup->data.t0.book_list[idx]=book_idx;
+                    if (vc->codebooks[book_idx].dimensions > max_codebook_dim)
+                        max_codebook_dim=vc->codebooks[book_idx].dimensions;
+
+                    if (floor_setup->data.t0.book_list[idx]>vc->codebook_count)
+                        return 1;
+                }
+            }
+
+            create_map( vc, i );
+
+            /* allocate mem for lsp coefficients */
+            {
+                /* codebook dim is for padding if codebook dim doesn't *
+                 * divide order+1 then we need to read more data       */
+                floor_setup->data.t0.lsp=
+                    av_malloc((floor_setup->data.t0.order+1 + max_codebook_dim)
+                              * sizeof(float));
+                if(!floor_setup->data.t0.lsp) { return 1; }
+            }
+
+#ifdef V_DEBUG /* debug output parsed headers */
+            AV_DEBUG("floor0 order: %u\n", floor_setup->data.t0.order);
+            AV_DEBUG("floor0 rate: %u\n", floor_setup->data.t0.rate);
+            AV_DEBUG("floor0 bark map size: %u\n",
+              floor_setup->data.t0.bark_map_size);
+            AV_DEBUG("floor0 amplitude bits: %u\n",
+              floor_setup->data.t0.amplitude_bits);
+            AV_DEBUG("floor0 amplitude offset: %u\n",
+              floor_setup->data.t0.amplitude_offset);
+            AV_DEBUG("floor0 number of books: %u\n",
+              floor_setup->data.t0.num_books);
+            AV_DEBUG("floor0 book list pointer: %p\n",
+              floor_setup->data.t0.book_list);
+            {
+              int idx;
+              for (idx=0;idx<floor_setup->data.t0.num_books;++idx) {
+                AV_DEBUG( "  Book %d: %u\n",
+                  idx+1,
+                  floor_setup->data.t0.book_list[idx] );
+              }
+            }
+#endif
+        }
+        else {
+            av_log(vc->avccontext, AV_LOG_ERROR, "Invalid floor type!\n");
+            return 1;
+        }
+    }
+    return 0;
+}
+
+// Process residues part
+
+static int vorbis_parse_setup_hdr_residues(vorbis_context *vc){
+    GetBitContext *gb=&vc->gb;
+    uint_fast8_t i, j, k;
+
+    vc->residue_count=get_bits(gb, 6)+1;
+    vc->residues=(vorbis_residue *)av_mallocz(vc->residue_count * sizeof(vorbis_residue));
+
+    AV_DEBUG(" There are %d residues. \n", vc->residue_count);
+
+    for(i=0;i<vc->residue_count;++i) {
+        vorbis_residue *res_setup=&vc->residues[i];
+        uint_fast8_t cascade[64];
+        uint_fast8_t high_bits;
+        uint_fast8_t low_bits;
+
+        res_setup->type=get_bits(gb, 16);
+
+        AV_DEBUG(" %d. residue type %d \n", i, res_setup->type);
+
+        res_setup->begin=get_bits(gb, 24);
+        res_setup->end=get_bits(gb, 24);
+        res_setup->partition_size=get_bits(gb, 24)+1;
+        res_setup->classifications=get_bits(gb, 6)+1;
+        res_setup->classbook=get_bits(gb, 8);
+
+        AV_DEBUG("    begin %d end %d part.size %d classif.s %d classbook %d \n", res_setup->begin, res_setup->end, res_setup->partition_size,
+          res_setup->classifications, res_setup->classbook);
+
+        for(j=0;j<res_setup->classifications;++j) {
+            high_bits=0;
+            low_bits=get_bits(gb, 3);
+            if (get_bits1(gb)) {
+                high_bits=get_bits(gb, 5);
+            }
+            cascade[j]=(high_bits<<3)+low_bits;
+
+            AV_DEBUG("     %d class casscade depth: %d \n", j, ilog(cascade[j]));
+        }
+
+        res_setup->maxpass=0;
+        for(j=0;j<res_setup->classifications;++j) {
+            for(k=0;k<8;++k) {
+                if (cascade[j]&(1<<k)) {
+                        res_setup->books[j][k]=get_bits(gb, 8);
+
+                    AV_DEBUG("     %d class casscade depth %d book: %d \n", j, k, res_setup->books[j][k]);
+
+                    if (k>res_setup->maxpass) {
+                        res_setup->maxpass=k;
+                    }
+                } else {
+                    res_setup->books[j][k]=-1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+// Process mappings part
+
+static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) {
+    GetBitContext *gb=&vc->gb;
+    uint_fast8_t i, j;
+
+    vc->mapping_count=get_bits(gb, 6)+1;
+    vc->mappings=(vorbis_mapping *)av_mallocz(vc->mapping_count * sizeof(vorbis_mapping));
+
+    AV_DEBUG(" There are %d mappings. \n", vc->mapping_count);
+
+    for(i=0;i<vc->mapping_count;++i) {
+        vorbis_mapping *mapping_setup=&vc->mappings[i];
+
+        if (get_bits(gb, 16)) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "Other mappings than type 0 are not compliant with the Vorbis I specification. \n");
+            return 1;
+        }
+        if (get_bits1(gb)) {
+            mapping_setup->submaps=get_bits(gb, 4)+1;
+        } else {
+            mapping_setup->submaps=1;
+        }
+
+        if (get_bits1(gb)) {
+            mapping_setup->coupling_steps=get_bits(gb, 8)+1;
+            mapping_setup->magnitude=(uint_fast8_t *)av_mallocz(mapping_setup->coupling_steps * sizeof(uint_fast8_t));
+            mapping_setup->angle=(uint_fast8_t *)av_mallocz(mapping_setup->coupling_steps * sizeof(uint_fast8_t));
+            for(j=0;j<mapping_setup->coupling_steps;++j) {
+                mapping_setup->magnitude[j]=get_bits(gb, ilog(vc->audio_channels-1));
+                mapping_setup->angle[j]=get_bits(gb, ilog(vc->audio_channels-1));
+                // FIXME: sanity checks
+            }
+        } else {
+            mapping_setup->coupling_steps=0;
+        }
+
+        AV_DEBUG("   %d mapping coupling steps: %d \n", i, mapping_setup->coupling_steps);
+
+        if(get_bits(gb, 2)) {
+            av_log(vc->avccontext, AV_LOG_ERROR, "%d. mapping setup data invalid. \n", i);
+            return 1; // following spec.
+        }
+
+        if (mapping_setup->submaps>1) {
+            mapping_setup->mux=(uint_fast8_t *)av_mallocz(vc->audio_channels * sizeof(uint_fast8_t));
+            for(j=0;j<vc->audio_channels;++j) {
+                mapping_setup->mux[j]=get_bits(gb, 4);
+            }
+        }
+
+        for(j=0;j<mapping_setup->submaps;++j) {
+            get_bits(gb, 8); // FIXME check?
+            mapping_setup->submap_floor[j]=get_bits(gb, 8);
+            mapping_setup->submap_residue[j]=get_bits(gb, 8);
+
+            AV_DEBUG("   %d mapping %d submap : floor %d, residue %d \n", i, j, mapping_setup->submap_floor[j], mapping_setup->submap_residue[j]);
+        }
+    }
+    return 0;
+}
+
+// Process modes part
+
+static void create_map( vorbis_context * vc, uint_fast8_t floor_number )
+{
+    vorbis_floor * floors=vc->floors;
+    vorbis_floor0 * vf;
+    int idx;
+    int_fast8_t blockflag;
+    int_fast32_t * map;
+    int_fast32_t n; //TODO: could theoretically be smaller?
+
+    for (blockflag=0;blockflag<2;++blockflag)
+    {
+    n=vc->blocksize[blockflag]/2;
+    floors[floor_number].data.t0.map[blockflag]=
+        av_malloc((n+1) * sizeof(int_fast32_t)); // n+sentinel
+
+    map=floors[floor_number].data.t0.map[blockflag];
+    vf=&floors[floor_number].data.t0;
+
+    for (idx=0; idx<n;++idx) {
+        map[idx]=floor( BARK((vf->rate*idx)/(2.0f*n)) *
+                              ((vf->bark_map_size)/
+                               BARK(vf->rate/2.0f )) );
+        if (vf->bark_map_size-1 < map[idx]) {
+            map[idx]=vf->bark_map_size-1;
+        }
+    }
+    map[n]=-1;
+    vf->map_size[blockflag]=n;
+    }
+
+#   ifdef V_DEBUG
+    for(idx=0;idx<=n;++idx) {
+        AV_DEBUG("floor0 map: map at pos %d is %d\n",
+                 idx, map[idx]);
+    }
+#   endif
+}
+
+static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) {
+    GetBitContext *gb=&vc->gb;
+    uint_fast8_t i;
+
+    vc->mode_count=get_bits(gb, 6)+1;
+    vc->modes=(vorbis_mode *)av_mallocz(vc->mode_count * sizeof(vorbis_mode));
+
+    AV_DEBUG(" There are %d modes.\n", vc->mode_count);
+
+    for(i=0;i<vc->mode_count;++i) {
+        vorbis_mode *mode_setup=&vc->modes[i];
+
+        mode_setup->blockflag=get_bits(gb, 1);
+        mode_setup->windowtype=get_bits(gb, 16); //FIXME check
+        mode_setup->transformtype=get_bits(gb, 16); //FIXME check
+        mode_setup->mapping=get_bits(gb, 8); //FIXME check
+
+        AV_DEBUG(" %d mode: blockflag %d, windowtype %d, transformtype %d, mapping %d \n", i, mode_setup->blockflag, mode_setup->windowtype, mode_setup->transformtype, mode_setup->mapping);
+    }
+    return 0;
+}
+
+// Process the whole setup header using the functions above
+
+static int vorbis_parse_setup_hdr(vorbis_context *vc) {
+    GetBitContext *gb=&vc->gb;
+
+    if ((get_bits(gb, 8)!='v') || (get_bits(gb, 8)!='o') ||
+    (get_bits(gb, 8)!='r') || (get_bits(gb, 8)!='b') ||
+    (get_bits(gb, 8)!='i') || (get_bits(gb, 8)!='s')) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (no vorbis signature). \n");
+        return 1;
+    }
+
+    if (vorbis_parse_setup_hdr_codebooks(vc)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (codebooks). \n");
+        return 2;
+    }
+    if (vorbis_parse_setup_hdr_tdtransforms(vc)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (time domain transforms). \n");
+        return 3;
+    }
+    if (vorbis_parse_setup_hdr_floors(vc)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (floors). \n");
+        return 4;
+    }
+    if (vorbis_parse_setup_hdr_residues(vc)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (residues). \n");
+        return 5;
+    }
+    if (vorbis_parse_setup_hdr_mappings(vc)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (mappings). \n");
+        return 6;
+    }
+    if (vorbis_parse_setup_hdr_modes(vc)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (modes). \n");
+        return 7;
+    }
+    if (!get_bits1(gb)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis setup header packet corrupt (framing flag). \n");
+        return 8; // framing flag bit unset error
+    }
+
+    return 0;
+}
+
+// Process the identification header
+
+static int vorbis_parse_id_hdr(vorbis_context *vc){
+    GetBitContext *gb=&vc->gb;
+    uint_fast8_t bl0, bl1;
+
+    if ((get_bits(gb, 8)!='v') || (get_bits(gb, 8)!='o') ||
+    (get_bits(gb, 8)!='r') || (get_bits(gb, 8)!='b') ||
+    (get_bits(gb, 8)!='i') || (get_bits(gb, 8)!='s')) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (no vorbis signature). \n");
+        return 1;
+    }
+
+    vc->version=get_bits_long(gb, 32);    //FIXME check 0
+    vc->audio_channels=get_bits(gb, 8);   //FIXME check >0
+    vc->audio_samplerate=get_bits_long(gb, 32);   //FIXME check >0
+    vc->bitrate_maximum=get_bits_long(gb, 32);
+    vc->bitrate_nominal=get_bits_long(gb, 32);
+    vc->bitrate_minimum=get_bits_long(gb, 32);
+    bl0=get_bits(gb, 4);
+    bl1=get_bits(gb, 4);
+    vc->blocksize[0]=(1<<bl0);
+    vc->blocksize[1]=(1<<bl1);
+    if (bl0>13 || bl0<6 || bl1>13 || bl1<6 || bl1<bl0) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (illegal blocksize). \n");
+        return 3;
+    }
+    // output format int16
+    if (vc->blocksize[1]/2 * vc->audio_channels * 2 >
+                                             AVCODEC_MAX_AUDIO_FRAME_SIZE) {
+        av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis channel count makes "
+               "output packets too large.\n");
+        return 4;
+    }
+    vc->win[0]=ff_vorbis_vwin[bl0-6];
+    vc->win[1]=ff_vorbis_vwin[bl1-6];
+
+    if(vc->exp_bias){
+        int i, j;
+        for(j=0; j<2; j++){
+            float *win = av_malloc(vc->blocksize[j]/2 * sizeof(float));
+            for(i=0; i<vc->blocksize[j]/2; i++)
+                win[i] = vc->win[j][i] * (1<<15);
+            vc->win[j] = win;
+        }
+    }
+
+    if ((get_bits1(gb)) == 0) {
+        av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (framing flag not set). \n");
+        return 2;
+    }
+
+    vc->channel_residues=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+    vc->channel_floors=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+    vc->saved=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+    vc->ret=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+    vc->buf=(float *)av_malloc(vc->blocksize[1] * sizeof(float));
+    vc->buf_tmp=(float *)av_malloc(vc->blocksize[1] * sizeof(float));
+    vc->saved_start=0;
+
+    ff_mdct_init(&vc->mdct[0], bl0, 1);
+    ff_mdct_init(&vc->mdct[1], bl1, 1);
+
+    AV_DEBUG(" vorbis version %d \n audio_channels %d \n audio_samplerate %d \n bitrate_max %d \n bitrate_nom %d \n bitrate_min %d \n blk_0 %d blk_1 %d \n ",
+            vc->version, vc->audio_channels, vc->audio_samplerate, vc->bitrate_maximum, vc->bitrate_nominal, vc->bitrate_minimum, vc->blocksize[0], vc->blocksize[1]);
+
+/*
+    BLK=vc->blocksize[0];
+    for(i=0;i<BLK/2;++i) {
+        vc->win[0][i]=sin(0.5*3.14159265358*(sin(((float)i+0.5)/(float)BLK*3.14159265358))*(sin(((float)i+0.5)/(float)BLK*3.14159265358)));
+    }
+*/
+
+    return 0;
+}
+
+// Process the extradata using the functions above (identification header, setup header)
+
+static int vorbis_decode_init(AVCodecContext *avccontext) {
+    vorbis_context *vc = avccontext->priv_data ;
+    uint8_t *headers = avccontext->extradata;
+    int headers_len=avccontext->extradata_size;
+    uint8_t *header_start[3];
+    int header_len[3];
+    GetBitContext *gb = &(vc->gb);
+    int i, j, hdr_type;
+
+    vc->avccontext = avccontext;
+    dsputil_init(&vc->dsp, avccontext);
+
+    if(vc->dsp.float_to_int16 == ff_float_to_int16_c) {
+        vc->add_bias = 385;
+        vc->exp_bias = 0;
+    } else {
+        vc->add_bias = 0;
+        vc->exp_bias = 15<<23;
+    }
+
+    if (!headers_len) {
+        av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
+        return -1;
+    }
+
+    if(headers[0] == 0 && headers[1] == 30) {
+        for(i = 0; i < 3; i++){
+            header_len[i] = *headers++ << 8;
+            header_len[i] += *headers++;
+            header_start[i] = headers;
+            headers += header_len[i];
+        }
+    } else if(headers[0] == 2) {
+        for(j=1,i=0;i<2;++i, ++j) {
+            header_len[i]=0;
+            while(j<headers_len && headers[j]==0xff) {
+                header_len[i]+=0xff;
+                ++j;
+            }
+            if (j>=headers_len) {
+                av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
+                return -1;
+            }
+            header_len[i]+=headers[j];
+        }
+        header_len[2]=headers_len-header_len[0]-header_len[1]-j;
+        headers+=j;
+        header_start[0] = headers;
+        header_start[1] = header_start[0] + header_len[0];
+        header_start[2] = header_start[1] + header_len[1];
+    } else {
+        av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
+        return -1;
+    }
+
+    init_get_bits(gb, header_start[0], header_len[0]*8);
+    hdr_type=get_bits(gb, 8);
+    if (hdr_type!=1) {
+        av_log(avccontext, AV_LOG_ERROR, "First header is not the id header.\n");
+        return -1;
+    }
+    if (vorbis_parse_id_hdr(vc)) {
+        av_log(avccontext, AV_LOG_ERROR, "Id header corrupt.\n");
+        vorbis_free(vc);
+        return -1;
+    }
+
+    init_get_bits(gb, header_start[2], header_len[2]*8);
+    hdr_type=get_bits(gb, 8);
+    if (hdr_type!=5) {
+        av_log(avccontext, AV_LOG_ERROR, "Third header is not the setup header.\n");
+        return -1;
+    }
+    if (vorbis_parse_setup_hdr(vc)) {
+        av_log(avccontext, AV_LOG_ERROR, "Setup header corrupt.\n");
+        vorbis_free(vc);
+        return -1;
+    }
+
+    avccontext->channels = vc->audio_channels;
+    avccontext->sample_rate = vc->audio_samplerate;
+
+    return 0 ;
+}
+
+// Decode audiopackets -------------------------------------------------
+
+// Read and decode floor
+
+static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
+                                         vorbis_floor_data *vfu, float *vec) {
+    vorbis_floor0 * vf=&vfu->t0;
+    float * lsp=vf->lsp;
+    uint_fast32_t amplitude;
+    uint_fast32_t book_idx;
+    uint_fast8_t blockflag=vc->modes[vc->mode_number].blockflag;
+
+    amplitude=get_bits(&vc->gb, vf->amplitude_bits);
+    if (amplitude>0) {
+        float last = 0;
+        uint_fast16_t lsp_len = 0;
+        uint_fast16_t idx;
+        vorbis_codebook codebook;
+
+        book_idx=get_bits(&vc->gb, ilog(vf->num_books));
+        if ( book_idx >= vf->num_books ) {
+            av_log( vc->avccontext, AV_LOG_ERROR,
+                    "floor0 dec: booknumber too high!\n" );
+            //FIXME: look above
+        }
+        AV_DEBUG( "floor0 dec: booknumber: %u\n", book_idx );
+        codebook=vc->codebooks[vf->book_list[book_idx]];
+
+        while (lsp_len<vf->order) {
+            int vec_off;
+
+            AV_DEBUG( "floor0 dec: book dimension: %d\n", codebook.dimensions );
+            AV_DEBUG( "floor0 dec: maximum depth: %d\n", codebook.maxdepth );
+            /* read temp vector */
+            vec_off=get_vlc2(&vc->gb,
+                             codebook.vlc.table,
+                             codebook.nb_bits,
+                             codebook.maxdepth ) *
+                             codebook.dimensions;
+            AV_DEBUG( "floor0 dec: vector offset: %d\n", vec_off );
+            /* copy each vector component and add last to it */
+            for (idx=0; idx<codebook.dimensions; ++idx) {
+                lsp[lsp_len+idx]=codebook.codevectors[vec_off+idx]+last;
+            }
+            last=lsp[lsp_len+idx-1]; /* set last to last vector component */
+
+            lsp_len += codebook.dimensions;
+        }
+#ifdef V_DEBUG
+        /* DEBUG: output lsp coeffs */
+        {
+            int idx;
+            for ( idx = 0; idx < lsp_len; ++idx )
+                AV_DEBUG("floor0 dec: coeff at %d is %f\n", idx, lsp[idx] );
+        }
+#endif
+
+        /* synthesize floor output vector */
+        {
+            int i;
+            int order=vf->order;
+            float wstep=M_PI/vf->bark_map_size;
+
+            for(i=0;i<order;i++) { lsp[i]=2.0f*cos(lsp[i]); }
+
+            AV_DEBUG("floor0 synth: map_size=%d; m=%d; wstep=%f\n",
+                     vf->map_size, order, wstep);
+
+            i=0;
+            while(i<vf->map_size[blockflag]) {
+                int j, iter_cond=vf->map[blockflag][i];
+                float p=0.5f;
+                float q=0.5f;
+                float two_cos_w=2.0f*cos(wstep*iter_cond); // needed all times
+
+                /* similar part for the q and p products */
+                for(j=0;j<order;j+=2) {
+                    q *= lsp[j]  -two_cos_w;
+                    p *= lsp[j+1]-two_cos_w;
+                }
+                if(j==order) { // even order
+                    p *= p*(2.0f-two_cos_w);
+                    q *= q*(2.0f+two_cos_w);
+                }
+                else { // odd order
+                    q *= two_cos_w-lsp[j]; // one more time for q
+
+                    /* final step and square */
+                    p *= p*(4.f-two_cos_w*two_cos_w);
+                    q *= q;
+                }
+
+                /* calculate linear floor value */
+                {
+                    q=exp( (
+                             ( (amplitude*vf->amplitude_offset)/
+                               (((1<<vf->amplitude_bits)-1) * sqrt(p+q)) )
+                             - vf->amplitude_offset ) * .11512925f
+                         );
+                }
+
+                /* fill vector */
+                do { vec[i]=q; ++i; }while(vf->map[blockflag][i]==iter_cond);
+            }
+        }
+    }
+    else {
+        /* this channel is unused */
+        return 1;
+    }
+
+    AV_DEBUG(" Floor0 decoded\n");
+
+    return 0;
+}
+
+static void render_line(int x0, int y0, int x1, int y1, float * buf, int n) {
+    int dy = y1 - y0;
+    int adx = x1 - x0;
+    int ady = FFABS(dy);
+    int base = dy / adx;
+    int x = x0;
+    int y = y0;
+    int err = 0;
+    int sy;
+    if (dy < 0) sy = base - 1;
+    else        sy = base + 1;
+    ady = ady - FFABS(base) * adx;
+    if (x >= n) return;
+    buf[x] = ff_vorbis_floor1_inverse_db_table[y];
+    for (x = x0 + 1; x < x1; x++) {
+        if (x >= n) return;
+        err += ady;
+        if (err >= adx) {
+            err -= adx;
+            y += sy;
+        } else {
+            y += base;
+        }
+        buf[x] = ff_vorbis_floor1_inverse_db_table[y];
+    }
+}
+
+void ff_vorbis_floor1_render_list(floor1_entry_t * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) {
+    int lx, ly, i;
+    lx = 0;
+    ly = y_list[0] * multiplier;
+    for (i = 1; i < values; i++) {
+        int pos = list[i].sort;
+        if (flag[pos]) {
+            render_line(lx, ly, list[pos].x, y_list[pos] * multiplier, out, samples);
+            lx = list[pos].x;
+            ly = y_list[pos] * multiplier;
+        }
+        if (lx >= samples) break;
+    }
+    if (lx < samples) render_line(lx, ly, samples, ly, out, samples);
+}
+
+static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc, vorbis_floor_data *vfu, float *vec) {
+    vorbis_floor1 * vf=&vfu->t1;
+    GetBitContext *gb=&vc->gb;
+    uint_fast16_t range_v[4]={ 256, 128, 86, 64 };
+    uint_fast16_t range=range_v[vf->multiplier-1];
+    uint_fast16_t floor1_Y[vf->x_list_dim];
+    uint_fast16_t floor1_Y_final[vf->x_list_dim];
+    int floor1_flag[vf->x_list_dim];
+    uint_fast8_t class_;
+    uint_fast8_t cdim;
+    uint_fast8_t cbits;
+    uint_fast8_t csub;
+    uint_fast8_t cval;
+    int_fast16_t book;
+    uint_fast16_t offset;
+    uint_fast16_t i,j;
+    /*u*/int_fast16_t adx, ady, off, predicted; // WTF ? dy/adx= (unsigned)dy/adx ?
+    int_fast16_t dy, err;
+
+
+    if (!get_bits1(gb)) return 1; // silence
+
+// Read values (or differences) for the floor's points
+
+    floor1_Y[0]=get_bits(gb, ilog(range-1));
+    floor1_Y[1]=get_bits(gb, ilog(range-1));
+
+    AV_DEBUG("floor 0 Y %d floor 1 Y %d \n", floor1_Y[0], floor1_Y[1]);
+
+    offset=2;
+    for(i=0;i<vf->partitions;++i) {
+        class_=vf->partition_class[i];
+        cdim=vf->class_dimensions[class_];
+        cbits=vf->class_subclasses[class_];
+        csub=(1<<cbits)-1;
+        cval=0;
+
+        AV_DEBUG("Cbits %d \n", cbits);
+
+        if (cbits) { // this reads all subclasses for this partition's class
+            cval=get_vlc2(gb, vc->codebooks[vf->class_masterbook[class_]].vlc.table,
+            vc->codebooks[vf->class_masterbook[class_]].nb_bits, 3);
+        }
+
+        for(j=0;j<cdim;++j) {
+            book=vf->subclass_books[class_][cval & csub];
+
+            AV_DEBUG("book %d Cbits %d cval %d  bits:%d \n", book, cbits, cval, get_bits_count(gb));
+
+            cval=cval>>cbits;
+            if (book>-1) {
+                floor1_Y[offset+j]=get_vlc2(gb, vc->codebooks[book].vlc.table,
+                vc->codebooks[book].nb_bits, 3);
+            } else {
+                floor1_Y[offset+j]=0;
+            }
+
+            AV_DEBUG(" floor(%d) = %d \n", vf->list[offset+j].x, floor1_Y[offset+j]);
+        }
+        offset+=cdim;
+    }
+
+// Amplitude calculation from the differences
+
+    floor1_flag[0]=1;
+    floor1_flag[1]=1;
+    floor1_Y_final[0]=floor1_Y[0];
+    floor1_Y_final[1]=floor1_Y[1];
+
+    for(i=2;i<vf->x_list_dim;++i) {
+        uint_fast16_t val, highroom, lowroom, room;
+        uint_fast16_t high_neigh_offs;
+        uint_fast16_t low_neigh_offs;
+
+        low_neigh_offs=vf->list[i].low;
+        high_neigh_offs=vf->list[i].high;
+        dy=floor1_Y_final[high_neigh_offs]-floor1_Y_final[low_neigh_offs];  // render_point begin
+        adx=vf->list[high_neigh_offs].x-vf->list[low_neigh_offs].x;
+        ady= FFABS(dy);
+        err=ady*(vf->list[i].x-vf->list[low_neigh_offs].x);
+        off=(int16_t)err/(int16_t)adx;
+        if (dy<0) {
+            predicted=floor1_Y_final[low_neigh_offs]-off;
+        } else {
+            predicted=floor1_Y_final[low_neigh_offs]+off;
+        } // render_point end
+
+        val=floor1_Y[i];
+        highroom=range-predicted;
+        lowroom=predicted;
+        if (highroom < lowroom) {
+            room=highroom*2;
+        } else {
+            room=lowroom*2;   // SPEC mispelling
+        }
+        if (val) {
+            floor1_flag[low_neigh_offs]=1;
+            floor1_flag[high_neigh_offs]=1;
+            floor1_flag[i]=1;
+            if (val>=room) {
+                if (highroom > lowroom) {
+                    floor1_Y_final[i]=val-lowroom+predicted;
+                } else {
+                    floor1_Y_final[i]=predicted-val+highroom-1;
+                }
+            } else {
+                if (val & 1) {
+                    floor1_Y_final[i]=predicted-(val+1)/2;
+                } else {
+                    floor1_Y_final[i]=predicted+val/2;
+                }
+            }
+        } else {
+            floor1_flag[i]=0;
+            floor1_Y_final[i]=predicted;
+        }
+
+        AV_DEBUG(" Decoded floor(%d) = %d / val %d \n", vf->list[i].x, floor1_Y_final[i], val);
+    }
+
+// Curve synth - connect the calculated dots and convert from dB scale FIXME optimize ?
+
+    ff_vorbis_floor1_render_list(vf->list, vf->x_list_dim, floor1_Y_final, floor1_flag, vf->multiplier, vec, vf->list[1].x);
+
+    AV_DEBUG(" Floor decoded\n");
+
+    return 0;
+}
+
+// Read and decode residue
+
+static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen) {
+    GetBitContext *gb=&vc->gb;
+    uint_fast8_t c_p_c=vc->codebooks[vr->classbook].dimensions;
+    uint_fast16_t n_to_read=vr->end-vr->begin;
+    uint_fast16_t ptns_to_read=n_to_read/vr->partition_size;
+    uint_fast8_t classifs[ptns_to_read*vc->audio_channels];
+    uint_fast8_t pass;
+    uint_fast8_t ch_used;
+    uint_fast8_t i,j,l;
+    uint_fast16_t k;
+
+    if (vr->type==2) {
+        for(j=1;j<ch;++j) {
+                do_not_decode[0]&=do_not_decode[j];  // FIXME - clobbering input
+        }
+        if (do_not_decode[0]) return 0;
+        ch_used=1;
+    } else {
+        ch_used=ch;
+    }
+
+    AV_DEBUG(" residue type 0/1/2 decode begin, ch: %d  cpc %d  \n", ch, c_p_c);
+
+    for(pass=0;pass<=vr->maxpass;++pass) { // FIXME OPTIMIZE?
+        uint_fast16_t voffset;
+        uint_fast16_t partition_count;
+        uint_fast16_t j_times_ptns_to_read;
+
+        voffset=vr->begin;
+        for(partition_count=0;partition_count<ptns_to_read;) {  // SPEC        error
+            if (!pass) {
+                uint_fast32_t inverse_class = ff_inverse[vr->classifications];
+                for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) {
+                    if (!do_not_decode[j]) {
+                        uint_fast32_t temp=get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table,
+                        vc->codebooks[vr->classbook].nb_bits, 3);
+
+                        AV_DEBUG("Classword: %d \n", temp);
+
+                        assert(vr->classifications > 1 && temp<=65536); //needed for inverse[]
+                        for(i=0;i<c_p_c;++i) {
+                            uint_fast32_t temp2;
+
+                            temp2=(((uint_fast64_t)temp) * inverse_class)>>32;
+                            if (partition_count+c_p_c-1-i < ptns_to_read) {
+                                classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications;
+                            }
+                            temp=temp2;
+                        }
+                    }
+                    j_times_ptns_to_read+=ptns_to_read;
+                }
+            }
+            for(i=0;(i<c_p_c) && (partition_count<ptns_to_read);++i) {
+                for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) {
+                    uint_fast16_t voffs;
+
+                    if (!do_not_decode[j]) {
+                        uint_fast8_t vqclass=classifs[j_times_ptns_to_read+partition_count];
+                        int_fast16_t vqbook=vr->books[vqclass][pass];
+
+                        if (vqbook>=0) {
+                            uint_fast16_t coffs;
+                            unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64
+                            uint_fast16_t step= dim==1 ? vr->partition_size
+                                              : FASTDIV(vr->partition_size, dim);
+                            vorbis_codebook codebook= vc->codebooks[vqbook];
+
+                            if (vr->type==0) {
+
+                                voffs=voffset+j*vlen;
+                                for(k=0;k<step;++k) {
+                                    coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+                                    for(l=0;l<dim;++l) {
+                                        vec[voffs+k+l*step]+=codebook.codevectors[coffs+l];  // FPMATH
+                                    }
+                                }
+                            }
+                            else if (vr->type==1) {
+                                voffs=voffset+j*vlen;
+                                for(k=0;k<step;++k) {
+                                    coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+                                    for(l=0;l<dim;++l, ++voffs) {
+                                        vec[voffs]+=codebook.codevectors[coffs+l];  // FPMATH
+
+                                        AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d  \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs);
+                                    }
+                                }
+                            }
+                            else if (vr->type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized
+                                voffs=voffset>>1;
+
+                                if(dim==2) {
+                                    for(k=0;k<step;++k) {
+                                        coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2;
+                                        vec[voffs+k     ]+=codebook.codevectors[coffs  ];  // FPMATH
+                                        vec[voffs+k+vlen]+=codebook.codevectors[coffs+1];  // FPMATH
+                                    }
+                                } else
+                                for(k=0;k<step;++k) {
+                                    coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+                                    for(l=0;l<dim;l+=2, voffs++) {
+                                        vec[voffs     ]+=codebook.codevectors[coffs+l  ];  // FPMATH
+                                        vec[voffs+vlen]+=codebook.codevectors[coffs+l+1];  // FPMATH
+
+                                        AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
+                                    }
+                                }
+
+                            }
+                            else if (vr->type==2) {
+                                voffs=voffset;
+
+                                for(k=0;k<step;++k) {
+                                    coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+                                    for(l=0;l<dim;++l, ++voffs) {
+                                        vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l];  // FPMATH FIXME use if and counter instead of / and %
+
+                                        AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
+                                    }
+                                }
+                            } else {
+                                av_log(vc->avccontext, AV_LOG_ERROR, " Invalid residue type while residue decode?! \n");
+                                return 1;
+                            }
+                        }
+                    }
+                    j_times_ptns_to_read+=ptns_to_read;
+                }
+                ++partition_count;
+                voffset+=vr->partition_size;
+            }
+        }
+    }
+    return 0;
+}
+
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
+{
+    int i;
+    for(i=0; i<blocksize; i++)
+    {
+        if (mag[i]>0.0) {
+            if (ang[i]>0.0) {
+                ang[i]=mag[i]-ang[i];
+            } else {
+                float temp=ang[i];
+                ang[i]=mag[i];
+                mag[i]+=temp;
+            }
+        } else {
+            if (ang[i]>0.0) {
+                ang[i]+=mag[i];
+            } else {
+                float temp=ang[i];
+                ang[i]=mag[i];
+                mag[i]-=temp;
+            }
+        }
+    }
+}
+
+// Decode the audio packet using the functions above
+
+static int vorbis_parse_audio_packet(vorbis_context *vc) {
+    GetBitContext *gb=&vc->gb;
+
+    uint_fast8_t previous_window=0,next_window=0;
+    uint_fast8_t mode_number;
+    uint_fast16_t blocksize;
+    int_fast32_t i,j;
+    uint_fast8_t no_residue[vc->audio_channels];
+    uint_fast8_t do_not_decode[vc->audio_channels];
+    vorbis_mapping *mapping;
+    float *ch_res_ptr=vc->channel_residues;
+    float *ch_floor_ptr=vc->channel_floors;
+    uint_fast8_t res_chan[vc->audio_channels];
+    uint_fast8_t res_num=0;
+    int_fast16_t retlen=0;
+    uint_fast16_t saved_start=0;
+    float fadd_bias = vc->add_bias;
+
+    if (get_bits1(gb)) {
+        av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n");
+        return -1; // packet type not audio
+    }
+
+    if (vc->mode_count==1) {
+        mode_number=0;
+    } else {
+        mode_number=get_bits(gb, ilog(vc->mode_count-1));
+    }
+    vc->mode_number=mode_number;
+    mapping=&vc->mappings[vc->modes[mode_number].mapping];
+
+    AV_DEBUG(" Mode number: %d , mapping: %d , blocktype %d \n", mode_number, vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
+
+    if (vc->modes[mode_number].blockflag) {
+        previous_window=get_bits1(gb);
+        next_window=get_bits1(gb);
+    }
+
+    blocksize=vc->blocksize[vc->modes[mode_number].blockflag];
+    memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
+    memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
+
+// Decode floor
+
+    for(i=0;i<vc->audio_channels;++i) {
+        vorbis_floor *floor;
+        if (mapping->submaps>1) {
+            floor=&vc->floors[mapping->submap_floor[mapping->mux[i]]];
+        } else {
+            floor=&vc->floors[mapping->submap_floor[0]];
+        }
+
+        no_residue[i]=floor->decode(vc, &floor->data, ch_floor_ptr);
+        ch_floor_ptr+=blocksize/2;
+    }
+
+// Nonzero vector propagate
+
+    for(i=mapping->coupling_steps-1;i>=0;--i) {
+        if (!(no_residue[mapping->magnitude[i]] & no_residue[mapping->angle[i]])) {
+            no_residue[mapping->magnitude[i]]=0;
+            no_residue[mapping->angle[i]]=0;
+        }
+    }
+
+// Decode residue
+
+    for(i=0;i<mapping->submaps;++i) {
+        vorbis_residue *residue;
+        uint_fast8_t ch=0;
+
+        for(j=0;j<vc->audio_channels;++j) {
+            if ((mapping->submaps==1) || (i=mapping->mux[j])) {
+                res_chan[j]=res_num;
+                if (no_residue[j]) {
+                    do_not_decode[ch]=1;
+                } else {
+                    do_not_decode[ch]=0;
+                }
+                ++ch;
+                ++res_num;
+            }
+        }
+        residue=&vc->residues[mapping->submap_residue[i]];
+        vorbis_residue_decode(vc, residue, ch, do_not_decode, ch_res_ptr, blocksize/2);
+
+        ch_res_ptr+=ch*blocksize/2;
+    }
+
+// Inverse coupling
+
+    for(i=mapping->coupling_steps-1;i>=0;--i) { //warning: i has to be signed
+        float *mag, *ang;
+
+        mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
+        ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
+        vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
+    }
+
+// Dotproduct
+
+    for(j=0, ch_floor_ptr=vc->channel_floors;j<vc->audio_channels;++j,ch_floor_ptr+=blocksize/2) {
+        ch_res_ptr=vc->channel_residues+res_chan[j]*blocksize/2;
+        vc->dsp.vector_fmul(ch_floor_ptr, ch_res_ptr, blocksize/2);
+    }
+
+// MDCT, overlap/add, save data for next overlapping  FPMATH
+
+    for(j=0;j<vc->audio_channels;++j) {
+        uint_fast8_t step=vc->audio_channels;
+        uint_fast16_t k;
+        float *saved=vc->saved+j*vc->blocksize[1]/2;
+        float *ret=vc->ret;
+        const float *lwin=vc->win[1];
+        const float *swin=vc->win[0];
+        float *buf=vc->buf;
+        float *buf_tmp=vc->buf_tmp;
+
+        ch_floor_ptr=vc->channel_floors+j*blocksize/2;
+
+        saved_start=vc->saved_start;
+
+        vc->mdct[0].fft.imdct_calc(&vc->mdct[vc->modes[mode_number].blockflag], buf, ch_floor_ptr, buf_tmp);
+
+        //FIXME process channels together, to allow faster simd vector_fmul_add_add?
+        if (vc->modes[mode_number].blockflag) {
+            // -- overlap/add
+            if (previous_window) {
+                vc->dsp.vector_fmul_add_add(ret+j, buf, lwin, saved, vc->add_bias, vc->blocksize[1]/2, step);
+                retlen=vc->blocksize[1]/2;
+            } else {
+                int len = (vc->blocksize[1]-vc->blocksize[0])/4;
+                buf += len;
+                vc->dsp.vector_fmul_add_add(ret+j, buf, swin, saved, vc->add_bias, vc->blocksize[0]/2, step);
+                k = vc->blocksize[0]/2*step + j;
+                buf += vc->blocksize[0]/2;
+                if(vc->exp_bias){
+                    for(i=0; i<len; i++, k+=step)
+                        ((uint32_t*)ret)[k] = ((uint32_t*)buf)[i] + vc->exp_bias; // ret[k]=buf[i]*(1<<bias)
+                } else {
+                    for(i=0; i<len; i++, k+=step)
+                        ret[k] = buf[i] + fadd_bias;
+                }
+                buf=vc->buf;
+                retlen=vc->blocksize[0]/2+len;
+            }
+            // -- save
+            if (next_window) {
+                buf += vc->blocksize[1]/2;
+                vc->dsp.vector_fmul_reverse(saved, buf, lwin, vc->blocksize[1]/2);
+                saved_start=0;
+            } else {
+                saved_start=(vc->blocksize[1]-vc->blocksize[0])/4;
+                buf += vc->blocksize[1]/2;
+                for(i=0; i<saved_start; i++)
+                    ((uint32_t*)saved)[i] = ((uint32_t*)buf)[i] + vc->exp_bias;
+                vc->dsp.vector_fmul_reverse(saved+saved_start, buf+saved_start, swin, vc->blocksize[0]/2);
+            }
+        } else {
+            // --overlap/add
+            if(vc->add_bias) {
+                for(k=j, i=0;i<saved_start;++i, k+=step)
+                    ret[k] = saved[i] + fadd_bias;
+            } else {
+                for(k=j, i=0;i<saved_start;++i, k+=step)
+                    ret[k] = saved[i];
+            }
+            vc->dsp.vector_fmul_add_add(ret+k, buf, swin, saved+saved_start, vc->add_bias, vc->blocksize[0]/2, step);
+            retlen=saved_start+vc->blocksize[0]/2;
+            // -- save
+            buf += vc->blocksize[0]/2;
+            vc->dsp.vector_fmul_reverse(saved, buf, swin, vc->blocksize[0]/2);
+            saved_start=0;
+        }
+    }
+    vc->saved_start=saved_start;
+
+    return retlen*vc->audio_channels;
+}
+
+// Return the decoded audio packet through the standard api
+
+static int vorbis_decode_frame(AVCodecContext *avccontext,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    vorbis_context *vc = avccontext->priv_data ;
+    GetBitContext *gb = &(vc->gb);
+
+    int_fast16_t len;
+
+    if(!buf_size){
+        return 0;
+    }
+
+    AV_DEBUG("packet length %d \n", buf_size);
+
+    init_get_bits(gb, buf, buf_size*8);
+
+    len=vorbis_parse_audio_packet(vc);
+
+    if (len<=0) {
+        *data_size=0;
+        return buf_size;
+    }
+
+    if (!vc->first_frame) {
+        vc->first_frame=1;
+        *data_size=0;
+        return buf_size ;
+    }
+
+    AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
+
+    vc->dsp.float_to_int16(data, vc->ret, len);
+    *data_size=len*2;
+
+    return buf_size ;
+}
+
+// Close decoder
+
+static int vorbis_decode_close(AVCodecContext *avccontext) {
+    vorbis_context *vc = avccontext->priv_data;
+
+    vorbis_free(vc);
+
+    return 0 ;
+}
+
+AVCodec vorbis_decoder = {
+    "vorbis",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_VORBIS,
+    sizeof(vorbis_context),
+    vorbis_decode_init,
+    NULL,
+    vorbis_decode_close,
+    vorbis_decode_frame,
+};
+
diff --git a/contrib/ffmpeg/libavcodec/vorbis.h b/contrib/ffmpeg/libavcodec/vorbis.h
new file mode 100644
index 000000000..cda909aa9
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vorbis.h
@@ -0,0 +1,43 @@
+/*
+ * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef VORBIS_H
+#define VORBIS_H
+
+#include "avcodec.h"
+
+extern const float ff_vorbis_floor1_inverse_db_table[256];
+extern const float * ff_vorbis_vwin[8];
+
+typedef struct {
+    uint_fast16_t x;
+    uint_fast16_t sort;
+    uint_fast16_t low;
+    uint_fast16_t high;
+} floor1_entry_t;
+
+void ff_vorbis_ready_floor1_list(floor1_entry_t * list, int values);
+unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n); // x^(1/n)
+int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num);
+void ff_vorbis_floor1_render_list(floor1_entry_t * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples);
+
+#define ilog(i) av_log2(2*(i))
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/vorbis_data.c b/contrib/ffmpeg/libavcodec/vorbis_data.c
new file mode 100644
index 000000000..5dc9c5f00
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vorbis_data.c
@@ -0,0 +1,2155 @@
+/*
+ * copyright (c) 2005 Denes Balatoni ( dbalatoni programozo hu )
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vorbis.h"
+
+static const float vwin64[32] = {
+  0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F,
+  0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F,
+  0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F,
+  0.4975789974F, 0.5601459521F, 0.6211085051F, 0.6793382689F,
+  0.7338252629F, 0.7837245849F, 0.8283939355F, 0.8674186656F,
+  0.9006222429F, 0.9280614787F, 0.9500073081F, 0.9669131782F,
+  0.9793740220F, 0.9880792941F, 0.9937636139F, 0.9971582668F,
+  0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F,
+};
+
+static const float vwin128[64] = {
+  0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F,
+  0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F,
+  0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F,
+  0.1427789367F, 0.1654677960F, 0.1895342001F, 0.2148867160F,
+  0.2414252576F, 0.2690412240F, 0.2976177952F, 0.3270303960F,
+  0.3571473350F, 0.3878306189F, 0.4189369387F, 0.4503188188F,
+  0.4818259135F, 0.5133064334F, 0.5446086751F, 0.5755826278F,
+  0.6060816248F, 0.6359640047F, 0.6650947483F, 0.6933470543F,
+  0.7206038179F, 0.7467589810F, 0.7717187213F, 0.7954024542F,
+  0.8177436264F, 0.8386902831F, 0.8582053981F, 0.8762669622F,
+  0.8928678298F, 0.9080153310F, 0.9217306608F, 0.9340480615F,
+  0.9450138200F, 0.9546851041F, 0.9631286621F, 0.9704194171F,
+  0.9766389810F, 0.9818741197F, 0.9862151938F, 0.9897546035F,
+  0.9925852598F, 0.9947991032F, 0.9964856900F, 0.9977308602F,
+  0.9986155015F, 0.9992144193F, 0.9995953200F, 0.9998179155F,
+  0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F,
+};
+
+static const float vwin256[128] = {
+  0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F,
+  0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F,
+  0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F,
+  0.0366651302F, 0.0427069140F, 0.0491939614F, 0.0561216907F,
+  0.0634851102F, 0.0712788035F, 0.0794969160F, 0.0881331402F,
+  0.0971807028F, 0.1066323515F, 0.1164803426F, 0.1267164297F,
+  0.1373318534F, 0.1483173323F, 0.1596630553F, 0.1713586755F,
+  0.1833933062F, 0.1957555184F, 0.2084333404F, 0.2214142599F,
+  0.2346852280F, 0.2482326664F, 0.2620424757F, 0.2761000481F,
+  0.2903902813F, 0.3048975959F, 0.3196059553F, 0.3344988887F,
+  0.3495595160F, 0.3647705766F, 0.3801144597F, 0.3955732382F,
+  0.4111287047F, 0.4267624093F, 0.4424557009F, 0.4581897696F,
+  0.4739456913F, 0.4897044744F, 0.5054471075F, 0.5211546088F,
+  0.5368080763F, 0.5523887395F, 0.5678780103F, 0.5832575361F,
+  0.5985092508F, 0.6136154277F, 0.6285587300F, 0.6433222619F,
+  0.6578896175F, 0.6722449294F, 0.6863729144F, 0.7002589187F,
+  0.7138889597F, 0.7272497662F, 0.7403288154F, 0.7531143679F,
+  0.7655954985F, 0.7777621249F, 0.7896050322F, 0.8011158947F,
+  0.8122872932F, 0.8231127294F, 0.8335866365F, 0.8437043850F,
+  0.8534622861F, 0.8628575905F, 0.8718884835F, 0.8805540765F,
+  0.8888543947F, 0.8967903616F, 0.9043637797F, 0.9115773078F,
+  0.9184344360F, 0.9249394562F, 0.9310974312F, 0.9369141608F,
+  0.9423961446F, 0.9475505439F, 0.9523851406F, 0.9569082947F,
+  0.9611289005F, 0.9650563408F, 0.9687004405F, 0.9720714191F,
+  0.9751798427F, 0.9780365753F, 0.9806527301F, 0.9830396204F,
+  0.9852087111F, 0.9871715701F, 0.9889398207F, 0.9905250941F,
+  0.9919389832F, 0.9931929973F, 0.9942985174F, 0.9952667537F,
+  0.9961087037F, 0.9968351119F, 0.9974564312F, 0.9979827858F,
+  0.9984239359F, 0.9987892441F, 0.9990876435F, 0.9993276081F,
+  0.9995171241F, 0.9996636648F, 0.9997741654F, 0.9998550016F,
+  0.9999119692F, 0.9999502656F, 0.9999744742F, 0.9999885497F,
+  0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F,
+};
+
+static const float vwin512[256] = {
+  0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F,
+  0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F,
+  0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F,
+  0.0092223540F, 0.0107533880F, 0.0124010466F, 0.0141650703F,
+  0.0160451800F, 0.0180410758F, 0.0201524373F, 0.0223789233F,
+  0.0247201710F, 0.0271757958F, 0.0297453914F, 0.0324285286F,
+  0.0352247556F, 0.0381335972F, 0.0411545545F, 0.0442871045F,
+  0.0475306997F, 0.0508847676F, 0.0543487103F, 0.0579219038F,
+  0.0616036982F, 0.0653934164F, 0.0692903546F, 0.0732937809F,
+  0.0774029356F, 0.0816170305F, 0.0859352485F, 0.0903567428F,
+  0.0948806375F, 0.0995060259F, 0.1042319712F, 0.1090575056F,
+  0.1139816300F, 0.1190033137F, 0.1241214941F, 0.1293350764F,
+  0.1346429333F, 0.1400439046F, 0.1455367974F, 0.1511203852F,
+  0.1567934083F, 0.1625545735F, 0.1684025537F, 0.1743359881F,
+  0.1803534820F, 0.1864536069F, 0.1926349000F, 0.1988958650F,
+  0.2052349715F, 0.2116506555F, 0.2181413191F, 0.2247053313F,
+  0.2313410275F, 0.2380467105F, 0.2448206500F, 0.2516610835F,
+  0.2585662164F, 0.2655342226F, 0.2725632448F, 0.2796513950F,
+  0.2867967551F, 0.2939973773F, 0.3012512852F, 0.3085564739F,
+  0.3159109111F, 0.3233125375F, 0.3307592680F, 0.3382489922F,
+  0.3457795756F, 0.3533488602F, 0.3609546657F, 0.3685947904F,
+  0.3762670121F, 0.3839690896F, 0.3916987634F, 0.3994537572F,
+  0.4072317788F, 0.4150305215F, 0.4228476653F, 0.4306808783F,
+  0.4385278181F, 0.4463861329F, 0.4542534630F, 0.4621274424F,
+  0.4700057001F, 0.4778858615F, 0.4857655502F, 0.4936423891F,
+  0.5015140023F, 0.5093780165F, 0.5172320626F, 0.5250737772F,
+  0.5329008043F, 0.5407107971F, 0.5485014192F, 0.5562703465F,
+  0.5640152688F, 0.5717338914F, 0.5794239366F, 0.5870831457F,
+  0.5947092801F, 0.6023001235F, 0.6098534829F, 0.6173671907F,
+  0.6248391059F, 0.6322671161F, 0.6396491384F, 0.6469831217F,
+  0.6542670475F, 0.6614989319F, 0.6686768267F, 0.6757988210F,
+  0.6828630426F, 0.6898676592F, 0.6968108799F, 0.7036909564F,
+  0.7105061843F, 0.7172549043F, 0.7239355032F, 0.7305464154F,
+  0.7370861235F, 0.7435531598F, 0.7499461068F, 0.7562635986F,
+  0.7625043214F, 0.7686670148F, 0.7747504721F, 0.7807535410F,
+  0.7866751247F, 0.7925141825F, 0.7982697296F, 0.8039408387F,
+  0.8095266395F, 0.8150263196F, 0.8204391248F, 0.8257643590F,
+  0.8310013848F, 0.8361496236F, 0.8412085555F, 0.8461777194F,
+  0.8510567129F, 0.8558451924F, 0.8605428730F, 0.8651495278F,
+  0.8696649882F, 0.8740891432F, 0.8784219392F, 0.8826633797F,
+  0.8868135244F, 0.8908724888F, 0.8948404441F, 0.8987176157F,
+  0.9025042831F, 0.9062007791F, 0.9098074886F, 0.9133248482F,
+  0.9167533451F, 0.9200935163F, 0.9233459472F, 0.9265112712F,
+  0.9295901680F, 0.9325833632F, 0.9354916263F, 0.9383157705F,
+  0.9410566504F, 0.9437151618F, 0.9462922398F, 0.9487888576F,
+  0.9512060252F, 0.9535447882F, 0.9558062262F, 0.9579914516F,
+  0.9601016078F, 0.9621378683F, 0.9641014348F, 0.9659935361F,
+  0.9678154261F, 0.9695683830F, 0.9712537071F, 0.9728727198F,
+  0.9744267618F, 0.9759171916F, 0.9773453842F, 0.9787127293F,
+  0.9800206298F, 0.9812705006F, 0.9824637665F, 0.9836018613F,
+  0.9846862258F, 0.9857183066F, 0.9866995544F, 0.9876314227F,
+  0.9885153662F, 0.9893528393F, 0.9901452948F, 0.9908941823F,
+  0.9916009470F, 0.9922670279F, 0.9928938570F, 0.9934828574F,
+  0.9940354423F, 0.9945530133F, 0.9950369595F, 0.9954886562F,
+  0.9959094633F, 0.9963007242F, 0.9966637649F, 0.9969998925F,
+  0.9973103939F, 0.9975965351F, 0.9978595598F, 0.9981006885F,
+  0.9983211172F, 0.9985220166F, 0.9987045311F, 0.9988697776F,
+  0.9990188449F, 0.9991527924F, 0.9992726499F, 0.9993794157F,
+  0.9994740570F, 0.9995575079F, 0.9996306699F, 0.9996944099F,
+  0.9997495605F, 0.9997969190F, 0.9998372465F, 0.9998712678F,
+  0.9998996704F, 0.9999231041F, 0.9999421807F, 0.9999574732F,
+  0.9999695157F, 0.9999788026F, 0.9999857885F, 0.9999908879F,
+  0.9999944746F, 0.9999968817F, 0.9999984010F, 0.9999992833F,
+  0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F,
+};
+
+static const float vwin1024[512] = {
+  0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F,
+  0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F,
+  0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F,
+  0.0023090133F, 0.0026930125F, 0.0031064797F, 0.0035493989F,
+  0.0040217533F, 0.0045235250F, 0.0050546946F, 0.0056152418F,
+  0.0062051451F, 0.0068243817F, 0.0074729278F, 0.0081507582F,
+  0.0088578466F, 0.0095941655F, 0.0103596863F, 0.0111543789F,
+  0.0119782122F, 0.0128311538F, 0.0137131701F, 0.0146242260F,
+  0.0155642855F, 0.0165333111F, 0.0175312640F, 0.0185581042F,
+  0.0196137903F, 0.0206982797F, 0.0218115284F, 0.0229534910F,
+  0.0241241208F, 0.0253233698F, 0.0265511886F, 0.0278075263F,
+  0.0290923308F, 0.0304055484F, 0.0317471241F, 0.0331170013F,
+  0.0345151222F, 0.0359414274F, 0.0373958560F, 0.0388783456F,
+  0.0403888325F, 0.0419272511F, 0.0434935347F, 0.0450876148F,
+  0.0467094213F, 0.0483588828F, 0.0500359261F, 0.0517404765F,
+  0.0534724575F, 0.0552317913F, 0.0570183983F, 0.0588321971F,
+  0.0606731048F, 0.0625410369F, 0.0644359070F, 0.0663576272F,
+  0.0683061077F, 0.0702812571F, 0.0722829821F, 0.0743111878F,
+  0.0763657775F, 0.0784466526F, 0.0805537129F, 0.0826868561F,
+  0.0848459782F, 0.0870309736F, 0.0892417345F, 0.0914781514F,
+  0.0937401128F, 0.0960275056F, 0.0983402145F, 0.1006781223F,
+  0.1030411101F, 0.1054290568F, 0.1078418397F, 0.1102793336F,
+  0.1127414119F, 0.1152279457F, 0.1177388042F, 0.1202738544F,
+  0.1228329618F, 0.1254159892F, 0.1280227980F, 0.1306532471F,
+  0.1333071937F, 0.1359844927F, 0.1386849970F, 0.1414085575F,
+  0.1441550230F, 0.1469242403F, 0.1497160539F, 0.1525303063F,
+  0.1553668381F, 0.1582254875F, 0.1611060909F, 0.1640084822F,
+  0.1669324936F, 0.1698779549F, 0.1728446939F, 0.1758325362F,
+  0.1788413055F, 0.1818708232F, 0.1849209084F, 0.1879913785F,
+  0.1910820485F, 0.1941927312F, 0.1973232376F, 0.2004733764F,
+  0.2036429541F, 0.2068317752F, 0.2100396421F, 0.2132663552F,
+  0.2165117125F, 0.2197755102F, 0.2230575422F, 0.2263576007F,
+  0.2296754753F, 0.2330109540F, 0.2363638225F, 0.2397338646F,
+  0.2431208619F, 0.2465245941F, 0.2499448389F, 0.2533813719F,
+  0.2568339669F, 0.2603023956F, 0.2637864277F, 0.2672858312F,
+  0.2708003718F, 0.2743298135F, 0.2778739186F, 0.2814324472F,
+  0.2850051576F, 0.2885918065F, 0.2921921485F, 0.2958059366F,
+  0.2994329219F, 0.3030728538F, 0.3067254799F, 0.3103905462F,
+  0.3140677969F, 0.3177569747F, 0.3214578205F, 0.3251700736F,
+  0.3288934718F, 0.3326277513F, 0.3363726468F, 0.3401278914F,
+  0.3438932168F, 0.3476683533F, 0.3514530297F, 0.3552469734F,
+  0.3590499106F, 0.3628615659F, 0.3666816630F, 0.3705099239F,
+  0.3743460698F, 0.3781898204F, 0.3820408945F, 0.3858990095F,
+  0.3897638820F, 0.3936352274F, 0.3975127601F, 0.4013961936F,
+  0.4052852405F, 0.4091796123F, 0.4130790198F, 0.4169831732F,
+  0.4208917815F, 0.4248045534F, 0.4287211965F, 0.4326414181F,
+  0.4365649248F, 0.4404914225F, 0.4444206167F, 0.4483522125F,
+  0.4522859146F, 0.4562214270F, 0.4601584538F, 0.4640966984F,
+  0.4680358644F, 0.4719756548F, 0.4759157726F, 0.4798559209F,
+  0.4837958024F, 0.4877351199F, 0.4916735765F, 0.4956108751F,
+  0.4995467188F, 0.5034808109F, 0.5074128550F, 0.5113425550F,
+  0.5152696149F, 0.5191937395F, 0.5231146336F, 0.5270320028F,
+  0.5309455530F, 0.5348549910F, 0.5387600239F, 0.5426603597F,
+  0.5465557070F, 0.5504457754F, 0.5543302752F, 0.5582089175F,
+  0.5620814145F, 0.5659474793F, 0.5698068262F, 0.5736591704F,
+  0.5775042283F, 0.5813417176F, 0.5851713571F, 0.5889928670F,
+  0.5928059689F, 0.5966103856F, 0.6004058415F, 0.6041920626F,
+  0.6079687761F, 0.6117357113F, 0.6154925986F, 0.6192391705F,
+  0.6229751612F, 0.6267003064F, 0.6304143441F, 0.6341170137F,
+  0.6378080569F, 0.6414872173F, 0.6451542405F, 0.6488088741F,
+  0.6524508681F, 0.6560799742F, 0.6596959469F, 0.6632985424F,
+  0.6668875197F, 0.6704626398F, 0.6740236662F, 0.6775703649F,
+  0.6811025043F, 0.6846198554F, 0.6881221916F, 0.6916092892F,
+  0.6950809269F, 0.6985368861F, 0.7019769510F, 0.7054009085F,
+  0.7088085484F, 0.7121996632F, 0.7155740484F, 0.7189315023F,
+  0.7222718263F, 0.7255948245F, 0.7289003043F, 0.7321880760F,
+  0.7354579530F, 0.7387097518F, 0.7419432921F, 0.7451583966F,
+  0.7483548915F, 0.7515326059F, 0.7546913723F, 0.7578310265F,
+  0.7609514077F, 0.7640523581F, 0.7671337237F, 0.7701953535F,
+  0.7732371001F, 0.7762588195F, 0.7792603711F, 0.7822416178F,
+  0.7852024259F, 0.7881426654F, 0.7910622097F, 0.7939609356F,
+  0.7968387237F, 0.7996954579F, 0.8025310261F, 0.8053453193F,
+  0.8081382324F, 0.8109096638F, 0.8136595156F, 0.8163876936F,
+  0.8190941071F, 0.8217786690F, 0.8244412960F, 0.8270819086F,
+  0.8297004305F, 0.8322967896F, 0.8348709171F, 0.8374227481F,
+  0.8399522213F, 0.8424592789F, 0.8449438672F, 0.8474059356F,
+  0.8498454378F, 0.8522623306F, 0.8546565748F, 0.8570281348F,
+  0.8593769787F, 0.8617030779F, 0.8640064080F, 0.8662869477F,
+  0.8685446796F, 0.8707795899F, 0.8729916682F, 0.8751809079F,
+  0.8773473059F, 0.8794908626F, 0.8816115819F, 0.8837094713F,
+  0.8857845418F, 0.8878368079F, 0.8898662874F, 0.8918730019F,
+  0.8938569760F, 0.8958182380F, 0.8977568194F, 0.8996727552F,
+  0.9015660837F, 0.9034368465F, 0.9052850885F, 0.9071108577F,
+  0.9089142057F, 0.9106951869F, 0.9124538591F, 0.9141902832F,
+  0.9159045233F, 0.9175966464F, 0.9192667228F, 0.9209148257F,
+  0.9225410313F, 0.9241454187F, 0.9257280701F, 0.9272890704F,
+  0.9288285075F, 0.9303464720F, 0.9318430576F, 0.9333183603F,
+  0.9347724792F, 0.9362055158F, 0.9376175745F, 0.9390087622F,
+  0.9403791881F, 0.9417289644F, 0.9430582055F, 0.9443670283F,
+  0.9456555521F, 0.9469238986F, 0.9481721917F, 0.9494005577F,
+  0.9506091252F, 0.9517980248F, 0.9529673894F, 0.9541173540F,
+  0.9552480557F, 0.9563596334F, 0.9574522282F, 0.9585259830F,
+  0.9595810428F, 0.9606175542F, 0.9616356656F, 0.9626355274F,
+  0.9636172915F, 0.9645811114F, 0.9655271425F, 0.9664555414F,
+  0.9673664664F, 0.9682600774F, 0.9691365355F, 0.9699960034F,
+  0.9708386448F, 0.9716646250F, 0.9724741103F, 0.9732672685F,
+  0.9740442683F, 0.9748052795F, 0.9755504729F, 0.9762800205F,
+  0.9769940950F, 0.9776928703F, 0.9783765210F, 0.9790452223F,
+  0.9796991504F, 0.9803384823F, 0.9809633954F, 0.9815740679F,
+  0.9821706784F, 0.9827534063F, 0.9833224312F, 0.9838779332F,
+  0.9844200928F, 0.9849490910F, 0.9854651087F, 0.9859683274F,
+  0.9864589286F, 0.9869370940F, 0.9874030054F, 0.9878568447F,
+  0.9882987937F, 0.9887290343F, 0.9891477481F, 0.9895551169F,
+  0.9899513220F, 0.9903365446F, 0.9907109658F, 0.9910747662F,
+  0.9914281260F, 0.9917712252F, 0.9921042433F, 0.9924273593F,
+  0.9927407516F, 0.9930445982F, 0.9933390763F, 0.9936243626F,
+  0.9939006331F, 0.9941680631F, 0.9944268269F, 0.9946770982F,
+  0.9949190498F, 0.9951528537F, 0.9953786808F, 0.9955967011F,
+  0.9958070836F, 0.9960099963F, 0.9962056061F, 0.9963940787F,
+  0.9965755786F, 0.9967502693F, 0.9969183129F, 0.9970798704F,
+  0.9972351013F, 0.9973841640F, 0.9975272151F, 0.9976644103F,
+  0.9977959036F, 0.9979218476F, 0.9980423932F, 0.9981576901F,
+  0.9982678862F, 0.9983731278F, 0.9984735596F, 0.9985693247F,
+  0.9986605645F, 0.9987474186F, 0.9988300248F, 0.9989085193F,
+  0.9989830364F, 0.9990537085F, 0.9991206662F, 0.9991840382F,
+  0.9992439513F, 0.9993005303F, 0.9993538982F, 0.9994041757F,
+  0.9994514817F, 0.9994959330F, 0.9995376444F, 0.9995767286F,
+  0.9996132960F, 0.9996474550F, 0.9996793121F, 0.9997089710F,
+  0.9997365339F, 0.9997621003F, 0.9997857677F, 0.9998076311F,
+  0.9998277836F, 0.9998463156F, 0.9998633155F, 0.9998788692F,
+  0.9998930603F, 0.9999059701F, 0.9999176774F, 0.9999282586F,
+  0.9999377880F, 0.9999463370F, 0.9999539749F, 0.9999607685F,
+  0.9999667820F, 0.9999720773F, 0.9999767136F, 0.9999807479F,
+  0.9999842344F, 0.9999872249F, 0.9999897688F, 0.9999919127F,
+  0.9999937009F, 0.9999951749F, 0.9999963738F, 0.9999973342F,
+  0.9999980900F, 0.9999986724F, 0.9999991103F, 0.9999994297F,
+  0.9999996543F, 0.9999998049F, 0.9999999000F, 0.9999999552F,
+  0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F,
+};
+
+static const float vwin2048[1024] = {
+  0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F,
+  0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F,
+  0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F,
+  0.0005774661F, 0.0006735427F, 0.0007770054F, 0.0008878533F,
+  0.0010060853F, 0.0011317002F, 0.0012646969F, 0.0014050742F,
+  0.0015528307F, 0.0017079650F, 0.0018704756F, 0.0020403610F,
+  0.0022176196F, 0.0024022497F, 0.0025942495F, 0.0027936173F,
+  0.0030003511F, 0.0032144490F, 0.0034359088F, 0.0036647286F,
+  0.0039009061F, 0.0041444391F, 0.0043953253F, 0.0046535621F,
+  0.0049191472F, 0.0051920781F, 0.0054723520F, 0.0057599664F,
+  0.0060549184F, 0.0063572052F, 0.0066668239F, 0.0069837715F,
+  0.0073080449F, 0.0076396410F, 0.0079785566F, 0.0083247884F,
+  0.0086783330F, 0.0090391871F, 0.0094073470F, 0.0097828092F,
+  0.0101655700F, 0.0105556258F, 0.0109529726F, 0.0113576065F,
+  0.0117695237F, 0.0121887200F, 0.0126151913F, 0.0130489335F,
+  0.0134899422F, 0.0139382130F, 0.0143937415F, 0.0148565233F,
+  0.0153265536F, 0.0158038279F, 0.0162883413F, 0.0167800889F,
+  0.0172790660F, 0.0177852675F, 0.0182986882F, 0.0188193231F,
+  0.0193471668F, 0.0198822141F, 0.0204244594F, 0.0209738974F,
+  0.0215305225F, 0.0220943289F, 0.0226653109F, 0.0232434627F,
+  0.0238287784F, 0.0244212519F, 0.0250208772F, 0.0256276481F,
+  0.0262415582F, 0.0268626014F, 0.0274907711F, 0.0281260608F,
+  0.0287684638F, 0.0294179736F, 0.0300745833F, 0.0307382859F,
+  0.0314090747F, 0.0320869424F, 0.0327718819F, 0.0334638860F,
+  0.0341629474F, 0.0348690586F, 0.0355822122F, 0.0363024004F,
+  0.0370296157F, 0.0377638502F, 0.0385050960F, 0.0392533451F,
+  0.0400085896F, 0.0407708211F, 0.0415400315F, 0.0423162123F,
+  0.0430993552F, 0.0438894515F, 0.0446864926F, 0.0454904698F,
+  0.0463013742F, 0.0471191969F, 0.0479439288F, 0.0487755607F,
+  0.0496140836F, 0.0504594879F, 0.0513117642F, 0.0521709031F,
+  0.0530368949F, 0.0539097297F, 0.0547893979F, 0.0556758894F,
+  0.0565691941F, 0.0574693019F, 0.0583762026F, 0.0592898858F,
+  0.0602103410F, 0.0611375576F, 0.0620715250F, 0.0630122324F,
+  0.0639596688F, 0.0649138234F, 0.0658746848F, 0.0668422421F,
+  0.0678164838F, 0.0687973985F, 0.0697849746F, 0.0707792005F,
+  0.0717800645F, 0.0727875547F, 0.0738016591F, 0.0748223656F,
+  0.0758496620F, 0.0768835359F, 0.0779239751F, 0.0789709668F,
+  0.0800244985F, 0.0810845574F, 0.0821511306F, 0.0832242052F,
+  0.0843037679F, 0.0853898056F, 0.0864823050F, 0.0875812525F,
+  0.0886866347F, 0.0897984378F, 0.0909166480F, 0.0920412513F,
+  0.0931722338F, 0.0943095813F, 0.0954532795F, 0.0966033140F,
+  0.0977596702F, 0.0989223336F, 0.1000912894F, 0.1012665227F,
+  0.1024480185F, 0.1036357616F, 0.1048297369F, 0.1060299290F,
+  0.1072363224F, 0.1084489014F, 0.1096676504F, 0.1108925534F,
+  0.1121235946F, 0.1133607577F, 0.1146040267F, 0.1158533850F,
+  0.1171088163F, 0.1183703040F, 0.1196378312F, 0.1209113812F,
+  0.1221909370F, 0.1234764815F, 0.1247679974F, 0.1260654674F,
+  0.1273688740F, 0.1286781995F, 0.1299934263F, 0.1313145365F,
+  0.1326415121F, 0.1339743349F, 0.1353129866F, 0.1366574490F,
+  0.1380077035F, 0.1393637315F, 0.1407255141F, 0.1420930325F,
+  0.1434662677F, 0.1448452004F, 0.1462298115F, 0.1476200814F,
+  0.1490159906F, 0.1504175195F, 0.1518246482F, 0.1532373569F,
+  0.1546556253F, 0.1560794333F, 0.1575087606F, 0.1589435866F,
+  0.1603838909F, 0.1618296526F, 0.1632808509F, 0.1647374648F,
+  0.1661994731F, 0.1676668546F, 0.1691395880F, 0.1706176516F,
+  0.1721010238F, 0.1735896829F, 0.1750836068F, 0.1765827736F,
+  0.1780871610F, 0.1795967468F, 0.1811115084F, 0.1826314234F,
+  0.1841564689F, 0.1856866221F, 0.1872218600F, 0.1887621595F,
+  0.1903074974F, 0.1918578503F, 0.1934131947F, 0.1949735068F,
+  0.1965387630F, 0.1981089393F, 0.1996840117F, 0.2012639560F,
+  0.2028487479F, 0.2044383630F, 0.2060327766F, 0.2076319642F,
+  0.2092359007F, 0.2108445614F, 0.2124579211F, 0.2140759545F,
+  0.2156986364F, 0.2173259411F, 0.2189578432F, 0.2205943168F,
+  0.2222353361F, 0.2238808751F, 0.2255309076F, 0.2271854073F,
+  0.2288443480F, 0.2305077030F, 0.2321754457F, 0.2338475493F,
+  0.2355239869F, 0.2372047315F, 0.2388897560F, 0.2405790329F,
+  0.2422725350F, 0.2439702347F, 0.2456721043F, 0.2473781159F,
+  0.2490882418F, 0.2508024539F, 0.2525207240F, 0.2542430237F,
+  0.2559693248F, 0.2576995986F, 0.2594338166F, 0.2611719498F,
+  0.2629139695F, 0.2646598466F, 0.2664095520F, 0.2681630564F,
+  0.2699203304F, 0.2716813445F, 0.2734460691F, 0.2752144744F,
+  0.2769865307F, 0.2787622079F, 0.2805414760F, 0.2823243047F,
+  0.2841106637F, 0.2859005227F, 0.2876938509F, 0.2894906179F,
+  0.2912907928F, 0.2930943447F, 0.2949012426F, 0.2967114554F,
+  0.2985249520F, 0.3003417009F, 0.3021616708F, 0.3039848301F,
+  0.3058111471F, 0.3076405901F, 0.3094731273F, 0.3113087266F,
+  0.3131473560F, 0.3149889833F, 0.3168335762F, 0.3186811024F,
+  0.3205315294F, 0.3223848245F, 0.3242409552F, 0.3260998886F,
+  0.3279615918F, 0.3298260319F, 0.3316931758F, 0.3335629903F,
+  0.3354354423F, 0.3373104982F, 0.3391881247F, 0.3410682882F,
+  0.3429509551F, 0.3448360917F, 0.3467236642F, 0.3486136387F,
+  0.3505059811F, 0.3524006575F, 0.3542976336F, 0.3561968753F,
+  0.3580983482F, 0.3600020179F, 0.3619078499F, 0.3638158096F,
+  0.3657258625F, 0.3676379737F, 0.3695521086F, 0.3714682321F,
+  0.3733863094F, 0.3753063055F, 0.3772281852F, 0.3791519134F,
+  0.3810774548F, 0.3830047742F, 0.3849338362F, 0.3868646053F,
+  0.3887970459F, 0.3907311227F, 0.3926667998F, 0.3946040417F,
+  0.3965428125F, 0.3984830765F, 0.4004247978F, 0.4023679403F,
+  0.4043124683F, 0.4062583455F, 0.4082055359F, 0.4101540034F,
+  0.4121037117F, 0.4140546246F, 0.4160067058F, 0.4179599190F,
+  0.4199142277F, 0.4218695956F, 0.4238259861F, 0.4257833627F,
+  0.4277416888F, 0.4297009279F, 0.4316610433F, 0.4336219983F,
+  0.4355837562F, 0.4375462803F, 0.4395095337F, 0.4414734797F,
+  0.4434380815F, 0.4454033021F, 0.4473691046F, 0.4493354521F,
+  0.4513023078F, 0.4532696345F, 0.4552373954F, 0.4572055533F,
+  0.4591740713F, 0.4611429123F, 0.4631120393F, 0.4650814151F,
+  0.4670510028F, 0.4690207650F, 0.4709906649F, 0.4729606651F,
+  0.4749307287F, 0.4769008185F, 0.4788708972F, 0.4808409279F,
+  0.4828108732F, 0.4847806962F, 0.4867503597F, 0.4887198264F,
+  0.4906890593F, 0.4926580213F, 0.4946266753F, 0.4965949840F,
+  0.4985629105F, 0.5005304176F, 0.5024974683F, 0.5044640255F,
+  0.5064300522F, 0.5083955114F, 0.5103603659F, 0.5123245790F,
+  0.5142881136F, 0.5162509328F, 0.5182129997F, 0.5201742774F,
+  0.5221347290F, 0.5240943178F, 0.5260530070F, 0.5280107598F,
+  0.5299675395F, 0.5319233095F, 0.5338780330F, 0.5358316736F,
+  0.5377841946F, 0.5397355596F, 0.5416857320F, 0.5436346755F,
+  0.5455823538F, 0.5475287304F, 0.5494737691F, 0.5514174337F,
+  0.5533596881F, 0.5553004962F, 0.5572398218F, 0.5591776291F,
+  0.5611138821F, 0.5630485449F, 0.5649815818F, 0.5669129570F,
+  0.5688426349F, 0.5707705799F, 0.5726967564F, 0.5746211290F,
+  0.5765436624F, 0.5784643212F, 0.5803830702F, 0.5822998743F,
+  0.5842146984F, 0.5861275076F, 0.5880382669F, 0.5899469416F,
+  0.5918534968F, 0.5937578981F, 0.5956601107F, 0.5975601004F,
+  0.5994578326F, 0.6013532732F, 0.6032463880F, 0.6051371429F,
+  0.6070255039F, 0.6089114372F, 0.6107949090F, 0.6126758856F,
+  0.6145543334F, 0.6164302191F, 0.6183035092F, 0.6201741706F,
+  0.6220421700F, 0.6239074745F, 0.6257700513F, 0.6276298674F,
+  0.6294868903F, 0.6313410873F, 0.6331924262F, 0.6350408745F,
+  0.6368864001F, 0.6387289710F, 0.6405685552F, 0.6424051209F,
+  0.6442386364F, 0.6460690702F, 0.6478963910F, 0.6497205673F,
+  0.6515415682F, 0.6533593625F, 0.6551739194F, 0.6569852082F,
+  0.6587931984F, 0.6605978593F, 0.6623991609F, 0.6641970728F,
+  0.6659915652F, 0.6677826081F, 0.6695701718F, 0.6713542268F,
+  0.6731347437F, 0.6749116932F, 0.6766850461F, 0.6784547736F,
+  0.6802208469F, 0.6819832374F, 0.6837419164F, 0.6854968559F,
+  0.6872480275F, 0.6889954034F, 0.6907389556F, 0.6924786566F,
+  0.6942144788F, 0.6959463950F, 0.6976743780F, 0.6993984008F,
+  0.7011184365F, 0.7028344587F, 0.7045464407F, 0.7062543564F,
+  0.7079581796F, 0.7096578844F, 0.7113534450F, 0.7130448359F,
+  0.7147320316F, 0.7164150070F, 0.7180937371F, 0.7197681970F,
+  0.7214383620F, 0.7231042077F, 0.7247657098F, 0.7264228443F,
+  0.7280755871F, 0.7297239147F, 0.7313678035F, 0.7330072301F,
+  0.7346421715F, 0.7362726046F, 0.7378985069F, 0.7395198556F,
+  0.7411366285F, 0.7427488034F, 0.7443563584F, 0.7459592717F,
+  0.7475575218F, 0.7491510873F, 0.7507399471F, 0.7523240803F,
+  0.7539034661F, 0.7554780839F, 0.7570479136F, 0.7586129349F,
+  0.7601731279F, 0.7617284730F, 0.7632789506F, 0.7648245416F,
+  0.7663652267F, 0.7679009872F, 0.7694318044F, 0.7709576599F,
+  0.7724785354F, 0.7739944130F, 0.7755052749F, 0.7770111035F,
+  0.7785118815F, 0.7800075916F, 0.7814982170F, 0.7829837410F,
+  0.7844641472F, 0.7859394191F, 0.7874095408F, 0.7888744965F,
+  0.7903342706F, 0.7917888476F, 0.7932382124F, 0.7946823501F,
+  0.7961212460F, 0.7975548855F, 0.7989832544F, 0.8004063386F,
+  0.8018241244F, 0.8032365981F, 0.8046437463F, 0.8060455560F,
+  0.8074420141F, 0.8088331080F, 0.8102188253F, 0.8115991536F,
+  0.8129740810F, 0.8143435957F, 0.8157076861F, 0.8170663409F,
+  0.8184195489F, 0.8197672994F, 0.8211095817F, 0.8224463853F,
+  0.8237777001F, 0.8251035161F, 0.8264238235F, 0.8277386129F,
+  0.8290478750F, 0.8303516008F, 0.8316497814F, 0.8329424083F,
+  0.8342294731F, 0.8355109677F, 0.8367868841F, 0.8380572148F,
+  0.8393219523F, 0.8405810893F, 0.8418346190F, 0.8430825345F,
+  0.8443248294F, 0.8455614974F, 0.8467925323F, 0.8480179285F,
+  0.8492376802F, 0.8504517822F, 0.8516602292F, 0.8528630164F,
+  0.8540601391F, 0.8552515928F, 0.8564373733F, 0.8576174766F,
+  0.8587918990F, 0.8599606368F, 0.8611236868F, 0.8622810460F,
+  0.8634327113F, 0.8645786802F, 0.8657189504F, 0.8668535195F,
+  0.8679823857F, 0.8691055472F, 0.8702230025F, 0.8713347503F,
+  0.8724407896F, 0.8735411194F, 0.8746357394F, 0.8757246489F,
+  0.8768078479F, 0.8778853364F, 0.8789571146F, 0.8800231832F,
+  0.8810835427F, 0.8821381942F, 0.8831871387F, 0.8842303777F,
+  0.8852679127F, 0.8862997456F, 0.8873258784F, 0.8883463132F,
+  0.8893610527F, 0.8903700994F, 0.8913734562F, 0.8923711263F,
+  0.8933631129F, 0.8943494196F, 0.8953300500F, 0.8963050083F,
+  0.8972742985F, 0.8982379249F, 0.8991958922F, 0.9001482052F,
+  0.9010948688F, 0.9020358883F, 0.9029712690F, 0.9039010165F,
+  0.9048251367F, 0.9057436357F, 0.9066565195F, 0.9075637946F,
+  0.9084654678F, 0.9093615456F, 0.9102520353F, 0.9111369440F,
+  0.9120162792F, 0.9128900484F, 0.9137582595F, 0.9146209204F,
+  0.9154780394F, 0.9163296248F, 0.9171756853F, 0.9180162296F,
+  0.9188512667F, 0.9196808057F, 0.9205048559F, 0.9213234270F,
+  0.9221365285F, 0.9229441704F, 0.9237463629F, 0.9245431160F,
+  0.9253344404F, 0.9261203465F, 0.9269008453F, 0.9276759477F,
+  0.9284456648F, 0.9292100080F, 0.9299689889F, 0.9307226190F,
+  0.9314709103F, 0.9322138747F, 0.9329515245F, 0.9336838721F,
+  0.9344109300F, 0.9351327108F, 0.9358492275F, 0.9365604931F,
+  0.9372665208F, 0.9379673239F, 0.9386629160F, 0.9393533107F,
+  0.9400385220F, 0.9407185637F, 0.9413934501F, 0.9420631954F,
+  0.9427278141F, 0.9433873208F, 0.9440417304F, 0.9446910576F,
+  0.9453353176F, 0.9459745255F, 0.9466086968F, 0.9472378469F,
+  0.9478619915F, 0.9484811463F, 0.9490953274F, 0.9497045506F,
+  0.9503088323F, 0.9509081888F, 0.9515026365F, 0.9520921921F,
+  0.9526768723F, 0.9532566940F, 0.9538316742F, 0.9544018300F,
+  0.9549671786F, 0.9555277375F, 0.9560835241F, 0.9566345562F,
+  0.9571808513F, 0.9577224275F, 0.9582593027F, 0.9587914949F,
+  0.9593190225F, 0.9598419038F, 0.9603601571F, 0.9608738012F,
+  0.9613828546F, 0.9618873361F, 0.9623872646F, 0.9628826591F,
+  0.9633735388F, 0.9638599227F, 0.9643418303F, 0.9648192808F,
+  0.9652922939F, 0.9657608890F, 0.9662250860F, 0.9666849046F,
+  0.9671403646F, 0.9675914861F, 0.9680382891F, 0.9684807937F,
+  0.9689190202F, 0.9693529890F, 0.9697827203F, 0.9702082347F,
+  0.9706295529F, 0.9710466953F, 0.9714596828F, 0.9718685362F,
+  0.9722732762F, 0.9726739240F, 0.9730705005F, 0.9734630267F,
+  0.9738515239F, 0.9742360134F, 0.9746165163F, 0.9749930540F,
+  0.9753656481F, 0.9757343198F, 0.9760990909F, 0.9764599829F,
+  0.9768170175F, 0.9771702164F, 0.9775196013F, 0.9778651941F,
+  0.9782070167F, 0.9785450909F, 0.9788794388F, 0.9792100824F,
+  0.9795370437F, 0.9798603449F, 0.9801800080F, 0.9804960554F,
+  0.9808085092F, 0.9811173916F, 0.9814227251F, 0.9817245318F,
+  0.9820228343F, 0.9823176549F, 0.9826090160F, 0.9828969402F,
+  0.9831814498F, 0.9834625674F, 0.9837403156F, 0.9840147169F,
+  0.9842857939F, 0.9845535692F, 0.9848180654F, 0.9850793052F,
+  0.9853373113F, 0.9855921062F, 0.9858437127F, 0.9860921535F,
+  0.9863374512F, 0.9865796287F, 0.9868187085F, 0.9870547136F,
+  0.9872876664F, 0.9875175899F, 0.9877445067F, 0.9879684396F,
+  0.9881894112F, 0.9884074444F, 0.9886225619F, 0.9888347863F,
+  0.9890441404F, 0.9892506468F, 0.9894543284F, 0.9896552077F,
+  0.9898533074F, 0.9900486502F, 0.9902412587F, 0.9904311555F,
+  0.9906183633F, 0.9908029045F, 0.9909848019F, 0.9911640779F,
+  0.9913407550F, 0.9915148557F, 0.9916864025F, 0.9918554179F,
+  0.9920219241F, 0.9921859437F, 0.9923474989F, 0.9925066120F,
+  0.9926633054F, 0.9928176012F, 0.9929695218F, 0.9931190891F,
+  0.9932663254F, 0.9934112527F, 0.9935538932F, 0.9936942686F,
+  0.9938324012F, 0.9939683126F, 0.9941020248F, 0.9942335597F,
+  0.9943629388F, 0.9944901841F, 0.9946153170F, 0.9947383593F,
+  0.9948593325F, 0.9949782579F, 0.9950951572F, 0.9952100516F,
+  0.9953229625F, 0.9954339111F, 0.9955429186F, 0.9956500062F,
+  0.9957551948F, 0.9958585056F, 0.9959599593F, 0.9960595769F,
+  0.9961573792F, 0.9962533869F, 0.9963476206F, 0.9964401009F,
+  0.9965308483F, 0.9966198833F, 0.9967072261F, 0.9967928971F,
+  0.9968769164F, 0.9969593041F, 0.9970400804F, 0.9971192651F,
+  0.9971968781F, 0.9972729391F, 0.9973474680F, 0.9974204842F,
+  0.9974920074F, 0.9975620569F, 0.9976306521F, 0.9976978122F,
+  0.9977635565F, 0.9978279039F, 0.9978908736F, 0.9979524842F,
+  0.9980127547F, 0.9980717037F, 0.9981293499F, 0.9981857116F,
+  0.9982408073F, 0.9982946554F, 0.9983472739F, 0.9983986810F,
+  0.9984488947F, 0.9984979328F, 0.9985458132F, 0.9985925534F,
+  0.9986381711F, 0.9986826838F, 0.9987261086F, 0.9987684630F,
+  0.9988097640F, 0.9988500286F, 0.9988892738F, 0.9989275163F,
+  0.9989647727F, 0.9990010597F, 0.9990363938F, 0.9990707911F,
+  0.9991042679F, 0.9991368404F, 0.9991685244F, 0.9991993358F,
+  0.9992292905F, 0.9992584038F, 0.9992866914F, 0.9993141686F,
+  0.9993408506F, 0.9993667526F, 0.9993918895F, 0.9994162761F,
+  0.9994399273F, 0.9994628576F, 0.9994850815F, 0.9995066133F,
+  0.9995274672F, 0.9995476574F, 0.9995671978F, 0.9995861021F,
+  0.9996043841F, 0.9996220573F, 0.9996391352F, 0.9996556310F,
+  0.9996715579F, 0.9996869288F, 0.9997017568F, 0.9997160543F,
+  0.9997298342F, 0.9997431088F, 0.9997558905F, 0.9997681914F,
+  0.9997800236F, 0.9997913990F, 0.9998023292F, 0.9998128261F,
+  0.9998229009F, 0.9998325650F, 0.9998418296F, 0.9998507058F,
+  0.9998592044F, 0.9998673362F, 0.9998751117F, 0.9998825415F,
+  0.9998896358F, 0.9998964047F, 0.9999028584F, 0.9999090066F,
+  0.9999148590F, 0.9999204253F, 0.9999257148F, 0.9999307368F,
+  0.9999355003F, 0.9999400144F, 0.9999442878F, 0.9999483293F,
+  0.9999521472F, 0.9999557499F, 0.9999591457F, 0.9999623426F,
+  0.9999653483F, 0.9999681708F, 0.9999708175F, 0.9999732959F,
+  0.9999756132F, 0.9999777765F, 0.9999797928F, 0.9999816688F,
+  0.9999834113F, 0.9999850266F, 0.9999865211F, 0.9999879009F,
+  0.9999891721F, 0.9999903405F, 0.9999914118F, 0.9999923914F,
+  0.9999932849F, 0.9999940972F, 0.9999948336F, 0.9999954989F,
+  0.9999960978F, 0.9999966349F, 0.9999971146F, 0.9999975411F,
+  0.9999979185F, 0.9999982507F, 0.9999985414F, 0.9999987944F,
+  0.9999990129F, 0.9999992003F, 0.9999993596F, 0.9999994939F,
+  0.9999996059F, 0.9999996981F, 0.9999997732F, 0.9999998333F,
+  0.9999998805F, 0.9999999170F, 0.9999999444F, 0.9999999643F,
+  0.9999999784F, 0.9999999878F, 0.9999999937F, 0.9999999972F,
+  0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F,
+};
+
+static const float vwin4096[2048] = {
+  0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F,
+  0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F,
+  0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F,
+  0.0001443798F, 0.0001684037F, 0.0001942754F, 0.0002219947F,
+  0.0002515616F, 0.0002829761F, 0.0003162380F, 0.0003513472F,
+  0.0003883038F, 0.0004271076F, 0.0004677584F, 0.0005102563F,
+  0.0005546011F, 0.0006007928F, 0.0006488311F, 0.0006987160F,
+  0.0007504474F, 0.0008040251F, 0.0008594490F, 0.0009167191F,
+  0.0009758351F, 0.0010367969F, 0.0010996044F, 0.0011642574F,
+  0.0012307558F, 0.0012990994F, 0.0013692880F, 0.0014413216F,
+  0.0015151998F, 0.0015909226F, 0.0016684898F, 0.0017479011F,
+  0.0018291565F, 0.0019122556F, 0.0019971983F, 0.0020839845F,
+  0.0021726138F, 0.0022630861F, 0.0023554012F, 0.0024495588F,
+  0.0025455588F, 0.0026434008F, 0.0027430847F, 0.0028446103F,
+  0.0029479772F, 0.0030531853F, 0.0031602342F, 0.0032691238F,
+  0.0033798538F, 0.0034924239F, 0.0036068338F, 0.0037230833F,
+  0.0038411721F, 0.0039610999F, 0.0040828664F, 0.0042064714F,
+  0.0043319145F, 0.0044591954F, 0.0045883139F, 0.0047192696F,
+  0.0048520622F, 0.0049866914F, 0.0051231569F, 0.0052614583F,
+  0.0054015953F, 0.0055435676F, 0.0056873748F, 0.0058330166F,
+  0.0059804926F, 0.0061298026F, 0.0062809460F, 0.0064339226F,
+  0.0065887320F, 0.0067453738F, 0.0069038476F, 0.0070641531F,
+  0.0072262899F, 0.0073902575F, 0.0075560556F, 0.0077236838F,
+  0.0078931417F, 0.0080644288F, 0.0082375447F, 0.0084124891F,
+  0.0085892615F, 0.0087678614F, 0.0089482885F, 0.0091305422F,
+  0.0093146223F, 0.0095005281F, 0.0096882592F, 0.0098778153F,
+  0.0100691958F, 0.0102624002F, 0.0104574281F, 0.0106542791F,
+  0.0108529525F, 0.0110534480F, 0.0112557651F, 0.0114599032F,
+  0.0116658618F, 0.0118736405F, 0.0120832387F, 0.0122946560F,
+  0.0125078917F, 0.0127229454F, 0.0129398166F, 0.0131585046F,
+  0.0133790090F, 0.0136013292F, 0.0138254647F, 0.0140514149F,
+  0.0142791792F, 0.0145087572F, 0.0147401481F, 0.0149733515F,
+  0.0152083667F, 0.0154451932F, 0.0156838304F, 0.0159242777F,
+  0.0161665345F, 0.0164106001F, 0.0166564741F, 0.0169041557F,
+  0.0171536443F, 0.0174049393F, 0.0176580401F, 0.0179129461F,
+  0.0181696565F, 0.0184281708F, 0.0186884883F, 0.0189506084F,
+  0.0192145303F, 0.0194802535F, 0.0197477772F, 0.0200171008F,
+  0.0202882236F, 0.0205611449F, 0.0208358639F, 0.0211123801F,
+  0.0213906927F, 0.0216708011F, 0.0219527043F, 0.0222364019F,
+  0.0225218930F, 0.0228091769F, 0.0230982529F, 0.0233891203F,
+  0.0236817782F, 0.0239762259F, 0.0242724628F, 0.0245704880F,
+  0.0248703007F, 0.0251719002F, 0.0254752858F, 0.0257804565F,
+  0.0260874117F, 0.0263961506F, 0.0267066722F, 0.0270189760F,
+  0.0273330609F, 0.0276489263F, 0.0279665712F, 0.0282859949F,
+  0.0286071966F, 0.0289301753F, 0.0292549303F, 0.0295814607F,
+  0.0299097656F, 0.0302398442F, 0.0305716957F, 0.0309053191F,
+  0.0312407135F, 0.0315778782F, 0.0319168122F, 0.0322575145F,
+  0.0325999844F, 0.0329442209F, 0.0332902231F, 0.0336379900F,
+  0.0339875208F, 0.0343388146F, 0.0346918703F, 0.0350466871F,
+  0.0354032640F, 0.0357616000F, 0.0361216943F, 0.0364835458F,
+  0.0368471535F, 0.0372125166F, 0.0375796339F, 0.0379485046F,
+  0.0383191276F, 0.0386915020F, 0.0390656267F, 0.0394415008F,
+  0.0398191231F, 0.0401984927F, 0.0405796086F, 0.0409624698F,
+  0.0413470751F, 0.0417334235F, 0.0421215141F, 0.0425113457F,
+  0.0429029172F, 0.0432962277F, 0.0436912760F, 0.0440880610F,
+  0.0444865817F, 0.0448868370F, 0.0452888257F, 0.0456925468F,
+  0.0460979992F, 0.0465051816F, 0.0469140931F, 0.0473247325F,
+  0.0477370986F, 0.0481511902F, 0.0485670064F, 0.0489845458F,
+  0.0494038074F, 0.0498247899F, 0.0502474922F, 0.0506719131F,
+  0.0510980514F, 0.0515259060F, 0.0519554756F, 0.0523867590F,
+  0.0528197550F, 0.0532544624F, 0.0536908800F, 0.0541290066F,
+  0.0545688408F, 0.0550103815F, 0.0554536274F, 0.0558985772F,
+  0.0563452297F, 0.0567935837F, 0.0572436377F, 0.0576953907F,
+  0.0581488412F, 0.0586039880F, 0.0590608297F, 0.0595193651F,
+  0.0599795929F, 0.0604415117F, 0.0609051202F, 0.0613704170F,
+  0.0618374009F, 0.0623060704F, 0.0627764243F, 0.0632484611F,
+  0.0637221795F, 0.0641975781F, 0.0646746555F, 0.0651534104F,
+  0.0656338413F, 0.0661159469F, 0.0665997257F, 0.0670851763F,
+  0.0675722973F, 0.0680610873F, 0.0685515448F, 0.0690436684F,
+  0.0695374567F, 0.0700329081F, 0.0705300213F, 0.0710287947F,
+  0.0715292269F, 0.0720313163F, 0.0725350616F, 0.0730404612F,
+  0.0735475136F, 0.0740562172F, 0.0745665707F, 0.0750785723F,
+  0.0755922207F, 0.0761075143F, 0.0766244515F, 0.0771430307F,
+  0.0776632505F, 0.0781851092F, 0.0787086052F, 0.0792337371F,
+  0.0797605032F, 0.0802889018F, 0.0808189315F, 0.0813505905F,
+  0.0818838773F, 0.0824187903F, 0.0829553277F, 0.0834934881F,
+  0.0840332697F, 0.0845746708F, 0.0851176899F, 0.0856623252F,
+  0.0862085751F, 0.0867564379F, 0.0873059119F, 0.0878569954F,
+  0.0884096867F, 0.0889639840F, 0.0895198858F, 0.0900773902F,
+  0.0906364955F, 0.0911972000F, 0.0917595019F, 0.0923233995F,
+  0.0928888909F, 0.0934559745F, 0.0940246485F, 0.0945949110F,
+  0.0951667604F, 0.0957401946F, 0.0963152121F, 0.0968918109F,
+  0.0974699893F, 0.0980497454F, 0.0986310773F, 0.0992139832F,
+  0.0997984614F, 0.1003845098F, 0.1009721267F, 0.1015613101F,
+  0.1021520582F, 0.1027443692F, 0.1033382410F, 0.1039336718F,
+  0.1045306597F, 0.1051292027F, 0.1057292990F, 0.1063309466F,
+  0.1069341435F, 0.1075388878F, 0.1081451776F, 0.1087530108F,
+  0.1093623856F, 0.1099732998F, 0.1105857516F, 0.1111997389F,
+  0.1118152597F, 0.1124323121F, 0.1130508939F, 0.1136710032F,
+  0.1142926379F, 0.1149157960F, 0.1155404755F, 0.1161666742F,
+  0.1167943901F, 0.1174236211F, 0.1180543652F, 0.1186866202F,
+  0.1193203841F, 0.1199556548F, 0.1205924300F, 0.1212307078F,
+  0.1218704860F, 0.1225117624F, 0.1231545349F, 0.1237988013F,
+  0.1244445596F, 0.1250918074F, 0.1257405427F, 0.1263907632F,
+  0.1270424667F, 0.1276956512F, 0.1283503142F, 0.1290064537F,
+  0.1296640674F, 0.1303231530F, 0.1309837084F, 0.1316457312F,
+  0.1323092193F, 0.1329741703F, 0.1336405820F, 0.1343084520F,
+  0.1349777782F, 0.1356485582F, 0.1363207897F, 0.1369944704F,
+  0.1376695979F, 0.1383461700F, 0.1390241842F, 0.1397036384F,
+  0.1403845300F, 0.1410668567F, 0.1417506162F, 0.1424358061F,
+  0.1431224240F, 0.1438104674F, 0.1444999341F, 0.1451908216F,
+  0.1458831274F, 0.1465768492F, 0.1472719844F, 0.1479685308F,
+  0.1486664857F, 0.1493658468F, 0.1500666115F, 0.1507687775F,
+  0.1514723422F, 0.1521773031F, 0.1528836577F, 0.1535914035F,
+  0.1543005380F, 0.1550110587F, 0.1557229631F, 0.1564362485F,
+  0.1571509124F, 0.1578669524F, 0.1585843657F, 0.1593031499F,
+  0.1600233024F, 0.1607448205F, 0.1614677017F, 0.1621919433F,
+  0.1629175428F, 0.1636444975F, 0.1643728047F, 0.1651024619F,
+  0.1658334665F, 0.1665658156F, 0.1672995067F, 0.1680345371F,
+  0.1687709041F, 0.1695086050F, 0.1702476372F, 0.1709879978F,
+  0.1717296843F, 0.1724726938F, 0.1732170237F, 0.1739626711F,
+  0.1747096335F, 0.1754579079F, 0.1762074916F, 0.1769583819F,
+  0.1777105760F, 0.1784640710F, 0.1792188642F, 0.1799749529F,
+  0.1807323340F, 0.1814910049F, 0.1822509628F, 0.1830122046F,
+  0.1837747277F, 0.1845385292F, 0.1853036062F, 0.1860699558F,
+  0.1868375751F, 0.1876064613F, 0.1883766114F, 0.1891480226F,
+  0.1899206919F, 0.1906946164F, 0.1914697932F, 0.1922462194F,
+  0.1930238919F, 0.1938028079F, 0.1945829643F, 0.1953643583F,
+  0.1961469868F, 0.1969308468F, 0.1977159353F, 0.1985022494F,
+  0.1992897859F, 0.2000785420F, 0.2008685145F, 0.2016597005F,
+  0.2024520968F, 0.2032457005F, 0.2040405084F, 0.2048365175F,
+  0.2056337247F, 0.2064321269F, 0.2072317211F, 0.2080325041F,
+  0.2088344727F, 0.2096376240F, 0.2104419547F, 0.2112474618F,
+  0.2120541420F, 0.2128619923F, 0.2136710094F, 0.2144811902F,
+  0.2152925315F, 0.2161050301F, 0.2169186829F, 0.2177334866F,
+  0.2185494381F, 0.2193665340F, 0.2201847712F, 0.2210041465F,
+  0.2218246565F, 0.2226462981F, 0.2234690680F, 0.2242929629F,
+  0.2251179796F, 0.2259441147F, 0.2267713650F, 0.2275997272F,
+  0.2284291979F, 0.2292597739F, 0.2300914518F, 0.2309242283F,
+  0.2317581001F, 0.2325930638F, 0.2334291160F, 0.2342662534F,
+  0.2351044727F, 0.2359437703F, 0.2367841431F, 0.2376255875F,
+  0.2384681001F, 0.2393116776F, 0.2401563165F, 0.2410020134F,
+  0.2418487649F, 0.2426965675F, 0.2435454178F, 0.2443953122F,
+  0.2452462474F, 0.2460982199F, 0.2469512262F, 0.2478052628F,
+  0.2486603262F, 0.2495164129F, 0.2503735194F, 0.2512316421F,
+  0.2520907776F, 0.2529509222F, 0.2538120726F, 0.2546742250F,
+  0.2555373760F, 0.2564015219F, 0.2572666593F, 0.2581327845F,
+  0.2589998939F, 0.2598679840F, 0.2607370510F, 0.2616070916F,
+  0.2624781019F, 0.2633500783F, 0.2642230173F, 0.2650969152F,
+  0.2659717684F, 0.2668475731F, 0.2677243257F, 0.2686020226F,
+  0.2694806601F, 0.2703602344F, 0.2712407419F, 0.2721221789F,
+  0.2730045417F, 0.2738878265F, 0.2747720297F, 0.2756571474F,
+  0.2765431760F, 0.2774301117F, 0.2783179508F, 0.2792066895F,
+  0.2800963240F, 0.2809868505F, 0.2818782654F, 0.2827705647F,
+  0.2836637447F, 0.2845578016F, 0.2854527315F, 0.2863485307F,
+  0.2872451953F, 0.2881427215F, 0.2890411055F, 0.2899403433F,
+  0.2908404312F, 0.2917413654F, 0.2926431418F, 0.2935457567F,
+  0.2944492061F, 0.2953534863F, 0.2962585932F, 0.2971645230F,
+  0.2980712717F, 0.2989788356F, 0.2998872105F, 0.3007963927F,
+  0.3017063781F, 0.3026171629F, 0.3035287430F, 0.3044411145F,
+  0.3053542736F, 0.3062682161F, 0.3071829381F, 0.3080984356F,
+  0.3090147047F, 0.3099317413F, 0.3108495414F, 0.3117681011F,
+  0.3126874163F, 0.3136074830F, 0.3145282972F, 0.3154498548F,
+  0.3163721517F, 0.3172951841F, 0.3182189477F, 0.3191434385F,
+  0.3200686525F, 0.3209945856F, 0.3219212336F, 0.3228485927F,
+  0.3237766585F, 0.3247054271F, 0.3256348943F, 0.3265650560F,
+  0.3274959081F, 0.3284274465F, 0.3293596671F, 0.3302925657F,
+  0.3312261382F, 0.3321603804F, 0.3330952882F, 0.3340308574F,
+  0.3349670838F, 0.3359039634F, 0.3368414919F, 0.3377796651F,
+  0.3387184789F, 0.3396579290F, 0.3405980113F, 0.3415387216F,
+  0.3424800556F, 0.3434220091F, 0.3443645779F, 0.3453077578F,
+  0.3462515446F, 0.3471959340F, 0.3481409217F, 0.3490865036F,
+  0.3500326754F, 0.3509794328F, 0.3519267715F, 0.3528746873F,
+  0.3538231759F, 0.3547722330F, 0.3557218544F, 0.3566720357F,
+  0.3576227727F, 0.3585740610F, 0.3595258964F, 0.3604782745F,
+  0.3614311910F, 0.3623846417F, 0.3633386221F, 0.3642931280F,
+  0.3652481549F, 0.3662036987F, 0.3671597548F, 0.3681163191F,
+  0.3690733870F, 0.3700309544F, 0.3709890167F, 0.3719475696F,
+  0.3729066089F, 0.3738661299F, 0.3748261285F, 0.3757866002F,
+  0.3767475406F, 0.3777089453F, 0.3786708100F, 0.3796331302F,
+  0.3805959014F, 0.3815591194F, 0.3825227796F, 0.3834868777F,
+  0.3844514093F, 0.3854163698F, 0.3863817549F, 0.3873475601F,
+  0.3883137810F, 0.3892804131F, 0.3902474521F, 0.3912148933F,
+  0.3921827325F, 0.3931509650F, 0.3941195865F, 0.3950885925F,
+  0.3960579785F, 0.3970277400F, 0.3979978725F, 0.3989683716F,
+  0.3999392328F, 0.4009104516F, 0.4018820234F, 0.4028539438F,
+  0.4038262084F, 0.4047988125F, 0.4057717516F, 0.4067450214F,
+  0.4077186172F, 0.4086925345F, 0.4096667688F, 0.4106413155F,
+  0.4116161703F, 0.4125913284F, 0.4135667854F, 0.4145425368F,
+  0.4155185780F, 0.4164949044F, 0.4174715116F, 0.4184483949F,
+  0.4194255498F, 0.4204029718F, 0.4213806563F, 0.4223585987F,
+  0.4233367946F, 0.4243152392F, 0.4252939281F, 0.4262728566F,
+  0.4272520202F, 0.4282314144F, 0.4292110345F, 0.4301908760F,
+  0.4311709343F, 0.4321512047F, 0.4331316828F, 0.4341123639F,
+  0.4350932435F, 0.4360743168F, 0.4370555794F, 0.4380370267F,
+  0.4390186540F, 0.4400004567F, 0.4409824303F, 0.4419645701F,
+  0.4429468716F, 0.4439293300F, 0.4449119409F, 0.4458946996F,
+  0.4468776014F, 0.4478606418F, 0.4488438162F, 0.4498271199F,
+  0.4508105483F, 0.4517940967F, 0.4527777607F, 0.4537615355F,
+  0.4547454165F, 0.4557293991F, 0.4567134786F, 0.4576976505F,
+  0.4586819101F, 0.4596662527F, 0.4606506738F, 0.4616351687F,
+  0.4626197328F, 0.4636043614F, 0.4645890499F, 0.4655737936F,
+  0.4665585880F, 0.4675434284F, 0.4685283101F, 0.4695132286F,
+  0.4704981791F, 0.4714831570F, 0.4724681577F, 0.4734531766F,
+  0.4744382089F, 0.4754232501F, 0.4764082956F, 0.4773933406F,
+  0.4783783806F, 0.4793634108F, 0.4803484267F, 0.4813334237F,
+  0.4823183969F, 0.4833033419F, 0.4842882540F, 0.4852731285F,
+  0.4862579608F, 0.4872427462F, 0.4882274802F, 0.4892121580F,
+  0.4901967751F, 0.4911813267F, 0.4921658083F, 0.4931502151F,
+  0.4941345427F, 0.4951187863F, 0.4961029412F, 0.4970870029F,
+  0.4980709667F, 0.4990548280F, 0.5000385822F, 0.5010222245F,
+  0.5020057505F, 0.5029891553F, 0.5039724345F, 0.5049555834F,
+  0.5059385973F, 0.5069214716F, 0.5079042018F, 0.5088867831F,
+  0.5098692110F, 0.5108514808F, 0.5118335879F, 0.5128155277F,
+  0.5137972956F, 0.5147788869F, 0.5157602971F, 0.5167415215F,
+  0.5177225555F, 0.5187033945F, 0.5196840339F, 0.5206644692F,
+  0.5216446956F, 0.5226247086F, 0.5236045035F, 0.5245840759F,
+  0.5255634211F, 0.5265425344F, 0.5275214114F, 0.5285000474F,
+  0.5294784378F, 0.5304565781F, 0.5314344637F, 0.5324120899F,
+  0.5333894522F, 0.5343665461F, 0.5353433670F, 0.5363199102F,
+  0.5372961713F, 0.5382721457F, 0.5392478287F, 0.5402232159F,
+  0.5411983027F, 0.5421730845F, 0.5431475569F, 0.5441217151F,
+  0.5450955548F, 0.5460690714F, 0.5470422602F, 0.5480151169F,
+  0.5489876368F, 0.5499598155F, 0.5509316484F, 0.5519031310F,
+  0.5528742587F, 0.5538450271F, 0.5548154317F, 0.5557854680F,
+  0.5567551314F, 0.5577244174F, 0.5586933216F, 0.5596618395F,
+  0.5606299665F, 0.5615976983F, 0.5625650302F, 0.5635319580F,
+  0.5644984770F, 0.5654645828F, 0.5664302709F, 0.5673955370F,
+  0.5683603765F, 0.5693247850F, 0.5702887580F, 0.5712522912F,
+  0.5722153800F, 0.5731780200F, 0.5741402069F, 0.5751019362F,
+  0.5760632034F, 0.5770240042F, 0.5779843341F, 0.5789441889F,
+  0.5799035639F, 0.5808624549F, 0.5818208575F, 0.5827787673F,
+  0.5837361800F, 0.5846930910F, 0.5856494961F, 0.5866053910F,
+  0.5875607712F, 0.5885156324F, 0.5894699703F, 0.5904237804F,
+  0.5913770586F, 0.5923298004F, 0.5932820016F, 0.5942336578F,
+  0.5951847646F, 0.5961353179F, 0.5970853132F, 0.5980347464F,
+  0.5989836131F, 0.5999319090F, 0.6008796298F, 0.6018267713F,
+  0.6027733292F, 0.6037192993F, 0.6046646773F, 0.6056094589F,
+  0.6065536400F, 0.6074972162F, 0.6084401833F, 0.6093825372F,
+  0.6103242736F, 0.6112653884F, 0.6122058772F, 0.6131457359F,
+  0.6140849604F, 0.6150235464F, 0.6159614897F, 0.6168987862F,
+  0.6178354318F, 0.6187714223F, 0.6197067535F, 0.6206414213F,
+  0.6215754215F, 0.6225087501F, 0.6234414028F, 0.6243733757F,
+  0.6253046646F, 0.6262352654F, 0.6271651739F, 0.6280943862F,
+  0.6290228982F, 0.6299507057F, 0.6308778048F, 0.6318041913F,
+  0.6327298612F, 0.6336548105F, 0.6345790352F, 0.6355025312F,
+  0.6364252945F, 0.6373473211F, 0.6382686070F, 0.6391891483F,
+  0.6401089409F, 0.6410279808F, 0.6419462642F, 0.6428637869F,
+  0.6437805452F, 0.6446965350F, 0.6456117524F, 0.6465261935F,
+  0.6474398544F, 0.6483527311F, 0.6492648197F, 0.6501761165F,
+  0.6510866174F, 0.6519963186F, 0.6529052162F, 0.6538133064F,
+  0.6547205854F, 0.6556270492F, 0.6565326941F, 0.6574375162F,
+  0.6583415117F, 0.6592446769F, 0.6601470079F, 0.6610485009F,
+  0.6619491521F, 0.6628489578F, 0.6637479143F, 0.6646460177F,
+  0.6655432643F, 0.6664396505F, 0.6673351724F, 0.6682298264F,
+  0.6691236087F, 0.6700165157F, 0.6709085436F, 0.6717996889F,
+  0.6726899478F, 0.6735793167F, 0.6744677918F, 0.6753553697F,
+  0.6762420466F, 0.6771278190F, 0.6780126832F, 0.6788966357F,
+  0.6797796728F, 0.6806617909F, 0.6815429866F, 0.6824232562F,
+  0.6833025961F, 0.6841810030F, 0.6850584731F, 0.6859350031F,
+  0.6868105894F, 0.6876852284F, 0.6885589168F, 0.6894316510F,
+  0.6903034275F, 0.6911742430F, 0.6920440939F, 0.6929129769F,
+  0.6937808884F, 0.6946478251F, 0.6955137837F, 0.6963787606F,
+  0.6972427525F, 0.6981057560F, 0.6989677678F, 0.6998287845F,
+  0.7006888028F, 0.7015478194F, 0.7024058309F, 0.7032628340F,
+  0.7041188254F, 0.7049738019F, 0.7058277601F, 0.7066806969F,
+  0.7075326089F, 0.7083834929F, 0.7092333457F, 0.7100821640F,
+  0.7109299447F, 0.7117766846F, 0.7126223804F, 0.7134670291F,
+  0.7143106273F, 0.7151531721F, 0.7159946602F, 0.7168350885F,
+  0.7176744539F, 0.7185127534F, 0.7193499837F, 0.7201861418F,
+  0.7210212247F, 0.7218552293F, 0.7226881526F, 0.7235199914F,
+  0.7243507428F, 0.7251804039F, 0.7260089715F, 0.7268364426F,
+  0.7276628144F, 0.7284880839F, 0.7293122481F, 0.7301353040F,
+  0.7309572487F, 0.7317780794F, 0.7325977930F, 0.7334163868F,
+  0.7342338579F, 0.7350502033F, 0.7358654202F, 0.7366795059F,
+  0.7374924573F, 0.7383042718F, 0.7391149465F, 0.7399244787F,
+  0.7407328655F, 0.7415401041F, 0.7423461920F, 0.7431511261F,
+  0.7439549040F, 0.7447575227F, 0.7455589797F, 0.7463592723F,
+  0.7471583976F, 0.7479563532F, 0.7487531363F, 0.7495487443F,
+  0.7503431745F, 0.7511364244F, 0.7519284913F, 0.7527193726F,
+  0.7535090658F, 0.7542975683F, 0.7550848776F, 0.7558709910F,
+  0.7566559062F, 0.7574396205F, 0.7582221314F, 0.7590034366F,
+  0.7597835334F, 0.7605624194F, 0.7613400923F, 0.7621165495F,
+  0.7628917886F, 0.7636658072F, 0.7644386030F, 0.7652101735F,
+  0.7659805164F, 0.7667496292F, 0.7675175098F, 0.7682841556F,
+  0.7690495645F, 0.7698137341F, 0.7705766622F, 0.7713383463F,
+  0.7720987844F, 0.7728579741F, 0.7736159132F, 0.7743725994F,
+  0.7751280306F, 0.7758822046F, 0.7766351192F, 0.7773867722F,
+  0.7781371614F, 0.7788862848F, 0.7796341401F, 0.7803807253F,
+  0.7811260383F, 0.7818700769F, 0.7826128392F, 0.7833543230F,
+  0.7840945263F, 0.7848334471F, 0.7855710833F, 0.7863074330F,
+  0.7870424941F, 0.7877762647F, 0.7885087428F, 0.7892399264F,
+  0.7899698137F, 0.7906984026F, 0.7914256914F, 0.7921516780F,
+  0.7928763607F, 0.7935997375F, 0.7943218065F, 0.7950425661F,
+  0.7957620142F, 0.7964801492F, 0.7971969692F, 0.7979124724F,
+  0.7986266570F, 0.7993395214F, 0.8000510638F, 0.8007612823F,
+  0.8014701754F, 0.8021777413F, 0.8028839784F, 0.8035888849F,
+  0.8042924592F, 0.8049946997F, 0.8056956048F, 0.8063951727F,
+  0.8070934020F, 0.8077902910F, 0.8084858381F, 0.8091800419F,
+  0.8098729007F, 0.8105644130F, 0.8112545774F, 0.8119433922F,
+  0.8126308561F, 0.8133169676F, 0.8140017251F, 0.8146851272F,
+  0.8153671726F, 0.8160478598F, 0.8167271874F, 0.8174051539F,
+  0.8180817582F, 0.8187569986F, 0.8194308741F, 0.8201033831F,
+  0.8207745244F, 0.8214442966F, 0.8221126986F, 0.8227797290F,
+  0.8234453865F, 0.8241096700F, 0.8247725781F, 0.8254341097F,
+  0.8260942636F, 0.8267530385F, 0.8274104334F, 0.8280664470F,
+  0.8287210782F, 0.8293743259F, 0.8300261889F, 0.8306766662F,
+  0.8313257566F, 0.8319734591F, 0.8326197727F, 0.8332646963F,
+  0.8339082288F, 0.8345503692F, 0.8351911167F, 0.8358304700F,
+  0.8364684284F, 0.8371049907F, 0.8377401562F, 0.8383739238F,
+  0.8390062927F, 0.8396372618F, 0.8402668305F, 0.8408949977F,
+  0.8415217626F, 0.8421471245F, 0.8427710823F, 0.8433936354F,
+  0.8440147830F, 0.8446345242F, 0.8452528582F, 0.8458697844F,
+  0.8464853020F, 0.8470994102F, 0.8477121084F, 0.8483233958F,
+  0.8489332718F, 0.8495417356F, 0.8501487866F, 0.8507544243F,
+  0.8513586479F, 0.8519614568F, 0.8525628505F, 0.8531628283F,
+  0.8537613897F, 0.8543585341F, 0.8549542611F, 0.8555485699F,
+  0.8561414603F, 0.8567329315F, 0.8573229832F, 0.8579116149F,
+  0.8584988262F, 0.8590846165F, 0.8596689855F, 0.8602519327F,
+  0.8608334577F, 0.8614135603F, 0.8619922399F, 0.8625694962F,
+  0.8631453289F, 0.8637197377F, 0.8642927222F, 0.8648642821F,
+  0.8654344172F, 0.8660031272F, 0.8665704118F, 0.8671362708F,
+  0.8677007039F, 0.8682637109F, 0.8688252917F, 0.8693854460F,
+  0.8699441737F, 0.8705014745F, 0.8710573485F, 0.8716117953F,
+  0.8721648150F, 0.8727164073F, 0.8732665723F, 0.8738153098F,
+  0.8743626197F, 0.8749085021F, 0.8754529569F, 0.8759959840F,
+  0.8765375835F, 0.8770777553F, 0.8776164996F, 0.8781538162F,
+  0.8786897054F, 0.8792241670F, 0.8797572013F, 0.8802888082F,
+  0.8808189880F, 0.8813477407F, 0.8818750664F, 0.8824009653F,
+  0.8829254375F, 0.8834484833F, 0.8839701028F, 0.8844902961F,
+  0.8850090636F, 0.8855264054F, 0.8860423218F, 0.8865568131F,
+  0.8870698794F, 0.8875815212F, 0.8880917386F, 0.8886005319F,
+  0.8891079016F, 0.8896138479F, 0.8901183712F, 0.8906214719F,
+  0.8911231503F, 0.8916234067F, 0.8921222417F, 0.8926196556F,
+  0.8931156489F, 0.8936102219F, 0.8941033752F, 0.8945951092F,
+  0.8950854244F, 0.8955743212F, 0.8960618003F, 0.8965478621F,
+  0.8970325071F, 0.8975157359F, 0.8979975490F, 0.8984779471F,
+  0.8989569307F, 0.8994345004F, 0.8999106568F, 0.9003854005F,
+  0.9008587323F, 0.9013306526F, 0.9018011623F, 0.9022702619F,
+  0.9027379521F, 0.9032042337F, 0.9036691074F, 0.9041325739F,
+  0.9045946339F, 0.9050552882F, 0.9055145376F, 0.9059723828F,
+  0.9064288246F, 0.9068838638F, 0.9073375013F, 0.9077897379F,
+  0.9082405743F, 0.9086900115F, 0.9091380503F, 0.9095846917F,
+  0.9100299364F, 0.9104737854F, 0.9109162397F, 0.9113573001F,
+  0.9117969675F, 0.9122352430F, 0.9126721275F, 0.9131076219F,
+  0.9135417273F, 0.9139744447F, 0.9144057750F, 0.9148357194F,
+  0.9152642787F, 0.9156914542F, 0.9161172468F, 0.9165416576F,
+  0.9169646877F, 0.9173863382F, 0.9178066102F, 0.9182255048F,
+  0.9186430232F, 0.9190591665F, 0.9194739359F, 0.9198873324F,
+  0.9202993574F, 0.9207100120F, 0.9211192973F, 0.9215272147F,
+  0.9219337653F, 0.9223389504F, 0.9227427713F, 0.9231452290F,
+  0.9235463251F, 0.9239460607F, 0.9243444371F, 0.9247414557F,
+  0.9251371177F, 0.9255314245F, 0.9259243774F, 0.9263159778F,
+  0.9267062270F, 0.9270951264F, 0.9274826774F, 0.9278688814F,
+  0.9282537398F, 0.9286372540F, 0.9290194254F, 0.9294002555F,
+  0.9297797458F, 0.9301578976F, 0.9305347125F, 0.9309101919F,
+  0.9312843373F, 0.9316571503F, 0.9320286323F, 0.9323987849F,
+  0.9327676097F, 0.9331351080F, 0.9335012816F, 0.9338661320F,
+  0.9342296607F, 0.9345918694F, 0.9349527596F, 0.9353123330F,
+  0.9356705911F, 0.9360275357F, 0.9363831683F, 0.9367374905F,
+  0.9370905042F, 0.9374422108F, 0.9377926122F, 0.9381417099F,
+  0.9384895057F, 0.9388360014F, 0.9391811985F, 0.9395250989F,
+  0.9398677043F, 0.9402090165F, 0.9405490371F, 0.9408877680F,
+  0.9412252110F, 0.9415613678F, 0.9418962402F, 0.9422298301F,
+  0.9425621392F, 0.9428931695F, 0.9432229226F, 0.9435514005F,
+  0.9438786050F, 0.9442045381F, 0.9445292014F, 0.9448525971F,
+  0.9451747268F, 0.9454955926F, 0.9458151963F, 0.9461335399F,
+  0.9464506253F, 0.9467664545F, 0.9470810293F, 0.9473943517F,
+  0.9477064238F, 0.9480172474F, 0.9483268246F, 0.9486351573F,
+  0.9489422475F, 0.9492480973F, 0.9495527087F, 0.9498560837F,
+  0.9501582243F, 0.9504591325F, 0.9507588105F, 0.9510572603F,
+  0.9513544839F, 0.9516504834F, 0.9519452609F, 0.9522388186F,
+  0.9525311584F, 0.9528222826F, 0.9531121932F, 0.9534008923F,
+  0.9536883821F, 0.9539746647F, 0.9542597424F, 0.9545436171F,
+  0.9548262912F, 0.9551077667F, 0.9553880459F, 0.9556671309F,
+  0.9559450239F, 0.9562217272F, 0.9564972429F, 0.9567715733F,
+  0.9570447206F, 0.9573166871F, 0.9575874749F, 0.9578570863F,
+  0.9581255236F, 0.9583927890F, 0.9586588849F, 0.9589238134F,
+  0.9591875769F, 0.9594501777F, 0.9597116180F, 0.9599719003F,
+  0.9602310267F, 0.9604889995F, 0.9607458213F, 0.9610014942F,
+  0.9612560206F, 0.9615094028F, 0.9617616433F, 0.9620127443F,
+  0.9622627083F, 0.9625115376F, 0.9627592345F, 0.9630058016F,
+  0.9632512411F, 0.9634955555F, 0.9637387471F, 0.9639808185F,
+  0.9642217720F, 0.9644616100F, 0.9647003349F, 0.9649379493F,
+  0.9651744556F, 0.9654098561F, 0.9656441534F, 0.9658773499F,
+  0.9661094480F, 0.9663404504F, 0.9665703593F, 0.9667991774F,
+  0.9670269071F, 0.9672535509F, 0.9674791114F, 0.9677035909F,
+  0.9679269921F, 0.9681493174F, 0.9683705694F, 0.9685907506F,
+  0.9688098636F, 0.9690279108F, 0.9692448948F, 0.9694608182F,
+  0.9696756836F, 0.9698894934F, 0.9701022503F, 0.9703139569F,
+  0.9705246156F, 0.9707342291F, 0.9709428000F, 0.9711503309F,
+  0.9713568243F, 0.9715622829F, 0.9717667093F, 0.9719701060F,
+  0.9721724757F, 0.9723738210F, 0.9725741446F, 0.9727734490F,
+  0.9729717369F, 0.9731690109F, 0.9733652737F, 0.9735605279F,
+  0.9737547762F, 0.9739480212F, 0.9741402656F, 0.9743315120F,
+  0.9745217631F, 0.9747110216F, 0.9748992901F, 0.9750865714F,
+  0.9752728681F, 0.9754581829F, 0.9756425184F, 0.9758258775F,
+  0.9760082627F, 0.9761896768F, 0.9763701224F, 0.9765496024F,
+  0.9767281193F, 0.9769056760F, 0.9770822751F, 0.9772579193F,
+  0.9774326114F, 0.9776063542F, 0.9777791502F, 0.9779510023F,
+  0.9781219133F, 0.9782918858F, 0.9784609226F, 0.9786290264F,
+  0.9787962000F, 0.9789624461F, 0.9791277676F, 0.9792921671F,
+  0.9794556474F, 0.9796182113F, 0.9797798615F, 0.9799406009F,
+  0.9801004321F, 0.9802593580F, 0.9804173813F, 0.9805745049F,
+  0.9807307314F, 0.9808860637F, 0.9810405046F, 0.9811940568F,
+  0.9813467232F, 0.9814985065F, 0.9816494095F, 0.9817994351F,
+  0.9819485860F, 0.9820968650F, 0.9822442750F, 0.9823908186F,
+  0.9825364988F, 0.9826813184F, 0.9828252801F, 0.9829683868F,
+  0.9831106413F, 0.9832520463F, 0.9833926048F, 0.9835323195F,
+  0.9836711932F, 0.9838092288F, 0.9839464291F, 0.9840827969F,
+  0.9842183351F, 0.9843530464F, 0.9844869337F, 0.9846199998F,
+  0.9847522475F, 0.9848836798F, 0.9850142993F, 0.9851441090F,
+  0.9852731117F, 0.9854013101F, 0.9855287073F, 0.9856553058F,
+  0.9857811087F, 0.9859061188F, 0.9860303388F, 0.9861537717F,
+  0.9862764202F, 0.9863982872F, 0.9865193756F, 0.9866396882F,
+  0.9867592277F, 0.9868779972F, 0.9869959993F, 0.9871132370F,
+  0.9872297131F, 0.9873454304F, 0.9874603918F, 0.9875746001F,
+  0.9876880581F, 0.9878007688F, 0.9879127348F, 0.9880239592F,
+  0.9881344447F, 0.9882441941F, 0.9883532104F, 0.9884614962F,
+  0.9885690546F, 0.9886758883F, 0.9887820001F, 0.9888873930F,
+  0.9889920697F, 0.9890960331F, 0.9891992859F, 0.9893018312F,
+  0.9894036716F, 0.9895048100F, 0.9896052493F, 0.9897049923F,
+  0.9898040418F, 0.9899024006F, 0.9900000717F, 0.9900970577F,
+  0.9901933616F, 0.9902889862F, 0.9903839343F, 0.9904782087F,
+  0.9905718122F, 0.9906647477F, 0.9907570180F, 0.9908486259F,
+  0.9909395742F, 0.9910298658F, 0.9911195034F, 0.9912084899F,
+  0.9912968281F, 0.9913845208F, 0.9914715708F, 0.9915579810F,
+  0.9916437540F, 0.9917288928F, 0.9918134001F, 0.9918972788F,
+  0.9919805316F, 0.9920631613F, 0.9921451707F, 0.9922265626F,
+  0.9923073399F, 0.9923875052F, 0.9924670615F, 0.9925460114F,
+  0.9926243577F, 0.9927021033F, 0.9927792508F, 0.9928558032F,
+  0.9929317631F, 0.9930071333F, 0.9930819167F, 0.9931561158F,
+  0.9932297337F, 0.9933027728F, 0.9933752362F, 0.9934471264F,
+  0.9935184462F, 0.9935891985F, 0.9936593859F, 0.9937290112F,
+  0.9937980771F, 0.9938665864F, 0.9939345418F, 0.9940019460F,
+  0.9940688018F, 0.9941351118F, 0.9942008789F, 0.9942661057F,
+  0.9943307950F, 0.9943949494F, 0.9944585717F, 0.9945216645F,
+  0.9945842307F, 0.9946462728F, 0.9947077936F, 0.9947687957F,
+  0.9948292820F, 0.9948892550F, 0.9949487174F, 0.9950076719F,
+  0.9950661212F, 0.9951240679F, 0.9951815148F, 0.9952384645F,
+  0.9952949196F, 0.9953508828F, 0.9954063568F, 0.9954613442F,
+  0.9955158476F, 0.9955698697F, 0.9956234132F, 0.9956764806F,
+  0.9957290746F, 0.9957811978F, 0.9958328528F, 0.9958840423F,
+  0.9959347688F, 0.9959850351F, 0.9960348435F, 0.9960841969F,
+  0.9961330977F, 0.9961815486F, 0.9962295521F, 0.9962771108F,
+  0.9963242274F, 0.9963709043F, 0.9964171441F, 0.9964629494F,
+  0.9965083228F, 0.9965532668F, 0.9965977840F, 0.9966418768F,
+  0.9966855479F, 0.9967287998F, 0.9967716350F, 0.9968140559F,
+  0.9968560653F, 0.9968976655F, 0.9969388591F, 0.9969796485F,
+  0.9970200363F, 0.9970600250F, 0.9970996170F, 0.9971388149F,
+  0.9971776211F, 0.9972160380F, 0.9972540683F, 0.9972917142F,
+  0.9973289783F, 0.9973658631F, 0.9974023709F, 0.9974385042F,
+  0.9974742655F, 0.9975096571F, 0.9975446816F, 0.9975793413F,
+  0.9976136386F, 0.9976475759F, 0.9976811557F, 0.9977143803F,
+  0.9977472521F, 0.9977797736F, 0.9978119470F, 0.9978437748F,
+  0.9978752593F, 0.9979064029F, 0.9979372079F, 0.9979676768F,
+  0.9979978117F, 0.9980276151F, 0.9980570893F, 0.9980862367F,
+  0.9981150595F, 0.9981435600F, 0.9981717406F, 0.9981996035F,
+  0.9982271511F, 0.9982543856F, 0.9982813093F, 0.9983079246F,
+  0.9983342336F, 0.9983602386F, 0.9983859418F, 0.9984113456F,
+  0.9984364522F, 0.9984612638F, 0.9984857825F, 0.9985100108F,
+  0.9985339507F, 0.9985576044F, 0.9985809743F, 0.9986040624F,
+  0.9986268710F, 0.9986494022F, 0.9986716583F, 0.9986936413F,
+  0.9987153535F, 0.9987367969F, 0.9987579738F, 0.9987788864F,
+  0.9987995366F, 0.9988199267F, 0.9988400587F, 0.9988599348F,
+  0.9988795572F, 0.9988989278F, 0.9989180487F, 0.9989369222F,
+  0.9989555501F, 0.9989739347F, 0.9989920780F, 0.9990099820F,
+  0.9990276487F, 0.9990450803F, 0.9990622787F, 0.9990792460F,
+  0.9990959841F, 0.9991124952F, 0.9991287812F, 0.9991448440F,
+  0.9991606858F, 0.9991763084F, 0.9991917139F, 0.9992069042F,
+  0.9992218813F, 0.9992366471F, 0.9992512035F, 0.9992655525F,
+  0.9992796961F, 0.9992936361F, 0.9993073744F, 0.9993209131F,
+  0.9993342538F, 0.9993473987F, 0.9993603494F, 0.9993731080F,
+  0.9993856762F, 0.9993980559F, 0.9994102490F, 0.9994222573F,
+  0.9994340827F, 0.9994457269F, 0.9994571918F, 0.9994684793F,
+  0.9994795910F, 0.9994905288F, 0.9995012945F, 0.9995118898F,
+  0.9995223165F, 0.9995325765F, 0.9995426713F, 0.9995526029F,
+  0.9995623728F, 0.9995719829F, 0.9995814349F, 0.9995907304F,
+  0.9995998712F, 0.9996088590F, 0.9996176954F, 0.9996263821F,
+  0.9996349208F, 0.9996433132F, 0.9996515609F, 0.9996596656F,
+  0.9996676288F, 0.9996754522F, 0.9996831375F, 0.9996906862F,
+  0.9996981000F, 0.9997053804F, 0.9997125290F, 0.9997195474F,
+  0.9997264371F, 0.9997331998F, 0.9997398369F, 0.9997463500F,
+  0.9997527406F, 0.9997590103F, 0.9997651606F, 0.9997711930F,
+  0.9997771089F, 0.9997829098F, 0.9997885973F, 0.9997941728F,
+  0.9997996378F, 0.9998049936F, 0.9998102419F, 0.9998153839F,
+  0.9998204211F, 0.9998253550F, 0.9998301868F, 0.9998349182F,
+  0.9998395503F, 0.9998440847F, 0.9998485226F, 0.9998528654F,
+  0.9998571146F, 0.9998612713F, 0.9998653370F, 0.9998693130F,
+  0.9998732007F, 0.9998770012F, 0.9998807159F, 0.9998843461F,
+  0.9998878931F, 0.9998913581F, 0.9998947424F, 0.9998980473F,
+  0.9999012740F, 0.9999044237F, 0.9999074976F, 0.9999104971F,
+  0.9999134231F, 0.9999162771F, 0.9999190601F, 0.9999217733F,
+  0.9999244179F, 0.9999269950F, 0.9999295058F, 0.9999319515F,
+  0.9999343332F, 0.9999366519F, 0.9999389088F, 0.9999411050F,
+  0.9999432416F, 0.9999453196F, 0.9999473402F, 0.9999493044F,
+  0.9999512132F, 0.9999530677F, 0.9999548690F, 0.9999566180F,
+  0.9999583157F, 0.9999599633F, 0.9999615616F, 0.9999631116F,
+  0.9999646144F, 0.9999660709F, 0.9999674820F, 0.9999688487F,
+  0.9999701719F, 0.9999714526F, 0.9999726917F, 0.9999738900F,
+  0.9999750486F, 0.9999761682F, 0.9999772497F, 0.9999782941F,
+  0.9999793021F, 0.9999802747F, 0.9999812126F, 0.9999821167F,
+  0.9999829878F, 0.9999838268F, 0.9999846343F, 0.9999854113F,
+  0.9999861584F, 0.9999868765F, 0.9999875664F, 0.9999882287F,
+  0.9999888642F, 0.9999894736F, 0.9999900577F, 0.9999906172F,
+  0.9999911528F, 0.9999916651F, 0.9999921548F, 0.9999926227F,
+  0.9999930693F, 0.9999934954F, 0.9999939015F, 0.9999942883F,
+  0.9999946564F, 0.9999950064F, 0.9999953390F, 0.9999956547F,
+  0.9999959541F, 0.9999962377F, 0.9999965062F, 0.9999967601F,
+  0.9999969998F, 0.9999972260F, 0.9999974392F, 0.9999976399F,
+  0.9999978285F, 0.9999980056F, 0.9999981716F, 0.9999983271F,
+  0.9999984724F, 0.9999986081F, 0.9999987345F, 0.9999988521F,
+  0.9999989613F, 0.9999990625F, 0.9999991562F, 0.9999992426F,
+  0.9999993223F, 0.9999993954F, 0.9999994625F, 0.9999995239F,
+  0.9999995798F, 0.9999996307F, 0.9999996768F, 0.9999997184F,
+  0.9999997559F, 0.9999997895F, 0.9999998195F, 0.9999998462F,
+  0.9999998698F, 0.9999998906F, 0.9999999088F, 0.9999999246F,
+  0.9999999383F, 0.9999999500F, 0.9999999600F, 0.9999999684F,
+  0.9999999754F, 0.9999999811F, 0.9999999858F, 0.9999999896F,
+  0.9999999925F, 0.9999999948F, 0.9999999965F, 0.9999999978F,
+  0.9999999986F, 0.9999999992F, 0.9999999996F, 0.9999999998F,
+  0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
+};
+
+static const float vwin8192[4096] = {
+  0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F,
+  0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F,
+  0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F,
+  0.0000360958F, 0.0000421021F, 0.0000485704F, 0.0000555006F,
+  0.0000628929F, 0.0000707472F, 0.0000790635F, 0.0000878417F,
+  0.0000970820F, 0.0001067842F, 0.0001169483F, 0.0001275744F,
+  0.0001386625F, 0.0001502126F, 0.0001622245F, 0.0001746984F,
+  0.0001876343F, 0.0002010320F, 0.0002148917F, 0.0002292132F,
+  0.0002439967F, 0.0002592421F, 0.0002749493F, 0.0002911184F,
+  0.0003077493F, 0.0003248421F, 0.0003423967F, 0.0003604132F,
+  0.0003788915F, 0.0003978316F, 0.0004172335F, 0.0004370971F,
+  0.0004574226F, 0.0004782098F, 0.0004994587F, 0.0005211694F,
+  0.0005433418F, 0.0005659759F, 0.0005890717F, 0.0006126292F,
+  0.0006366484F, 0.0006611292F, 0.0006860716F, 0.0007114757F,
+  0.0007373414F, 0.0007636687F, 0.0007904576F, 0.0008177080F,
+  0.0008454200F, 0.0008735935F, 0.0009022285F, 0.0009313250F,
+  0.0009608830F, 0.0009909025F, 0.0010213834F, 0.0010523257F,
+  0.0010837295F, 0.0011155946F, 0.0011479211F, 0.0011807090F,
+  0.0012139582F, 0.0012476687F, 0.0012818405F, 0.0013164736F,
+  0.0013515679F, 0.0013871235F, 0.0014231402F, 0.0014596182F,
+  0.0014965573F, 0.0015339576F, 0.0015718190F, 0.0016101415F,
+  0.0016489251F, 0.0016881698F, 0.0017278754F, 0.0017680421F,
+  0.0018086698F, 0.0018497584F, 0.0018913080F, 0.0019333185F,
+  0.0019757898F, 0.0020187221F, 0.0020621151F, 0.0021059690F,
+  0.0021502837F, 0.0021950591F, 0.0022402953F, 0.0022859921F,
+  0.0023321497F, 0.0023787679F, 0.0024258467F, 0.0024733861F,
+  0.0025213861F, 0.0025698466F, 0.0026187676F, 0.0026681491F,
+  0.0027179911F, 0.0027682935F, 0.0028190562F, 0.0028702794F,
+  0.0029219628F, 0.0029741066F, 0.0030267107F, 0.0030797749F,
+  0.0031332994F, 0.0031872841F, 0.0032417289F, 0.0032966338F,
+  0.0033519988F, 0.0034078238F, 0.0034641089F, 0.0035208539F,
+  0.0035780589F, 0.0036357237F, 0.0036938485F, 0.0037524331F,
+  0.0038114775F, 0.0038709817F, 0.0039309456F, 0.0039913692F,
+  0.0040522524F, 0.0041135953F, 0.0041753978F, 0.0042376599F,
+  0.0043003814F, 0.0043635624F, 0.0044272029F, 0.0044913028F,
+  0.0045558620F, 0.0046208806F, 0.0046863585F, 0.0047522955F,
+  0.0048186919F, 0.0048855473F, 0.0049528619F, 0.0050206356F,
+  0.0050888684F, 0.0051575601F, 0.0052267108F, 0.0052963204F,
+  0.0053663890F, 0.0054369163F, 0.0055079025F, 0.0055793474F,
+  0.0056512510F, 0.0057236133F, 0.0057964342F, 0.0058697137F,
+  0.0059434517F, 0.0060176482F, 0.0060923032F, 0.0061674166F,
+  0.0062429883F, 0.0063190183F, 0.0063955066F, 0.0064724532F,
+  0.0065498579F, 0.0066277207F, 0.0067060416F, 0.0067848205F,
+  0.0068640575F, 0.0069437523F, 0.0070239051F, 0.0071045157F,
+  0.0071855840F, 0.0072671102F, 0.0073490940F, 0.0074315355F,
+  0.0075144345F, 0.0075977911F, 0.0076816052F, 0.0077658768F,
+  0.0078506057F, 0.0079357920F, 0.0080214355F, 0.0081075363F,
+  0.0081940943F, 0.0082811094F, 0.0083685816F, 0.0084565108F,
+  0.0085448970F, 0.0086337401F, 0.0087230401F, 0.0088127969F,
+  0.0089030104F, 0.0089936807F, 0.0090848076F, 0.0091763911F,
+  0.0092684311F, 0.0093609276F, 0.0094538805F, 0.0095472898F,
+  0.0096411554F, 0.0097354772F, 0.0098302552F, 0.0099254894F,
+  0.0100211796F, 0.0101173259F, 0.0102139281F, 0.0103109863F,
+  0.0104085002F, 0.0105064700F, 0.0106048955F, 0.0107037766F,
+  0.0108031133F, 0.0109029056F, 0.0110031534F, 0.0111038565F,
+  0.0112050151F, 0.0113066289F, 0.0114086980F, 0.0115112222F,
+  0.0116142015F, 0.0117176359F, 0.0118215252F, 0.0119258695F,
+  0.0120306686F, 0.0121359225F, 0.0122416312F, 0.0123477944F,
+  0.0124544123F, 0.0125614847F, 0.0126690116F, 0.0127769928F,
+  0.0128854284F, 0.0129943182F, 0.0131036623F, 0.0132134604F,
+  0.0133237126F, 0.0134344188F, 0.0135455790F, 0.0136571929F,
+  0.0137692607F, 0.0138817821F, 0.0139947572F, 0.0141081859F,
+  0.0142220681F, 0.0143364037F, 0.0144511927F, 0.0145664350F,
+  0.0146821304F, 0.0147982791F, 0.0149148808F, 0.0150319355F,
+  0.0151494431F, 0.0152674036F, 0.0153858168F, 0.0155046828F,
+  0.0156240014F, 0.0157437726F, 0.0158639962F, 0.0159846723F,
+  0.0161058007F, 0.0162273814F, 0.0163494142F, 0.0164718991F,
+  0.0165948361F, 0.0167182250F, 0.0168420658F, 0.0169663584F,
+  0.0170911027F, 0.0172162987F, 0.0173419462F, 0.0174680452F,
+  0.0175945956F, 0.0177215974F, 0.0178490504F, 0.0179769545F,
+  0.0181053098F, 0.0182341160F, 0.0183633732F, 0.0184930812F,
+  0.0186232399F, 0.0187538494F, 0.0188849094F, 0.0190164200F,
+  0.0191483809F, 0.0192807923F, 0.0194136539F, 0.0195469656F,
+  0.0196807275F, 0.0198149394F, 0.0199496012F, 0.0200847128F,
+  0.0202202742F, 0.0203562853F, 0.0204927460F, 0.0206296561F,
+  0.0207670157F, 0.0209048245F, 0.0210430826F, 0.0211817899F,
+  0.0213209462F, 0.0214605515F, 0.0216006057F, 0.0217411086F,
+  0.0218820603F, 0.0220234605F, 0.0221653093F, 0.0223076066F,
+  0.0224503521F, 0.0225935459F, 0.0227371879F, 0.0228812779F,
+  0.0230258160F, 0.0231708018F, 0.0233162355F, 0.0234621169F,
+  0.0236084459F, 0.0237552224F, 0.0239024462F, 0.0240501175F,
+  0.0241982359F, 0.0243468015F, 0.0244958141F, 0.0246452736F,
+  0.0247951800F, 0.0249455331F, 0.0250963329F, 0.0252475792F,
+  0.0253992720F, 0.0255514111F, 0.0257039965F, 0.0258570281F,
+  0.0260105057F, 0.0261644293F, 0.0263187987F, 0.0264736139F,
+  0.0266288747F, 0.0267845811F, 0.0269407330F, 0.0270973302F,
+  0.0272543727F, 0.0274118604F, 0.0275697930F, 0.0277281707F,
+  0.0278869932F, 0.0280462604F, 0.0282059723F, 0.0283661287F,
+  0.0285267295F, 0.0286877747F, 0.0288492641F, 0.0290111976F,
+  0.0291735751F, 0.0293363965F, 0.0294996617F, 0.0296633706F,
+  0.0298275231F, 0.0299921190F, 0.0301571583F, 0.0303226409F,
+  0.0304885667F, 0.0306549354F, 0.0308217472F, 0.0309890017F,
+  0.0311566989F, 0.0313248388F, 0.0314934211F, 0.0316624459F,
+  0.0318319128F, 0.0320018220F, 0.0321721732F, 0.0323429663F,
+  0.0325142013F, 0.0326858779F, 0.0328579962F, 0.0330305559F,
+  0.0332035570F, 0.0333769994F, 0.0335508829F, 0.0337252074F,
+  0.0338999728F, 0.0340751790F, 0.0342508259F, 0.0344269134F,
+  0.0346034412F, 0.0347804094F, 0.0349578178F, 0.0351356663F,
+  0.0353139548F, 0.0354926831F, 0.0356718511F, 0.0358514588F,
+  0.0360315059F, 0.0362119924F, 0.0363929182F, 0.0365742831F,
+  0.0367560870F, 0.0369383297F, 0.0371210113F, 0.0373041315F,
+  0.0374876902F, 0.0376716873F, 0.0378561226F, 0.0380409961F,
+  0.0382263077F, 0.0384120571F, 0.0385982443F, 0.0387848691F,
+  0.0389719315F, 0.0391594313F, 0.0393473683F, 0.0395357425F,
+  0.0397245537F, 0.0399138017F, 0.0401034866F, 0.0402936080F,
+  0.0404841660F, 0.0406751603F, 0.0408665909F, 0.0410584576F,
+  0.0412507603F, 0.0414434988F, 0.0416366731F, 0.0418302829F,
+  0.0420243282F, 0.0422188088F, 0.0424137246F, 0.0426090755F,
+  0.0428048613F, 0.0430010819F, 0.0431977371F, 0.0433948269F,
+  0.0435923511F, 0.0437903095F, 0.0439887020F, 0.0441875285F,
+  0.0443867889F, 0.0445864830F, 0.0447866106F, 0.0449871717F,
+  0.0451881661F, 0.0453895936F, 0.0455914542F, 0.0457937477F,
+  0.0459964738F, 0.0461996326F, 0.0464032239F, 0.0466072475F,
+  0.0468117032F, 0.0470165910F, 0.0472219107F, 0.0474276622F,
+  0.0476338452F, 0.0478404597F, 0.0480475056F, 0.0482549827F,
+  0.0484628907F, 0.0486712297F, 0.0488799994F, 0.0490891998F,
+  0.0492988306F, 0.0495088917F, 0.0497193830F, 0.0499303043F,
+  0.0501416554F, 0.0503534363F, 0.0505656468F, 0.0507782867F,
+  0.0509913559F, 0.0512048542F, 0.0514187815F, 0.0516331376F,
+  0.0518479225F, 0.0520631358F, 0.0522787775F, 0.0524948475F,
+  0.0527113455F, 0.0529282715F, 0.0531456252F, 0.0533634066F,
+  0.0535816154F, 0.0538002515F, 0.0540193148F, 0.0542388051F,
+  0.0544587222F, 0.0546790660F, 0.0548998364F, 0.0551210331F,
+  0.0553426561F, 0.0555647051F, 0.0557871801F, 0.0560100807F,
+  0.0562334070F, 0.0564571587F, 0.0566813357F, 0.0569059378F,
+  0.0571309649F, 0.0573564168F, 0.0575822933F, 0.0578085942F,
+  0.0580353195F, 0.0582624689F, 0.0584900423F, 0.0587180396F,
+  0.0589464605F, 0.0591753049F, 0.0594045726F, 0.0596342635F,
+  0.0598643774F, 0.0600949141F, 0.0603258735F, 0.0605572555F,
+  0.0607890597F, 0.0610212862F, 0.0612539346F, 0.0614870049F,
+  0.0617204968F, 0.0619544103F, 0.0621887451F, 0.0624235010F,
+  0.0626586780F, 0.0628942758F, 0.0631302942F, 0.0633667331F,
+  0.0636035923F, 0.0638408717F, 0.0640785710F, 0.0643166901F,
+  0.0645552288F, 0.0647941870F, 0.0650335645F, 0.0652733610F,
+  0.0655135765F, 0.0657542108F, 0.0659952636F, 0.0662367348F,
+  0.0664786242F, 0.0667209316F, 0.0669636570F, 0.0672068000F,
+  0.0674503605F, 0.0676943384F, 0.0679387334F, 0.0681835454F,
+  0.0684287742F, 0.0686744196F, 0.0689204814F, 0.0691669595F,
+  0.0694138536F, 0.0696611637F, 0.0699088894F, 0.0701570307F,
+  0.0704055873F, 0.0706545590F, 0.0709039458F, 0.0711537473F,
+  0.0714039634F, 0.0716545939F, 0.0719056387F, 0.0721570975F,
+  0.0724089702F, 0.0726612565F, 0.0729139563F, 0.0731670694F,
+  0.0734205956F, 0.0736745347F, 0.0739288866F, 0.0741836510F,
+  0.0744388277F, 0.0746944166F, 0.0749504175F, 0.0752068301F,
+  0.0754636543F, 0.0757208899F, 0.0759785367F, 0.0762365946F,
+  0.0764950632F, 0.0767539424F, 0.0770132320F, 0.0772729319F,
+  0.0775330418F, 0.0777935616F, 0.0780544909F, 0.0783158298F,
+  0.0785775778F, 0.0788397349F, 0.0791023009F, 0.0793652755F,
+  0.0796286585F, 0.0798924498F, 0.0801566492F, 0.0804212564F,
+  0.0806862712F, 0.0809516935F, 0.0812175231F, 0.0814837597F,
+  0.0817504031F, 0.0820174532F, 0.0822849097F, 0.0825527724F,
+  0.0828210412F, 0.0830897158F, 0.0833587960F, 0.0836282816F,
+  0.0838981724F, 0.0841684682F, 0.0844391688F, 0.0847102740F,
+  0.0849817835F, 0.0852536973F, 0.0855260150F, 0.0857987364F,
+  0.0860718614F, 0.0863453897F, 0.0866193211F, 0.0868936554F,
+  0.0871683924F, 0.0874435319F, 0.0877190737F, 0.0879950175F,
+  0.0882713632F, 0.0885481105F, 0.0888252592F, 0.0891028091F,
+  0.0893807600F, 0.0896591117F, 0.0899378639F, 0.0902170165F,
+  0.0904965692F, 0.0907765218F, 0.0910568740F, 0.0913376258F,
+  0.0916187767F, 0.0919003268F, 0.0921822756F, 0.0924646230F,
+  0.0927473687F, 0.0930305126F, 0.0933140545F, 0.0935979940F,
+  0.0938823310F, 0.0941670653F, 0.0944521966F, 0.0947377247F,
+  0.0950236494F, 0.0953099704F, 0.0955966876F, 0.0958838007F,
+  0.0961713094F, 0.0964592136F, 0.0967475131F, 0.0970362075F,
+  0.0973252967F, 0.0976147805F, 0.0979046585F, 0.0981949307F,
+  0.0984855967F, 0.0987766563F, 0.0990681093F, 0.0993599555F,
+  0.0996521945F, 0.0999448263F, 0.1002378506F, 0.1005312671F,
+  0.1008250755F, 0.1011192757F, 0.1014138675F, 0.1017088505F,
+  0.1020042246F, 0.1022999895F, 0.1025961450F, 0.1028926909F,
+  0.1031896268F, 0.1034869526F, 0.1037846680F, 0.1040827729F,
+  0.1043812668F, 0.1046801497F, 0.1049794213F, 0.1052790813F,
+  0.1055791294F, 0.1058795656F, 0.1061803894F, 0.1064816006F,
+  0.1067831991F, 0.1070851846F, 0.1073875568F, 0.1076903155F,
+  0.1079934604F, 0.1082969913F, 0.1086009079F, 0.1089052101F,
+  0.1092098975F, 0.1095149699F, 0.1098204270F, 0.1101262687F,
+  0.1104324946F, 0.1107391045F, 0.1110460982F, 0.1113534754F,
+  0.1116612359F, 0.1119693793F, 0.1122779055F, 0.1125868142F,
+  0.1128961052F, 0.1132057781F, 0.1135158328F, 0.1138262690F,
+  0.1141370863F, 0.1144482847F, 0.1147598638F, 0.1150718233F,
+  0.1153841631F, 0.1156968828F, 0.1160099822F, 0.1163234610F,
+  0.1166373190F, 0.1169515559F, 0.1172661714F, 0.1175811654F,
+  0.1178965374F, 0.1182122874F, 0.1185284149F, 0.1188449198F,
+  0.1191618018F, 0.1194790606F, 0.1197966960F, 0.1201147076F,
+  0.1204330953F, 0.1207518587F, 0.1210709976F, 0.1213905118F,
+  0.1217104009F, 0.1220306647F, 0.1223513029F, 0.1226723153F,
+  0.1229937016F, 0.1233154615F, 0.1236375948F, 0.1239601011F,
+  0.1242829803F, 0.1246062319F, 0.1249298559F, 0.1252538518F,
+  0.1255782195F, 0.1259029586F, 0.1262280689F, 0.1265535501F,
+  0.1268794019F, 0.1272056241F, 0.1275322163F, 0.1278591784F,
+  0.1281865099F, 0.1285142108F, 0.1288422805F, 0.1291707190F,
+  0.1294995259F, 0.1298287009F, 0.1301582437F, 0.1304881542F,
+  0.1308184319F, 0.1311490766F, 0.1314800881F, 0.1318114660F,
+  0.1321432100F, 0.1324753200F, 0.1328077955F, 0.1331406364F,
+  0.1334738422F, 0.1338074129F, 0.1341413479F, 0.1344756472F,
+  0.1348103103F, 0.1351453370F, 0.1354807270F, 0.1358164801F,
+  0.1361525959F, 0.1364890741F, 0.1368259145F, 0.1371631167F,
+  0.1375006805F, 0.1378386056F, 0.1381768917F, 0.1385155384F,
+  0.1388545456F, 0.1391939129F, 0.1395336400F, 0.1398737266F,
+  0.1402141724F, 0.1405549772F, 0.1408961406F, 0.1412376623F,
+  0.1415795421F, 0.1419217797F, 0.1422643746F, 0.1426073268F,
+  0.1429506358F, 0.1432943013F, 0.1436383231F, 0.1439827008F,
+  0.1443274342F, 0.1446725229F, 0.1450179667F, 0.1453637652F,
+  0.1457099181F, 0.1460564252F, 0.1464032861F, 0.1467505006F,
+  0.1470980682F, 0.1474459888F, 0.1477942620F, 0.1481428875F,
+  0.1484918651F, 0.1488411942F, 0.1491908748F, 0.1495409065F,
+  0.1498912889F, 0.1502420218F, 0.1505931048F, 0.1509445376F,
+  0.1512963200F, 0.1516484516F, 0.1520009321F, 0.1523537612F,
+  0.1527069385F, 0.1530604638F, 0.1534143368F, 0.1537685571F,
+  0.1541231244F, 0.1544780384F, 0.1548332987F, 0.1551889052F,
+  0.1555448574F, 0.1559011550F, 0.1562577978F, 0.1566147853F,
+  0.1569721173F, 0.1573297935F, 0.1576878135F, 0.1580461771F,
+  0.1584048838F, 0.1587639334F, 0.1591233255F, 0.1594830599F,
+  0.1598431361F, 0.1602035540F, 0.1605643131F, 0.1609254131F,
+  0.1612868537F, 0.1616486346F, 0.1620107555F, 0.1623732160F,
+  0.1627360158F, 0.1630991545F, 0.1634626319F, 0.1638264476F,
+  0.1641906013F, 0.1645550926F, 0.1649199212F, 0.1652850869F,
+  0.1656505892F, 0.1660164278F, 0.1663826024F, 0.1667491127F,
+  0.1671159583F, 0.1674831388F, 0.1678506541F, 0.1682185036F,
+  0.1685866872F, 0.1689552044F, 0.1693240549F, 0.1696932384F,
+  0.1700627545F, 0.1704326029F, 0.1708027833F, 0.1711732952F,
+  0.1715441385F, 0.1719153127F, 0.1722868175F, 0.1726586526F,
+  0.1730308176F, 0.1734033121F, 0.1737761359F, 0.1741492886F,
+  0.1745227698F, 0.1748965792F, 0.1752707164F, 0.1756451812F,
+  0.1760199731F, 0.1763950918F, 0.1767705370F, 0.1771463083F,
+  0.1775224054F, 0.1778988279F, 0.1782755754F, 0.1786526477F,
+  0.1790300444F, 0.1794077651F, 0.1797858094F, 0.1801641771F,
+  0.1805428677F, 0.1809218810F, 0.1813012165F, 0.1816808739F,
+  0.1820608528F, 0.1824411530F, 0.1828217739F, 0.1832027154F,
+  0.1835839770F, 0.1839655584F, 0.1843474592F, 0.1847296790F,
+  0.1851122175F, 0.1854950744F, 0.1858782492F, 0.1862617417F,
+  0.1866455514F, 0.1870296780F, 0.1874141211F, 0.1877988804F,
+  0.1881839555F, 0.1885693461F, 0.1889550517F, 0.1893410721F,
+  0.1897274068F, 0.1901140555F, 0.1905010178F, 0.1908882933F,
+  0.1912758818F, 0.1916637828F, 0.1920519959F, 0.1924405208F,
+  0.1928293571F, 0.1932185044F, 0.1936079625F, 0.1939977308F,
+  0.1943878091F, 0.1947781969F, 0.1951688939F, 0.1955598998F,
+  0.1959512141F, 0.1963428364F, 0.1967347665F, 0.1971270038F,
+  0.1975195482F, 0.1979123990F, 0.1983055561F, 0.1986990190F,
+  0.1990927873F, 0.1994868607F, 0.1998812388F, 0.2002759212F,
+  0.2006709075F, 0.2010661974F, 0.2014617904F, 0.2018576862F,
+  0.2022538844F, 0.2026503847F, 0.2030471865F, 0.2034442897F,
+  0.2038416937F, 0.2042393982F, 0.2046374028F, 0.2050357071F,
+  0.2054343107F, 0.2058332133F, 0.2062324145F, 0.2066319138F,
+  0.2070317110F, 0.2074318055F, 0.2078321970F, 0.2082328852F,
+  0.2086338696F, 0.2090351498F, 0.2094367255F, 0.2098385962F,
+  0.2102407617F, 0.2106432213F, 0.2110459749F, 0.2114490220F,
+  0.2118523621F, 0.2122559950F, 0.2126599202F, 0.2130641373F,
+  0.2134686459F, 0.2138734456F, 0.2142785361F, 0.2146839168F,
+  0.2150895875F, 0.2154955478F, 0.2159017972F, 0.2163083353F,
+  0.2167151617F, 0.2171222761F, 0.2175296780F, 0.2179373670F,
+  0.2183453428F, 0.2187536049F, 0.2191621529F, 0.2195709864F,
+  0.2199801051F, 0.2203895085F, 0.2207991961F, 0.2212091677F,
+  0.2216194228F, 0.2220299610F, 0.2224407818F, 0.2228518850F,
+  0.2232632699F, 0.2236749364F, 0.2240868839F, 0.2244991121F,
+  0.2249116204F, 0.2253244086F, 0.2257374763F, 0.2261508229F,
+  0.2265644481F, 0.2269783514F, 0.2273925326F, 0.2278069911F,
+  0.2282217265F, 0.2286367384F, 0.2290520265F, 0.2294675902F,
+  0.2298834292F, 0.2302995431F, 0.2307159314F, 0.2311325937F,
+  0.2315495297F, 0.2319667388F, 0.2323842207F, 0.2328019749F,
+  0.2332200011F, 0.2336382988F, 0.2340568675F, 0.2344757070F,
+  0.2348948166F, 0.2353141961F, 0.2357338450F, 0.2361537629F,
+  0.2365739493F, 0.2369944038F, 0.2374151261F, 0.2378361156F,
+  0.2382573720F, 0.2386788948F, 0.2391006836F, 0.2395227380F,
+  0.2399450575F, 0.2403676417F, 0.2407904902F, 0.2412136026F,
+  0.2416369783F, 0.2420606171F, 0.2424845185F, 0.2429086820F,
+  0.2433331072F, 0.2437577936F, 0.2441827409F, 0.2446079486F,
+  0.2450334163F, 0.2454591435F, 0.2458851298F, 0.2463113747F,
+  0.2467378779F, 0.2471646389F, 0.2475916573F, 0.2480189325F,
+  0.2484464643F, 0.2488742521F, 0.2493022955F, 0.2497305940F,
+  0.2501591473F, 0.2505879549F, 0.2510170163F, 0.2514463311F,
+  0.2518758989F, 0.2523057193F, 0.2527357916F, 0.2531661157F,
+  0.2535966909F, 0.2540275169F, 0.2544585931F, 0.2548899193F,
+  0.2553214948F, 0.2557533193F, 0.2561853924F, 0.2566177135F,
+  0.2570502822F, 0.2574830981F, 0.2579161608F, 0.2583494697F,
+  0.2587830245F, 0.2592168246F, 0.2596508697F, 0.2600851593F,
+  0.2605196929F, 0.2609544701F, 0.2613894904F, 0.2618247534F,
+  0.2622602586F, 0.2626960055F, 0.2631319938F, 0.2635682230F,
+  0.2640046925F, 0.2644414021F, 0.2648783511F, 0.2653155391F,
+  0.2657529657F, 0.2661906305F, 0.2666285329F, 0.2670666725F,
+  0.2675050489F, 0.2679436616F, 0.2683825101F, 0.2688215940F,
+  0.2692609127F, 0.2697004660F, 0.2701402532F, 0.2705802739F,
+  0.2710205278F, 0.2714610142F, 0.2719017327F, 0.2723426830F,
+  0.2727838644F, 0.2732252766F, 0.2736669191F, 0.2741087914F,
+  0.2745508930F, 0.2749932235F, 0.2754357824F, 0.2758785693F,
+  0.2763215837F, 0.2767648251F, 0.2772082930F, 0.2776519870F,
+  0.2780959066F, 0.2785400513F, 0.2789844207F, 0.2794290143F,
+  0.2798738316F, 0.2803188722F, 0.2807641355F, 0.2812096211F,
+  0.2816553286F, 0.2821012574F, 0.2825474071F, 0.2829937773F,
+  0.2834403673F, 0.2838871768F, 0.2843342053F, 0.2847814523F,
+  0.2852289174F, 0.2856765999F, 0.2861244996F, 0.2865726159F,
+  0.2870209482F, 0.2874694962F, 0.2879182594F, 0.2883672372F,
+  0.2888164293F, 0.2892658350F, 0.2897154540F, 0.2901652858F,
+  0.2906153298F, 0.2910655856F, 0.2915160527F, 0.2919667306F,
+  0.2924176189F, 0.2928687171F, 0.2933200246F, 0.2937715409F,
+  0.2942232657F, 0.2946751984F, 0.2951273386F, 0.2955796856F,
+  0.2960322391F, 0.2964849986F, 0.2969379636F, 0.2973911335F,
+  0.2978445080F, 0.2982980864F, 0.2987518684F, 0.2992058534F,
+  0.2996600409F, 0.3001144305F, 0.3005690217F, 0.3010238139F,
+  0.3014788067F, 0.3019339995F, 0.3023893920F, 0.3028449835F,
+  0.3033007736F, 0.3037567618F, 0.3042129477F, 0.3046693306F,
+  0.3051259102F, 0.3055826859F, 0.3060396572F, 0.3064968236F,
+  0.3069541847F, 0.3074117399F, 0.3078694887F, 0.3083274307F,
+  0.3087855653F, 0.3092438920F, 0.3097024104F, 0.3101611199F,
+  0.3106200200F, 0.3110791103F, 0.3115383902F, 0.3119978592F,
+  0.3124575169F, 0.3129173627F, 0.3133773961F, 0.3138376166F,
+  0.3142980238F, 0.3147586170F, 0.3152193959F, 0.3156803598F,
+  0.3161415084F, 0.3166028410F, 0.3170643573F, 0.3175260566F,
+  0.3179879384F, 0.3184500023F, 0.3189122478F, 0.3193746743F,
+  0.3198372814F, 0.3203000685F, 0.3207630351F, 0.3212261807F,
+  0.3216895048F, 0.3221530069F, 0.3226166865F, 0.3230805430F,
+  0.3235445760F, 0.3240087849F, 0.3244731693F, 0.3249377285F,
+  0.3254024622F, 0.3258673698F, 0.3263324507F, 0.3267977045F,
+  0.3272631306F, 0.3277287286F, 0.3281944978F, 0.3286604379F,
+  0.3291265482F, 0.3295928284F, 0.3300592777F, 0.3305258958F,
+  0.3309926821F, 0.3314596361F, 0.3319267573F, 0.3323940451F,
+  0.3328614990F, 0.3333291186F, 0.3337969033F, 0.3342648525F,
+  0.3347329658F, 0.3352012427F, 0.3356696825F, 0.3361382849F,
+  0.3366070492F, 0.3370759749F, 0.3375450616F, 0.3380143087F,
+  0.3384837156F, 0.3389532819F, 0.3394230071F, 0.3398928905F,
+  0.3403629317F, 0.3408331302F, 0.3413034854F, 0.3417739967F,
+  0.3422446638F, 0.3427154860F, 0.3431864628F, 0.3436575938F,
+  0.3441288782F, 0.3446003158F, 0.3450719058F, 0.3455436478F,
+  0.3460155412F, 0.3464875856F, 0.3469597804F, 0.3474321250F,
+  0.3479046189F, 0.3483772617F, 0.3488500527F, 0.3493229914F,
+  0.3497960774F, 0.3502693100F, 0.3507426887F, 0.3512162131F,
+  0.3516898825F, 0.3521636965F, 0.3526376545F, 0.3531117559F,
+  0.3535860003F, 0.3540603870F, 0.3545349157F, 0.3550095856F,
+  0.3554843964F, 0.3559593474F, 0.3564344381F, 0.3569096680F,
+  0.3573850366F, 0.3578605432F, 0.3583361875F, 0.3588119687F,
+  0.3592878865F, 0.3597639402F, 0.3602401293F, 0.3607164533F,
+  0.3611929117F, 0.3616695038F, 0.3621462292F, 0.3626230873F,
+  0.3631000776F, 0.3635771995F, 0.3640544525F, 0.3645318360F,
+  0.3650093496F, 0.3654869926F, 0.3659647645F, 0.3664426648F,
+  0.3669206930F, 0.3673988484F, 0.3678771306F, 0.3683555390F,
+  0.3688340731F, 0.3693127322F, 0.3697915160F, 0.3702704237F,
+  0.3707494549F, 0.3712286091F, 0.3717078857F, 0.3721872840F,
+  0.3726668037F, 0.3731464441F, 0.3736262047F, 0.3741060850F,
+  0.3745860843F, 0.3750662023F, 0.3755464382F, 0.3760267915F,
+  0.3765072618F, 0.3769878484F, 0.3774685509F, 0.3779493686F,
+  0.3784303010F, 0.3789113475F, 0.3793925076F, 0.3798737809F,
+  0.3803551666F, 0.3808366642F, 0.3813182733F, 0.3817999932F,
+  0.3822818234F, 0.3827637633F, 0.3832458124F, 0.3837279702F,
+  0.3842102360F, 0.3846926093F, 0.3851750897F, 0.3856576764F,
+  0.3861403690F, 0.3866231670F, 0.3871060696F, 0.3875890765F,
+  0.3880721870F, 0.3885554007F, 0.3890387168F, 0.3895221349F,
+  0.3900056544F, 0.3904892748F, 0.3909729955F, 0.3914568160F,
+  0.3919407356F, 0.3924247539F, 0.3929088702F, 0.3933930841F,
+  0.3938773949F, 0.3943618021F, 0.3948463052F, 0.3953309035F,
+  0.3958155966F, 0.3963003838F, 0.3967852646F, 0.3972702385F,
+  0.3977553048F, 0.3982404631F, 0.3987257127F, 0.3992110531F,
+  0.3996964838F, 0.4001820041F, 0.4006676136F, 0.4011533116F,
+  0.4016390976F, 0.4021249710F, 0.4026109313F, 0.4030969779F,
+  0.4035831102F, 0.4040693277F, 0.4045556299F, 0.4050420160F,
+  0.4055284857F, 0.4060150383F, 0.4065016732F, 0.4069883899F,
+  0.4074751879F, 0.4079620665F, 0.4084490252F, 0.4089360635F,
+  0.4094231807F, 0.4099103763F, 0.4103976498F, 0.4108850005F,
+  0.4113724280F, 0.4118599315F, 0.4123475107F, 0.4128351648F,
+  0.4133228934F, 0.4138106959F, 0.4142985716F, 0.4147865201F,
+  0.4152745408F, 0.4157626330F, 0.4162507963F, 0.4167390301F,
+  0.4172273337F, 0.4177157067F, 0.4182041484F, 0.4186926583F,
+  0.4191812359F, 0.4196698805F, 0.4201585915F, 0.4206473685F,
+  0.4211362108F, 0.4216251179F, 0.4221140892F, 0.4226031241F,
+  0.4230922221F, 0.4235813826F, 0.4240706050F, 0.4245598887F,
+  0.4250492332F, 0.4255386379F, 0.4260281022F, 0.4265176256F,
+  0.4270072075F, 0.4274968473F, 0.4279865445F, 0.4284762984F,
+  0.4289661086F, 0.4294559743F, 0.4299458951F, 0.4304358704F,
+  0.4309258996F, 0.4314159822F, 0.4319061175F, 0.4323963050F,
+  0.4328865441F, 0.4333768342F, 0.4338671749F, 0.4343575654F,
+  0.4348480052F, 0.4353384938F, 0.4358290306F, 0.4363196149F,
+  0.4368102463F, 0.4373009241F, 0.4377916478F, 0.4382824168F,
+  0.4387732305F, 0.4392640884F, 0.4397549899F, 0.4402459343F,
+  0.4407369212F, 0.4412279499F, 0.4417190198F, 0.4422101305F,
+  0.4427012813F, 0.4431924717F, 0.4436837010F, 0.4441749686F,
+  0.4446662742F, 0.4451576169F, 0.4456489963F, 0.4461404118F,
+  0.4466318628F, 0.4471233487F, 0.4476148690F, 0.4481064230F,
+  0.4485980103F, 0.4490896302F, 0.4495812821F, 0.4500729654F,
+  0.4505646797F, 0.4510564243F, 0.4515481986F, 0.4520400021F,
+  0.4525318341F, 0.4530236942F, 0.4535155816F, 0.4540074959F,
+  0.4544994365F, 0.4549914028F, 0.4554833941F, 0.4559754100F,
+  0.4564674499F, 0.4569595131F, 0.4574515991F, 0.4579437074F,
+  0.4584358372F, 0.4589279881F, 0.4594201595F, 0.4599123508F,
+  0.4604045615F, 0.4608967908F, 0.4613890383F, 0.4618813034F,
+  0.4623735855F, 0.4628658841F, 0.4633581984F, 0.4638505281F,
+  0.4643428724F, 0.4648352308F, 0.4653276028F, 0.4658199877F,
+  0.4663123849F, 0.4668047940F, 0.4672972143F, 0.4677896451F,
+  0.4682820861F, 0.4687745365F, 0.4692669958F, 0.4697594634F,
+  0.4702519387F, 0.4707444211F, 0.4712369102F, 0.4717294052F,
+  0.4722219056F, 0.4727144109F, 0.4732069204F, 0.4736994336F,
+  0.4741919498F, 0.4746844686F, 0.4751769893F, 0.4756695113F,
+  0.4761620341F, 0.4766545571F, 0.4771470797F, 0.4776396013F,
+  0.4781321213F, 0.4786246392F, 0.4791171544F, 0.4796096663F,
+  0.4801021744F, 0.4805946779F, 0.4810871765F, 0.4815796694F,
+  0.4820721561F, 0.4825646360F, 0.4830571086F, 0.4835495732F,
+  0.4840420293F, 0.4845344763F, 0.4850269136F, 0.4855193407F,
+  0.4860117569F, 0.4865041617F, 0.4869965545F, 0.4874889347F,
+  0.4879813018F, 0.4884736551F, 0.4889659941F, 0.4894583182F,
+  0.4899506268F, 0.4904429193F, 0.4909351952F, 0.4914274538F,
+  0.4919196947F, 0.4924119172F, 0.4929041207F, 0.4933963046F,
+  0.4938884685F, 0.4943806116F, 0.4948727335F, 0.4953648335F,
+  0.4958569110F, 0.4963489656F, 0.4968409965F, 0.4973330032F,
+  0.4978249852F, 0.4983169419F, 0.4988088726F, 0.4993007768F,
+  0.4997926539F, 0.5002845034F, 0.5007763247F, 0.5012681171F,
+  0.5017598801F, 0.5022516132F, 0.5027433157F, 0.5032349871F,
+  0.5037266268F, 0.5042182341F, 0.5047098086F, 0.5052013497F,
+  0.5056928567F, 0.5061843292F, 0.5066757664F, 0.5071671679F,
+  0.5076585330F, 0.5081498613F, 0.5086411520F, 0.5091324047F,
+  0.5096236187F, 0.5101147934F, 0.5106059284F, 0.5110970230F,
+  0.5115880766F, 0.5120790887F, 0.5125700587F, 0.5130609860F,
+  0.5135518700F, 0.5140427102F, 0.5145335059F, 0.5150242566F,
+  0.5155149618F, 0.5160056208F, 0.5164962331F, 0.5169867980F,
+  0.5174773151F, 0.5179677837F, 0.5184582033F, 0.5189485733F,
+  0.5194388931F, 0.5199291621F, 0.5204193798F, 0.5209095455F,
+  0.5213996588F, 0.5218897190F, 0.5223797256F, 0.5228696779F,
+  0.5233595755F, 0.5238494177F, 0.5243392039F, 0.5248289337F,
+  0.5253186063F, 0.5258082213F, 0.5262977781F, 0.5267872760F,
+  0.5272767146F, 0.5277660932F, 0.5282554112F, 0.5287446682F,
+  0.5292338635F, 0.5297229965F, 0.5302120667F, 0.5307010736F,
+  0.5311900164F, 0.5316788947F, 0.5321677079F, 0.5326564554F,
+  0.5331451366F, 0.5336337511F, 0.5341222981F, 0.5346107771F,
+  0.5350991876F, 0.5355875290F, 0.5360758007F, 0.5365640021F,
+  0.5370521327F, 0.5375401920F, 0.5380281792F, 0.5385160939F,
+  0.5390039355F, 0.5394917034F, 0.5399793971F, 0.5404670159F,
+  0.5409545594F, 0.5414420269F, 0.5419294179F, 0.5424167318F,
+  0.5429039680F, 0.5433911261F, 0.5438782053F, 0.5443652051F,
+  0.5448521250F, 0.5453389644F, 0.5458257228F, 0.5463123995F,
+  0.5467989940F, 0.5472855057F, 0.5477719341F, 0.5482582786F,
+  0.5487445387F, 0.5492307137F, 0.5497168031F, 0.5502028063F,
+  0.5506887228F, 0.5511745520F, 0.5516602934F, 0.5521459463F,
+  0.5526315103F, 0.5531169847F, 0.5536023690F, 0.5540876626F,
+  0.5545728649F, 0.5550579755F, 0.5555429937F, 0.5560279189F,
+  0.5565127507F, 0.5569974884F, 0.5574821315F, 0.5579666794F,
+  0.5584511316F, 0.5589354875F, 0.5594197465F, 0.5599039080F,
+  0.5603879716F, 0.5608719367F, 0.5613558026F, 0.5618395689F,
+  0.5623232350F, 0.5628068002F, 0.5632902642F, 0.5637736262F,
+  0.5642568858F, 0.5647400423F, 0.5652230953F, 0.5657060442F,
+  0.5661888883F, 0.5666716272F, 0.5671542603F, 0.5676367870F,
+  0.5681192069F, 0.5686015192F, 0.5690837235F, 0.5695658192F,
+  0.5700478058F, 0.5705296827F, 0.5710114494F, 0.5714931052F,
+  0.5719746497F, 0.5724560822F, 0.5729374023F, 0.5734186094F,
+  0.5738997029F, 0.5743806823F, 0.5748615470F, 0.5753422965F,
+  0.5758229301F, 0.5763034475F, 0.5767838480F, 0.5772641310F,
+  0.5777442960F, 0.5782243426F, 0.5787042700F, 0.5791840778F,
+  0.5796637654F, 0.5801433322F, 0.5806227778F, 0.5811021016F,
+  0.5815813029F, 0.5820603814F, 0.5825393363F, 0.5830181673F,
+  0.5834968737F, 0.5839754549F, 0.5844539105F, 0.5849322399F,
+  0.5854104425F, 0.5858885179F, 0.5863664653F, 0.5868442844F,
+  0.5873219746F, 0.5877995353F, 0.5882769660F, 0.5887542661F,
+  0.5892314351F, 0.5897084724F, 0.5901853776F, 0.5906621500F,
+  0.5911387892F, 0.5916152945F, 0.5920916655F, 0.5925679016F,
+  0.5930440022F, 0.5935199669F, 0.5939957950F, 0.5944714861F,
+  0.5949470396F, 0.5954224550F, 0.5958977317F, 0.5963728692F,
+  0.5968478669F, 0.5973227244F, 0.5977974411F, 0.5982720163F,
+  0.5987464497F, 0.5992207407F, 0.5996948887F, 0.6001688932F,
+  0.6006427537F, 0.6011164696F, 0.6015900405F, 0.6020634657F,
+  0.6025367447F, 0.6030098770F, 0.6034828621F, 0.6039556995F,
+  0.6044283885F, 0.6049009288F, 0.6053733196F, 0.6058455606F,
+  0.6063176512F, 0.6067895909F, 0.6072613790F, 0.6077330152F,
+  0.6082044989F, 0.6086758295F, 0.6091470065F, 0.6096180294F,
+  0.6100888977F, 0.6105596108F, 0.6110301682F, 0.6115005694F,
+  0.6119708139F, 0.6124409011F, 0.6129108305F, 0.6133806017F,
+  0.6138502139F, 0.6143196669F, 0.6147889599F, 0.6152580926F,
+  0.6157270643F, 0.6161958746F, 0.6166645230F, 0.6171330088F,
+  0.6176013317F, 0.6180694910F, 0.6185374863F, 0.6190053171F,
+  0.6194729827F, 0.6199404828F, 0.6204078167F, 0.6208749841F,
+  0.6213419842F, 0.6218088168F, 0.6222754811F, 0.6227419768F,
+  0.6232083032F, 0.6236744600F, 0.6241404465F, 0.6246062622F,
+  0.6250719067F, 0.6255373795F, 0.6260026799F, 0.6264678076F,
+  0.6269327619F, 0.6273975425F, 0.6278621487F, 0.6283265800F,
+  0.6287908361F, 0.6292549163F, 0.6297188201F, 0.6301825471F,
+  0.6306460966F, 0.6311094683F, 0.6315726617F, 0.6320356761F,
+  0.6324985111F, 0.6329611662F, 0.6334236410F, 0.6338859348F,
+  0.6343480472F, 0.6348099777F, 0.6352717257F, 0.6357332909F,
+  0.6361946726F, 0.6366558704F, 0.6371168837F, 0.6375777122F,
+  0.6380383552F, 0.6384988123F, 0.6389590830F, 0.6394191668F,
+  0.6398790631F, 0.6403387716F, 0.6407982916F, 0.6412576228F,
+  0.6417167645F, 0.6421757163F, 0.6426344778F, 0.6430930483F,
+  0.6435514275F, 0.6440096149F, 0.6444676098F, 0.6449254119F,
+  0.6453830207F, 0.6458404356F, 0.6462976562F, 0.6467546820F,
+  0.6472115125F, 0.6476681472F, 0.6481245856F, 0.6485808273F,
+  0.6490368717F, 0.6494927183F, 0.6499483667F, 0.6504038164F,
+  0.6508590670F, 0.6513141178F, 0.6517689684F, 0.6522236185F,
+  0.6526780673F, 0.6531323146F, 0.6535863598F, 0.6540402024F,
+  0.6544938419F, 0.6549472779F, 0.6554005099F, 0.6558535373F,
+  0.6563063598F, 0.6567589769F, 0.6572113880F, 0.6576635927F,
+  0.6581155906F, 0.6585673810F, 0.6590189637F, 0.6594703380F,
+  0.6599215035F, 0.6603724598F, 0.6608232064F, 0.6612737427F,
+  0.6617240684F, 0.6621741829F, 0.6626240859F, 0.6630737767F,
+  0.6635232550F, 0.6639725202F, 0.6644215720F, 0.6648704098F,
+  0.6653190332F, 0.6657674417F, 0.6662156348F, 0.6666636121F,
+  0.6671113731F, 0.6675589174F, 0.6680062445F, 0.6684533538F,
+  0.6689002450F, 0.6693469177F, 0.6697933712F, 0.6702396052F,
+  0.6706856193F, 0.6711314129F, 0.6715769855F, 0.6720223369F,
+  0.6724674664F, 0.6729123736F, 0.6733570581F, 0.6738015194F,
+  0.6742457570F, 0.6746897706F, 0.6751335596F, 0.6755771236F,
+  0.6760204621F, 0.6764635747F, 0.6769064609F, 0.6773491204F,
+  0.6777915525F, 0.6782337570F, 0.6786757332F, 0.6791174809F,
+  0.6795589995F, 0.6800002886F, 0.6804413477F, 0.6808821765F,
+  0.6813227743F, 0.6817631409F, 0.6822032758F, 0.6826431785F,
+  0.6830828485F, 0.6835222855F, 0.6839614890F, 0.6844004585F,
+  0.6848391936F, 0.6852776939F, 0.6857159589F, 0.6861539883F,
+  0.6865917815F, 0.6870293381F, 0.6874666576F, 0.6879037398F,
+  0.6883405840F, 0.6887771899F, 0.6892135571F, 0.6896496850F,
+  0.6900855733F, 0.6905212216F, 0.6909566294F, 0.6913917963F,
+  0.6918267218F, 0.6922614055F, 0.6926958471F, 0.6931300459F,
+  0.6935640018F, 0.6939977141F, 0.6944311825F, 0.6948644066F,
+  0.6952973859F, 0.6957301200F, 0.6961626085F, 0.6965948510F,
+  0.6970268470F, 0.6974585961F, 0.6978900980F, 0.6983213521F,
+  0.6987523580F, 0.6991831154F, 0.6996136238F, 0.7000438828F,
+  0.7004738921F, 0.7009036510F, 0.7013331594F, 0.7017624166F,
+  0.7021914224F, 0.7026201763F, 0.7030486779F, 0.7034769268F,
+  0.7039049226F, 0.7043326648F, 0.7047601531F, 0.7051873870F,
+  0.7056143662F, 0.7060410902F, 0.7064675586F, 0.7068937711F,
+  0.7073197271F, 0.7077454264F, 0.7081708684F, 0.7085960529F,
+  0.7090209793F, 0.7094456474F, 0.7098700566F, 0.7102942066F,
+  0.7107180970F, 0.7111417274F, 0.7115650974F, 0.7119882066F,
+  0.7124110545F, 0.7128336409F, 0.7132559653F, 0.7136780272F,
+  0.7140998264F, 0.7145213624F, 0.7149426348F, 0.7153636433F,
+  0.7157843874F, 0.7162048668F, 0.7166250810F, 0.7170450296F,
+  0.7174647124F, 0.7178841289F, 0.7183032786F, 0.7187221613F,
+  0.7191407765F, 0.7195591239F, 0.7199772030F, 0.7203950135F,
+  0.7208125550F, 0.7212298271F, 0.7216468294F, 0.7220635616F,
+  0.7224800233F, 0.7228962140F, 0.7233121335F, 0.7237277813F,
+  0.7241431571F, 0.7245582604F, 0.7249730910F, 0.7253876484F,
+  0.7258019322F, 0.7262159422F, 0.7266296778F, 0.7270431388F,
+  0.7274563247F, 0.7278692353F, 0.7282818700F, 0.7286942287F,
+  0.7291063108F, 0.7295181160F, 0.7299296440F, 0.7303408944F,
+  0.7307518669F, 0.7311625609F, 0.7315729763F, 0.7319831126F,
+  0.7323929695F, 0.7328025466F, 0.7332118435F, 0.7336208600F,
+  0.7340295955F, 0.7344380499F, 0.7348462226F, 0.7352541134F,
+  0.7356617220F, 0.7360690478F, 0.7364760907F, 0.7368828502F,
+  0.7372893259F, 0.7376955176F, 0.7381014249F, 0.7385070475F,
+  0.7389123849F, 0.7393174368F, 0.7397222029F, 0.7401266829F,
+  0.7405308763F, 0.7409347829F, 0.7413384023F, 0.7417417341F,
+  0.7421447780F, 0.7425475338F, 0.7429500009F, 0.7433521791F,
+  0.7437540681F, 0.7441556674F, 0.7445569769F, 0.7449579960F,
+  0.7453587245F, 0.7457591621F, 0.7461593084F, 0.7465591631F,
+  0.7469587259F, 0.7473579963F, 0.7477569741F, 0.7481556590F,
+  0.7485540506F, 0.7489521486F, 0.7493499526F, 0.7497474623F,
+  0.7501446775F, 0.7505415977F, 0.7509382227F, 0.7513345521F,
+  0.7517305856F, 0.7521263229F, 0.7525217636F, 0.7529169074F,
+  0.7533117541F, 0.7537063032F, 0.7541005545F, 0.7544945076F,
+  0.7548881623F, 0.7552815182F, 0.7556745749F, 0.7560673323F,
+  0.7564597899F, 0.7568519474F, 0.7572438046F, 0.7576353611F,
+  0.7580266166F, 0.7584175708F, 0.7588082235F, 0.7591985741F,
+  0.7595886226F, 0.7599783685F, 0.7603678116F, 0.7607569515F,
+  0.7611457879F, 0.7615343206F, 0.7619225493F, 0.7623104735F,
+  0.7626980931F, 0.7630854078F, 0.7634724171F, 0.7638591209F,
+  0.7642455188F, 0.7646316106F, 0.7650173959F, 0.7654028744F,
+  0.7657880459F, 0.7661729100F, 0.7665574664F, 0.7669417150F,
+  0.7673256553F, 0.7677092871F, 0.7680926100F, 0.7684756239F,
+  0.7688583284F, 0.7692407232F, 0.7696228080F, 0.7700045826F,
+  0.7703860467F, 0.7707671999F, 0.7711480420F, 0.7715285728F,
+  0.7719087918F, 0.7722886989F, 0.7726682938F, 0.7730475762F,
+  0.7734265458F, 0.7738052023F, 0.7741835454F, 0.7745615750F,
+  0.7749392906F, 0.7753166921F, 0.7756937791F, 0.7760705514F,
+  0.7764470087F, 0.7768231508F, 0.7771989773F, 0.7775744880F,
+  0.7779496827F, 0.7783245610F, 0.7786991227F, 0.7790733676F,
+  0.7794472953F, 0.7798209056F, 0.7801941982F, 0.7805671729F,
+  0.7809398294F, 0.7813121675F, 0.7816841869F, 0.7820558873F,
+  0.7824272684F, 0.7827983301F, 0.7831690720F, 0.7835394940F,
+  0.7839095957F, 0.7842793768F, 0.7846488373F, 0.7850179767F,
+  0.7853867948F, 0.7857552914F, 0.7861234663F, 0.7864913191F,
+  0.7868588497F, 0.7872260578F, 0.7875929431F, 0.7879595055F,
+  0.7883257445F, 0.7886916601F, 0.7890572520F, 0.7894225198F,
+  0.7897874635F, 0.7901520827F, 0.7905163772F, 0.7908803468F,
+  0.7912439912F, 0.7916073102F, 0.7919703035F, 0.7923329710F,
+  0.7926953124F, 0.7930573274F, 0.7934190158F, 0.7937803774F,
+  0.7941414120F, 0.7945021193F, 0.7948624991F, 0.7952225511F,
+  0.7955822752F, 0.7959416711F, 0.7963007387F, 0.7966594775F,
+  0.7970178875F, 0.7973759685F, 0.7977337201F, 0.7980911422F,
+  0.7984482346F, 0.7988049970F, 0.7991614292F, 0.7995175310F,
+  0.7998733022F, 0.8002287426F, 0.8005838519F, 0.8009386299F,
+  0.8012930765F, 0.8016471914F, 0.8020009744F, 0.8023544253F,
+  0.8027075438F, 0.8030603298F, 0.8034127831F, 0.8037649035F,
+  0.8041166906F, 0.8044681445F, 0.8048192647F, 0.8051700512F,
+  0.8055205038F, 0.8058706222F, 0.8062204062F, 0.8065698556F,
+  0.8069189702F, 0.8072677499F, 0.8076161944F, 0.8079643036F,
+  0.8083120772F, 0.8086595151F, 0.8090066170F, 0.8093533827F,
+  0.8096998122F, 0.8100459051F, 0.8103916613F, 0.8107370806F,
+  0.8110821628F, 0.8114269077F, 0.8117713151F, 0.8121153849F,
+  0.8124591169F, 0.8128025108F, 0.8131455666F, 0.8134882839F,
+  0.8138306627F, 0.8141727027F, 0.8145144038F, 0.8148557658F,
+  0.8151967886F, 0.8155374718F, 0.8158778154F, 0.8162178192F,
+  0.8165574830F, 0.8168968067F, 0.8172357900F, 0.8175744328F,
+  0.8179127349F, 0.8182506962F, 0.8185883164F, 0.8189255955F,
+  0.8192625332F, 0.8195991295F, 0.8199353840F, 0.8202712967F,
+  0.8206068673F, 0.8209420958F, 0.8212769820F, 0.8216115256F,
+  0.8219457266F, 0.8222795848F, 0.8226131000F, 0.8229462721F,
+  0.8232791009F, 0.8236115863F, 0.8239437280F, 0.8242755260F,
+  0.8246069801F, 0.8249380901F, 0.8252688559F, 0.8255992774F,
+  0.8259293544F, 0.8262590867F, 0.8265884741F, 0.8269175167F,
+  0.8272462141F, 0.8275745663F, 0.8279025732F, 0.8282302344F,
+  0.8285575501F, 0.8288845199F, 0.8292111437F, 0.8295374215F,
+  0.8298633530F, 0.8301889382F, 0.8305141768F, 0.8308390688F,
+  0.8311636141F, 0.8314878124F, 0.8318116637F, 0.8321351678F,
+  0.8324583246F, 0.8327811340F, 0.8331035957F, 0.8334257098F,
+  0.8337474761F, 0.8340688944F, 0.8343899647F, 0.8347106867F,
+  0.8350310605F, 0.8353510857F, 0.8356707624F, 0.8359900904F,
+  0.8363090696F, 0.8366276999F, 0.8369459811F, 0.8372639131F,
+  0.8375814958F, 0.8378987292F, 0.8382156130F, 0.8385321472F,
+  0.8388483316F, 0.8391641662F, 0.8394796508F, 0.8397947853F,
+  0.8401095697F, 0.8404240037F, 0.8407380873F, 0.8410518204F,
+  0.8413652029F, 0.8416782347F, 0.8419909156F, 0.8423032456F,
+  0.8426152245F, 0.8429268523F, 0.8432381289F, 0.8435490541F,
+  0.8438596279F, 0.8441698502F, 0.8444797208F, 0.8447892396F,
+  0.8450984067F, 0.8454072218F, 0.8457156849F, 0.8460237959F,
+  0.8463315547F, 0.8466389612F, 0.8469460154F, 0.8472527170F,
+  0.8475590661F, 0.8478650625F, 0.8481707063F, 0.8484759971F,
+  0.8487809351F, 0.8490855201F, 0.8493897521F, 0.8496936308F,
+  0.8499971564F, 0.8503003286F, 0.8506031474F, 0.8509056128F,
+  0.8512077246F, 0.8515094828F, 0.8518108872F, 0.8521119379F,
+  0.8524126348F, 0.8527129777F, 0.8530129666F, 0.8533126015F,
+  0.8536118822F, 0.8539108087F, 0.8542093809F, 0.8545075988F,
+  0.8548054623F, 0.8551029712F, 0.8554001257F, 0.8556969255F,
+  0.8559933707F, 0.8562894611F, 0.8565851968F, 0.8568805775F,
+  0.8571756034F, 0.8574702743F, 0.8577645902F, 0.8580585509F,
+  0.8583521566F, 0.8586454070F, 0.8589383021F, 0.8592308420F,
+  0.8595230265F, 0.8598148556F, 0.8601063292F, 0.8603974473F,
+  0.8606882098F, 0.8609786167F, 0.8612686680F, 0.8615583636F,
+  0.8618477034F, 0.8621366874F, 0.8624253156F, 0.8627135878F,
+  0.8630015042F, 0.8632890646F, 0.8635762690F, 0.8638631173F,
+  0.8641496096F, 0.8644357457F, 0.8647215257F, 0.8650069495F,
+  0.8652920171F, 0.8655767283F, 0.8658610833F, 0.8661450820F,
+  0.8664287243F, 0.8667120102F, 0.8669949397F, 0.8672775127F,
+  0.8675597293F, 0.8678415894F, 0.8681230929F, 0.8684042398F,
+  0.8686850302F, 0.8689654640F, 0.8692455412F, 0.8695252617F,
+  0.8698046255F, 0.8700836327F, 0.8703622831F, 0.8706405768F,
+  0.8709185138F, 0.8711960940F, 0.8714733174F, 0.8717501840F,
+  0.8720266939F, 0.8723028469F, 0.8725786430F, 0.8728540824F,
+  0.8731291648F, 0.8734038905F, 0.8736782592F, 0.8739522711F,
+  0.8742259261F, 0.8744992242F, 0.8747721653F, 0.8750447496F,
+  0.8753169770F, 0.8755888475F, 0.8758603611F, 0.8761315177F,
+  0.8764023175F, 0.8766727603F, 0.8769428462F, 0.8772125752F,
+  0.8774819474F, 0.8777509626F, 0.8780196209F, 0.8782879224F,
+  0.8785558669F, 0.8788234546F, 0.8790906854F, 0.8793575594F,
+  0.8796240765F, 0.8798902368F, 0.8801560403F, 0.8804214870F,
+  0.8806865768F, 0.8809513099F, 0.8812156863F, 0.8814797059F,
+  0.8817433687F, 0.8820066749F, 0.8822696243F, 0.8825322171F,
+  0.8827944532F, 0.8830563327F, 0.8833178556F, 0.8835790219F,
+  0.8838398316F, 0.8841002848F, 0.8843603815F, 0.8846201217F,
+  0.8848795054F, 0.8851385327F, 0.8853972036F, 0.8856555182F,
+  0.8859134764F, 0.8861710783F, 0.8864283239F, 0.8866852133F,
+  0.8869417464F, 0.8871979234F, 0.8874537443F, 0.8877092090F,
+  0.8879643177F, 0.8882190704F, 0.8884734671F, 0.8887275078F,
+  0.8889811927F, 0.8892345216F, 0.8894874948F, 0.8897401122F,
+  0.8899923738F, 0.8902442798F, 0.8904958301F, 0.8907470248F,
+  0.8909978640F, 0.8912483477F, 0.8914984759F, 0.8917482487F,
+  0.8919976662F, 0.8922467284F, 0.8924954353F, 0.8927437871F,
+  0.8929917837F, 0.8932394252F, 0.8934867118F, 0.8937336433F,
+  0.8939802199F, 0.8942264417F, 0.8944723087F, 0.8947178210F,
+  0.8949629785F, 0.8952077815F, 0.8954522299F, 0.8956963239F,
+  0.8959400634F, 0.8961834486F, 0.8964264795F, 0.8966691561F,
+  0.8969114786F, 0.8971534470F, 0.8973950614F, 0.8976363219F,
+  0.8978772284F, 0.8981177812F, 0.8983579802F, 0.8985978256F,
+  0.8988373174F, 0.8990764556F, 0.8993152405F, 0.8995536720F,
+  0.8997917502F, 0.9000294751F, 0.9002668470F, 0.9005038658F,
+  0.9007405317F, 0.9009768446F, 0.9012128048F, 0.9014484123F,
+  0.9016836671F, 0.9019185693F, 0.9021531191F, 0.9023873165F,
+  0.9026211616F, 0.9028546546F, 0.9030877954F, 0.9033205841F,
+  0.9035530210F, 0.9037851059F, 0.9040168392F, 0.9042482207F,
+  0.9044792507F, 0.9047099293F, 0.9049402564F, 0.9051702323F,
+  0.9053998569F, 0.9056291305F, 0.9058580531F, 0.9060866248F,
+  0.9063148457F, 0.9065427159F, 0.9067702355F, 0.9069974046F,
+  0.9072242233F, 0.9074506917F, 0.9076768100F, 0.9079025782F,
+  0.9081279964F, 0.9083530647F, 0.9085777833F, 0.9088021523F,
+  0.9090261717F, 0.9092498417F, 0.9094731623F, 0.9096961338F,
+  0.9099187561F, 0.9101410295F, 0.9103629540F, 0.9105845297F,
+  0.9108057568F, 0.9110266354F, 0.9112471656F, 0.9114673475F,
+  0.9116871812F, 0.9119066668F, 0.9121258046F, 0.9123445945F,
+  0.9125630367F, 0.9127811314F, 0.9129988786F, 0.9132162785F,
+  0.9134333312F, 0.9136500368F, 0.9138663954F, 0.9140824073F,
+  0.9142980724F, 0.9145133910F, 0.9147283632F, 0.9149429890F,
+  0.9151572687F, 0.9153712023F, 0.9155847900F, 0.9157980319F,
+  0.9160109282F, 0.9162234790F, 0.9164356844F, 0.9166475445F,
+  0.9168590595F, 0.9170702296F, 0.9172810548F, 0.9174915354F,
+  0.9177016714F, 0.9179114629F, 0.9181209102F, 0.9183300134F,
+  0.9185387726F, 0.9187471879F, 0.9189552595F, 0.9191629876F,
+  0.9193703723F, 0.9195774136F, 0.9197841119F, 0.9199904672F,
+  0.9201964797F, 0.9204021495F, 0.9206074767F, 0.9208124616F,
+  0.9210171043F, 0.9212214049F, 0.9214253636F, 0.9216289805F,
+  0.9218322558F, 0.9220351896F, 0.9222377821F, 0.9224400335F,
+  0.9226419439F, 0.9228435134F, 0.9230447423F, 0.9232456307F,
+  0.9234461787F, 0.9236463865F, 0.9238462543F, 0.9240457822F,
+  0.9242449704F, 0.9244438190F, 0.9246423282F, 0.9248404983F,
+  0.9250383293F, 0.9252358214F, 0.9254329747F, 0.9256297896F,
+  0.9258262660F, 0.9260224042F, 0.9262182044F, 0.9264136667F,
+  0.9266087913F, 0.9268035783F, 0.9269980280F, 0.9271921405F,
+  0.9273859160F, 0.9275793546F, 0.9277724566F, 0.9279652221F,
+  0.9281576513F, 0.9283497443F, 0.9285415014F, 0.9287329227F,
+  0.9289240084F, 0.9291147586F, 0.9293051737F, 0.9294952536F,
+  0.9296849987F, 0.9298744091F, 0.9300634850F, 0.9302522266F,
+  0.9304406340F, 0.9306287074F, 0.9308164471F, 0.9310038532F,
+  0.9311909259F, 0.9313776654F, 0.9315640719F, 0.9317501455F,
+  0.9319358865F, 0.9321212951F, 0.9323063713F, 0.9324911155F,
+  0.9326755279F, 0.9328596085F, 0.9330433577F, 0.9332267756F,
+  0.9334098623F, 0.9335926182F, 0.9337750434F, 0.9339571380F,
+  0.9341389023F, 0.9343203366F, 0.9345014409F, 0.9346822155F,
+  0.9348626606F, 0.9350427763F, 0.9352225630F, 0.9354020207F,
+  0.9355811498F, 0.9357599503F, 0.9359384226F, 0.9361165667F,
+  0.9362943830F, 0.9364718716F, 0.9366490327F, 0.9368258666F,
+  0.9370023733F, 0.9371785533F, 0.9373544066F, 0.9375299335F,
+  0.9377051341F, 0.9378800087F, 0.9380545576F, 0.9382287809F,
+  0.9384026787F, 0.9385762515F, 0.9387494993F, 0.9389224223F,
+  0.9390950209F, 0.9392672951F, 0.9394392453F, 0.9396108716F,
+  0.9397821743F, 0.9399531536F, 0.9401238096F, 0.9402941427F,
+  0.9404641530F, 0.9406338407F, 0.9408032061F, 0.9409722495F,
+  0.9411409709F, 0.9413093707F, 0.9414774491F, 0.9416452062F,
+  0.9418126424F, 0.9419797579F, 0.9421465528F, 0.9423130274F,
+  0.9424791819F, 0.9426450166F, 0.9428105317F, 0.9429757274F,
+  0.9431406039F, 0.9433051616F, 0.9434694005F, 0.9436333209F,
+  0.9437969232F, 0.9439602074F, 0.9441231739F, 0.9442858229F,
+  0.9444481545F, 0.9446101691F, 0.9447718669F, 0.9449332481F,
+  0.9450943129F, 0.9452550617F, 0.9454154945F, 0.9455756118F,
+  0.9457354136F, 0.9458949003F, 0.9460540721F, 0.9462129292F,
+  0.9463714719F, 0.9465297003F, 0.9466876149F, 0.9468452157F,
+  0.9470025031F, 0.9471594772F, 0.9473161384F, 0.9474724869F,
+  0.9476285229F, 0.9477842466F, 0.9479396584F, 0.9480947585F,
+  0.9482495470F, 0.9484040243F, 0.9485581906F, 0.9487120462F,
+  0.9488655913F, 0.9490188262F, 0.9491717511F, 0.9493243662F,
+  0.9494766718F, 0.9496286683F, 0.9497803557F, 0.9499317345F,
+  0.9500828047F, 0.9502335668F, 0.9503840209F, 0.9505341673F,
+  0.9506840062F, 0.9508335380F, 0.9509827629F, 0.9511316810F,
+  0.9512802928F, 0.9514285984F, 0.9515765982F, 0.9517242923F,
+  0.9518716810F, 0.9520187646F, 0.9521655434F, 0.9523120176F,
+  0.9524581875F, 0.9526040534F, 0.9527496154F, 0.9528948739F,
+  0.9530398292F, 0.9531844814F, 0.9533288310F, 0.9534728780F,
+  0.9536166229F, 0.9537600659F, 0.9539032071F, 0.9540460470F,
+  0.9541885858F, 0.9543308237F, 0.9544727611F, 0.9546143981F,
+  0.9547557351F, 0.9548967723F, 0.9550375100F, 0.9551779485F,
+  0.9553180881F, 0.9554579290F, 0.9555974714F, 0.9557367158F,
+  0.9558756623F, 0.9560143112F, 0.9561526628F, 0.9562907174F,
+  0.9564284752F, 0.9565659366F, 0.9567031017F, 0.9568399710F,
+  0.9569765446F, 0.9571128229F, 0.9572488061F, 0.9573844944F,
+  0.9575198883F, 0.9576549879F, 0.9577897936F, 0.9579243056F,
+  0.9580585242F, 0.9581924497F, 0.9583260824F, 0.9584594226F,
+  0.9585924705F, 0.9587252264F, 0.9588576906F, 0.9589898634F,
+  0.9591217452F, 0.9592533360F, 0.9593846364F, 0.9595156465F,
+  0.9596463666F, 0.9597767971F, 0.9599069382F, 0.9600367901F,
+  0.9601663533F, 0.9602956279F, 0.9604246143F, 0.9605533128F,
+  0.9606817236F, 0.9608098471F, 0.9609376835F, 0.9610652332F,
+  0.9611924963F, 0.9613194733F, 0.9614461644F, 0.9615725699F,
+  0.9616986901F, 0.9618245253F, 0.9619500757F, 0.9620753418F,
+  0.9622003238F, 0.9623250219F, 0.9624494365F, 0.9625735679F,
+  0.9626974163F, 0.9628209821F, 0.9629442656F, 0.9630672671F,
+  0.9631899868F, 0.9633124251F, 0.9634345822F, 0.9635564585F,
+  0.9636780543F, 0.9637993699F, 0.9639204056F, 0.9640411616F,
+  0.9641616383F, 0.9642818359F, 0.9644017549F, 0.9645213955F,
+  0.9646407579F, 0.9647598426F, 0.9648786497F, 0.9649971797F,
+  0.9651154328F, 0.9652334092F, 0.9653511095F, 0.9654685337F,
+  0.9655856823F, 0.9657025556F, 0.9658191538F, 0.9659354773F,
+  0.9660515263F, 0.9661673013F, 0.9662828024F, 0.9663980300F,
+  0.9665129845F, 0.9666276660F, 0.9667420750F, 0.9668562118F,
+  0.9669700766F, 0.9670836698F, 0.9671969917F, 0.9673100425F,
+  0.9674228227F, 0.9675353325F, 0.9676475722F, 0.9677595422F,
+  0.9678712428F, 0.9679826742F, 0.9680938368F, 0.9682047309F,
+  0.9683153569F, 0.9684257150F, 0.9685358056F, 0.9686456289F,
+  0.9687551853F, 0.9688644752F, 0.9689734987F, 0.9690822564F,
+  0.9691907483F, 0.9692989750F, 0.9694069367F, 0.9695146337F,
+  0.9696220663F, 0.9697292349F, 0.9698361398F, 0.9699427813F,
+  0.9700491597F, 0.9701552754F, 0.9702611286F, 0.9703667197F,
+  0.9704720490F, 0.9705771169F, 0.9706819236F, 0.9707864695F,
+  0.9708907549F, 0.9709947802F, 0.9710985456F, 0.9712020514F,
+  0.9713052981F, 0.9714082859F, 0.9715110151F, 0.9716134862F,
+  0.9717156993F, 0.9718176549F, 0.9719193532F, 0.9720207946F,
+  0.9721219794F, 0.9722229080F, 0.9723235806F, 0.9724239976F,
+  0.9725241593F, 0.9726240661F, 0.9727237183F, 0.9728231161F,
+  0.9729222601F, 0.9730211503F, 0.9731197873F, 0.9732181713F,
+  0.9733163027F, 0.9734141817F, 0.9735118088F, 0.9736091842F,
+  0.9737063083F, 0.9738031814F, 0.9738998039F, 0.9739961760F,
+  0.9740922981F, 0.9741881706F, 0.9742837938F, 0.9743791680F,
+  0.9744742935F, 0.9745691707F, 0.9746637999F, 0.9747581814F,
+  0.9748523157F, 0.9749462029F, 0.9750398435F, 0.9751332378F,
+  0.9752263861F, 0.9753192887F, 0.9754119461F, 0.9755043585F,
+  0.9755965262F, 0.9756884496F, 0.9757801291F, 0.9758715650F,
+  0.9759627575F, 0.9760537071F, 0.9761444141F, 0.9762348789F,
+  0.9763251016F, 0.9764150828F, 0.9765048228F, 0.9765943218F,
+  0.9766835802F, 0.9767725984F, 0.9768613767F, 0.9769499154F,
+  0.9770382149F, 0.9771262755F, 0.9772140976F, 0.9773016815F,
+  0.9773890275F, 0.9774761360F, 0.9775630073F, 0.9776496418F,
+  0.9777360398F, 0.9778222016F, 0.9779081277F, 0.9779938182F,
+  0.9780792736F, 0.9781644943F, 0.9782494805F, 0.9783342326F,
+  0.9784187509F, 0.9785030359F, 0.9785870877F, 0.9786709069F,
+  0.9787544936F, 0.9788378484F, 0.9789209714F, 0.9790038631F,
+  0.9790865238F, 0.9791689538F, 0.9792511535F, 0.9793331232F,
+  0.9794148633F, 0.9794963742F, 0.9795776561F, 0.9796587094F,
+  0.9797395345F, 0.9798201316F, 0.9799005013F, 0.9799806437F,
+  0.9800605593F, 0.9801402483F, 0.9802197112F, 0.9802989483F,
+  0.9803779600F, 0.9804567465F, 0.9805353082F, 0.9806136455F,
+  0.9806917587F, 0.9807696482F, 0.9808473143F, 0.9809247574F,
+  0.9810019778F, 0.9810789759F, 0.9811557519F, 0.9812323064F,
+  0.9813086395F, 0.9813847517F, 0.9814606433F, 0.9815363147F,
+  0.9816117662F, 0.9816869981F, 0.9817620108F, 0.9818368047F,
+  0.9819113801F, 0.9819857374F, 0.9820598769F, 0.9821337989F,
+  0.9822075038F, 0.9822809920F, 0.9823542638F, 0.9824273195F,
+  0.9825001596F, 0.9825727843F, 0.9826451940F, 0.9827173891F,
+  0.9827893700F, 0.9828611368F, 0.9829326901F, 0.9830040302F,
+  0.9830751574F, 0.9831460720F, 0.9832167745F, 0.9832872652F,
+  0.9833575444F, 0.9834276124F, 0.9834974697F, 0.9835671166F,
+  0.9836365535F, 0.9837057806F, 0.9837747983F, 0.9838436071F,
+  0.9839122072F, 0.9839805990F, 0.9840487829F, 0.9841167591F,
+  0.9841845282F, 0.9842520903F, 0.9843194459F, 0.9843865953F,
+  0.9844535389F, 0.9845202771F, 0.9845868101F, 0.9846531383F,
+  0.9847192622F, 0.9847851820F, 0.9848508980F, 0.9849164108F,
+  0.9849817205F, 0.9850468276F, 0.9851117324F, 0.9851764352F,
+  0.9852409365F, 0.9853052366F, 0.9853693358F, 0.9854332344F,
+  0.9854969330F, 0.9855604317F, 0.9856237309F, 0.9856868310F,
+  0.9857497325F, 0.9858124355F, 0.9858749404F, 0.9859372477F,
+  0.9859993577F, 0.9860612707F, 0.9861229871F, 0.9861845072F,
+  0.9862458315F, 0.9863069601F, 0.9863678936F, 0.9864286322F,
+  0.9864891764F, 0.9865495264F, 0.9866096826F, 0.9866696454F,
+  0.9867294152F, 0.9867889922F, 0.9868483769F, 0.9869075695F,
+  0.9869665706F, 0.9870253803F, 0.9870839991F, 0.9871424273F,
+  0.9872006653F, 0.9872587135F, 0.9873165721F, 0.9873742415F,
+  0.9874317222F, 0.9874890144F, 0.9875461185F, 0.9876030348F,
+  0.9876597638F, 0.9877163057F, 0.9877726610F, 0.9878288300F,
+  0.9878848130F, 0.9879406104F, 0.9879962225F, 0.9880516497F,
+  0.9881068924F, 0.9881619509F, 0.9882168256F, 0.9882715168F,
+  0.9883260249F, 0.9883803502F, 0.9884344931F, 0.9884884539F,
+  0.9885422331F, 0.9885958309F, 0.9886492477F, 0.9887024838F,
+  0.9887555397F, 0.9888084157F, 0.9888611120F, 0.9889136292F,
+  0.9889659675F, 0.9890181273F, 0.9890701089F, 0.9891219128F,
+  0.9891735392F, 0.9892249885F, 0.9892762610F, 0.9893273572F,
+  0.9893782774F, 0.9894290219F, 0.9894795911F, 0.9895299853F,
+  0.9895802049F, 0.9896302502F, 0.9896801217F, 0.9897298196F,
+  0.9897793443F, 0.9898286961F, 0.9898778755F, 0.9899268828F,
+  0.9899757183F, 0.9900243823F, 0.9900728753F, 0.9901211976F,
+  0.9901693495F, 0.9902173314F, 0.9902651436F, 0.9903127865F,
+  0.9903602605F, 0.9904075659F, 0.9904547031F, 0.9905016723F,
+  0.9905484740F, 0.9905951086F, 0.9906415763F, 0.9906878775F,
+  0.9907340126F, 0.9907799819F, 0.9908257858F, 0.9908714247F,
+  0.9909168988F, 0.9909622086F, 0.9910073543F, 0.9910523364F,
+  0.9910971552F, 0.9911418110F, 0.9911863042F, 0.9912306351F,
+  0.9912748042F, 0.9913188117F, 0.9913626580F, 0.9914063435F,
+  0.9914498684F, 0.9914932333F, 0.9915364383F, 0.9915794839F,
+  0.9916223703F, 0.9916650981F, 0.9917076674F, 0.9917500787F,
+  0.9917923323F, 0.9918344286F, 0.9918763679F, 0.9919181505F,
+  0.9919597769F, 0.9920012473F, 0.9920425621F, 0.9920837217F,
+  0.9921247263F, 0.9921655765F, 0.9922062724F, 0.9922468145F,
+  0.9922872030F, 0.9923274385F, 0.9923675211F, 0.9924074513F,
+  0.9924472294F, 0.9924868557F, 0.9925263306F, 0.9925656544F,
+  0.9926048275F, 0.9926438503F, 0.9926827230F, 0.9927214461F,
+  0.9927600199F, 0.9927984446F, 0.9928367208F, 0.9928748486F,
+  0.9929128285F, 0.9929506608F, 0.9929883459F, 0.9930258841F,
+  0.9930632757F, 0.9931005211F, 0.9931376207F, 0.9931745747F,
+  0.9932113836F, 0.9932480476F, 0.9932845671F, 0.9933209425F,
+  0.9933571742F, 0.9933932623F, 0.9934292074F, 0.9934650097F,
+  0.9935006696F, 0.9935361874F, 0.9935715635F, 0.9936067982F,
+  0.9936418919F, 0.9936768448F, 0.9937116574F, 0.9937463300F,
+  0.9937808629F, 0.9938152565F, 0.9938495111F, 0.9938836271F,
+  0.9939176047F, 0.9939514444F, 0.9939851465F, 0.9940187112F,
+  0.9940521391F, 0.9940854303F, 0.9941185853F, 0.9941516044F,
+  0.9941844879F, 0.9942172361F, 0.9942498495F, 0.9942823283F,
+  0.9943146729F, 0.9943468836F, 0.9943789608F, 0.9944109047F,
+  0.9944427158F, 0.9944743944F, 0.9945059408F, 0.9945373553F,
+  0.9945686384F, 0.9945997902F, 0.9946308112F, 0.9946617017F,
+  0.9946924621F, 0.9947230926F, 0.9947535937F, 0.9947839656F,
+  0.9948142086F, 0.9948443232F, 0.9948743097F, 0.9949041683F,
+  0.9949338995F, 0.9949635035F, 0.9949929807F, 0.9950223315F,
+  0.9950515561F, 0.9950806549F, 0.9951096282F, 0.9951384764F,
+  0.9951671998F, 0.9951957987F, 0.9952242735F, 0.9952526245F,
+  0.9952808520F, 0.9953089564F, 0.9953369380F, 0.9953647971F,
+  0.9953925340F, 0.9954201491F, 0.9954476428F, 0.9954750153F,
+  0.9955022670F, 0.9955293981F, 0.9955564092F, 0.9955833003F,
+  0.9956100720F, 0.9956367245F, 0.9956632582F, 0.9956896733F,
+  0.9957159703F, 0.9957421494F, 0.9957682110F, 0.9957941553F,
+  0.9958199828F, 0.9958456937F, 0.9958712884F, 0.9958967672F,
+  0.9959221305F, 0.9959473784F, 0.9959725115F, 0.9959975300F,
+  0.9960224342F, 0.9960472244F, 0.9960719011F, 0.9960964644F,
+  0.9961209148F, 0.9961452525F, 0.9961694779F, 0.9961935913F,
+  0.9962175930F, 0.9962414834F, 0.9962652627F, 0.9962889313F,
+  0.9963124895F, 0.9963359377F, 0.9963592761F, 0.9963825051F,
+  0.9964056250F, 0.9964286361F, 0.9964515387F, 0.9964743332F,
+  0.9964970198F, 0.9965195990F, 0.9965420709F, 0.9965644360F,
+  0.9965866946F, 0.9966088469F, 0.9966308932F, 0.9966528340F,
+  0.9966746695F, 0.9966964001F, 0.9967180260F, 0.9967395475F,
+  0.9967609651F, 0.9967822789F, 0.9968034894F, 0.9968245968F,
+  0.9968456014F, 0.9968665036F, 0.9968873037F, 0.9969080019F,
+  0.9969285987F, 0.9969490942F, 0.9969694889F, 0.9969897830F,
+  0.9970099769F, 0.9970300708F, 0.9970500651F, 0.9970699601F,
+  0.9970897561F, 0.9971094533F, 0.9971290522F, 0.9971485531F,
+  0.9971679561F, 0.9971872617F, 0.9972064702F, 0.9972255818F,
+  0.9972445968F, 0.9972635157F, 0.9972823386F, 0.9973010659F,
+  0.9973196980F, 0.9973382350F, 0.9973566773F, 0.9973750253F,
+  0.9973932791F, 0.9974114392F, 0.9974295059F, 0.9974474793F,
+  0.9974653599F, 0.9974831480F, 0.9975008438F, 0.9975184476F,
+  0.9975359598F, 0.9975533806F, 0.9975707104F, 0.9975879495F,
+  0.9976050981F, 0.9976221566F, 0.9976391252F, 0.9976560043F,
+  0.9976727941F, 0.9976894950F, 0.9977061073F, 0.9977226312F,
+  0.9977390671F, 0.9977554152F, 0.9977716759F, 0.9977878495F,
+  0.9978039361F, 0.9978199363F, 0.9978358501F, 0.9978516780F,
+  0.9978674202F, 0.9978830771F, 0.9978986488F, 0.9979141358F,
+  0.9979295383F, 0.9979448566F, 0.9979600909F, 0.9979752417F,
+  0.9979903091F, 0.9980052936F, 0.9980201952F, 0.9980350145F,
+  0.9980497515F, 0.9980644067F, 0.9980789804F, 0.9980934727F,
+  0.9981078841F, 0.9981222147F, 0.9981364649F, 0.9981506350F,
+  0.9981647253F, 0.9981787360F, 0.9981926674F, 0.9982065199F,
+  0.9982202936F, 0.9982339890F, 0.9982476062F, 0.9982611456F,
+  0.9982746074F, 0.9982879920F, 0.9983012996F, 0.9983145304F,
+  0.9983276849F, 0.9983407632F, 0.9983537657F, 0.9983666926F,
+  0.9983795442F, 0.9983923208F, 0.9984050226F, 0.9984176501F,
+  0.9984302033F, 0.9984426827F, 0.9984550884F, 0.9984674208F,
+  0.9984796802F, 0.9984918667F, 0.9985039808F, 0.9985160227F,
+  0.9985279926F, 0.9985398909F, 0.9985517177F, 0.9985634734F,
+  0.9985751583F, 0.9985867727F, 0.9985983167F, 0.9986097907F,
+  0.9986211949F, 0.9986325297F, 0.9986437953F, 0.9986549919F,
+  0.9986661199F, 0.9986771795F, 0.9986881710F, 0.9986990946F,
+  0.9987099507F, 0.9987207394F, 0.9987314611F, 0.9987421161F,
+  0.9987527045F, 0.9987632267F, 0.9987736829F, 0.9987840734F,
+  0.9987943985F, 0.9988046584F, 0.9988148534F, 0.9988249838F,
+  0.9988350498F, 0.9988450516F, 0.9988549897F, 0.9988648641F,
+  0.9988746753F, 0.9988844233F, 0.9988941086F, 0.9989037313F,
+  0.9989132918F, 0.9989227902F, 0.9989322269F, 0.9989416021F,
+  0.9989509160F, 0.9989601690F, 0.9989693613F, 0.9989784931F,
+  0.9989875647F, 0.9989965763F, 0.9990055283F, 0.9990144208F,
+  0.9990232541F, 0.9990320286F, 0.9990407443F, 0.9990494016F,
+  0.9990580008F, 0.9990665421F, 0.9990750257F, 0.9990834519F,
+  0.9990918209F, 0.9991001331F, 0.9991083886F, 0.9991165877F,
+  0.9991247307F, 0.9991328177F, 0.9991408491F, 0.9991488251F,
+  0.9991567460F, 0.9991646119F, 0.9991724232F, 0.9991801801F,
+  0.9991878828F, 0.9991955316F, 0.9992031267F, 0.9992106684F,
+  0.9992181569F, 0.9992255925F, 0.9992329753F, 0.9992403057F,
+  0.9992475839F, 0.9992548101F, 0.9992619846F, 0.9992691076F,
+  0.9992761793F, 0.9992832001F, 0.9992901701F, 0.9992970895F,
+  0.9993039587F, 0.9993107777F, 0.9993175470F, 0.9993242667F,
+  0.9993309371F, 0.9993375583F, 0.9993441307F, 0.9993506545F,
+  0.9993571298F, 0.9993635570F, 0.9993699362F, 0.9993762678F,
+  0.9993825519F, 0.9993887887F, 0.9993949785F, 0.9994011216F,
+  0.9994072181F, 0.9994132683F, 0.9994192725F, 0.9994252307F,
+  0.9994311434F, 0.9994370107F, 0.9994428327F, 0.9994486099F,
+  0.9994543423F, 0.9994600303F, 0.9994656739F, 0.9994712736F,
+  0.9994768294F, 0.9994823417F, 0.9994878105F, 0.9994932363F,
+  0.9994986191F, 0.9995039592F, 0.9995092568F, 0.9995145122F,
+  0.9995197256F, 0.9995248971F, 0.9995300270F, 0.9995351156F,
+  0.9995401630F, 0.9995451695F, 0.9995501352F, 0.9995550604F,
+  0.9995599454F, 0.9995647903F, 0.9995695953F, 0.9995743607F,
+  0.9995790866F, 0.9995837734F, 0.9995884211F, 0.9995930300F,
+  0.9995976004F, 0.9996021324F, 0.9996066263F, 0.9996110822F,
+  0.9996155004F, 0.9996198810F, 0.9996242244F, 0.9996285306F,
+  0.9996327999F, 0.9996370326F, 0.9996412287F, 0.9996453886F,
+  0.9996495125F, 0.9996536004F, 0.9996576527F, 0.9996616696F,
+  0.9996656512F, 0.9996695977F, 0.9996735094F, 0.9996773865F,
+  0.9996812291F, 0.9996850374F, 0.9996888118F, 0.9996925523F,
+  0.9996962591F, 0.9996999325F, 0.9997035727F, 0.9997071798F,
+  0.9997107541F, 0.9997142957F, 0.9997178049F, 0.9997212818F,
+  0.9997247266F, 0.9997281396F, 0.9997315209F, 0.9997348708F,
+  0.9997381893F, 0.9997414767F, 0.9997447333F, 0.9997479591F,
+  0.9997511544F, 0.9997543194F, 0.9997574542F, 0.9997605591F,
+  0.9997636342F, 0.9997666797F, 0.9997696958F, 0.9997726828F,
+  0.9997756407F, 0.9997785698F, 0.9997814703F, 0.9997843423F,
+  0.9997871860F, 0.9997900016F, 0.9997927894F, 0.9997955494F,
+  0.9997982818F, 0.9998009869F, 0.9998036648F, 0.9998063157F,
+  0.9998089398F, 0.9998115373F, 0.9998141082F, 0.9998166529F,
+  0.9998191715F, 0.9998216642F, 0.9998241311F, 0.9998265724F,
+  0.9998289884F, 0.9998313790F, 0.9998337447F, 0.9998360854F,
+  0.9998384015F, 0.9998406930F, 0.9998429602F, 0.9998452031F,
+  0.9998474221F, 0.9998496171F, 0.9998517885F, 0.9998539364F,
+  0.9998560610F, 0.9998581624F, 0.9998602407F, 0.9998622962F,
+  0.9998643291F, 0.9998663394F, 0.9998683274F, 0.9998702932F,
+  0.9998722370F, 0.9998741589F, 0.9998760591F, 0.9998779378F,
+  0.9998797952F, 0.9998816313F, 0.9998834464F, 0.9998852406F,
+  0.9998870141F, 0.9998887670F, 0.9998904995F, 0.9998922117F,
+  0.9998939039F, 0.9998955761F, 0.9998972285F, 0.9998988613F,
+  0.9999004746F, 0.9999020686F, 0.9999036434F, 0.9999051992F,
+  0.9999067362F, 0.9999082544F, 0.9999097541F, 0.9999112354F,
+  0.9999126984F, 0.9999141433F, 0.9999155703F, 0.9999169794F,
+  0.9999183709F, 0.9999197449F, 0.9999211014F, 0.9999224408F,
+  0.9999237631F, 0.9999250684F, 0.9999263570F, 0.9999276289F,
+  0.9999288843F, 0.9999301233F, 0.9999313461F, 0.9999325529F,
+  0.9999337437F, 0.9999349187F, 0.9999360780F, 0.9999372218F,
+  0.9999383503F, 0.9999394635F, 0.9999405616F, 0.9999416447F,
+  0.9999427129F, 0.9999437665F, 0.9999448055F, 0.9999458301F,
+  0.9999468404F, 0.9999478365F, 0.9999488185F, 0.9999497867F,
+  0.9999507411F, 0.9999516819F, 0.9999526091F, 0.9999535230F,
+  0.9999544236F, 0.9999553111F, 0.9999561856F, 0.9999570472F,
+  0.9999578960F, 0.9999587323F, 0.9999595560F, 0.9999603674F,
+  0.9999611666F, 0.9999619536F, 0.9999627286F, 0.9999634917F,
+  0.9999642431F, 0.9999649828F, 0.9999657110F, 0.9999664278F,
+  0.9999671334F, 0.9999678278F, 0.9999685111F, 0.9999691835F,
+  0.9999698451F, 0.9999704960F, 0.9999711364F, 0.9999717662F,
+  0.9999723858F, 0.9999729950F, 0.9999735942F, 0.9999741834F,
+  0.9999747626F, 0.9999753321F, 0.9999758919F, 0.9999764421F,
+  0.9999769828F, 0.9999775143F, 0.9999780364F, 0.9999785495F,
+  0.9999790535F, 0.9999795485F, 0.9999800348F, 0.9999805124F,
+  0.9999809813F, 0.9999814417F, 0.9999818938F, 0.9999823375F,
+  0.9999827731F, 0.9999832005F, 0.9999836200F, 0.9999840316F,
+  0.9999844353F, 0.9999848314F, 0.9999852199F, 0.9999856008F,
+  0.9999859744F, 0.9999863407F, 0.9999866997F, 0.9999870516F,
+  0.9999873965F, 0.9999877345F, 0.9999880656F, 0.9999883900F,
+  0.9999887078F, 0.9999890190F, 0.9999893237F, 0.9999896220F,
+  0.9999899140F, 0.9999901999F, 0.9999904796F, 0.9999907533F,
+  0.9999910211F, 0.9999912830F, 0.9999915391F, 0.9999917896F,
+  0.9999920345F, 0.9999922738F, 0.9999925077F, 0.9999927363F,
+  0.9999929596F, 0.9999931777F, 0.9999933907F, 0.9999935987F,
+  0.9999938018F, 0.9999940000F, 0.9999941934F, 0.9999943820F,
+  0.9999945661F, 0.9999947456F, 0.9999949206F, 0.9999950912F,
+  0.9999952575F, 0.9999954195F, 0.9999955773F, 0.9999957311F,
+  0.9999958807F, 0.9999960265F, 0.9999961683F, 0.9999963063F,
+  0.9999964405F, 0.9999965710F, 0.9999966979F, 0.9999968213F,
+  0.9999969412F, 0.9999970576F, 0.9999971707F, 0.9999972805F,
+  0.9999973871F, 0.9999974905F, 0.9999975909F, 0.9999976881F,
+  0.9999977824F, 0.9999978738F, 0.9999979624F, 0.9999980481F,
+  0.9999981311F, 0.9999982115F, 0.9999982892F, 0.9999983644F,
+  0.9999984370F, 0.9999985072F, 0.9999985750F, 0.9999986405F,
+  0.9999987037F, 0.9999987647F, 0.9999988235F, 0.9999988802F,
+  0.9999989348F, 0.9999989873F, 0.9999990379F, 0.9999990866F,
+  0.9999991334F, 0.9999991784F, 0.9999992217F, 0.9999992632F,
+  0.9999993030F, 0.9999993411F, 0.9999993777F, 0.9999994128F,
+  0.9999994463F, 0.9999994784F, 0.9999995091F, 0.9999995384F,
+  0.9999995663F, 0.9999995930F, 0.9999996184F, 0.9999996426F,
+  0.9999996657F, 0.9999996876F, 0.9999997084F, 0.9999997282F,
+  0.9999997469F, 0.9999997647F, 0.9999997815F, 0.9999997973F,
+  0.9999998123F, 0.9999998265F, 0.9999998398F, 0.9999998524F,
+  0.9999998642F, 0.9999998753F, 0.9999998857F, 0.9999998954F,
+  0.9999999045F, 0.9999999130F, 0.9999999209F, 0.9999999282F,
+  0.9999999351F, 0.9999999414F, 0.9999999472F, 0.9999999526F,
+  0.9999999576F, 0.9999999622F, 0.9999999664F, 0.9999999702F,
+  0.9999999737F, 0.9999999769F, 0.9999999798F, 0.9999999824F,
+  0.9999999847F, 0.9999999868F, 0.9999999887F, 0.9999999904F,
+  0.9999999919F, 0.9999999932F, 0.9999999943F, 0.9999999953F,
+  0.9999999961F, 0.9999999969F, 0.9999999975F, 0.9999999980F,
+  0.9999999985F, 0.9999999988F, 0.9999999991F, 0.9999999993F,
+  0.9999999995F, 0.9999999997F, 0.9999999998F, 0.9999999999F,
+  0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
+  1.0000000000F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
+};
+
+const float ff_vorbis_floor1_inverse_db_table[256]={
+  1.0649863e-07F, 1.1341951e-07F, 1.2079015e-07F, 1.2863978e-07F,
+  1.3699951e-07F, 1.4590251e-07F, 1.5538408e-07F, 1.6548181e-07F,
+  1.7623575e-07F, 1.8768855e-07F, 1.9988561e-07F, 2.128753e-07F,
+  2.2670913e-07F, 2.4144197e-07F, 2.5713223e-07F, 2.7384213e-07F,
+  2.9163793e-07F, 3.1059021e-07F, 3.3077411e-07F, 3.5226968e-07F,
+  3.7516214e-07F, 3.9954229e-07F, 4.2550680e-07F, 4.5315863e-07F,
+  4.8260743e-07F, 5.1396998e-07F, 5.4737065e-07F, 5.8294187e-07F,
+  6.2082472e-07F, 6.6116941e-07F, 7.0413592e-07F, 7.4989464e-07F,
+  7.9862701e-07F, 8.5052630e-07F, 9.0579828e-07F, 9.6466216e-07F,
+  1.0273513e-06F, 1.0941144e-06F, 1.1652161e-06F, 1.2409384e-06F,
+  1.3215816e-06F, 1.4074654e-06F, 1.4989305e-06F, 1.5963394e-06F,
+  1.7000785e-06F, 1.8105592e-06F, 1.9282195e-06F, 2.0535261e-06F,
+  2.1869758e-06F, 2.3290978e-06F, 2.4804557e-06F, 2.6416497e-06F,
+  2.8133190e-06F, 2.9961443e-06F, 3.1908506e-06F, 3.3982101e-06F,
+  3.6190449e-06F, 3.8542308e-06F, 4.1047004e-06F, 4.3714470e-06F,
+  4.6555282e-06F, 4.9580707e-06F, 5.2802740e-06F, 5.6234160e-06F,
+  5.9888572e-06F, 6.3780469e-06F, 6.7925283e-06F, 7.2339451e-06F,
+  7.7040476e-06F, 8.2047000e-06F, 8.7378876e-06F, 9.3057248e-06F,
+  9.9104632e-06F, 1.0554501e-05F, 1.1240392e-05F, 1.1970856e-05F,
+  1.2748789e-05F, 1.3577278e-05F, 1.4459606e-05F, 1.5399272e-05F,
+  1.6400004e-05F, 1.7465768e-05F, 1.8600792e-05F, 1.9809576e-05F,
+  2.1096914e-05F, 2.2467911e-05F, 2.3928002e-05F, 2.5482978e-05F,
+  2.7139006e-05F, 2.8902651e-05F, 3.0780908e-05F, 3.2781225e-05F,
+  3.4911534e-05F, 3.7180282e-05F, 3.9596466e-05F, 4.2169667e-05F,
+  4.4910090e-05F, 4.7828601e-05F, 5.0936773e-05F, 5.4246931e-05F,
+  5.7772202e-05F, 6.1526565e-05F, 6.5524908e-05F, 6.9783085e-05F,
+  7.4317983e-05F, 7.9147585e-05F, 8.4291040e-05F, 8.9768747e-05F,
+  9.5602426e-05F, 0.00010181521F, 0.00010843174F, 0.00011547824F,
+  0.00012298267F, 0.00013097477F, 0.00013948625F, 0.00014855085F,
+  0.00015820453F, 0.00016848555F, 0.00017943469F, 0.00019109536F,
+  0.00020351382F, 0.00021673929F, 0.00023082423F, 0.00024582449F,
+  0.00026179955F, 0.00027881276F, 0.00029693158F, 0.00031622787F,
+  0.00033677814F, 0.00035866388F, 0.00038197188F, 0.00040679456F,
+  0.00043323036F, 0.00046138411F, 0.00049136745F, 0.00052329927F,
+  0.00055730621F, 0.00059352311F, 0.00063209358F, 0.00067317058F,
+  0.00071691700F, 0.00076350630F, 0.00081312324F, 0.00086596457F,
+  0.00092223983F, 0.00098217216F, 0.0010459992F, 0.0011139742F,
+  0.0011863665F, 0.0012634633F, 0.0013455702F, 0.0014330129F,
+  0.0015261382F, 0.0016253153F, 0.0017309374F, 0.0018434235F,
+  0.0019632195F, 0.0020908006F, 0.0022266726F, 0.0023713743F,
+  0.0025254795F, 0.0026895994F, 0.0028643847F, 0.0030505286F,
+  0.0032487691F, 0.0034598925F, 0.0036847358F, 0.0039241906F,
+  0.0041792066F, 0.0044507950F, 0.0047400328F, 0.0050480668F,
+  0.0053761186F, 0.0057254891F, 0.0060975636F, 0.0064938176F,
+  0.0069158225F, 0.0073652516F, 0.0078438871F, 0.0083536271F,
+  0.0088964928F, 0.009474637F, 0.010090352F, 0.010746080F,
+  0.011444421F, 0.012188144F, 0.012980198F, 0.013823725F,
+  0.014722068F, 0.015678791F, 0.016697687F, 0.017782797F,
+  0.018938423F, 0.020169149F, 0.021479854F, 0.022875735F,
+  0.024362330F, 0.025945531F, 0.027631618F, 0.029427276F,
+  0.031339626F, 0.033376252F, 0.035545228F, 0.037855157F,
+  0.040315199F, 0.042935108F, 0.045725273F, 0.048696758F,
+  0.051861348F, 0.055231591F, 0.058820850F, 0.062643361F,
+  0.066714279F, 0.071049749F, 0.075666962F, 0.080584227F,
+  0.085821044F, 0.091398179F, 0.097337747F, 0.10366330F,
+  0.11039993F, 0.11757434F, 0.12521498F, 0.13335215F,
+  0.14201813F, 0.15124727F, 0.16107617F, 0.17154380F,
+  0.18269168F, 0.19456402F, 0.20720788F, 0.22067342F,
+  0.23501402F, 0.25028656F, 0.26655159F, 0.28387361F,
+  0.30232132F, 0.32196786F, 0.34289114F, 0.36517414F,
+  0.38890521F, 0.41417847F, 0.44109412F, 0.46975890F,
+  0.50028648F, 0.53279791F, 0.56742212F, 0.60429640F,
+  0.64356699F, 0.68538959F, 0.72993007F, 0.77736504F,
+  0.82788260F, 0.88168307F, 0.9389798F, 1.F,
+};
+
+const float * ff_vorbis_vwin[8] = { vwin64, vwin128, vwin256, vwin512, vwin1024, vwin2048, vwin4096, vwin8192 };
+
diff --git a/contrib/ffmpeg/libavcodec/vorbis_enc.c b/contrib/ffmpeg/libavcodec/vorbis_enc.c
new file mode 100644
index 000000000..636b0cfae
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vorbis_enc.c
@@ -0,0 +1,1087 @@
+/*
+ * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file vorbis_enc.c
+ * Native Vorbis encoder.
+ * @author Oded Shimon <ods15@ods15.dyndns.org>
+ */
+
+#include <float.h>
+#include "avcodec.h"
+#include "dsputil.h"
+#include "vorbis.h"
+#include "vorbis_enc_data.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+typedef struct {
+    int nentries;
+    uint8_t * lens;
+    uint32_t * codewords;
+    int ndimentions;
+    float min;
+    float delta;
+    int seq_p;
+    int lookup;
+    int * quantlist;
+    float * dimentions;
+    float * pow2;
+} codebook_t;
+
+typedef struct {
+    int dim;
+    int subclass;
+    int masterbook;
+    int * books;
+} floor_class_t;
+
+typedef struct {
+    int partitions;
+    int * partition_to_class;
+    int nclasses;
+    floor_class_t * classes;
+    int multiplier;
+    int rangebits;
+    int values;
+    floor1_entry_t * list;
+} floor_t;
+
+typedef struct {
+    int type;
+    int begin;
+    int end;
+    int partition_size;
+    int classifications;
+    int classbook;
+    int8_t (*books)[8];
+    float (*maxes)[2];
+} residue_t;
+
+typedef struct {
+    int submaps;
+    int * mux;
+    int * floor;
+    int * residue;
+    int coupling_steps;
+    int * magnitude;
+    int * angle;
+} mapping_t;
+
+typedef struct {
+    int blockflag;
+    int mapping;
+} vorbis_mode_t;
+
+typedef struct {
+    int channels;
+    int sample_rate;
+    int log2_blocksize[2];
+    MDCTContext mdct[2];
+    const float * win[2];
+    int have_saved;
+    float * saved;
+    float * samples;
+    float * floor; // also used for tmp values for mdct
+    float * coeffs; // also used for residue after floor
+    float quality;
+
+    int ncodebooks;
+    codebook_t * codebooks;
+
+    int nfloors;
+    floor_t * floors;
+
+    int nresidues;
+    residue_t * residues;
+
+    int nmappings;
+    mapping_t * mappings;
+
+    int nmodes;
+    vorbis_mode_t * modes;
+} venc_context_t;
+
+typedef struct {
+    int total;
+    int total_pos;
+    int pos;
+    uint8_t * buf_ptr;
+} PutBitContext;
+
+static inline void init_put_bits(PutBitContext * pb, uint8_t * buf, int buffer_len) {
+    pb->total = buffer_len * 8;
+    pb->total_pos = 0;
+    pb->pos = 0;
+    pb->buf_ptr = buf;
+}
+
+static void put_bits(PutBitContext * pb, int bits, uint64_t val) {
+    if ((pb->total_pos += bits) >= pb->total) return;
+    if (!bits) return;
+    if (pb->pos) {
+        if (pb->pos > bits) {
+            *pb->buf_ptr |= val << (8 - pb->pos);
+            pb->pos -= bits;
+            bits = 0;
+        } else {
+            *pb->buf_ptr++ |= (val << (8 - pb->pos)) & 0xFF;
+            val >>= pb->pos;
+            bits -= pb->pos;
+            pb->pos = 0;
+        }
+    }
+    for (; bits >= 8; bits -= 8) {
+        *pb->buf_ptr++ = val & 0xFF;
+        val >>= 8;
+    }
+    if (bits) {
+        *pb->buf_ptr = val;
+        pb->pos = 8 - bits;
+    }
+}
+
+static inline void flush_put_bits(PutBitContext * pb) {
+}
+
+static inline int put_bits_count(PutBitContext * pb) {
+    return pb->total_pos;
+}
+
+static inline void put_codeword(PutBitContext * pb, codebook_t * cb, int entry) {
+    assert(entry >= 0);
+    assert(entry < cb->nentries);
+    assert(cb->lens[entry]);
+    put_bits(pb, cb->lens[entry], cb->codewords[entry]);
+}
+
+static int cb_lookup_vals(int lookup, int dimentions, int entries) {
+    if      (lookup == 1) return ff_vorbis_nth_root(entries, dimentions);
+    else if (lookup == 2) return dimentions * entries;
+    return 0;
+}
+
+static void ready_codebook(codebook_t * cb) {
+    int i;
+
+    ff_vorbis_len2vlc(cb->lens, cb->codewords, cb->nentries);
+
+    if (!cb->lookup)
+        cb->pow2 = cb->dimentions = NULL;
+    else {
+        int vals = cb_lookup_vals(cb->lookup, cb->ndimentions, cb->nentries);
+        cb->dimentions = av_malloc(sizeof(float) * cb->nentries * cb->ndimentions);
+        cb->pow2 = av_mallocz(sizeof(float) * cb->nentries);
+        for (i = 0; i < cb->nentries; i++) {
+            float last = 0;
+            int j;
+            int div = 1;
+            for (j = 0; j < cb->ndimentions; j++) {
+                int off;
+                if (cb->lookup == 1)
+                    off = (i / div) % vals; // lookup type 1
+                else
+                    off = i * cb->ndimentions + j; // lookup type 2
+
+                cb->dimentions[i * cb->ndimentions + j] = last + cb->min + cb->quantlist[off] * cb->delta;
+                if (cb->seq_p)
+                    last = cb->dimentions[i * cb->ndimentions + j];
+                cb->pow2[i] += cb->dimentions[i * cb->ndimentions + j]*cb->dimentions[i * cb->ndimentions + j];
+                div *= vals;
+            }
+            cb->pow2[i] /= 2.;
+        }
+    }
+}
+
+static void ready_residue(residue_t * rc, venc_context_t * venc) {
+    int i;
+    assert(rc->type == 2);
+    rc->maxes = av_mallocz(sizeof(float[2]) * rc->classifications);
+    for (i = 0; i < rc->classifications; i++) {
+        int j;
+        codebook_t * cb;
+        for (j = 0; j < 8; j++)
+            if (rc->books[i][j] != -1) break;
+        if (j == 8) continue; // zero
+        cb = &venc->codebooks[rc->books[i][j]];
+        assert(cb->ndimentions >= 2);
+        assert(cb->lookup);
+
+        for (j = 0; j < cb->nentries; j++) {
+            float a;
+            if (!cb->lens[j]) continue;
+            a = fabs(cb->dimentions[j * cb->ndimentions]);
+            if (a > rc->maxes[i][0])
+                rc->maxes[i][0] = a;
+            a = fabs(cb->dimentions[j * cb->ndimentions + 1]);
+            if (a > rc->maxes[i][1])
+                rc->maxes[i][1] = a;
+        }
+    }
+    // small bias
+    for (i = 0; i < rc->classifications; i++) {
+        rc->maxes[i][0] += 0.8;
+        rc->maxes[i][1] += 0.8;
+    }
+}
+
+static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccontext) {
+    floor_t * fc;
+    residue_t * rc;
+    mapping_t * mc;
+    int i, book;
+
+    venc->channels = avccontext->channels;
+    venc->sample_rate = avccontext->sample_rate;
+    venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11;
+
+    venc->ncodebooks = sizeof(cvectors)/sizeof(cvectors[0]);
+    venc->codebooks = av_malloc(sizeof(codebook_t) * venc->ncodebooks);
+
+    // codebook 0..14 - floor1 book, values 0..255
+    // codebook 15 residue masterbook
+    // codebook 16..29 residue
+    for (book = 0; book < venc->ncodebooks; book++) {
+        codebook_t * cb = &venc->codebooks[book];
+        int vals;
+        cb->ndimentions = cvectors[book].dim;
+        cb->nentries = cvectors[book].real_len;
+        cb->min = cvectors[book].min;
+        cb->delta = cvectors[book].delta;
+        cb->lookup = cvectors[book].lookup;
+        cb->seq_p = 0;
+
+        cb->lens = av_malloc(sizeof(uint8_t) * cb->nentries);
+        cb->codewords = av_malloc(sizeof(uint32_t) * cb->nentries);
+        memcpy(cb->lens, cvectors[book].clens, cvectors[book].len);
+        memset(cb->lens + cvectors[book].len, 0, cb->nentries - cvectors[book].len);
+
+        if (cb->lookup) {
+            vals = cb_lookup_vals(cb->lookup, cb->ndimentions, cb->nentries);
+            cb->quantlist = av_malloc(sizeof(int) * vals);
+            for (i = 0; i < vals; i++)
+                cb->quantlist[i] = cvectors[book].quant[i];
+        } else {
+            cb->quantlist = NULL;
+        }
+        ready_codebook(cb);
+    }
+
+    venc->nfloors = 1;
+    venc->floors = av_malloc(sizeof(floor_t) * venc->nfloors);
+
+    // just 1 floor
+    fc = &venc->floors[0];
+    fc->partitions = 8;
+    fc->partition_to_class = av_malloc(sizeof(int) * fc->partitions);
+    fc->nclasses = 0;
+    for (i = 0; i < fc->partitions; i++) {
+        static const int a[] = {0,1,2,2,3,3,4,4};
+        fc->partition_to_class[i] = a[i];
+        fc->nclasses = FFMAX(fc->nclasses, fc->partition_to_class[i]);
+    }
+    fc->nclasses++;
+    fc->classes = av_malloc(sizeof(floor_class_t) * fc->nclasses);
+    for (i = 0; i < fc->nclasses; i++) {
+        floor_class_t * c = &fc->classes[i];
+        int j, books;
+        c->dim = floor_classes[i].dim;
+        c->subclass = floor_classes[i].subclass;
+        c->masterbook = floor_classes[i].masterbook;
+        books = (1 << c->subclass);
+        c->books = av_malloc(sizeof(int) * books);
+        for (j = 0; j < books; j++)
+            c->books[j] = floor_classes[i].nbooks[j];
+    }
+    fc->multiplier = 2;
+    fc->rangebits = venc->log2_blocksize[0] - 1;
+
+    fc->values = 2;
+    for (i = 0; i < fc->partitions; i++)
+        fc->values += fc->classes[fc->partition_to_class[i]].dim;
+
+    fc->list = av_malloc(sizeof(floor1_entry_t) * fc->values);
+    fc->list[0].x = 0;
+    fc->list[1].x = 1 << fc->rangebits;
+    for (i = 2; i < fc->values; i++) {
+        static const int a[] = {
+             93, 23,372,  6, 46,186,750, 14, 33, 65,
+            130,260,556,  3, 10, 18, 28, 39, 55, 79,
+            111,158,220,312,464,650,850
+        };
+        fc->list[i].x = a[i - 2];
+    }
+    ff_vorbis_ready_floor1_list(fc->list, fc->values);
+
+    venc->nresidues = 1;
+    venc->residues = av_malloc(sizeof(residue_t) * venc->nresidues);
+
+    // single residue
+    rc = &venc->residues[0];
+    rc->type = 2;
+    rc->begin = 0;
+    rc->end = 1600;
+    rc->partition_size = 32;
+    rc->classifications = 10;
+    rc->classbook = 15;
+    rc->books = av_malloc(sizeof(*rc->books) * rc->classifications);
+    {
+        static const int8_t a[10][8] = {
+            { -1, -1, -1, -1, -1, -1, -1, -1, },
+            { -1, -1, 16, -1, -1, -1, -1, -1, },
+            { -1, -1, 17, -1, -1, -1, -1, -1, },
+            { -1, -1, 18, -1, -1, -1, -1, -1, },
+            { -1, -1, 19, -1, -1, -1, -1, -1, },
+            { -1, -1, 20, -1, -1, -1, -1, -1, },
+            { -1, -1, 21, -1, -1, -1, -1, -1, },
+            { 22, 23, -1, -1, -1, -1, -1, -1, },
+            { 24, 25, -1, -1, -1, -1, -1, -1, },
+            { 26, 27, 28, -1, -1, -1, -1, -1, },
+        };
+        memcpy(rc->books, a, sizeof a);
+    }
+    ready_residue(rc, venc);
+
+    venc->nmappings = 1;
+    venc->mappings = av_malloc(sizeof(mapping_t) * venc->nmappings);
+
+    // single mapping
+    mc = &venc->mappings[0];
+    mc->submaps = 1;
+    mc->mux = av_malloc(sizeof(int) * venc->channels);
+    for (i = 0; i < venc->channels; i++)
+        mc->mux[i] = 0;
+    mc->floor = av_malloc(sizeof(int) * mc->submaps);
+    mc->residue = av_malloc(sizeof(int) * mc->submaps);
+    for (i = 0; i < mc->submaps; i++) {
+        mc->floor[i] = 0;
+        mc->residue[i] = 0;
+    }
+    mc->coupling_steps = venc->channels == 2 ? 1 : 0;
+    mc->magnitude = av_malloc(sizeof(int) * mc->coupling_steps);
+    mc->angle = av_malloc(sizeof(int) * mc->coupling_steps);
+    if (mc->coupling_steps) {
+        mc->magnitude[0] = 0;
+        mc->angle[0] = 1;
+    }
+
+    venc->nmodes = 1;
+    venc->modes = av_malloc(sizeof(vorbis_mode_t) * venc->nmodes);
+
+    // single mode
+    venc->modes[0].blockflag = 0;
+    venc->modes[0].mapping = 0;
+
+    venc->have_saved = 0;
+    venc->saved = av_malloc(sizeof(float) * venc->channels * (1 << venc->log2_blocksize[1]) / 2);
+    venc->samples = av_malloc(sizeof(float) * venc->channels * (1 << venc->log2_blocksize[1]));
+    venc->floor = av_malloc(sizeof(float) * venc->channels * (1 << venc->log2_blocksize[1]) / 2);
+    venc->coeffs = av_malloc(sizeof(float) * venc->channels * (1 << venc->log2_blocksize[1]) / 2);
+
+    venc->win[0] = ff_vorbis_vwin[venc->log2_blocksize[0] - 6];
+    venc->win[1] = ff_vorbis_vwin[venc->log2_blocksize[1] - 6];
+
+    ff_mdct_init(&venc->mdct[0], venc->log2_blocksize[0], 0);
+    ff_mdct_init(&venc->mdct[1], venc->log2_blocksize[1], 0);
+}
+
+static void put_float(PutBitContext * pb, float f) {
+    int exp, mant;
+    uint32_t res = 0;
+    mant = (int)ldexp(frexp(f, &exp), 20);
+    exp += 788 - 20;
+    if (mant < 0) { res |= (1 << 31); mant = -mant; }
+    res |= mant | (exp << 21);
+    put_bits(pb, 32, res);
+}
+
+static void put_codebook_header(PutBitContext * pb, codebook_t * cb) {
+    int i;
+    int ordered = 0;
+
+    put_bits(pb, 24, 0x564342); //magic
+    put_bits(pb, 16, cb->ndimentions);
+    put_bits(pb, 24, cb->nentries);
+
+    for (i = 1; i < cb->nentries; i++)
+        if (cb->lens[i] < cb->lens[i-1]) break;
+    if (i == cb->nentries)
+        ordered = 1;
+
+    put_bits(pb, 1, ordered);
+    if (ordered) {
+        int len = cb->lens[0];
+        put_bits(pb, 5, len - 1);
+        i = 0;
+        while (i < cb->nentries) {
+            int j;
+            for (j = 0; j+i < cb->nentries; j++)
+                if (cb->lens[j+i] != len) break;
+            put_bits(pb, ilog(cb->nentries - i), j);
+            i += j;
+            len++;
+        }
+    } else {
+        int sparse = 0;
+        for (i = 0; i < cb->nentries; i++)
+            if (!cb->lens[i]) break;
+        if (i != cb->nentries)
+            sparse = 1;
+        put_bits(pb, 1, sparse);
+
+        for (i = 0; i < cb->nentries; i++) {
+            if (sparse) put_bits(pb, 1, !!cb->lens[i]);
+            if (cb->lens[i]) put_bits(pb, 5, cb->lens[i] - 1);
+        }
+    }
+
+    put_bits(pb, 4, cb->lookup);
+    if (cb->lookup) {
+        int tmp = cb_lookup_vals(cb->lookup, cb->ndimentions, cb->nentries);
+        int bits = ilog(cb->quantlist[0]);
+
+        for (i = 1; i < tmp; i++)
+            bits = FFMAX(bits, ilog(cb->quantlist[i]));
+
+        put_float(pb, cb->min);
+        put_float(pb, cb->delta);
+
+        put_bits(pb, 4, bits - 1);
+        put_bits(pb, 1, cb->seq_p);
+
+        for (i = 0; i < tmp; i++)
+            put_bits(pb, bits, cb->quantlist[i]);
+    }
+}
+
+static void put_floor_header(PutBitContext * pb, floor_t * fc) {
+    int i;
+
+    put_bits(pb, 16, 1); // type, only floor1 is supported
+
+    put_bits(pb, 5, fc->partitions);
+
+    for (i = 0; i < fc->partitions; i++)
+        put_bits(pb, 4, fc->partition_to_class[i]);
+
+    for (i = 0; i < fc->nclasses; i++) {
+        int j, books;
+
+        put_bits(pb, 3, fc->classes[i].dim - 1);
+        put_bits(pb, 2, fc->classes[i].subclass);
+
+        if (fc->classes[i].subclass)
+            put_bits(pb, 8, fc->classes[i].masterbook);
+
+        books = (1 << fc->classes[i].subclass);
+
+        for (j = 0; j < books; j++)
+            put_bits(pb, 8, fc->classes[i].books[j] + 1);
+    }
+
+    put_bits(pb, 2, fc->multiplier - 1);
+    put_bits(pb, 4, fc->rangebits);
+
+    for (i = 2; i < fc->values; i++)
+        put_bits(pb, fc->rangebits, fc->list[i].x);
+}
+
+static void put_residue_header(PutBitContext * pb, residue_t * rc) {
+    int i;
+
+    put_bits(pb, 16, rc->type);
+
+    put_bits(pb, 24, rc->begin);
+    put_bits(pb, 24, rc->end);
+    put_bits(pb, 24, rc->partition_size - 1);
+    put_bits(pb, 6, rc->classifications - 1);
+    put_bits(pb, 8, rc->classbook);
+
+    for (i = 0; i < rc->classifications; i++) {
+        int j, tmp = 0;
+        for (j = 0; j < 8; j++)
+            tmp |= (rc->books[i][j] != -1) << j;
+
+        put_bits(pb, 3, tmp & 7);
+        put_bits(pb, 1, tmp > 7);
+
+        if (tmp > 7)
+            put_bits(pb, 5, tmp >> 3);
+    }
+
+    for (i = 0; i < rc->classifications; i++) {
+        int j;
+        for (j = 0; j < 8; j++)
+            if (rc->books[i][j] != -1)
+                put_bits(pb, 8, rc->books[i][j]);
+    }
+}
+
+static int put_main_header(venc_context_t * venc, uint8_t ** out) {
+    int i;
+    PutBitContext pb;
+    uint8_t buffer[50000] = {0}, * p = buffer;
+    int buffer_len = sizeof buffer;
+    int len, hlens[3];
+
+    // identification header
+    init_put_bits(&pb, p, buffer_len);
+    put_bits(&pb, 8, 1); //magic
+    for (i = 0; "vorbis"[i]; i++)
+        put_bits(&pb, 8, "vorbis"[i]);
+    put_bits(&pb, 32, 0); // version
+    put_bits(&pb, 8, venc->channels);
+    put_bits(&pb, 32, venc->sample_rate);
+    put_bits(&pb, 32, 0); // bitrate
+    put_bits(&pb, 32, 0); // bitrate
+    put_bits(&pb, 32, 0); // bitrate
+    put_bits(&pb, 4, venc->log2_blocksize[0]);
+    put_bits(&pb, 4, venc->log2_blocksize[1]);
+    put_bits(&pb, 1, 1); // framing
+
+    flush_put_bits(&pb);
+    hlens[0] = (put_bits_count(&pb) + 7) / 8;
+    buffer_len -= hlens[0];
+    p += hlens[0];
+
+    // comment header
+    init_put_bits(&pb, p, buffer_len);
+    put_bits(&pb, 8, 3); //magic
+    for (i = 0; "vorbis"[i]; i++)
+        put_bits(&pb, 8, "vorbis"[i]);
+    put_bits(&pb, 32, 0); // vendor length TODO
+    put_bits(&pb, 32, 0); // amount of comments
+    put_bits(&pb, 1, 1); // framing
+
+    flush_put_bits(&pb);
+    hlens[1] = (put_bits_count(&pb) + 7) / 8;
+    buffer_len -= hlens[1];
+    p += hlens[1];
+
+    // setup header
+    init_put_bits(&pb, p, buffer_len);
+    put_bits(&pb, 8, 5); //magic
+    for (i = 0; "vorbis"[i]; i++)
+        put_bits(&pb, 8, "vorbis"[i]);
+
+    // codebooks
+    put_bits(&pb, 8, venc->ncodebooks - 1);
+    for (i = 0; i < venc->ncodebooks; i++)
+        put_codebook_header(&pb, &venc->codebooks[i]);
+
+    // time domain, reserved, zero
+    put_bits(&pb, 6, 0);
+    put_bits(&pb, 16, 0);
+
+    // floors
+    put_bits(&pb, 6, venc->nfloors - 1);
+    for (i = 0; i < venc->nfloors; i++)
+        put_floor_header(&pb, &venc->floors[i]);
+
+    // residues
+    put_bits(&pb, 6, venc->nresidues - 1);
+    for (i = 0; i < venc->nresidues; i++)
+        put_residue_header(&pb, &venc->residues[i]);
+
+    // mappings
+    put_bits(&pb, 6, venc->nmappings - 1);
+    for (i = 0; i < venc->nmappings; i++) {
+        mapping_t * mc = &venc->mappings[i];
+        int j;
+        put_bits(&pb, 16, 0); // mapping type
+
+        put_bits(&pb, 1, mc->submaps > 1);
+        if (mc->submaps > 1)
+            put_bits(&pb, 4, mc->submaps - 1);
+
+        put_bits(&pb, 1, !!mc->coupling_steps);
+        if (mc->coupling_steps) {
+            put_bits(&pb, 8, mc->coupling_steps - 1);
+            for (j = 0; j < mc->coupling_steps; j++) {
+                put_bits(&pb, ilog(venc->channels - 1), mc->magnitude[j]);
+                put_bits(&pb, ilog(venc->channels - 1), mc->angle[j]);
+            }
+        }
+
+        put_bits(&pb, 2, 0); // reserved
+
+        if (mc->submaps > 1)
+            for (j = 0; j < venc->channels; j++)
+                put_bits(&pb, 4, mc->mux[j]);
+
+        for (j = 0; j < mc->submaps; j++) {
+            put_bits(&pb, 8, 0); // reserved time configuration
+            put_bits(&pb, 8, mc->floor[j]);
+            put_bits(&pb, 8, mc->residue[j]);
+        }
+    }
+
+    // modes
+    put_bits(&pb, 6, venc->nmodes - 1);
+    for (i = 0; i < venc->nmodes; i++) {
+        put_bits(&pb, 1, venc->modes[i].blockflag);
+        put_bits(&pb, 16, 0); // reserved window type
+        put_bits(&pb, 16, 0); // reserved transform type
+        put_bits(&pb, 8, venc->modes[i].mapping);
+    }
+
+    put_bits(&pb, 1, 1); // framing
+
+    flush_put_bits(&pb);
+    hlens[2] = (put_bits_count(&pb) + 7) / 8;
+
+    len = hlens[0] + hlens[1] + hlens[2];
+    p = *out = av_mallocz(64 + len + len/255);
+
+    *p++ = 2;
+    p += av_xiphlacing(p, hlens[0]);
+    p += av_xiphlacing(p, hlens[1]);
+    buffer_len = 0;
+    for (i = 0; i < 3; i++) {
+        memcpy(p, buffer + buffer_len, hlens[i]);
+        p += hlens[i];
+        buffer_len += hlens[i];
+    }
+
+    return p - *out;
+}
+
+static float get_floor_average(floor_t * fc, float * coeffs, int i) {
+    int begin = fc->list[fc->list[FFMAX(i-1, 0)].sort].x;
+    int end   = fc->list[fc->list[FFMIN(i+1, fc->values - 1)].sort].x;
+    int j;
+    float average = 0;
+
+    for (j = begin; j < end; j++)
+        average += fabs(coeffs[j]);
+    return average / (end - begin);
+}
+
+static void floor_fit(venc_context_t * venc, floor_t * fc, float * coeffs, uint_fast16_t * posts, int samples) {
+    int range = 255 / fc->multiplier + 1;
+    int i;
+    float tot_average = 0.;
+    float averages[fc->values];
+    for (i = 0; i < fc->values; i++){
+        averages[i] = get_floor_average(fc, coeffs, i);
+        tot_average += averages[i];
+    }
+    tot_average /= fc->values;
+    tot_average /= venc->quality;
+
+    for (i = 0; i < fc->values; i++) {
+        int position = fc->list[fc->list[i].sort].x;
+        float average = averages[i];
+        int j;
+
+        average *= pow(tot_average / average, 0.5) * pow(1.25, position/200.); // MAGIC!
+        for (j = 0; j < range - 1; j++)
+            if (ff_vorbis_floor1_inverse_db_table[j * fc->multiplier] > average) break;
+        posts[fc->list[i].sort] = j;
+    }
+}
+
+static int render_point(int x0, int y0, int x1, int y1, int x) {
+    return y0 +  (x - x0) * (y1 - y0) / (x1 - x0);
+}
+
+static void floor_encode(venc_context_t * venc, floor_t * fc, PutBitContext * pb, uint_fast16_t * posts, float * floor, int samples) {
+    int range = 255 / fc->multiplier + 1;
+    int coded[fc->values]; // first 2 values are unused
+    int i, counter;
+
+    put_bits(pb, 1, 1); // non zero
+    put_bits(pb, ilog(range - 1), posts[0]);
+    put_bits(pb, ilog(range - 1), posts[1]);
+    coded[0] = coded[1] = 1;
+
+    for (i = 2; i < fc->values; i++) {
+        int predicted = render_point(fc->list[fc->list[i].low].x,
+                                     posts[fc->list[i].low],
+                                     fc->list[fc->list[i].high].x,
+                                     posts[fc->list[i].high],
+                                     fc->list[i].x);
+        int highroom = range - predicted;
+        int lowroom = predicted;
+        int room = FFMIN(highroom, lowroom);
+        if (predicted == posts[i]) {
+            coded[i] = 0; // must be used later as flag!
+            continue;
+        } else {
+            if (!coded[fc->list[i].low ]) coded[fc->list[i].low ] = -1;
+            if (!coded[fc->list[i].high]) coded[fc->list[i].high] = -1;
+        }
+        if (posts[i] > predicted) {
+            if (posts[i] - predicted > room)
+                coded[i] = posts[i] - predicted + lowroom;
+            else
+                coded[i] = (posts[i] - predicted) << 1;
+        } else {
+            if (predicted - posts[i] > room)
+                coded[i] = predicted - posts[i] + highroom - 1;
+            else
+                coded[i] = ((predicted - posts[i]) << 1) - 1;
+        }
+    }
+
+    counter = 2;
+    for (i = 0; i < fc->partitions; i++) {
+        floor_class_t * c = &fc->classes[fc->partition_to_class[i]];
+        int k, cval = 0, csub = 1<<c->subclass;
+        if (c->subclass) {
+            codebook_t * book = &venc->codebooks[c->masterbook];
+            int cshift = 0;
+            for (k = 0; k < c->dim; k++) {
+                int l;
+                for (l = 0; l < csub; l++) {
+                    int maxval = 1;
+                    if (c->books[l] != -1)
+                        maxval = venc->codebooks[c->books[l]].nentries;
+                    // coded could be -1, but this still works, cause thats 0
+                    if (coded[counter + k] < maxval) break;
+                }
+                assert(l != csub);
+                cval |= l << cshift;
+                cshift += c->subclass;
+            }
+            put_codeword(pb, book, cval);
+        }
+        for (k = 0; k < c->dim; k++) {
+            int book = c->books[cval & (csub-1)];
+            int entry = coded[counter++];
+            cval >>= c->subclass;
+            if (book == -1) continue;
+            if (entry == -1) entry = 0;
+            put_codeword(pb, &venc->codebooks[book], entry);
+        }
+    }
+
+    ff_vorbis_floor1_render_list(fc->list, fc->values, posts, coded, fc->multiplier, floor, samples);
+}
+
+static float * put_vector(codebook_t * book, PutBitContext * pb, float * num) {
+    int i, entry = -1;
+    float distance = FLT_MAX;
+    assert(book->dimentions);
+    for (i = 0; i < book->nentries; i++) {
+        float * vec = book->dimentions + i * book->ndimentions, d = book->pow2[i];
+        int j;
+        if (!book->lens[i]) continue;
+        for (j = 0; j < book->ndimentions; j++)
+            d -= vec[j] * num[j];
+        if (distance > d) {
+            entry = i;
+            distance = d;
+        }
+    }
+    put_codeword(pb, book, entry);
+    return &book->dimentions[entry * book->ndimentions];
+}
+
+static void residue_encode(venc_context_t * venc, residue_t * rc, PutBitContext * pb, float * coeffs, int samples, int real_ch) {
+    int pass, i, j, p, k;
+    int psize = rc->partition_size;
+    int partitions = (rc->end - rc->begin) / psize;
+    int channels = (rc->type == 2) ? 1 : real_ch;
+    int classes[channels][partitions];
+    int classwords = venc->codebooks[rc->classbook].ndimentions;
+
+    assert(rc->type == 2);
+    assert(real_ch == 2);
+    for (p = 0; p < partitions; p++) {
+        float max1 = 0., max2 = 0.;
+        int s = rc->begin + p * psize;
+        for (k = s; k < s + psize; k += 2) {
+            max1 = FFMAX(max1, fabs(coeffs[          k / real_ch]));
+            max2 = FFMAX(max2, fabs(coeffs[samples + k / real_ch]));
+        }
+
+        for (i = 0; i < rc->classifications - 1; i++) {
+            if (max1 < rc->maxes[i][0] && max2 < rc->maxes[i][1]) break;
+        }
+        classes[0][p] = i;
+    }
+
+    for (pass = 0; pass < 8; pass++) {
+        p = 0;
+        while (p < partitions) {
+            if (pass == 0)
+                for (j = 0; j < channels; j++) {
+                    codebook_t * book = &venc->codebooks[rc->classbook];
+                    int entry = 0;
+                    for (i = 0; i < classwords; i++) {
+                        entry *= rc->classifications;
+                        entry += classes[j][p + i];
+                    }
+                    put_codeword(pb, book, entry);
+                }
+            for (i = 0; i < classwords && p < partitions; i++, p++) {
+                for (j = 0; j < channels; j++) {
+                    int nbook = rc->books[classes[j][p]][pass];
+                    codebook_t * book = &venc->codebooks[nbook];
+                    float * buf = coeffs + samples*j + rc->begin + p*psize;
+                    if (nbook == -1) continue;
+
+                    assert(rc->type == 0 || rc->type == 2);
+                    assert(!(psize % book->ndimentions));
+
+                    if (rc->type == 0) {
+                        for (k = 0; k < psize; k += book->ndimentions) {
+                            float * a = put_vector(book, pb, &buf[k]);
+                            int l;
+                            for (l = 0; l < book->ndimentions; l++)
+                                buf[k + l] -= a[l];
+                        }
+                    } else {
+                        int s = rc->begin + p * psize, a1, b1;
+                        a1 = (s % real_ch) * samples;
+                        b1 =  s / real_ch;
+                        s = real_ch * samples;
+                        for (k = 0; k < psize; k += book->ndimentions) {
+                            int dim, a2 = a1, b2 = b1;
+                            float vec[book->ndimentions], * pv = vec;
+                            for (dim = book->ndimentions; dim--; ) {
+                                *pv++ = coeffs[a2 + b2];
+                                if ((a2 += samples) == s) {
+                                    a2=0;
+                                    b2++;
+                                }
+                            }
+                            pv = put_vector(book, pb, vec);
+                            for (dim = book->ndimentions; dim--; ) {
+                                coeffs[a1 + b1] -= *pv++;
+                                if ((a1 += samples) == s) {
+                                    a1=0;
+                                    b1++;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int apply_window_and_mdct(venc_context_t * venc, signed short * audio, int samples) {
+    int i, j, channel;
+    const float * win = venc->win[0];
+    int window_len = 1 << (venc->log2_blocksize[0] - 1);
+    float n = (float)(1 << venc->log2_blocksize[0]) / 4.;
+    // FIXME use dsp
+
+    if (!venc->have_saved && !samples) return 0;
+
+    if (venc->have_saved) {
+        for (channel = 0; channel < venc->channels; channel++) {
+            memcpy(venc->samples + channel*window_len*2, venc->saved + channel*window_len, sizeof(float)*window_len);
+        }
+    } else {
+        for (channel = 0; channel < venc->channels; channel++) {
+            memset(venc->samples + channel*window_len*2, 0, sizeof(float)*window_len);
+        }
+    }
+
+    if (samples) {
+        for (channel = 0; channel < venc->channels; channel++) {
+            float * offset = venc->samples + channel*window_len*2 + window_len;
+            j = channel;
+            for (i = 0; i < samples; i++, j += venc->channels)
+                offset[i] = audio[j] / 32768. / n * win[window_len - i - 1];
+        }
+    } else {
+        for (channel = 0; channel < venc->channels; channel++) {
+            memset(venc->samples + channel*window_len*2 + window_len, 0, sizeof(float)*window_len);
+        }
+    }
+
+    for (channel = 0; channel < venc->channels; channel++) {
+        ff_mdct_calc(&venc->mdct[0], venc->coeffs + channel*window_len, venc->samples + channel*window_len*2, venc->floor/*tmp*/);
+    }
+
+    if (samples) {
+        for (channel = 0; channel < venc->channels; channel++) {
+            float * offset = venc->saved + channel*window_len;
+            j = channel;
+            for (i = 0; i < samples; i++, j += venc->channels)
+                offset[i] = audio[j] / 32768. / n * win[i];
+        }
+        venc->have_saved = 1;
+    } else {
+        venc->have_saved = 0;
+    }
+    return 1;
+}
+
+static int vorbis_encode_init(AVCodecContext * avccontext)
+{
+    venc_context_t * venc = avccontext->priv_data;
+
+    if (avccontext->channels != 2) {
+        av_log(avccontext, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n");
+        return -1;
+    }
+
+    create_vorbis_context(venc, avccontext);
+
+    if (avccontext->flags & CODEC_FLAG_QSCALE)
+        venc->quality = avccontext->global_quality / (float)FF_QP2LAMBDA / 10.;
+    else
+        venc->quality = 1.;
+    venc->quality *= venc->quality;
+
+    avccontext->extradata_size = put_main_header(venc, (uint8_t**)&avccontext->extradata);
+
+    avccontext->frame_size = 1 << (venc->log2_blocksize[0] - 1);
+
+    avccontext->coded_frame = avcodec_alloc_frame();
+    avccontext->coded_frame->key_frame = 1;
+
+    return 0;
+}
+
+static int vorbis_encode_frame(AVCodecContext * avccontext, unsigned char * packets, int buf_size, void *data)
+{
+    venc_context_t * venc = avccontext->priv_data;
+    signed short * audio = data;
+    int samples = data ? avccontext->frame_size : 0;
+    vorbis_mode_t * mode;
+    mapping_t * mapping;
+    PutBitContext pb;
+    int i;
+
+    if (!apply_window_and_mdct(venc, audio, samples)) return 0;
+    samples = 1 << (venc->log2_blocksize[0] - 1);
+
+    init_put_bits(&pb, packets, buf_size);
+
+    put_bits(&pb, 1, 0); // magic bit
+
+    put_bits(&pb, ilog(venc->nmodes - 1), 0); // 0 bits, the mode
+
+    mode = &venc->modes[0];
+    mapping = &venc->mappings[mode->mapping];
+    if (mode->blockflag) {
+        put_bits(&pb, 1, 0);
+        put_bits(&pb, 1, 0);
+    }
+
+    for (i = 0; i < venc->channels; i++) {
+        floor_t * fc = &venc->floors[mapping->floor[mapping->mux[i]]];
+        uint_fast16_t posts[fc->values];
+        floor_fit(venc, fc, &venc->coeffs[i * samples], posts, samples);
+        floor_encode(venc, fc, &pb, posts, &venc->floor[i * samples], samples);
+    }
+
+    for (i = 0; i < venc->channels * samples; i++) {
+        venc->coeffs[i] /= venc->floor[i];
+    }
+
+    for (i = 0; i < mapping->coupling_steps; i++) {
+        float * mag = venc->coeffs + mapping->magnitude[i] * samples;
+        float * ang = venc->coeffs + mapping->angle[i] * samples;
+        int j;
+        for (j = 0; j < samples; j++) {
+            float a = ang[j];
+            ang[j] -= mag[j];
+            if (mag[j] > 0) ang[j] = -ang[j];
+            if (ang[j] < 0) mag[j] = a;
+        }
+    }
+
+    residue_encode(venc, &venc->residues[mapping->residue[mapping->mux[0]]], &pb, venc->coeffs, samples, venc->channels);
+
+    flush_put_bits(&pb);
+    return (put_bits_count(&pb) + 7) / 8;
+}
+
+
+static int vorbis_encode_close(AVCodecContext * avccontext)
+{
+    venc_context_t * venc = avccontext->priv_data;
+    int i;
+
+    if (venc->codebooks)
+        for (i = 0; i < venc->ncodebooks; i++) {
+            av_freep(&venc->codebooks[i].lens);
+            av_freep(&venc->codebooks[i].codewords);
+            av_freep(&venc->codebooks[i].quantlist);
+            av_freep(&venc->codebooks[i].dimentions);
+            av_freep(&venc->codebooks[i].pow2);
+        }
+    av_freep(&venc->codebooks);
+
+    if (venc->floors)
+        for (i = 0; i < venc->nfloors; i++) {
+            int j;
+            if (venc->floors[i].classes)
+                for (j = 0; j < venc->floors[i].nclasses; j++)
+                    av_freep(&venc->floors[i].classes[j].books);
+            av_freep(&venc->floors[i].classes);
+            av_freep(&venc->floors[i].partition_to_class);
+            av_freep(&venc->floors[i].list);
+        }
+    av_freep(&venc->floors);
+
+    if (venc->residues)
+        for (i = 0; i < venc->nresidues; i++) {
+            av_freep(&venc->residues[i].books);
+            av_freep(&venc->residues[i].maxes);
+        }
+    av_freep(&venc->residues);
+
+    if (venc->mappings)
+        for (i = 0; i < venc->nmappings; i++) {
+            av_freep(&venc->mappings[i].mux);
+            av_freep(&venc->mappings[i].floor);
+            av_freep(&venc->mappings[i].residue);
+            av_freep(&venc->mappings[i].magnitude);
+            av_freep(&venc->mappings[i].angle);
+        }
+    av_freep(&venc->mappings);
+
+    av_freep(&venc->modes);
+
+    av_freep(&venc->saved);
+    av_freep(&venc->samples);
+    av_freep(&venc->floor);
+    av_freep(&venc->coeffs);
+
+    ff_mdct_end(&venc->mdct[0]);
+    ff_mdct_end(&venc->mdct[1]);
+
+    av_freep(&avccontext->coded_frame);
+    av_freep(&avccontext->extradata);
+
+    return 0 ;
+}
+
+AVCodec vorbis_encoder = {
+    "vorbis",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_VORBIS,
+    sizeof(venc_context_t),
+    vorbis_encode_init,
+    vorbis_encode_frame,
+    vorbis_encode_close,
+    .capabilities= CODEC_CAP_DELAY,
+};
diff --git a/contrib/ffmpeg/libavcodec/vorbis_enc_data.h b/contrib/ffmpeg/libavcodec/vorbis_enc_data.h
new file mode 100644
index 000000000..e56dc5df5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vorbis_enc_data.h
@@ -0,0 +1,498 @@
+/*
+ * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+static const uint8_t codebook0[] = {
+   2, 10,  8, 14,  7, 12, 11, 14,  1,  5,  3,  7,  4,  9,  7,
+  13,
+};
+
+static const uint8_t codebook1[] = {
+   1,  4,  2,  6,  3,  7,  5,  7,
+};
+
+static const uint8_t codebook2[] = {
+   1,  5,  7, 21,  5,  8,  9, 21, 10,  9, 12, 20, 20, 16, 20,
+  20,  4,  8,  9, 20,  6,  8,  9, 20, 11, 11, 13, 20, 20, 15,
+  17, 20,  9, 11, 14, 20,  8, 10, 15, 20, 11, 13, 15, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 13, 20, 20, 20, 18, 18, 20, 20,
+  20, 20, 20, 20,  3,  6,  8, 20,  6,  7,  9, 20, 10,  9, 12,
+  20, 20, 20, 20, 20,  5,  7,  9, 20,  6,  6,  9, 20, 10,  9,
+  12, 20, 20, 20, 20, 20,  8, 10, 13, 20,  8,  9, 12, 20, 11,
+  10, 12, 20, 20, 20, 20, 20, 18, 20, 20, 20, 15, 17, 18, 20,
+  18, 17, 18, 20, 20, 20, 20, 20,  7, 10, 12, 20,  8,  9, 11,
+  20, 14, 13, 14, 20, 20, 20, 20, 20,  6,  9, 12, 20,  7,  8,
+  11, 20, 12, 11, 13, 20, 20, 20, 20, 20,  9, 11, 15, 20,  8,
+  10, 14, 20, 12, 11, 14, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 11, 16, 18,
+  20, 15, 15, 17, 20, 20, 17, 20, 20, 20, 20, 20, 20,  9, 14,
+  16, 20, 12, 12, 15, 20, 17, 15, 18, 20, 20, 20, 20, 20, 16,
+  19, 18, 20, 15, 16, 20, 20, 17, 17, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+  20,
+};
+
+static const uint8_t codebook3[] = {
+   2,  3,  7, 13,  4,  4,  7, 15,  8,  6,  9, 17, 21, 16, 15,
+  21,  2,  5,  7, 11,  5,  5,  7, 14,  9,  7, 10, 16, 17, 15,
+  16, 21,  4,  7, 10, 17,  7,  7,  9, 15, 11,  9, 11, 16, 21,
+  18, 15, 21, 18, 21, 21, 21, 15, 17, 17, 19, 21, 19, 18, 20,
+  21, 21, 21, 20,
+};
+
+static const uint8_t codebook4[] = {
+   5,  5,  5,  5,  6,  5,  6,  5,  6,  5,  6,  5,  6,  5,  6,
+   5,  6,  5,  6,  5,  6,  5,  6,  5,  7,  5,  7,  5,  7,  5,
+   7,  5,  8,  6,  8,  6,  8,  6,  9,  6,  9,  6, 10,  6, 10,
+   6, 11,  6, 11,  7, 11,  7, 12,  7, 12,  7, 12,  7, 12,  7,
+  12,  7, 12,  7, 12,  7, 12,  8, 13,  8, 12,  8, 12,  8, 13,
+   8, 13,  9, 13,  9, 13,  9, 13,  9, 12, 10, 12, 10, 13, 10,
+  14, 11, 14, 12, 14, 13, 14, 13, 14, 14, 15, 16, 15, 15, 15,
+  14, 15, 17, 21, 22, 22, 21, 22, 22, 22, 22, 22, 22, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21,
+};
+
+static const uint8_t codebook5[] = {
+   2,  5,  5,  4,  5,  4,  5,  4,  5,  4,  6,  5,  6,  5,  6,
+   5,  6,  5,  7,  5,  7,  6,  8,  6,  8,  6,  8,  6,  9,  6,
+   9,  6,
+};
+
+static const uint8_t codebook6[] = {
+   8,  5,  8,  4,  9,  4,  9,  4,  9,  4,  9,  4,  9,  4,  9,
+   4,  9,  4,  9,  4,  9,  4,  8,  4,  8,  4,  9,  5,  9,  5,
+   9,  5,  9,  5,  9,  6, 10,  6, 10,  7, 10,  8, 11,  9, 11,
+  11, 12, 13, 12, 14, 13, 15, 13, 15, 14, 16, 14, 17, 15, 17,
+  15, 15, 16, 16, 15, 16, 16, 16, 15, 18, 16, 15, 17, 17, 19,
+  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+  19, 19, 19, 19, 19, 19,
+};
+
+static const uint8_t codebook7[] = {
+   1,  5,  5,  5,  5,  5,  5,  5,  6,  5,  6,  5,  6,  5,  6,
+   5,  6,  6,  7,  7,  7,  7,  8,  7,  8,  8,  9,  8, 10,  9,
+  10,  9,
+};
+
+static const uint8_t codebook8[] = {
+   4,  3,  4,  3,  4,  4,  5,  4,  5,  4,  5,  5,  6,  5,  6,
+   5,  7,  5,  7,  6,  7,  6,  8,  7,  8,  7,  8,  7,  9,  8,
+   9,  9,  9,  9, 10, 10, 10, 11,  9, 12,  9, 12,  9, 15, 10,
+  14,  9, 13, 10, 13, 10, 12, 10, 12, 10, 13, 10, 12, 11, 13,
+  11, 14, 12, 13, 13, 14, 14, 13, 14, 15, 14, 16, 13, 13, 14,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 15, 15,
+};
+
+static const uint8_t codebook9[] = {
+   4,  5,  4,  5,  3,  5,  3,  5,  3,  5,  4,  4,  4,  4,  5,
+   5,  5,
+};
+
+static const uint8_t codebook10[] = {
+   3,  3,  4,  3,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  5,
+   7,  5,  8,  6,  8,  6,  9,  7, 10,  7, 10,  8, 10,  8, 11,
+   9, 11,
+};
+
+static const uint8_t codebook11[] = {
+   3,  7,  3,  8,  3, 10,  3,  8,  3,  9,  3,  8,  4,  9,  4,
+   9,  5,  9,  6, 10,  6,  9,  7, 11,  7, 12,  9, 13, 10, 13,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12,
+};
+
+static const uint8_t codebook12[] = {
+   4,  5,  4,  5,  4,  5,  4,  5,  3,  5,  3,  5,  3,  5,  4,
+   5,  4,
+};
+
+static const uint8_t codebook13[] = {
+   4,  2,  4,  2,  5,  3,  5,  4,  6,  6,  6,  7,  7,  8,  7,
+   8,  7,  8,  7,  9,  8,  9,  8,  9,  8, 10,  8, 11,  9, 12,
+   9, 12,
+};
+
+static const uint8_t codebook14[] = {
+   2,  5,  2,  6,  3,  6,  4,  7,  4,  7,  5,  9,  5, 11,  6,
+  11,  6, 11,  7, 11,  6, 11,  6, 11,  9, 11,  8, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10,
+  10, 10, 10,
+};
+
+static const uint8_t codebook15[] = {
+   5,  6, 11, 11, 11, 11, 10, 10, 12, 11,  5,  2, 11,  5,  6,
+   6,  7,  9, 11, 13, 13, 10,  7, 11,  6,  7,  8,  9, 10, 12,
+  11,  5, 11,  6,  8,  7,  9, 11, 14, 15, 11,  6,  6,  8,  4,
+   5,  7,  8, 10, 13, 10,  5,  7,  7,  5,  5,  6,  8, 10, 11,
+  10,  7,  7,  8,  6,  5,  5,  7,  9,  9, 11,  8,  8, 11,  8,
+   7,  6,  6,  7,  9, 12, 11, 10, 13,  9,  9,  7,  7,  7,  9,
+  11, 13, 12, 15, 12, 11,  9,  8,  8,  8,
+};
+
+static const uint8_t codebook16[] = {
+   2,  4,  4,  0,  0,  0,  0,  0,  0,  5,  6,  6,  0,  0,  0,
+   0,  0,  0,  5,  6,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  5,  7,  7,  0,  0,  0,  0,  0,  0,
+   7,  8,  8,  0,  0,  0,  0,  0,  0,  6,  7,  8,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  7,  7,
+   0,  0,  0,  0,  0,  0,  6,  8,  7,  0,  0,  0,  0,  0,  0,
+   7,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  7,  7,  0,  0,  0,
+   0,  0,  0,  7,  8,  8,  0,  0,  0,  0,  0,  0,  7,  8,  8,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   7,  8,  8,  0,  0,  0,  0,  0,  0,  8,  8,  9,  0,  0,  0,
+   0,  0,  0,  8,  9,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  6,  8,  8,  0,  0,  0,  0,  0,  0,
+   7,  9,  8,  0,  0,  0,  0,  0,  0,  8,  9,  9,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  5,  7,  7,  0,  0,  0,  0,  0,  0,  7,  8,  8,
+   0,  0,  0,  0,  0,  0,  7,  8,  8,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  8,  8,  0,  0,  0,
+   0,  0,  0,  8,  9,  9,  0,  0,  0,  0,  0,  0,  7,  8,  9,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   6,  8,  8,  0,  0,  0,  0,  0,  0,  8,  9,  9,  0,  0,  0,
+   0,  0,  0,  8,  9,  8,
+};
+
+static const uint8_t codebook17[] = {
+   2,  5,  5,  0,  0,  0,  5,  5,  0,  0,  0,  5,  5,  0,  0,
+   0,  7,  8,  0,  0,  0,  0,  0,  0,  0,  5,  6,  6,  0,  0,
+   0,  7,  7,  0,  0,  0,  7,  7,  0,  0,  0, 10, 10,  0,  0,
+   0,  0,  0,  0,  0,  5,  6,  6,  0,  0,  0,  7,  7,  0,  0,
+   0,  7,  7,  0,  0,  0, 10, 10,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   5,  7,  7,  0,  0,  0,  7,  7,  0,  0,  0,  7,  7,  0,  0,
+   0,  9,  9,  0,  0,  0,  0,  0,  0,  0,  5,  7,  7,  0,  0,
+   0,  7,  7,  0,  0,  0,  7,  7,  0,  0,  0,  9,  9,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  5,  7,  7,  0,  0,  0,  7,  7,  0,  0,
+   0,  7,  7,  0,  0,  0,  9,  9,  0,  0,  0,  0,  0,  0,  0,
+   5,  7,  7,  0,  0,  0,  7,  7,  0,  0,  0,  7,  7,  0,  0,
+   0,  9,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  8, 10, 10,  0,  0,
+   0,  9,  9,  0,  0,  0,  9,  9,  0,  0,  0, 10, 10,  0,  0,
+   0,  0,  0,  0,  0,  8, 10, 10,  0,  0,  0,  9,  9,  0,  0,
+   0,  9,  9,  0,  0,  0, 10, 10,
+};
+
+static const uint8_t codebook18[] = {
+   2,  4,  3,  6,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  4,  6,  6,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  4,  4,  4,  6,  6,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   6,  6,  6,  9,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6,  7,  9,  9,
+};
+
+static const uint8_t codebook19[] = {
+   2,  3,  3,  6,  6,  0,  0,  0,  0,  0,  4,  4,  6,  6,  0,
+   0,  0,  0,  0,  4,  4,  6,  6,  0,  0,  0,  0,  0,  5,  5,
+   6,  6,  0,  0,  0,  0,  0,  0,  0,  6,  6,  0,  0,  0,  0,
+   0,  0,  0,  7,  8,  0,  0,  0,  0,  0,  0,  0,  7,  7,  0,
+   0,  0,  0,  0,  0,  0,  9,  9,
+};
+
+static const uint8_t codebook20[] = {
+   1,  3,  4,  6,  6,  7,  7,  9,  9,  0,  5,  5,  7,  7,  7,
+   8,  9,  9,  0,  5,  5,  7,  7,  8,  8,  9,  9,  0,  7,  7,
+   8,  8,  8,  8, 10, 10,  0,  0,  0,  8,  8,  8,  8, 10, 10,
+   0,  0,  0,  9,  9,  9,  9, 10, 10,  0,  0,  0,  9,  9,  9,
+   9, 10, 10,  0,  0,  0, 10, 10, 10, 10, 11, 11,  0,  0,  0,
+   0,  0, 10, 10, 11, 11,
+};
+
+static const uint8_t codebook21[] = {
+   2,  3,  3,  6,  6,  7,  7,  8,  8,  8,  8,  9,  9, 10, 10,
+  11, 10,  0,  5,  5,  7,  7,  8,  8,  9,  9,  9,  9, 10, 10,
+  10, 10, 11, 11,  0,  5,  5,  7,  7,  8,  8,  9,  9,  9,  9,
+  10, 10, 10, 10, 11, 11,  0,  6,  6,  7,  7,  8,  8,  9,  9,
+   9,  9, 10, 10, 11, 11, 11, 11,  0,  0,  0,  7,  7,  8,  8,
+   9,  9,  9,  9, 10, 10, 11, 11, 11, 12,  0,  0,  0,  8,  8,
+   8,  8,  9,  9,  9,  9, 10, 10, 11, 11, 12, 12,  0,  0,  0,
+   8,  8,  8,  8,  9,  9,  9,  9, 10, 10, 11, 11, 12, 12,  0,
+   0,  0,  9,  9,  9,  9, 10, 10, 10, 10, 11, 10, 11, 11, 12,
+  12,  0,  0,  0,  0,  0,  9,  9, 10, 10, 10, 10, 11, 11, 11,
+  11, 12, 12,  0,  0,  0,  0,  0,  9,  8,  9,  9, 10, 10, 11,
+  11, 12, 12, 12, 12,  0,  0,  0,  0,  0,  8,  8,  9,  9, 10,
+  10, 11, 11, 12, 11, 12, 12,  0,  0,  0,  0,  0,  9, 10, 10,
+  10, 11, 11, 11, 11, 12, 12, 13, 13,  0,  0,  0,  0,  0,  0,
+   0, 10, 10, 10, 10, 11, 11, 12, 12, 13, 13,  0,  0,  0,  0,
+   0,  0,  0, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13,  0,  0,
+   0,  0,  0,  0,  0, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13,
+   0,  0,  0,  0,  0,  0,  0, 11, 11, 12, 12, 12, 12, 13, 13,
+  13, 13,  0,  0,  0,  0,  0,  0,  0,  0,  0, 12, 12, 12, 12,
+  13, 13, 13, 13,
+};
+
+static const uint8_t codebook22[] = {
+   1,  4,  4,  7,  6,  6,  7,  6,  6,  4,  7,  7, 10,  9,  9,
+  11,  9,  9,  4,  7,  7, 10,  9,  9, 11,  9,  9,  7, 10, 10,
+  11, 11, 10, 12, 11, 11,  6,  9,  9, 11, 10, 10, 11, 10, 10,
+   6,  9,  9, 11, 10, 10, 11, 10, 10,  7, 11, 11, 11, 11, 11,
+  12, 11, 11,  6,  9,  9, 11, 10, 10, 11, 10, 10,  6,  9,  9,
+  11, 10, 10, 11, 10, 10,
+};
+
+static const uint8_t codebook23[] = {
+   2,  4,  4,  6,  6,  7,  7,  7,  7,  8,  8, 10,  5,  5,  6,
+   6,  7,  7,  8,  8,  8,  8, 10,  5,  5,  6,  6,  7,  7,  8,
+   8,  8,  8, 10,  6,  6,  7,  7,  8,  8,  8,  8,  8,  8, 10,
+  10, 10,  7,  7,  8,  7,  8,  8,  8,  8, 10, 10, 10,  8,  8,
+   8,  8,  8,  8,  8,  8, 10, 10, 10,  7,  8,  8,  8,  8,  8,
+   8,  8, 10, 10, 10,  8,  8,  8,  8,  8,  8,  8,  8, 10, 10,
+  10, 10, 10,  8,  8,  8,  8,  8,  8, 10, 10, 10, 10, 10,  9,
+   9,  8,  8,  9,  8, 10, 10, 10, 10, 10,  8,  8,  8,  8,  8,
+   8,
+};
+
+static const uint8_t codebook24[] = {
+   1,  4,  4,  6,  6,  7,  7,  8,  8,  9,  9, 10, 10,  6,  5,
+   5,  7,  7,  8,  8,  8,  8,  9,  9, 10, 10,  7,  5,  5,  7,
+   7,  8,  8,  8,  8,  9,  9, 11, 10,  0,  8,  8,  8,  8,  9,
+   9,  9,  9, 10, 10, 11, 11,  0,  8,  8,  8,  8,  9,  9,  9,
+   9, 10, 10, 11, 11,  0, 12, 12,  9,  9, 10, 10, 10, 10, 11,
+  11, 11, 12,  0, 13, 13,  9,  9, 10, 10, 10, 10, 11, 11, 12,
+  12,  0,  0,  0, 10, 10, 10, 10, 11, 11, 12, 12, 12, 12,  0,
+   0,  0, 10, 10, 10, 10, 11, 11, 12, 12, 12, 12,  0,  0,  0,
+  14, 14, 11, 11, 11, 11, 12, 12, 13, 13,  0,  0,  0, 14, 14,
+  11, 11, 11, 11, 12, 12, 13, 13,  0,  0,  0,  0,  0, 12, 12,
+  12, 12, 13, 13, 14, 13,  0,  0,  0,  0,  0, 13, 13, 12, 12,
+  13, 12, 14, 13,
+};
+
+static const uint8_t codebook25[] = {
+   2,  4,  4,  5,  5,  6,  5,  5,  5,  5,  6,  4,  5,  5,  5,
+   6,  5,  5,  5,  5,  6,  6,  6,  5,  5,
+};
+
+static const uint8_t codebook26[] = {
+   1,  4,  4, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,  4,  9,
+   8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,  2,  9,  7, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11,
+};
+
+static const uint8_t codebook27[] = {
+   1,  4,  4,  6,  6,  7,  7,  8,  7,  9,  9, 10, 10, 10, 10,
+   6,  5,  5,  7,  7,  8,  8, 10,  8, 11, 10, 12, 12, 13, 13,
+   6,  5,  5,  7,  7,  8,  8, 10,  9, 11, 11, 12, 12, 13, 12,
+  18,  8,  8,  8,  8,  9,  9, 10,  9, 11, 10, 12, 12, 13, 13,
+  18,  8,  8,  8,  8,  9,  9, 10, 10, 11, 11, 13, 12, 14, 13,
+  18, 11, 11,  9,  9, 10, 10, 11, 11, 11, 12, 13, 12, 13, 14,
+  18, 11, 11,  9,  8, 11, 10, 11, 11, 11, 11, 12, 12, 14, 13,
+  18, 18, 18, 10, 11, 10, 11, 12, 12, 12, 12, 13, 12, 14, 13,
+  18, 18, 18, 10, 11, 11,  9, 12, 11, 12, 12, 12, 13, 13, 13,
+  18, 18, 17, 14, 14, 11, 11, 12, 12, 13, 12, 14, 12, 14, 13,
+  18, 18, 18, 14, 14, 11, 10, 12,  9, 12, 13, 13, 13, 13, 13,
+  18, 18, 17, 16, 18, 13, 13, 12, 12, 13, 11, 14, 12, 14, 14,
+  17, 18, 18, 17, 18, 13, 12, 13, 10, 12, 11, 14, 14, 14, 14,
+  17, 18, 18, 18, 18, 15, 16, 12, 12, 13, 10, 14, 12, 14, 15,
+  18, 18, 18, 16, 17, 16, 14, 12, 11, 13, 10, 13, 13, 14, 15,
+};
+
+static const uint8_t codebook28[] = {
+   2,  5,  5,  6,  6,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,
+   8,  8, 10,  6,  6,  7,  7,  8,  7,  8,  8,  8,  8,  8,  9,
+   9,  9,  9,  9, 10,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+   9,  9,  9,  9,  9,  9, 10,  7,  7,  7,  7,  8,  8,  8,  8,
+   9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10,  7,  7,  8,  8,
+   8,  9,  9,  9,  9,  9,  9,  9,  9,  9, 11, 11, 11,  8,  8,
+   8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10,
+   8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10,
+  10, 10,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10,
+   9, 10, 10, 10, 11, 11,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9, 11, 10, 11, 11, 11,  9,  9,  9,  9,  9,  9, 10,
+  10,  9,  9, 10,  9, 11, 10, 11, 11, 11,  9,  9,  9,  9,  9,
+   9,  9,  9, 10, 10, 10,  9, 11, 11, 11, 11, 11,  9,  9,  9,
+   9, 10, 10,  9,  9,  9,  9, 10,  9, 11, 11, 11, 11, 11, 11,
+  11,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 11, 11, 11, 11,
+  11, 11, 11, 10,  9, 10, 10,  9, 10,  9,  9, 10,  9, 11, 10,
+  10, 11, 11, 11, 11,  9, 10,  9,  9,  9,  9, 10, 10, 10, 10,
+  11, 11, 11, 11, 11, 11, 10, 10, 10,  9,  9, 10,  9, 10,  9,
+  10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,  9,  9,  9,  9,
+   9, 10, 10, 10,
+};
+
+static const struct {
+    int dim;
+    int len;
+    int real_len;
+    const uint8_t * clens;
+    int lookup;
+    float min;
+    float delta;
+    const uint8_t * quant;
+} cvectors[] = {
+    { 2,   16,   16, codebook0,  0 },
+    { 2,    8,    8, codebook1,  0 },
+    { 2,  256,  256, codebook2,  0 },
+    { 2,   64,   64, codebook3,  0 },
+    { 2,  128,  128, codebook4,  0 },
+    { 2,   32,   32, codebook5,  0 },
+    { 2,   96,   96, codebook6,  0 },
+    { 2,   32,   32, codebook7,  0 },
+    { 2,   96,   96, codebook8,  0 },
+    { 2,   17,   17, codebook9,  0 },
+    { 2,   32,   32, codebook10, 0 },
+    { 2,   78,   78, codebook11, 0 },
+    { 2,   17,   17, codebook12, 0 },
+    { 2,   32,   32, codebook13, 0 },
+    { 2,   78,   78, codebook14, 0 },
+    { 2,  100,  100, codebook15, 0 },
+    { 8, 1641, 6561, codebook16, 1,    -1.0,   1.0, (const uint8_t[]){ 1, 0, 2, } },
+    { 4,  443,  625, codebook17, 1,    -2.0,   1.0, (const uint8_t[]){ 2, 1, 3, 0, 4, } },
+    { 4,  105,  625, codebook18, 1,    -2.0,   1.0, (const uint8_t[]){ 2, 1, 3, 0, 4, } },
+    { 2,   68,   81, codebook19, 1,    -4.0,   1.0, (const uint8_t[]){ 4, 3, 5, 2, 6, 1, 7, 0, 8, } },
+    { 2,   81,   81, codebook20, 1,    -4.0,   1.0, (const uint8_t[]){ 4, 3, 5, 2, 6, 1, 7, 0, 8, } },
+    { 2,  289,  289, codebook21, 1,    -8.0,   1.0, (const uint8_t[]){ 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 0, 16, } },
+    { 4,   81,   81, codebook22, 1,   -11.0,  11.0, (const uint8_t[]){ 1, 0, 2, } },
+    { 2,  121,  121, codebook23, 1,    -5.0,   1.0, (const uint8_t[]){ 5, 4, 6, 3, 7, 2, 8, 1, 9, 0, 10, } },
+    { 2,  169,  169, codebook24, 1,   -30.0,   5.0, (const uint8_t[]){ 6, 5, 7, 4, 8, 3, 9, 2, 10, 1, 11, 0, 12, } },
+    { 2,   25,   25, codebook25, 1,    -2.0,   1.0, (const uint8_t[]){ 2, 1, 3, 0, 4, } },
+    { 2,  169,  169, codebook26, 1, -1530.0, 255.0, (const uint8_t[]){ 6, 5, 7, 4, 8, 3, 9, 2, 10, 1, 11, 0, 12, } },
+    { 2,  225,  225, codebook27, 1,  -119.0,  17.0, (const uint8_t[]){ 7, 6, 8, 5, 9, 4, 10, 3, 11, 2, 12, 1, 13, 0, 14, } },
+    { 2,  289,  289, codebook28, 1,    -8.0,   1.0, (const uint8_t[]){ 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 0, 16, } },
+};
+
+static const struct {
+    int dim;
+    int subclass;
+    int masterbook;
+    const int * nbooks;
+} floor_classes[] = {
+    { 3, 0, 0, (const int[]){  4             } },
+    { 4, 1, 0, (const int[]){  5,  6         } },
+    { 3, 1, 1, (const int[]){  7,  8         } },
+    { 4, 2, 2, (const int[]){ -1,  9, 10, 11 } },
+    { 3, 2, 3, (const int[]){ -1, 12, 13, 14 } },
+};
diff --git a/contrib/ffmpeg/libavcodec/vp3.c b/contrib/ffmpeg/libavcodec/vp3.c
new file mode 100644
index 000000000..6a398693a
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp3.c
@@ -0,0 +1,2659 @@
+/*
+ * Copyright (C) 2003-2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file vp3.c
+ * On2 VP3 Video Decoder
+ *
+ * VP3 Video Decoder by Mike Melanson (mike at multimedia.cx)
+ * For more information about the VP3 coding process, visit:
+ *   http://multimedia.cx/
+ *
+ * Theora decoder by Alex Beregszaszi
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+#include "vp3data.h"
+
+#define FRAGMENT_PIXELS 8
+
+/*
+ * Debugging Variables
+ *
+ * Define one or more of the following compile-time variables to 1 to obtain
+ * elaborate information about certain aspects of the decoding process.
+ *
+ * KEYFRAMES_ONLY: set this to 1 to only see keyframes (VP3 slideshow mode)
+ * DEBUG_VP3: high-level decoding flow
+ * DEBUG_INIT: initialization parameters
+ * DEBUG_DEQUANTIZERS: display how the dequanization tables are built
+ * DEBUG_BLOCK_CODING: unpacking the superblock/macroblock/fragment coding
+ * DEBUG_MODES: unpacking the coding modes for individual fragments
+ * DEBUG_VECTORS: display the motion vectors
+ * DEBUG_TOKEN: display exhaustive information about each DCT token
+ * DEBUG_VLC: display the VLCs as they are extracted from the stream
+ * DEBUG_DC_PRED: display the process of reversing DC prediction
+ * DEBUG_IDCT: show every detail of the IDCT process
+ */
+
+#define KEYFRAMES_ONLY 0
+
+#define DEBUG_VP3 0
+#define DEBUG_INIT 0
+#define DEBUG_DEQUANTIZERS 0
+#define DEBUG_BLOCK_CODING 0
+#define DEBUG_MODES 0
+#define DEBUG_VECTORS 0
+#define DEBUG_TOKEN 0
+#define DEBUG_VLC 0
+#define DEBUG_DC_PRED 0
+#define DEBUG_IDCT 0
+
+#if DEBUG_VP3
+#define debug_vp3(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_vp3(const char *format, ...) { }
+#endif
+
+#if DEBUG_INIT
+#define debug_init(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_init(const char *format, ...) { }
+#endif
+
+#if DEBUG_DEQUANTIZERS
+#define debug_dequantizers(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_dequantizers(const char *format, ...) { }
+#endif
+
+#if DEBUG_BLOCK_CODING
+#define debug_block_coding(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_block_coding(const char *format, ...) { }
+#endif
+
+#if DEBUG_MODES
+#define debug_modes(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_modes(const char *format, ...) { }
+#endif
+
+#if DEBUG_VECTORS
+#define debug_vectors(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_vectors(const char *format, ...) { }
+#endif
+
+#if DEBUG_TOKEN
+#define debug_token(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_token(const char *format, ...) { }
+#endif
+
+#if DEBUG_VLC
+#define debug_vlc(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_vlc(const char *format, ...) { }
+#endif
+
+#if DEBUG_DC_PRED
+#define debug_dc_pred(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_dc_pred(const char *format, ...) { }
+#endif
+
+#if DEBUG_IDCT
+#define debug_idct(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
+#else
+static inline void debug_idct(const char *format, ...) { }
+#endif
+
+typedef struct Coeff {
+    struct Coeff *next;
+    DCTELEM coeff;
+    uint8_t index;
+} Coeff;
+
+//FIXME split things out into their own arrays
+typedef struct Vp3Fragment {
+    Coeff *next_coeff;
+    /* address of first pixel taking into account which plane the fragment
+     * lives on as well as the plane stride */
+    int first_pixel;
+    /* this is the macroblock that the fragment belongs to */
+    uint16_t macroblock;
+    uint8_t coding_method;
+    uint8_t coeff_count;
+    int8_t motion_x;
+    int8_t motion_y;
+} Vp3Fragment;
+
+#define SB_NOT_CODED        0
+#define SB_PARTIALLY_CODED  1
+#define SB_FULLY_CODED      2
+
+#define MODE_INTER_NO_MV      0
+#define MODE_INTRA            1
+#define MODE_INTER_PLUS_MV    2
+#define MODE_INTER_LAST_MV    3
+#define MODE_INTER_PRIOR_LAST 4
+#define MODE_USING_GOLDEN     5
+#define MODE_GOLDEN_MV        6
+#define MODE_INTER_FOURMV     7
+#define CODING_MODE_COUNT     8
+
+/* special internal mode */
+#define MODE_COPY             8
+
+/* There are 6 preset schemes, plus a free-form scheme */
+static int ModeAlphabet[7][CODING_MODE_COUNT] =
+{
+    /* this is the custom scheme */
+    { 0, 0, 0, 0, 0, 0, 0, 0 },
+
+    /* scheme 1: Last motion vector dominates */
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
+         MODE_INTER_PLUS_MV,    MODE_INTER_NO_MV,
+         MODE_INTRA,            MODE_USING_GOLDEN,
+         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
+
+    /* scheme 2 */
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
+         MODE_INTER_NO_MV,      MODE_INTER_PLUS_MV,
+         MODE_INTRA,            MODE_USING_GOLDEN,
+         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
+
+    /* scheme 3 */
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,
+         MODE_INTER_PRIOR_LAST, MODE_INTER_NO_MV,
+         MODE_INTRA,            MODE_USING_GOLDEN,
+         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
+
+    /* scheme 4 */
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,
+         MODE_INTER_NO_MV,      MODE_INTER_PRIOR_LAST,
+         MODE_INTRA,            MODE_USING_GOLDEN,
+         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
+
+    /* scheme 5: No motion vector dominates */
+    {    MODE_INTER_NO_MV,      MODE_INTER_LAST_MV,
+         MODE_INTER_PRIOR_LAST, MODE_INTER_PLUS_MV,
+         MODE_INTRA,            MODE_USING_GOLDEN,
+         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
+
+    /* scheme 6 */
+    {    MODE_INTER_NO_MV,      MODE_USING_GOLDEN,
+         MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
+         MODE_INTER_PLUS_MV,    MODE_INTRA,
+         MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
+
+};
+
+#define MIN_DEQUANT_VAL 2
+
+typedef struct Vp3DecodeContext {
+    AVCodecContext *avctx;
+    int theora, theora_tables;
+    int version;
+    int width, height;
+    AVFrame golden_frame;
+    AVFrame last_frame;
+    AVFrame current_frame;
+    int keyframe;
+    DSPContext dsp;
+    int flipped_image;
+
+    int qis[3];
+    int nqis;
+    int quality_index;
+    int last_quality_index;
+
+    int superblock_count;
+    int superblock_width;
+    int superblock_height;
+    int y_superblock_width;
+    int y_superblock_height;
+    int c_superblock_width;
+    int c_superblock_height;
+    int u_superblock_start;
+    int v_superblock_start;
+    unsigned char *superblock_coding;
+
+    int macroblock_count;
+    int macroblock_width;
+    int macroblock_height;
+
+    int fragment_count;
+    int fragment_width;
+    int fragment_height;
+
+    Vp3Fragment *all_fragments;
+    Coeff *coeffs;
+    Coeff *next_coeff;
+    int fragment_start[3];
+
+    ScanTable scantable;
+
+    /* tables */
+    uint16_t coded_dc_scale_factor[64];
+    uint32_t coded_ac_scale_factor[64];
+    uint8_t base_matrix[384][64];
+    uint8_t qr_count[2][3];
+    uint8_t qr_size [2][3][64];
+    uint16_t qr_base[2][3][64];
+
+    /* this is a list of indices into the all_fragments array indicating
+     * which of the fragments are coded */
+    int *coded_fragment_list;
+    int coded_fragment_list_index;
+    int pixel_addresses_inited;
+
+    VLC dc_vlc[16];
+    VLC ac_vlc_1[16];
+    VLC ac_vlc_2[16];
+    VLC ac_vlc_3[16];
+    VLC ac_vlc_4[16];
+
+    VLC superblock_run_length_vlc;
+    VLC fragment_run_length_vlc;
+    VLC mode_code_vlc;
+    VLC motion_vector_vlc;
+
+    /* these arrays need to be on 16-byte boundaries since SSE2 operations
+     * index into them */
+    DECLARE_ALIGNED_16(int16_t, qmat[2][4][64]);        //<qmat[is_inter][plane]
+
+    /* This table contains superblock_count * 16 entries. Each set of 16
+     * numbers corresponds to the fragment indices 0..15 of the superblock.
+     * An entry will be -1 to indicate that no entry corresponds to that
+     * index. */
+    int *superblock_fragments;
+
+    /* This table contains superblock_count * 4 entries. Each set of 4
+     * numbers corresponds to the macroblock indices 0..3 of the superblock.
+     * An entry will be -1 to indicate that no entry corresponds to that
+     * index. */
+    int *superblock_macroblocks;
+
+    /* This table contains macroblock_count * 6 entries. Each set of 6
+     * numbers corresponds to the fragment indices 0..5 which comprise
+     * the macroblock (4 Y fragments and 2 C fragments). */
+    int *macroblock_fragments;
+    /* This is an array that indicates how a particular macroblock
+     * is coded. */
+    unsigned char *macroblock_coding;
+
+    int first_coded_y_fragment;
+    int first_coded_c_fragment;
+    int last_coded_y_fragment;
+    int last_coded_c_fragment;
+
+    uint8_t edge_emu_buffer[9*2048]; //FIXME dynamic alloc
+    int8_t qscale_table[2048]; //FIXME dynamic alloc (width+15)/16
+
+    /* Huffman decode */
+    int hti;
+    unsigned int hbits;
+    int entries;
+    int huff_code_size;
+    uint16_t huffman_table[80][32][2];
+
+    uint32_t filter_limit_values[64];
+    int bounding_values_array[256];
+} Vp3DecodeContext;
+
+static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb);
+
+/************************************************************************
+ * VP3 specific functions
+ ************************************************************************/
+
+/*
+ * This function sets up all of the various blocks mappings:
+ * superblocks <-> fragments, macroblocks <-> fragments,
+ * superblocks <-> macroblocks
+ *
+ * Returns 0 is successful; returns 1 if *anything* went wrong.
+ */
+static int init_block_mapping(Vp3DecodeContext *s)
+{
+    int i, j;
+    signed int hilbert_walk_mb[4];
+
+    int current_fragment = 0;
+    int current_width = 0;
+    int current_height = 0;
+    int right_edge = 0;
+    int bottom_edge = 0;
+    int superblock_row_inc = 0;
+    int *hilbert = NULL;
+    int mapping_index = 0;
+
+    int current_macroblock;
+    int c_fragment;
+
+    signed char travel_width[16] = {
+         1,  1,  0, -1,
+         0,  0,  1,  0,
+         1,  0,  1,  0,
+         0, -1,  0,  1
+    };
+
+    signed char travel_height[16] = {
+         0,  0,  1,  0,
+         1,  1,  0, -1,
+         0,  1,  0, -1,
+        -1,  0, -1,  0
+    };
+
+    signed char travel_width_mb[4] = {
+         1,  0,  1,  0
+    };
+
+    signed char travel_height_mb[4] = {
+         0,  1,  0, -1
+    };
+
+    debug_vp3("  vp3: initialize block mapping tables\n");
+
+    hilbert_walk_mb[0] = 1;
+    hilbert_walk_mb[1] = s->macroblock_width;
+    hilbert_walk_mb[2] = 1;
+    hilbert_walk_mb[3] = -s->macroblock_width;
+
+    /* iterate through each superblock (all planes) and map the fragments */
+    for (i = 0; i < s->superblock_count; i++) {
+        debug_init("    superblock %d (u starts @ %d, v starts @ %d)\n",
+            i, s->u_superblock_start, s->v_superblock_start);
+
+        /* time to re-assign the limits? */
+        if (i == 0) {
+
+            /* start of Y superblocks */
+            right_edge = s->fragment_width;
+            bottom_edge = s->fragment_height;
+            current_width = -1;
+            current_height = 0;
+            superblock_row_inc = 3 * s->fragment_width -
+                (s->y_superblock_width * 4 - s->fragment_width);
+
+            /* the first operation for this variable is to advance by 1 */
+            current_fragment = -1;
+
+        } else if (i == s->u_superblock_start) {
+
+            /* start of U superblocks */
+            right_edge = s->fragment_width / 2;
+            bottom_edge = s->fragment_height / 2;
+            current_width = -1;
+            current_height = 0;
+            superblock_row_inc = 3 * (s->fragment_width / 2) -
+                (s->c_superblock_width * 4 - s->fragment_width / 2);
+
+            /* the first operation for this variable is to advance by 1 */
+            current_fragment = s->fragment_start[1] - 1;
+
+        } else if (i == s->v_superblock_start) {
+
+            /* start of V superblocks */
+            right_edge = s->fragment_width / 2;
+            bottom_edge = s->fragment_height / 2;
+            current_width = -1;
+            current_height = 0;
+            superblock_row_inc = 3 * (s->fragment_width / 2) -
+                (s->c_superblock_width * 4 - s->fragment_width / 2);
+
+            /* the first operation for this variable is to advance by 1 */
+            current_fragment = s->fragment_start[2] - 1;
+
+        }
+
+        if (current_width >= right_edge - 1) {
+            /* reset width and move to next superblock row */
+            current_width = -1;
+            current_height += 4;
+
+            /* fragment is now at the start of a new superblock row */
+            current_fragment += superblock_row_inc;
+        }
+
+        /* iterate through all 16 fragments in a superblock */
+        for (j = 0; j < 16; j++) {
+            current_fragment += travel_width[j] + right_edge * travel_height[j];
+            current_width += travel_width[j];
+            current_height += travel_height[j];
+
+            /* check if the fragment is in bounds */
+            if ((current_width < right_edge) &&
+                (current_height < bottom_edge)) {
+                s->superblock_fragments[mapping_index] = current_fragment;
+                debug_init("    mapping fragment %d to superblock %d, position %d (%d/%d x %d/%d)\n",
+                    s->superblock_fragments[mapping_index], i, j,
+                    current_width, right_edge, current_height, bottom_edge);
+            } else {
+                s->superblock_fragments[mapping_index] = -1;
+                debug_init("    superblock %d, position %d has no fragment (%d/%d x %d/%d)\n",
+                    i, j,
+                    current_width, right_edge, current_height, bottom_edge);
+            }
+
+            mapping_index++;
+        }
+    }
+
+    /* initialize the superblock <-> macroblock mapping; iterate through
+     * all of the Y plane superblocks to build this mapping */
+    right_edge = s->macroblock_width;
+    bottom_edge = s->macroblock_height;
+    current_width = -1;
+    current_height = 0;
+    superblock_row_inc = s->macroblock_width -
+        (s->y_superblock_width * 2 - s->macroblock_width);;
+    hilbert = hilbert_walk_mb;
+    mapping_index = 0;
+    current_macroblock = -1;
+    for (i = 0; i < s->u_superblock_start; i++) {
+
+        if (current_width >= right_edge - 1) {
+            /* reset width and move to next superblock row */
+            current_width = -1;
+            current_height += 2;
+
+            /* macroblock is now at the start of a new superblock row */
+            current_macroblock += superblock_row_inc;
+        }
+
+        /* iterate through each potential macroblock in the superblock */
+        for (j = 0; j < 4; j++) {
+            current_macroblock += hilbert_walk_mb[j];
+            current_width += travel_width_mb[j];
+            current_height += travel_height_mb[j];
+
+            /* check if the macroblock is in bounds */
+            if ((current_width < right_edge) &&
+                (current_height < bottom_edge)) {
+                s->superblock_macroblocks[mapping_index] = current_macroblock;
+                debug_init("    mapping macroblock %d to superblock %d, position %d (%d/%d x %d/%d)\n",
+                    s->superblock_macroblocks[mapping_index], i, j,
+                    current_width, right_edge, current_height, bottom_edge);
+            } else {
+                s->superblock_macroblocks[mapping_index] = -1;
+                debug_init("    superblock %d, position %d has no macroblock (%d/%d x %d/%d)\n",
+                    i, j,
+                    current_width, right_edge, current_height, bottom_edge);
+            }
+
+            mapping_index++;
+        }
+    }
+
+    /* initialize the macroblock <-> fragment mapping */
+    current_fragment = 0;
+    current_macroblock = 0;
+    mapping_index = 0;
+    for (i = 0; i < s->fragment_height; i += 2) {
+
+        for (j = 0; j < s->fragment_width; j += 2) {
+
+            debug_init("    macroblock %d contains fragments: ", current_macroblock);
+            s->all_fragments[current_fragment].macroblock = current_macroblock;
+            s->macroblock_fragments[mapping_index++] = current_fragment;
+            debug_init("%d ", current_fragment);
+
+            if (j + 1 < s->fragment_width) {
+                s->all_fragments[current_fragment + 1].macroblock = current_macroblock;
+                s->macroblock_fragments[mapping_index++] = current_fragment + 1;
+                debug_init("%d ", current_fragment + 1);
+            } else
+                s->macroblock_fragments[mapping_index++] = -1;
+
+            if (i + 1 < s->fragment_height) {
+                s->all_fragments[current_fragment + s->fragment_width].macroblock =
+                    current_macroblock;
+                s->macroblock_fragments[mapping_index++] =
+                    current_fragment + s->fragment_width;
+                debug_init("%d ", current_fragment + s->fragment_width);
+            } else
+                s->macroblock_fragments[mapping_index++] = -1;
+
+            if ((j + 1 < s->fragment_width) && (i + 1 < s->fragment_height)) {
+                s->all_fragments[current_fragment + s->fragment_width + 1].macroblock =
+                    current_macroblock;
+                s->macroblock_fragments[mapping_index++] =
+                    current_fragment + s->fragment_width + 1;
+                debug_init("%d ", current_fragment + s->fragment_width + 1);
+            } else
+                s->macroblock_fragments[mapping_index++] = -1;
+
+            /* C planes */
+            c_fragment = s->fragment_start[1] +
+                (i * s->fragment_width / 4) + (j / 2);
+            s->all_fragments[c_fragment].macroblock = s->macroblock_count;
+            s->macroblock_fragments[mapping_index++] = c_fragment;
+            debug_init("%d ", c_fragment);
+
+            c_fragment = s->fragment_start[2] +
+                (i * s->fragment_width / 4) + (j / 2);
+            s->all_fragments[c_fragment].macroblock = s->macroblock_count;
+            s->macroblock_fragments[mapping_index++] = c_fragment;
+            debug_init("%d ", c_fragment);
+
+            debug_init("\n");
+
+            if (j + 2 <= s->fragment_width)
+                current_fragment += 2;
+            else
+                current_fragment++;
+            current_macroblock++;
+        }
+
+        current_fragment += s->fragment_width;
+    }
+
+    return 0;  /* successful path out */
+}
+
+/*
+ * This function wipes out all of the fragment data.
+ */
+static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
+{
+    int i;
+
+    /* zero out all of the fragment information */
+    s->coded_fragment_list_index = 0;
+    for (i = 0; i < s->fragment_count; i++) {
+        s->all_fragments[i].coeff_count = 0;
+        s->all_fragments[i].motion_x = 127;
+        s->all_fragments[i].motion_y = 127;
+        s->all_fragments[i].next_coeff= NULL;
+        s->coeffs[i].index=
+        s->coeffs[i].coeff=0;
+        s->coeffs[i].next= NULL;
+    }
+}
+
+/*
+ * This function sets up the dequantization tables used for a particular
+ * frame.
+ */
+static void init_dequantizer(Vp3DecodeContext *s)
+{
+    int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index];
+    int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index];
+    int i, plane, inter, qri, bmi, bmj, qistart;
+
+    debug_vp3("  vp3: initializing dequantization tables\n");
+
+    for(inter=0; inter<2; inter++){
+        for(plane=0; plane<3; plane++){
+            int sum=0;
+            for(qri=0; qri<s->qr_count[inter][plane]; qri++){
+                sum+= s->qr_size[inter][plane][qri];
+                if(s->quality_index <= sum)
+                    break;
+            }
+            qistart= sum - s->qr_size[inter][plane][qri];
+            bmi= s->qr_base[inter][plane][qri  ];
+            bmj= s->qr_base[inter][plane][qri+1];
+            for(i=0; i<64; i++){
+                int coeff= (  2*(sum    -s->quality_index)*s->base_matrix[bmi][i]
+                            - 2*(qistart-s->quality_index)*s->base_matrix[bmj][i]
+                            + s->qr_size[inter][plane][qri])
+                           / (2*s->qr_size[inter][plane][qri]);
+
+                int qmin= 8<<(inter + !i);
+                int qscale= i ? ac_scale_factor : dc_scale_factor;
+
+                s->qmat[inter][plane][i]= clip((qscale * coeff)/100 * 4, qmin, 4096);
+            }
+        }
+    }
+
+    memset(s->qscale_table, (FFMAX(s->qmat[0][0][1], s->qmat[0][1][1])+8)/16, 512); //FIXME finetune
+}
+
+/*
+ * This function initializes the loop filter boundary limits if the frame's
+ * quality index is different from the previous frame's.
+ */
+static void init_loop_filter(Vp3DecodeContext *s)
+{
+    int *bounding_values= s->bounding_values_array+127;
+    int filter_limit;
+    int x;
+
+    filter_limit = s->filter_limit_values[s->quality_index];
+
+    /* set up the bounding values */
+    memset(s->bounding_values_array, 0, 256 * sizeof(int));
+    for (x = 0; x < filter_limit; x++) {
+        bounding_values[-x - filter_limit] = -filter_limit + x;
+        bounding_values[-x] = -x;
+        bounding_values[x] = x;
+        bounding_values[x + filter_limit] = filter_limit - x;
+    }
+}
+
+/*
+ * This function unpacks all of the superblock/macroblock/fragment coding
+ * information from the bitstream.
+ */
+static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
+{
+    int bit = 0;
+    int current_superblock = 0;
+    int current_run = 0;
+    int decode_fully_flags = 0;
+    int decode_partial_blocks = 0;
+    int first_c_fragment_seen;
+
+    int i, j;
+    int current_fragment;
+
+    debug_vp3("  vp3: unpacking superblock coding\n");
+
+    if (s->keyframe) {
+
+        debug_vp3("    keyframe-- all superblocks are fully coded\n");
+        memset(s->superblock_coding, SB_FULLY_CODED, s->superblock_count);
+
+    } else {
+
+        /* unpack the list of partially-coded superblocks */
+        bit = get_bits(gb, 1);
+        /* toggle the bit because as soon as the first run length is
+         * fetched the bit will be toggled again */
+        bit ^= 1;
+        while (current_superblock < s->superblock_count) {
+            if (current_run-- == 0) {
+                bit ^= 1;
+                current_run = get_vlc2(gb,
+                    s->superblock_run_length_vlc.table, 6, 2);
+                if (current_run == 33)
+                    current_run += get_bits(gb, 12);
+                debug_block_coding("      setting superblocks %d..%d to %s\n",
+                    current_superblock,
+                    current_superblock + current_run - 1,
+                    (bit) ? "partially coded" : "not coded");
+
+                /* if any of the superblocks are not partially coded, flag
+                 * a boolean to decode the list of fully-coded superblocks */
+                if (bit == 0) {
+                    decode_fully_flags = 1;
+                } else {
+
+                    /* make a note of the fact that there are partially coded
+                     * superblocks */
+                    decode_partial_blocks = 1;
+                }
+            }
+            s->superblock_coding[current_superblock++] = bit;
+        }
+
+        /* unpack the list of fully coded superblocks if any of the blocks were
+         * not marked as partially coded in the previous step */
+        if (decode_fully_flags) {
+
+            current_superblock = 0;
+            current_run = 0;
+            bit = get_bits(gb, 1);
+            /* toggle the bit because as soon as the first run length is
+             * fetched the bit will be toggled again */
+            bit ^= 1;
+            while (current_superblock < s->superblock_count) {
+
+                /* skip any superblocks already marked as partially coded */
+                if (s->superblock_coding[current_superblock] == SB_NOT_CODED) {
+
+                    if (current_run-- == 0) {
+                        bit ^= 1;
+                        current_run = get_vlc2(gb,
+                            s->superblock_run_length_vlc.table, 6, 2);
+                        if (current_run == 33)
+                            current_run += get_bits(gb, 12);
+                    }
+
+                    debug_block_coding("      setting superblock %d to %s\n",
+                        current_superblock,
+                        (bit) ? "fully coded" : "not coded");
+                    s->superblock_coding[current_superblock] = 2*bit;
+                }
+                current_superblock++;
+            }
+        }
+
+        /* if there were partial blocks, initialize bitstream for
+         * unpacking fragment codings */
+        if (decode_partial_blocks) {
+
+            current_run = 0;
+            bit = get_bits(gb, 1);
+            /* toggle the bit because as soon as the first run length is
+             * fetched the bit will be toggled again */
+            bit ^= 1;
+        }
+    }
+
+    /* figure out which fragments are coded; iterate through each
+     * superblock (all planes) */
+    s->coded_fragment_list_index = 0;
+    s->next_coeff= s->coeffs + s->fragment_count;
+    s->first_coded_y_fragment = s->first_coded_c_fragment = 0;
+    s->last_coded_y_fragment = s->last_coded_c_fragment = -1;
+    first_c_fragment_seen = 0;
+    memset(s->macroblock_coding, MODE_COPY, s->macroblock_count);
+    for (i = 0; i < s->superblock_count; i++) {
+
+        /* iterate through all 16 fragments in a superblock */
+        for (j = 0; j < 16; j++) {
+
+            /* if the fragment is in bounds, check its coding status */
+            current_fragment = s->superblock_fragments[i * 16 + j];
+            if (current_fragment >= s->fragment_count) {
+                av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_superblocks(): bad fragment number (%d >= %d)\n",
+                    current_fragment, s->fragment_count);
+                return 1;
+            }
+            if (current_fragment != -1) {
+                if (s->superblock_coding[i] == SB_NOT_CODED) {
+
+                    /* copy all the fragments from the prior frame */
+                    s->all_fragments[current_fragment].coding_method =
+                        MODE_COPY;
+
+                } else if (s->superblock_coding[i] == SB_PARTIALLY_CODED) {
+
+                    /* fragment may or may not be coded; this is the case
+                     * that cares about the fragment coding runs */
+                    if (current_run-- == 0) {
+                        bit ^= 1;
+                        current_run = get_vlc2(gb,
+                            s->fragment_run_length_vlc.table, 5, 2);
+                    }
+
+                    if (bit) {
+                        /* default mode; actual mode will be decoded in
+                         * the next phase */
+                        s->all_fragments[current_fragment].coding_method =
+                            MODE_INTER_NO_MV;
+                        s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
+                        s->coded_fragment_list[s->coded_fragment_list_index] =
+                            current_fragment;
+                        if ((current_fragment >= s->fragment_start[1]) &&
+                            (s->last_coded_y_fragment == -1) &&
+                            (!first_c_fragment_seen)) {
+                            s->first_coded_c_fragment = s->coded_fragment_list_index;
+                            s->last_coded_y_fragment = s->first_coded_c_fragment - 1;
+                            first_c_fragment_seen = 1;
+                        }
+                        s->coded_fragment_list_index++;
+                        s->macroblock_coding[s->all_fragments[current_fragment].macroblock] = MODE_INTER_NO_MV;
+                        debug_block_coding("      superblock %d is partially coded, fragment %d is coded\n",
+                            i, current_fragment);
+                    } else {
+                        /* not coded; copy this fragment from the prior frame */
+                        s->all_fragments[current_fragment].coding_method =
+                            MODE_COPY;
+                        debug_block_coding("      superblock %d is partially coded, fragment %d is not coded\n",
+                            i, current_fragment);
+                    }
+
+                } else {
+
+                    /* fragments are fully coded in this superblock; actual
+                     * coding will be determined in next step */
+                    s->all_fragments[current_fragment].coding_method =
+                        MODE_INTER_NO_MV;
+                    s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
+                    s->coded_fragment_list[s->coded_fragment_list_index] =
+                        current_fragment;
+                    if ((current_fragment >= s->fragment_start[1]) &&
+                        (s->last_coded_y_fragment == -1) &&
+                        (!first_c_fragment_seen)) {
+                        s->first_coded_c_fragment = s->coded_fragment_list_index;
+                        s->last_coded_y_fragment = s->first_coded_c_fragment - 1;
+                        first_c_fragment_seen = 1;
+                    }
+                    s->coded_fragment_list_index++;
+                    s->macroblock_coding[s->all_fragments[current_fragment].macroblock] = MODE_INTER_NO_MV;
+                    debug_block_coding("      superblock %d is fully coded, fragment %d is coded\n",
+                        i, current_fragment);
+                }
+            }
+        }
+    }
+
+    if (!first_c_fragment_seen)
+        /* only Y fragments coded in this frame */
+        s->last_coded_y_fragment = s->coded_fragment_list_index - 1;
+    else
+        /* end the list of coded C fragments */
+        s->last_coded_c_fragment = s->coded_fragment_list_index - 1;
+
+    debug_block_coding("    %d total coded fragments, y: %d -> %d, c: %d -> %d\n",
+        s->coded_fragment_list_index,
+        s->first_coded_y_fragment,
+        s->last_coded_y_fragment,
+        s->first_coded_c_fragment,
+        s->last_coded_c_fragment);
+
+    return 0;
+}
+
+/*
+ * This function unpacks all the coding mode data for individual macroblocks
+ * from the bitstream.
+ */
+static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
+{
+    int i, j, k;
+    int scheme;
+    int current_macroblock;
+    int current_fragment;
+    int coding_mode;
+
+    debug_vp3("  vp3: unpacking encoding modes\n");
+
+    if (s->keyframe) {
+        debug_vp3("    keyframe-- all blocks are coded as INTRA\n");
+
+        for (i = 0; i < s->fragment_count; i++)
+            s->all_fragments[i].coding_method = MODE_INTRA;
+
+    } else {
+
+        /* fetch the mode coding scheme for this frame */
+        scheme = get_bits(gb, 3);
+        debug_modes("    using mode alphabet %d\n", scheme);
+
+        /* is it a custom coding scheme? */
+        if (scheme == 0) {
+            debug_modes("    custom mode alphabet ahead:\n");
+            for (i = 0; i < 8; i++)
+                ModeAlphabet[scheme][get_bits(gb, 3)] = i;
+        }
+
+        for (i = 0; i < 8; i++)
+            debug_modes("      mode[%d][%d] = %d\n", scheme, i,
+                ModeAlphabet[scheme][i]);
+
+        /* iterate through all of the macroblocks that contain 1 or more
+         * coded fragments */
+        for (i = 0; i < s->u_superblock_start; i++) {
+
+            for (j = 0; j < 4; j++) {
+                current_macroblock = s->superblock_macroblocks[i * 4 + j];
+                if ((current_macroblock == -1) ||
+                    (s->macroblock_coding[current_macroblock] == MODE_COPY))
+                    continue;
+                if (current_macroblock >= s->macroblock_count) {
+                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_modes(): bad macroblock number (%d >= %d)\n",
+                        current_macroblock, s->macroblock_count);
+                    return 1;
+                }
+
+                /* mode 7 means get 3 bits for each coding mode */
+                if (scheme == 7)
+                    coding_mode = get_bits(gb, 3);
+                else
+                    coding_mode = ModeAlphabet[scheme]
+                        [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)];
+
+                s->macroblock_coding[current_macroblock] = coding_mode;
+                for (k = 0; k < 6; k++) {
+                    current_fragment =
+                        s->macroblock_fragments[current_macroblock * 6 + k];
+                    if (current_fragment == -1)
+                        continue;
+                    if (current_fragment >= s->fragment_count) {
+                        av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_modes(): bad fragment number (%d >= %d)\n",
+                            current_fragment, s->fragment_count);
+                        return 1;
+                    }
+                    if (s->all_fragments[current_fragment].coding_method !=
+                        MODE_COPY)
+                        s->all_fragments[current_fragment].coding_method =
+                            coding_mode;
+                }
+
+                debug_modes("    coding method for macroblock starting @ fragment %d = %d\n",
+                    s->macroblock_fragments[current_macroblock * 6], coding_mode);
+            }
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * This function unpacks all the motion vectors for the individual
+ * macroblocks from the bitstream.
+ */
+static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
+{
+    int i, j, k;
+    int coding_mode;
+    int motion_x[6];
+    int motion_y[6];
+    int last_motion_x = 0;
+    int last_motion_y = 0;
+    int prior_last_motion_x = 0;
+    int prior_last_motion_y = 0;
+    int current_macroblock;
+    int current_fragment;
+
+    debug_vp3("  vp3: unpacking motion vectors\n");
+    if (s->keyframe) {
+
+        debug_vp3("    keyframe-- there are no motion vectors\n");
+
+    } else {
+
+        memset(motion_x, 0, 6 * sizeof(int));
+        memset(motion_y, 0, 6 * sizeof(int));
+
+        /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
+        coding_mode = get_bits(gb, 1);
+        debug_vectors("    using %s scheme for unpacking motion vectors\n",
+            (coding_mode == 0) ? "VLC" : "fixed-length");
+
+        /* iterate through all of the macroblocks that contain 1 or more
+         * coded fragments */
+        for (i = 0; i < s->u_superblock_start; i++) {
+
+            for (j = 0; j < 4; j++) {
+                current_macroblock = s->superblock_macroblocks[i * 4 + j];
+                if ((current_macroblock == -1) ||
+                    (s->macroblock_coding[current_macroblock] == MODE_COPY))
+                    continue;
+                if (current_macroblock >= s->macroblock_count) {
+                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad macroblock number (%d >= %d)\n",
+                        current_macroblock, s->macroblock_count);
+                    return 1;
+                }
+
+                current_fragment = s->macroblock_fragments[current_macroblock * 6];
+                if (current_fragment >= s->fragment_count) {
+                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d\n",
+                        current_fragment, s->fragment_count);
+                    return 1;
+                }
+                switch (s->macroblock_coding[current_macroblock]) {
+
+                case MODE_INTER_PLUS_MV:
+                case MODE_GOLDEN_MV:
+                    /* all 6 fragments use the same motion vector */
+                    if (coding_mode == 0) {
+                        motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                        motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                    } else {
+                        motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)];
+                        motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)];
+                    }
+
+                    for (k = 1; k < 6; k++) {
+                        motion_x[k] = motion_x[0];
+                        motion_y[k] = motion_y[0];
+                    }
+
+                    /* vector maintenance, only on MODE_INTER_PLUS_MV */
+                    if (s->macroblock_coding[current_macroblock] ==
+                        MODE_INTER_PLUS_MV) {
+                        prior_last_motion_x = last_motion_x;
+                        prior_last_motion_y = last_motion_y;
+                        last_motion_x = motion_x[0];
+                        last_motion_y = motion_y[0];
+                    }
+                    break;
+
+                case MODE_INTER_FOURMV:
+                    /* fetch 4 vectors from the bitstream, one for each
+                     * Y fragment, then average for the C fragment vectors */
+                    motion_x[4] = motion_y[4] = 0;
+                    for (k = 0; k < 4; k++) {
+                        if (coding_mode == 0) {
+                            motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                            motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                        } else {
+                            motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)];
+                            motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)];
+                        }
+                        motion_x[4] += motion_x[k];
+                        motion_y[4] += motion_y[k];
+                    }
+
+                    motion_x[5]=
+                    motion_x[4]= RSHIFT(motion_x[4], 2);
+                    motion_y[5]=
+                    motion_y[4]= RSHIFT(motion_y[4], 2);
+
+                    /* vector maintenance; vector[3] is treated as the
+                     * last vector in this case */
+                    prior_last_motion_x = last_motion_x;
+                    prior_last_motion_y = last_motion_y;
+                    last_motion_x = motion_x[3];
+                    last_motion_y = motion_y[3];
+                    break;
+
+                case MODE_INTER_LAST_MV:
+                    /* all 6 fragments use the last motion vector */
+                    motion_x[0] = last_motion_x;
+                    motion_y[0] = last_motion_y;
+                    for (k = 1; k < 6; k++) {
+                        motion_x[k] = motion_x[0];
+                        motion_y[k] = motion_y[0];
+                    }
+
+                    /* no vector maintenance (last vector remains the
+                     * last vector) */
+                    break;
+
+                case MODE_INTER_PRIOR_LAST:
+                    /* all 6 fragments use the motion vector prior to the
+                     * last motion vector */
+                    motion_x[0] = prior_last_motion_x;
+                    motion_y[0] = prior_last_motion_y;
+                    for (k = 1; k < 6; k++) {
+                        motion_x[k] = motion_x[0];
+                        motion_y[k] = motion_y[0];
+                    }
+
+                    /* vector maintenance */
+                    prior_last_motion_x = last_motion_x;
+                    prior_last_motion_y = last_motion_y;
+                    last_motion_x = motion_x[0];
+                    last_motion_y = motion_y[0];
+                    break;
+
+                default:
+                    /* covers intra, inter without MV, golden without MV */
+                    memset(motion_x, 0, 6 * sizeof(int));
+                    memset(motion_y, 0, 6 * sizeof(int));
+
+                    /* no vector maintenance */
+                    break;
+                }
+
+                /* assign the motion vectors to the correct fragments */
+                debug_vectors("    vectors for macroblock starting @ fragment %d (coding method %d):\n",
+                    current_fragment,
+                    s->macroblock_coding[current_macroblock]);
+                for (k = 0; k < 6; k++) {
+                    current_fragment =
+                        s->macroblock_fragments[current_macroblock * 6 + k];
+                    if (current_fragment == -1)
+                        continue;
+                    if (current_fragment >= s->fragment_count) {
+                        av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d)\n",
+                            current_fragment, s->fragment_count);
+                        return 1;
+                    }
+                    s->all_fragments[current_fragment].motion_x = motion_x[k];
+                    s->all_fragments[current_fragment].motion_y = motion_y[k];
+                    debug_vectors("    vector %d: fragment %d = (%d, %d)\n",
+                        k, current_fragment, motion_x[k], motion_y[k]);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * This function is called by unpack_dct_coeffs() to extract the VLCs from
+ * the bitstream. The VLCs encode tokens which are used to unpack DCT
+ * data. This function unpacks all the VLCs for either the Y plane or both
+ * C planes, and is called for DC coefficients or different AC coefficient
+ * levels (since different coefficient types require different VLC tables.
+ *
+ * This function returns a residual eob run. E.g, if a particular token gave
+ * instructions to EOB the next 5 fragments and there were only 2 fragments
+ * left in the current fragment range, 3 would be returned so that it could
+ * be passed into the next call to this same function.
+ */
+static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
+                        VLC *table, int coeff_index,
+                        int first_fragment, int last_fragment,
+                        int eob_run)
+{
+    int i;
+    int token;
+    int zero_run = 0;
+    DCTELEM coeff = 0;
+    Vp3Fragment *fragment;
+    uint8_t *perm= s->scantable.permutated;
+    int bits_to_get;
+
+    if ((first_fragment >= s->fragment_count) ||
+        (last_fragment >= s->fragment_count)) {
+
+        av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vlcs(): bad fragment number (%d -> %d ?)\n",
+            first_fragment, last_fragment);
+        return 0;
+    }
+
+    for (i = first_fragment; i <= last_fragment; i++) {
+
+        fragment = &s->all_fragments[s->coded_fragment_list[i]];
+        if (fragment->coeff_count > coeff_index)
+            continue;
+
+        if (!eob_run) {
+            /* decode a VLC into a token */
+            token = get_vlc2(gb, table->table, 5, 3);
+            debug_vlc(" token = %2d, ", token);
+            /* use the token to get a zero run, a coefficient, and an eob run */
+            if (token <= 6) {
+                eob_run = eob_run_base[token];
+                if (eob_run_get_bits[token])
+                    eob_run += get_bits(gb, eob_run_get_bits[token]);
+                coeff = zero_run = 0;
+            } else {
+                bits_to_get = coeff_get_bits[token];
+                if (!bits_to_get)
+                    coeff = coeff_tables[token][0];
+                else
+                    coeff = coeff_tables[token][get_bits(gb, bits_to_get)];
+
+                zero_run = zero_run_base[token];
+                if (zero_run_get_bits[token])
+                    zero_run += get_bits(gb, zero_run_get_bits[token]);
+            }
+        }
+
+        if (!eob_run) {
+            fragment->coeff_count += zero_run;
+            if (fragment->coeff_count < 64){
+                fragment->next_coeff->coeff= coeff;
+                fragment->next_coeff->index= perm[fragment->coeff_count++]; //FIXME perm here already?
+                fragment->next_coeff->next= s->next_coeff;
+                s->next_coeff->next=NULL;
+                fragment->next_coeff= s->next_coeff++;
+            }
+            debug_vlc(" fragment %d coeff = %d\n",
+                s->coded_fragment_list[i], fragment->next_coeff[coeff_index]);
+        } else {
+            fragment->coeff_count |= 128;
+            debug_vlc(" fragment %d eob with %d coefficients\n",
+                s->coded_fragment_list[i], fragment->coeff_count&127);
+            eob_run--;
+        }
+    }
+
+    return eob_run;
+}
+
+/*
+ * This function unpacks all of the DCT coefficient data from the
+ * bitstream.
+ */
+static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
+{
+    int i;
+    int dc_y_table;
+    int dc_c_table;
+    int ac_y_table;
+    int ac_c_table;
+    int residual_eob_run = 0;
+
+    /* fetch the DC table indices */
+    dc_y_table = get_bits(gb, 4);
+    dc_c_table = get_bits(gb, 4);
+
+    /* unpack the Y plane DC coefficients */
+    debug_vp3("  vp3: unpacking Y plane DC coefficients using table %d\n",
+        dc_y_table);
+    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0,
+        s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+
+    /* unpack the C plane DC coefficients */
+    debug_vp3("  vp3: unpacking C plane DC coefficients using table %d\n",
+        dc_c_table);
+    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
+        s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+
+    /* fetch the AC table indices */
+    ac_y_table = get_bits(gb, 4);
+    ac_c_table = get_bits(gb, 4);
+
+    /* unpack the group 1 AC coefficients (coeffs 1-5) */
+    for (i = 1; i <= 5; i++) {
+
+        debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
+            i, ac_y_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_y_table], i,
+            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+
+        debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
+            i, ac_c_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_c_table], i,
+            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+    }
+
+    /* unpack the group 2 AC coefficients (coeffs 6-14) */
+    for (i = 6; i <= 14; i++) {
+
+        debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
+            i, ac_y_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_y_table], i,
+            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+
+        debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
+            i, ac_c_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_c_table], i,
+            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+    }
+
+    /* unpack the group 3 AC coefficients (coeffs 15-27) */
+    for (i = 15; i <= 27; i++) {
+
+        debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
+            i, ac_y_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_y_table], i,
+            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+
+        debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
+            i, ac_c_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_c_table], i,
+            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+    }
+
+    /* unpack the group 4 AC coefficients (coeffs 28-63) */
+    for (i = 28; i <= 63; i++) {
+
+        debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
+            i, ac_y_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_y_table], i,
+            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+
+        debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
+            i, ac_c_table);
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_c_table], i,
+            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+    }
+
+    return 0;
+}
+
+/*
+ * This function reverses the DC prediction for each coded fragment in
+ * the frame. Much of this function is adapted directly from the original
+ * VP3 source code.
+ */
+#define COMPATIBLE_FRAME(x) \
+  (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type)
+#define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY)
+#define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this
+
+static void reverse_dc_prediction(Vp3DecodeContext *s,
+                                  int first_fragment,
+                                  int fragment_width,
+                                  int fragment_height)
+{
+
+#define PUL 8
+#define PU 4
+#define PUR 2
+#define PL 1
+
+    int x, y;
+    int i = first_fragment;
+
+    int predicted_dc;
+
+    /* DC values for the left, up-left, up, and up-right fragments */
+    int vl, vul, vu, vur;
+
+    /* indices for the left, up-left, up, and up-right fragments */
+    int l, ul, u, ur;
+
+    /*
+     * The 6 fields mean:
+     *   0: up-left multiplier
+     *   1: up multiplier
+     *   2: up-right multiplier
+     *   3: left multiplier
+     */
+    int predictor_transform[16][4] = {
+        {  0,  0,  0,  0},
+        {  0,  0,  0,128},        // PL
+        {  0,  0,128,  0},        // PUR
+        {  0,  0, 53, 75},        // PUR|PL
+        {  0,128,  0,  0},        // PU
+        {  0, 64,  0, 64},        // PU|PL
+        {  0,128,  0,  0},        // PU|PUR
+        {  0,  0, 53, 75},        // PU|PUR|PL
+        {128,  0,  0,  0},        // PUL
+        {  0,  0,  0,128},        // PUL|PL
+        { 64,  0, 64,  0},        // PUL|PUR
+        {  0,  0, 53, 75},        // PUL|PUR|PL
+        {  0,128,  0,  0},        // PUL|PU
+       {-104,116,  0,116},        // PUL|PU|PL
+        { 24, 80, 24,  0},        // PUL|PU|PUR
+       {-104,116,  0,116}         // PUL|PU|PUR|PL
+    };
+
+    /* This table shows which types of blocks can use other blocks for
+     * prediction. For example, INTRA is the only mode in this table to
+     * have a frame number of 0. That means INTRA blocks can only predict
+     * from other INTRA blocks. There are 2 golden frame coding types;
+     * blocks encoding in these modes can only predict from other blocks
+     * that were encoded with these 1 of these 2 modes. */
+    unsigned char compatible_frame[8] = {
+        1,    /* MODE_INTER_NO_MV */
+        0,    /* MODE_INTRA */
+        1,    /* MODE_INTER_PLUS_MV */
+        1,    /* MODE_INTER_LAST_MV */
+        1,    /* MODE_INTER_PRIOR_MV */
+        2,    /* MODE_USING_GOLDEN */
+        2,    /* MODE_GOLDEN_MV */
+        1     /* MODE_INTER_FOUR_MV */
+    };
+    int current_frame_type;
+
+    /* there is a last DC predictor for each of the 3 frame types */
+    short last_dc[3];
+
+    int transform = 0;
+
+    debug_vp3("  vp3: reversing DC prediction\n");
+
+    vul = vu = vur = vl = 0;
+    last_dc[0] = last_dc[1] = last_dc[2] = 0;
+
+    /* for each fragment row... */
+    for (y = 0; y < fragment_height; y++) {
+
+        /* for each fragment in a row... */
+        for (x = 0; x < fragment_width; x++, i++) {
+
+            /* reverse prediction if this block was coded */
+            if (s->all_fragments[i].coding_method != MODE_COPY) {
+
+                current_frame_type =
+                    compatible_frame[s->all_fragments[i].coding_method];
+                debug_dc_pred(" frag %d: orig DC = %d, ",
+                    i, DC_COEFF(i));
+
+                transform= 0;
+                if(x){
+                    l= i-1;
+                    vl = DC_COEFF(l);
+                    if(FRAME_CODED(l) && COMPATIBLE_FRAME(l))
+                        transform |= PL;
+                }
+                if(y){
+                    u= i-fragment_width;
+                    vu = DC_COEFF(u);
+                    if(FRAME_CODED(u) && COMPATIBLE_FRAME(u))
+                        transform |= PU;
+                    if(x){
+                        ul= i-fragment_width-1;
+                        vul = DC_COEFF(ul);
+                        if(FRAME_CODED(ul) && COMPATIBLE_FRAME(ul))
+                            transform |= PUL;
+                    }
+                    if(x + 1 < fragment_width){
+                        ur= i-fragment_width+1;
+                        vur = DC_COEFF(ur);
+                        if(FRAME_CODED(ur) && COMPATIBLE_FRAME(ur))
+                            transform |= PUR;
+                    }
+                }
+
+                debug_dc_pred("transform = %d, ", transform);
+
+                if (transform == 0) {
+
+                    /* if there were no fragments to predict from, use last
+                     * DC saved */
+                    predicted_dc = last_dc[current_frame_type];
+                    debug_dc_pred("from last DC (%d) = %d\n",
+                        current_frame_type, DC_COEFF(i));
+
+                } else {
+
+                    /* apply the appropriate predictor transform */
+                    predicted_dc =
+                        (predictor_transform[transform][0] * vul) +
+                        (predictor_transform[transform][1] * vu) +
+                        (predictor_transform[transform][2] * vur) +
+                        (predictor_transform[transform][3] * vl);
+
+                    predicted_dc /= 128;
+
+                    /* check for outranging on the [ul u l] and
+                     * [ul u ur l] predictors */
+                    if ((transform == 13) || (transform == 15)) {
+                        if (FFABS(predicted_dc - vu) > 128)
+                            predicted_dc = vu;
+                        else if (FFABS(predicted_dc - vl) > 128)
+                            predicted_dc = vl;
+                        else if (FFABS(predicted_dc - vul) > 128)
+                            predicted_dc = vul;
+                    }
+
+                    debug_dc_pred("from pred DC = %d\n",
+                    DC_COEFF(i));
+                }
+
+                /* at long last, apply the predictor */
+                if(s->coeffs[i].index){
+                    *s->next_coeff= s->coeffs[i];
+                    s->coeffs[i].index=0;
+                    s->coeffs[i].coeff=0;
+                    s->coeffs[i].next= s->next_coeff++;
+                }
+                s->coeffs[i].coeff += predicted_dc;
+                /* save the DC */
+                last_dc[current_frame_type] = DC_COEFF(i);
+                if(DC_COEFF(i) && !(s->all_fragments[i].coeff_count&127)){
+                    s->all_fragments[i].coeff_count= 129;
+//                    s->all_fragments[i].next_coeff= s->next_coeff;
+                    s->coeffs[i].next= s->next_coeff;
+                    (s->next_coeff++)->next=NULL;
+                }
+            }
+        }
+    }
+}
+
+
+static void horizontal_filter(unsigned char *first_pixel, int stride,
+    int *bounding_values);
+static void vertical_filter(unsigned char *first_pixel, int stride,
+    int *bounding_values);
+
+/*
+ * Perform the final rendering for a particular slice of data.
+ * The slice number ranges from 0..(macroblock_height - 1).
+ */
+static void render_slice(Vp3DecodeContext *s, int slice)
+{
+    int x;
+    int m, n;
+    int16_t *dequantizer;
+    DECLARE_ALIGNED_16(DCTELEM, block[64]);
+    int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
+    int motion_halfpel_index;
+    uint8_t *motion_source;
+    int plane;
+    int current_macroblock_entry = slice * s->macroblock_width * 6;
+
+    if (slice >= s->macroblock_height)
+        return;
+
+    for (plane = 0; plane < 3; plane++) {
+        uint8_t *output_plane = s->current_frame.data    [plane];
+        uint8_t *  last_plane = s->   last_frame.data    [plane];
+        uint8_t *golden_plane = s-> golden_frame.data    [plane];
+        int stride            = s->current_frame.linesize[plane];
+        int plane_width       = s->width  >> !!plane;
+        int plane_height      = s->height >> !!plane;
+        int y =        slice *  FRAGMENT_PIXELS << !plane ;
+        int slice_height = y + (FRAGMENT_PIXELS << !plane);
+        int i = s->macroblock_fragments[current_macroblock_entry + plane + 3*!!plane];
+
+        if (!s->flipped_image) stride = -stride;
+
+
+        if(FFABS(stride) > 2048)
+            return; //various tables are fixed size
+
+        /* for each fragment row in the slice (both of them)... */
+        for (; y < slice_height; y += 8) {
+
+            /* for each fragment in a row... */
+            for (x = 0; x < plane_width; x += 8, i++) {
+
+                if ((i < 0) || (i >= s->fragment_count)) {
+                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:render_slice(): bad fragment number (%d)\n", i);
+                    return;
+                }
+
+                /* transform if this block was coded */
+                if ((s->all_fragments[i].coding_method != MODE_COPY) &&
+                    !((s->avctx->flags & CODEC_FLAG_GRAY) && plane)) {
+
+                    if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) ||
+                        (s->all_fragments[i].coding_method == MODE_GOLDEN_MV))
+                        motion_source= golden_plane;
+                    else
+                        motion_source= last_plane;
+
+                    motion_source += s->all_fragments[i].first_pixel;
+                    motion_halfpel_index = 0;
+
+                    /* sort out the motion vector if this fragment is coded
+                     * using a motion vector method */
+                    if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
+                        (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) {
+                        int src_x, src_y;
+                        motion_x = s->all_fragments[i].motion_x;
+                        motion_y = s->all_fragments[i].motion_y;
+                        if(plane){
+                            motion_x= (motion_x>>1) | (motion_x&1);
+                            motion_y= (motion_y>>1) | (motion_y&1);
+                        }
+
+                        src_x= (motion_x>>1) + x;
+                        src_y= (motion_y>>1) + y;
+                        if ((motion_x == 127) || (motion_y == 127))
+                            av_log(s->avctx, AV_LOG_ERROR, " help! got invalid motion vector! (%X, %X)\n", motion_x, motion_y);
+
+                        motion_halfpel_index = motion_x & 0x01;
+                        motion_source += (motion_x >> 1);
+
+                        motion_halfpel_index |= (motion_y & 0x01) << 1;
+                        motion_source += ((motion_y >> 1) * stride);
+
+                        if(src_x<0 || src_y<0 || src_x + 9 >= plane_width || src_y + 9 >= plane_height){
+                            uint8_t *temp= s->edge_emu_buffer;
+                            if(stride<0) temp -= 9*stride;
+                            else temp += 9*stride;
+
+                            ff_emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height);
+                            motion_source= temp;
+                        }
+                    }
+
+
+                    /* first, take care of copying a block from either the
+                     * previous or the golden frame */
+                    if (s->all_fragments[i].coding_method != MODE_INTRA) {
+                        /* Note, it is possible to implement all MC cases with
+                           put_no_rnd_pixels_l2 which would look more like the
+                           VP3 source but this would be slower as
+                           put_no_rnd_pixels_tab is better optimzed */
+                        if(motion_halfpel_index != 3){
+                            s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
+                                output_plane + s->all_fragments[i].first_pixel,
+                                motion_source, stride, 8);
+                        }else{
+                            int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
+                            s->dsp.put_no_rnd_pixels_l2[1](
+                                output_plane + s->all_fragments[i].first_pixel,
+                                motion_source - d,
+                                motion_source + stride + 1 + d,
+                                stride, 8);
+                        }
+                        dequantizer = s->qmat[1][plane];
+                    }else{
+                        dequantizer = s->qmat[0][plane];
+                    }
+
+                    /* dequantize the DCT coefficients */
+                    debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n",
+                        i, s->all_fragments[i].coding_method,
+                        DC_COEFF(i), dequantizer[0]);
+
+                    if(s->avctx->idct_algo==FF_IDCT_VP3){
+                        Coeff *coeff= s->coeffs + i;
+                        memset(block, 0, sizeof(block));
+                        while(coeff->next){
+                            block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
+                            coeff= coeff->next;
+                        }
+                    }else{
+                        Coeff *coeff= s->coeffs + i;
+                        memset(block, 0, sizeof(block));
+                        while(coeff->next){
+                            block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
+                            coeff= coeff->next;
+                        }
+                    }
+
+                    /* invert DCT and place (or add) in final output */
+
+                    if (s->all_fragments[i].coding_method == MODE_INTRA) {
+                        if(s->avctx->idct_algo!=FF_IDCT_VP3)
+                            block[0] += 128<<3;
+                        s->dsp.idct_put(
+                            output_plane + s->all_fragments[i].first_pixel,
+                            stride,
+                            block);
+                    } else {
+                        s->dsp.idct_add(
+                            output_plane + s->all_fragments[i].first_pixel,
+                            stride,
+                            block);
+                    }
+
+                    debug_idct("block after idct_%s():\n",
+                        (s->all_fragments[i].coding_method == MODE_INTRA)?
+                        "put" : "add");
+                    for (m = 0; m < 8; m++) {
+                        for (n = 0; n < 8; n++) {
+                            debug_idct(" %3d", *(output_plane +
+                                s->all_fragments[i].first_pixel + (m * stride + n)));
+                        }
+                        debug_idct("\n");
+                    }
+                    debug_idct("\n");
+
+                } else {
+
+                    /* copy directly from the previous frame */
+                    s->dsp.put_pixels_tab[1][0](
+                        output_plane + s->all_fragments[i].first_pixel,
+                        last_plane + s->all_fragments[i].first_pixel,
+                        stride, 8);
+
+                }
+#if 0
+                /* perform the left edge filter if:
+                 *   - the fragment is not on the left column
+                 *   - the fragment is coded in this frame
+                 *   - the fragment is not coded in this frame but the left
+                 *     fragment is coded in this frame (this is done instead
+                 *     of a right edge filter when rendering the left fragment
+                 *     since this fragment is not available yet) */
+                if ((x > 0) &&
+                    ((s->all_fragments[i].coding_method != MODE_COPY) ||
+                     ((s->all_fragments[i].coding_method == MODE_COPY) &&
+                      (s->all_fragments[i - 1].coding_method != MODE_COPY)) )) {
+                    horizontal_filter(
+                        output_plane + s->all_fragments[i].first_pixel + 7*stride,
+                        -stride, s->bounding_values_array + 127);
+                }
+
+                /* perform the top edge filter if:
+                 *   - the fragment is not on the top row
+                 *   - the fragment is coded in this frame
+                 *   - the fragment is not coded in this frame but the above
+                 *     fragment is coded in this frame (this is done instead
+                 *     of a bottom edge filter when rendering the above
+                 *     fragment since this fragment is not available yet) */
+                if ((y > 0) &&
+                    ((s->all_fragments[i].coding_method != MODE_COPY) ||
+                     ((s->all_fragments[i].coding_method == MODE_COPY) &&
+                      (s->all_fragments[i - fragment_width].coding_method != MODE_COPY)) )) {
+                    vertical_filter(
+                        output_plane + s->all_fragments[i].first_pixel - stride,
+                        -stride, s->bounding_values_array + 127);
+                }
+#endif
+            }
+        }
+    }
+
+     /* this looks like a good place for slice dispatch... */
+     /* algorithm:
+      *   if (slice == s->macroblock_height - 1)
+      *     dispatch (both last slice & 2nd-to-last slice);
+      *   else if (slice > 0)
+      *     dispatch (slice - 1);
+      */
+
+    emms_c();
+}
+
+static void horizontal_filter(unsigned char *first_pixel, int stride,
+    int *bounding_values)
+{
+    unsigned char *end;
+    int filter_value;
+
+    for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
+        filter_value =
+            (first_pixel[-2] - first_pixel[ 1])
+         +3*(first_pixel[ 0] - first_pixel[-1]);
+        filter_value = bounding_values[(filter_value + 4) >> 3];
+        first_pixel[-1] = clip_uint8(first_pixel[-1] + filter_value);
+        first_pixel[ 0] = clip_uint8(first_pixel[ 0] - filter_value);
+    }
+}
+
+static void vertical_filter(unsigned char *first_pixel, int stride,
+    int *bounding_values)
+{
+    unsigned char *end;
+    int filter_value;
+    const int nstride= -stride;
+
+    for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
+        filter_value =
+            (first_pixel[2 * nstride] - first_pixel[ stride])
+         +3*(first_pixel[0          ] - first_pixel[nstride]);
+        filter_value = bounding_values[(filter_value + 4) >> 3];
+        first_pixel[nstride] = clip_uint8(first_pixel[nstride] + filter_value);
+        first_pixel[0] = clip_uint8(first_pixel[0] - filter_value);
+    }
+}
+
+static void apply_loop_filter(Vp3DecodeContext *s)
+{
+    int plane;
+    int x, y;
+    int *bounding_values= s->bounding_values_array+127;
+
+#if 0
+    int bounding_values_array[256];
+    int filter_limit;
+
+    /* find the right loop limit value */
+    for (x = 63; x >= 0; x--) {
+        if (vp31_ac_scale_factor[x] >= s->quality_index)
+            break;
+    }
+    filter_limit = vp31_filter_limit_values[s->quality_index];
+
+    /* set up the bounding values */
+    memset(bounding_values_array, 0, 256 * sizeof(int));
+    for (x = 0; x < filter_limit; x++) {
+        bounding_values[-x - filter_limit] = -filter_limit + x;
+        bounding_values[-x] = -x;
+        bounding_values[x] = x;
+        bounding_values[x + filter_limit] = filter_limit - x;
+    }
+#endif
+
+    for (plane = 0; plane < 3; plane++) {
+        int width           = s->fragment_width  >> !!plane;
+        int height          = s->fragment_height >> !!plane;
+        int fragment        = s->fragment_start        [plane];
+        int stride          = s->current_frame.linesize[plane];
+        uint8_t *plane_data = s->current_frame.data    [plane];
+        if (!s->flipped_image) stride = -stride;
+
+        for (y = 0; y < height; y++) {
+
+            for (x = 0; x < width; x++) {
+START_TIMER
+                /* do not perform left edge filter for left columns frags */
+                if ((x > 0) &&
+                    (s->all_fragments[fragment].coding_method != MODE_COPY)) {
+                    horizontal_filter(
+                        plane_data + s->all_fragments[fragment].first_pixel,
+                        stride, bounding_values);
+                }
+
+                /* do not perform top edge filter for top row fragments */
+                if ((y > 0) &&
+                    (s->all_fragments[fragment].coding_method != MODE_COPY)) {
+                    vertical_filter(
+                        plane_data + s->all_fragments[fragment].first_pixel,
+                        stride, bounding_values);
+                }
+
+                /* do not perform right edge filter for right column
+                 * fragments or if right fragment neighbor is also coded
+                 * in this frame (it will be filtered in next iteration) */
+                if ((x < width - 1) &&
+                    (s->all_fragments[fragment].coding_method != MODE_COPY) &&
+                    (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
+                    horizontal_filter(
+                        plane_data + s->all_fragments[fragment + 1].first_pixel,
+                        stride, bounding_values);
+                }
+
+                /* do not perform bottom edge filter for bottom row
+                 * fragments or if bottom fragment neighbor is also coded
+                 * in this frame (it will be filtered in the next row) */
+                if ((y < height - 1) &&
+                    (s->all_fragments[fragment].coding_method != MODE_COPY) &&
+                    (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
+                    vertical_filter(
+                        plane_data + s->all_fragments[fragment + width].first_pixel,
+                        stride, bounding_values);
+                }
+
+                fragment++;
+STOP_TIMER("loop filter")
+            }
+        }
+    }
+}
+
+/*
+ * This function computes the first pixel addresses for each fragment.
+ * This function needs to be invoked after the first frame is allocated
+ * so that it has access to the plane strides.
+ */
+static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
+{
+
+    int i, x, y;
+
+    /* figure out the first pixel addresses for each of the fragments */
+    /* Y plane */
+    i = 0;
+    for (y = s->fragment_height; y > 0; y--) {
+        for (x = 0; x < s->fragment_width; x++) {
+            s->all_fragments[i++].first_pixel =
+                s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS -
+                    s->golden_frame.linesize[0] +
+                    x * FRAGMENT_PIXELS;
+            debug_init("  fragment %d, first pixel @ %d\n",
+                i-1, s->all_fragments[i-1].first_pixel);
+        }
+    }
+
+    /* U plane */
+    i = s->fragment_start[1];
+    for (y = s->fragment_height / 2; y > 0; y--) {
+        for (x = 0; x < s->fragment_width / 2; x++) {
+            s->all_fragments[i++].first_pixel =
+                s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS -
+                    s->golden_frame.linesize[1] +
+                    x * FRAGMENT_PIXELS;
+            debug_init("  fragment %d, first pixel @ %d\n",
+                i-1, s->all_fragments[i-1].first_pixel);
+        }
+    }
+
+    /* V plane */
+    i = s->fragment_start[2];
+    for (y = s->fragment_height / 2; y > 0; y--) {
+        for (x = 0; x < s->fragment_width / 2; x++) {
+            s->all_fragments[i++].first_pixel =
+                s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS -
+                    s->golden_frame.linesize[2] +
+                    x * FRAGMENT_PIXELS;
+            debug_init("  fragment %d, first pixel @ %d\n",
+                i-1, s->all_fragments[i-1].first_pixel);
+        }
+    }
+}
+
+/* FIXME: this should be merged with the above! */
+static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
+{
+
+    int i, x, y;
+
+    /* figure out the first pixel addresses for each of the fragments */
+    /* Y plane */
+    i = 0;
+    for (y = 1; y <= s->fragment_height; y++) {
+        for (x = 0; x < s->fragment_width; x++) {
+            s->all_fragments[i++].first_pixel =
+                s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS -
+                    s->golden_frame.linesize[0] +
+                    x * FRAGMENT_PIXELS;
+            debug_init("  fragment %d, first pixel @ %d\n",
+                i-1, s->all_fragments[i-1].first_pixel);
+        }
+    }
+
+    /* U plane */
+    i = s->fragment_start[1];
+    for (y = 1; y <= s->fragment_height / 2; y++) {
+        for (x = 0; x < s->fragment_width / 2; x++) {
+            s->all_fragments[i++].first_pixel =
+                s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS -
+                    s->golden_frame.linesize[1] +
+                    x * FRAGMENT_PIXELS;
+            debug_init("  fragment %d, first pixel @ %d\n",
+                i-1, s->all_fragments[i-1].first_pixel);
+        }
+    }
+
+    /* V plane */
+    i = s->fragment_start[2];
+    for (y = 1; y <= s->fragment_height / 2; y++) {
+        for (x = 0; x < s->fragment_width / 2; x++) {
+            s->all_fragments[i++].first_pixel =
+                s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS -
+                    s->golden_frame.linesize[2] +
+                    x * FRAGMENT_PIXELS;
+            debug_init("  fragment %d, first pixel @ %d\n",
+                i-1, s->all_fragments[i-1].first_pixel);
+        }
+    }
+}
+
+/*
+ * This is the ffmpeg/libavcodec API init function.
+ */
+static int vp3_decode_init(AVCodecContext *avctx)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+    int i, inter, plane;
+    int c_width;
+    int c_height;
+    int y_superblock_count;
+    int c_superblock_count;
+
+    if (avctx->codec_tag == MKTAG('V','P','3','0'))
+        s->version = 0;
+    else
+        s->version = 1;
+
+    s->avctx = avctx;
+    s->width = (avctx->width + 15) & 0xFFFFFFF0;
+    s->height = (avctx->height + 15) & 0xFFFFFFF0;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+    avctx->has_b_frames = 0;
+    if(avctx->idct_algo==FF_IDCT_AUTO)
+        avctx->idct_algo=FF_IDCT_VP3;
+    dsputil_init(&s->dsp, avctx);
+
+    ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
+
+    /* initialize to an impossible value which will force a recalculation
+     * in the first frame decode */
+    s->quality_index = -1;
+
+    s->y_superblock_width = (s->width + 31) / 32;
+    s->y_superblock_height = (s->height + 31) / 32;
+    y_superblock_count = s->y_superblock_width * s->y_superblock_height;
+
+    /* work out the dimensions for the C planes */
+    c_width = s->width / 2;
+    c_height = s->height / 2;
+    s->c_superblock_width = (c_width + 31) / 32;
+    s->c_superblock_height = (c_height + 31) / 32;
+    c_superblock_count = s->c_superblock_width * s->c_superblock_height;
+
+    s->superblock_count = y_superblock_count + (c_superblock_count * 2);
+    s->u_superblock_start = y_superblock_count;
+    s->v_superblock_start = s->u_superblock_start + c_superblock_count;
+    s->superblock_coding = av_malloc(s->superblock_count);
+
+    s->macroblock_width = (s->width + 15) / 16;
+    s->macroblock_height = (s->height + 15) / 16;
+    s->macroblock_count = s->macroblock_width * s->macroblock_height;
+
+    s->fragment_width = s->width / FRAGMENT_PIXELS;
+    s->fragment_height = s->height / FRAGMENT_PIXELS;
+
+    /* fragment count covers all 8x8 blocks for all 3 planes */
+    s->fragment_count = s->fragment_width * s->fragment_height * 3 / 2;
+    s->fragment_start[1] = s->fragment_width * s->fragment_height;
+    s->fragment_start[2] = s->fragment_width * s->fragment_height * 5 / 4;
+
+    debug_init("  Y plane: %d x %d\n", s->width, s->height);
+    debug_init("  C plane: %d x %d\n", c_width, c_height);
+    debug_init("  Y superblocks: %d x %d, %d total\n",
+        s->y_superblock_width, s->y_superblock_height, y_superblock_count);
+    debug_init("  C superblocks: %d x %d, %d total\n",
+        s->c_superblock_width, s->c_superblock_height, c_superblock_count);
+    debug_init("  total superblocks = %d, U starts @ %d, V starts @ %d\n",
+        s->superblock_count, s->u_superblock_start, s->v_superblock_start);
+    debug_init("  macroblocks: %d x %d, %d total\n",
+        s->macroblock_width, s->macroblock_height, s->macroblock_count);
+    debug_init("  %d fragments, %d x %d, u starts @ %d, v starts @ %d\n",
+        s->fragment_count,
+        s->fragment_width,
+        s->fragment_height,
+        s->fragment_start[1],
+        s->fragment_start[2]);
+
+    s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
+    s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
+    s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
+    s->pixel_addresses_inited = 0;
+
+    if (!s->theora_tables)
+    {
+        for (i = 0; i < 64; i++) {
+            s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i];
+            s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
+            s->base_matrix[0][i] = vp31_intra_y_dequant[i];
+            s->base_matrix[1][i] = vp31_intra_c_dequant[i];
+            s->base_matrix[2][i] = vp31_inter_dequant[i];
+            s->filter_limit_values[i] = vp31_filter_limit_values[i];
+        }
+
+        for(inter=0; inter<2; inter++){
+            for(plane=0; plane<3; plane++){
+                s->qr_count[inter][plane]= 1;
+                s->qr_size [inter][plane][0]= 63;
+                s->qr_base [inter][plane][0]=
+                s->qr_base [inter][plane][1]= 2*inter + (!!plane)*!inter;
+            }
+        }
+
+        /* init VLC tables */
+        for (i = 0; i < 16; i++) {
+
+            /* DC histograms */
+            init_vlc(&s->dc_vlc[i], 5, 32,
+                &dc_bias[i][0][1], 4, 2,
+                &dc_bias[i][0][0], 4, 2, 0);
+
+            /* group 1 AC histograms */
+            init_vlc(&s->ac_vlc_1[i], 5, 32,
+                &ac_bias_0[i][0][1], 4, 2,
+                &ac_bias_0[i][0][0], 4, 2, 0);
+
+            /* group 2 AC histograms */
+            init_vlc(&s->ac_vlc_2[i], 5, 32,
+                &ac_bias_1[i][0][1], 4, 2,
+                &ac_bias_1[i][0][0], 4, 2, 0);
+
+            /* group 3 AC histograms */
+            init_vlc(&s->ac_vlc_3[i], 5, 32,
+                &ac_bias_2[i][0][1], 4, 2,
+                &ac_bias_2[i][0][0], 4, 2, 0);
+
+            /* group 4 AC histograms */
+            init_vlc(&s->ac_vlc_4[i], 5, 32,
+                &ac_bias_3[i][0][1], 4, 2,
+                &ac_bias_3[i][0][0], 4, 2, 0);
+        }
+    } else {
+        for (i = 0; i < 16; i++) {
+
+            /* DC histograms */
+            init_vlc(&s->dc_vlc[i], 5, 32,
+                &s->huffman_table[i][0][1], 4, 2,
+                &s->huffman_table[i][0][0], 4, 2, 0);
+
+            /* group 1 AC histograms */
+            init_vlc(&s->ac_vlc_1[i], 5, 32,
+                &s->huffman_table[i+16][0][1], 4, 2,
+                &s->huffman_table[i+16][0][0], 4, 2, 0);
+
+            /* group 2 AC histograms */
+            init_vlc(&s->ac_vlc_2[i], 5, 32,
+                &s->huffman_table[i+16*2][0][1], 4, 2,
+                &s->huffman_table[i+16*2][0][0], 4, 2, 0);
+
+            /* group 3 AC histograms */
+            init_vlc(&s->ac_vlc_3[i], 5, 32,
+                &s->huffman_table[i+16*3][0][1], 4, 2,
+                &s->huffman_table[i+16*3][0][0], 4, 2, 0);
+
+            /* group 4 AC histograms */
+            init_vlc(&s->ac_vlc_4[i], 5, 32,
+                &s->huffman_table[i+16*4][0][1], 4, 2,
+                &s->huffman_table[i+16*4][0][0], 4, 2, 0);
+        }
+    }
+
+    init_vlc(&s->superblock_run_length_vlc, 6, 34,
+        &superblock_run_length_vlc_table[0][1], 4, 2,
+        &superblock_run_length_vlc_table[0][0], 4, 2, 0);
+
+    init_vlc(&s->fragment_run_length_vlc, 5, 30,
+        &fragment_run_length_vlc_table[0][1], 4, 2,
+        &fragment_run_length_vlc_table[0][0], 4, 2, 0);
+
+    init_vlc(&s->mode_code_vlc, 3, 8,
+        &mode_code_vlc_table[0][1], 2, 1,
+        &mode_code_vlc_table[0][0], 2, 1, 0);
+
+    init_vlc(&s->motion_vector_vlc, 6, 63,
+        &motion_vector_vlc_table[0][1], 2, 1,
+        &motion_vector_vlc_table[0][0], 2, 1, 0);
+
+    /* work out the block mapping tables */
+    s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int));
+    s->superblock_macroblocks = av_malloc(s->superblock_count * 4 * sizeof(int));
+    s->macroblock_fragments = av_malloc(s->macroblock_count * 6 * sizeof(int));
+    s->macroblock_coding = av_malloc(s->macroblock_count + 1);
+    init_block_mapping(s);
+
+    for (i = 0; i < 3; i++) {
+        s->current_frame.data[i] = NULL;
+        s->last_frame.data[i] = NULL;
+        s->golden_frame.data[i] = NULL;
+    }
+
+    return 0;
+}
+
+/*
+ * This is the ffmpeg/libavcodec API frame decode function.
+ */
+static int vp3_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+    GetBitContext gb;
+    static int counter = 0;
+    int i;
+
+    init_get_bits(&gb, buf, buf_size * 8);
+
+    if (s->theora && get_bits1(&gb))
+    {
+#if 1
+        av_log(avctx, AV_LOG_ERROR, "Header packet passed to frame decoder, skipping\n");
+        return -1;
+#else
+        int ptype = get_bits(&gb, 7);
+
+        skip_bits(&gb, 6*8); /* "theora" */
+
+        switch(ptype)
+        {
+            case 1:
+                theora_decode_comments(avctx, &gb);
+                break;
+            case 2:
+                theora_decode_tables(avctx, &gb);
+                    init_dequantizer(s);
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype);
+        }
+        return buf_size;
+#endif
+    }
+
+    s->keyframe = !get_bits1(&gb);
+    if (!s->theora)
+        skip_bits(&gb, 1);
+    s->last_quality_index = s->quality_index;
+
+    s->nqis=0;
+    do{
+        s->qis[s->nqis++]= get_bits(&gb, 6);
+    } while(s->theora >= 0x030200 && s->nqis<3 && get_bits1(&gb));
+
+    s->quality_index= s->qis[0];
+
+    if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n",
+            s->keyframe?"key":"", counter, s->quality_index);
+    counter++;
+
+    if (s->quality_index != s->last_quality_index) {
+        init_dequantizer(s);
+        init_loop_filter(s);
+    }
+
+    if (s->keyframe) {
+        if (!s->theora)
+        {
+            skip_bits(&gb, 4); /* width code */
+            skip_bits(&gb, 4); /* height code */
+            if (s->version)
+            {
+                s->version = get_bits(&gb, 5);
+                if (counter == 1)
+                    av_log(s->avctx, AV_LOG_DEBUG, "VP version: %d\n", s->version);
+            }
+        }
+        if (s->version || s->theora)
+        {
+                if (get_bits1(&gb))
+                    av_log(s->avctx, AV_LOG_ERROR, "Warning, unsupported keyframe coding type?!\n");
+            skip_bits(&gb, 2); /* reserved? */
+        }
+
+        if (s->last_frame.data[0] == s->golden_frame.data[0]) {
+            if (s->golden_frame.data[0])
+                avctx->release_buffer(avctx, &s->golden_frame);
+            s->last_frame= s->golden_frame; /* ensure that we catch any access to this released frame */
+        } else {
+            if (s->golden_frame.data[0])
+                avctx->release_buffer(avctx, &s->golden_frame);
+            if (s->last_frame.data[0])
+                avctx->release_buffer(avctx, &s->last_frame);
+        }
+
+        s->golden_frame.reference = 3;
+        if(avctx->get_buffer(avctx, &s->golden_frame) < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
+            return -1;
+        }
+
+        /* golden frame is also the current frame */
+        s->current_frame= s->golden_frame;
+
+        /* time to figure out pixel addresses? */
+        if (!s->pixel_addresses_inited)
+        {
+            if (!s->flipped_image)
+                vp3_calculate_pixel_addresses(s);
+            else
+                theora_calculate_pixel_addresses(s);
+            s->pixel_addresses_inited = 1;
+        }
+    } else {
+        /* allocate a new current frame */
+        s->current_frame.reference = 3;
+        if (!s->pixel_addresses_inited) {
+            av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n");
+            return -1;
+        }
+        if(avctx->get_buffer(avctx, &s->current_frame) < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
+            return -1;
+        }
+    }
+
+    s->current_frame.qscale_table= s->qscale_table; //FIXME allocate individual tables per AVFrame
+    s->current_frame.qstride= 0;
+
+    {START_TIMER
+    init_frame(s, &gb);
+    STOP_TIMER("init_frame")}
+
+#if KEYFRAMES_ONLY
+if (!s->keyframe) {
+
+    memcpy(s->current_frame.data[0], s->golden_frame.data[0],
+        s->current_frame.linesize[0] * s->height);
+    memcpy(s->current_frame.data[1], s->golden_frame.data[1],
+        s->current_frame.linesize[1] * s->height / 2);
+    memcpy(s->current_frame.data[2], s->golden_frame.data[2],
+        s->current_frame.linesize[2] * s->height / 2);
+
+} else {
+#endif
+
+    {START_TIMER
+    if (unpack_superblocks(s, &gb)){
+        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_superblocks\n");
+        return -1;
+    }
+    STOP_TIMER("unpack_superblocks")}
+    {START_TIMER
+    if (unpack_modes(s, &gb)){
+        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_modes\n");
+        return -1;
+    }
+    STOP_TIMER("unpack_modes")}
+    {START_TIMER
+    if (unpack_vectors(s, &gb)){
+        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n");
+        return -1;
+    }
+    STOP_TIMER("unpack_vectors")}
+    {START_TIMER
+    if (unpack_dct_coeffs(s, &gb)){
+        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n");
+        return -1;
+    }
+    STOP_TIMER("unpack_dct_coeffs")}
+    {START_TIMER
+
+    reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
+    if ((avctx->flags & CODEC_FLAG_GRAY) == 0) {
+        reverse_dc_prediction(s, s->fragment_start[1],
+            s->fragment_width / 2, s->fragment_height / 2);
+        reverse_dc_prediction(s, s->fragment_start[2],
+            s->fragment_width / 2, s->fragment_height / 2);
+    }
+    STOP_TIMER("reverse_dc_prediction")}
+    {START_TIMER
+
+    for (i = 0; i < s->macroblock_height; i++)
+        render_slice(s, i);
+    STOP_TIMER("render_fragments")}
+
+    {START_TIMER
+    apply_loop_filter(s);
+    STOP_TIMER("apply_loop_filter")}
+#if KEYFRAMES_ONLY
+}
+#endif
+
+    *data_size=sizeof(AVFrame);
+    *(AVFrame*)data= s->current_frame;
+
+    /* release the last frame, if it is allocated and if it is not the
+     * golden frame */
+    if ((s->last_frame.data[0]) &&
+        (s->last_frame.data[0] != s->golden_frame.data[0]))
+        avctx->release_buffer(avctx, &s->last_frame);
+
+    /* shuffle frames (last = current) */
+    s->last_frame= s->current_frame;
+    s->current_frame.data[0]= NULL; /* ensure that we catch any access to this released frame */
+
+    return buf_size;
+}
+
+/*
+ * This is the ffmpeg/libavcodec API module cleanup function.
+ */
+static int vp3_decode_end(AVCodecContext *avctx)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+
+    av_free(s->all_fragments);
+    av_free(s->coeffs);
+    av_free(s->coded_fragment_list);
+    av_free(s->superblock_fragments);
+    av_free(s->superblock_macroblocks);
+    av_free(s->macroblock_fragments);
+    av_free(s->macroblock_coding);
+
+    /* release all frames */
+    if (s->golden_frame.data[0] && s->golden_frame.data[0] != s->last_frame.data[0])
+        avctx->release_buffer(avctx, &s->golden_frame);
+    if (s->last_frame.data[0])
+        avctx->release_buffer(avctx, &s->last_frame);
+    /* no need to release the current_frame since it will always be pointing
+     * to the same frame as either the golden or last frame */
+
+    return 0;
+}
+
+static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+
+    if (get_bits(gb, 1)) {
+        int token;
+        if (s->entries >= 32) { /* overflow */
+            av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n");
+            return -1;
+        }
+        token = get_bits(gb, 5);
+        //av_log(avctx, AV_LOG_DEBUG, "hti %d hbits %x token %d entry : %d size %d\n", s->hti, s->hbits, token, s->entries, s->huff_code_size);
+        s->huffman_table[s->hti][token][0] = s->hbits;
+        s->huffman_table[s->hti][token][1] = s->huff_code_size;
+        s->entries++;
+    }
+    else {
+        if (s->huff_code_size >= 32) {/* overflow */
+            av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n");
+            return -1;
+        }
+        s->huff_code_size++;
+        s->hbits <<= 1;
+        read_huffman_tree(avctx, gb);
+        s->hbits |= 1;
+        read_huffman_tree(avctx, gb);
+        s->hbits >>= 1;
+        s->huff_code_size--;
+    }
+    return 0;
+}
+
+static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+
+    s->theora = get_bits_long(gb, 24);
+    av_log(avctx, AV_LOG_INFO, "Theora bitstream version %X\n", s->theora);
+
+    /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */
+    /* but previous versions have the image flipped relative to vp3 */
+    if (s->theora < 0x030200)
+    {
+        s->flipped_image = 1;
+        av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n");
+    }
+
+    s->width = get_bits(gb, 16) << 4;
+    s->height = get_bits(gb, 16) << 4;
+
+    if(avcodec_check_dimensions(avctx, s->width, s->height)){
+        av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height);
+        s->width= s->height= 0;
+        return -1;
+    }
+
+    if (s->theora >= 0x030400)
+    {
+        skip_bits(gb, 32); /* total number of superblocks in a frame */
+        // fixme, the next field is 36bits long
+        skip_bits(gb, 32); /* total number of blocks in a frame */
+        skip_bits(gb, 4); /* total number of blocks in a frame */
+        skip_bits(gb, 32); /* total number of macroblocks in a frame */
+
+        skip_bits(gb, 24); /* frame width */
+        skip_bits(gb, 24); /* frame height */
+    }
+    else
+    {
+        skip_bits(gb, 24); /* frame width */
+        skip_bits(gb, 24); /* frame height */
+    }
+
+  if (s->theora >= 0x030200) {
+    skip_bits(gb, 8); /* offset x */
+    skip_bits(gb, 8); /* offset y */
+  }
+
+    skip_bits(gb, 32); /* fps numerator */
+    skip_bits(gb, 32); /* fps denumerator */
+    skip_bits(gb, 24); /* aspect numerator */
+    skip_bits(gb, 24); /* aspect denumerator */
+
+    if (s->theora < 0x030200)
+        skip_bits(gb, 5); /* keyframe frequency force */
+    skip_bits(gb, 8); /* colorspace */
+    if (s->theora >= 0x030400)
+        skip_bits(gb, 2); /* pixel format: 420,res,422,444 */
+    skip_bits(gb, 24); /* bitrate */
+
+    skip_bits(gb, 6); /* quality hint */
+
+    if (s->theora >= 0x030200)
+    {
+        skip_bits(gb, 5); /* keyframe frequency force */
+
+        if (s->theora < 0x030400)
+            skip_bits(gb, 5); /* spare bits */
+    }
+
+//    align_get_bits(gb);
+
+    avctx->width = s->width;
+    avctx->height = s->height;
+
+    return 0;
+}
+
+static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+    int i, n, matrices, inter, plane;
+
+    if (s->theora >= 0x030200) {
+        n = get_bits(gb, 3);
+        /* loop filter limit values table */
+        for (i = 0; i < 64; i++)
+            s->filter_limit_values[i] = get_bits(gb, n);
+    }
+
+    if (s->theora >= 0x030200)
+        n = get_bits(gb, 4) + 1;
+    else
+        n = 16;
+    /* quality threshold table */
+    for (i = 0; i < 64; i++)
+        s->coded_ac_scale_factor[i] = get_bits(gb, n);
+
+    if (s->theora >= 0x030200)
+        n = get_bits(gb, 4) + 1;
+    else
+        n = 16;
+    /* dc scale factor table */
+    for (i = 0; i < 64; i++)
+        s->coded_dc_scale_factor[i] = get_bits(gb, n);
+
+    if (s->theora >= 0x030200)
+        matrices = get_bits(gb, 9) + 1;
+    else
+        matrices = 3;
+
+    if(matrices > 384){
+        av_log(avctx, AV_LOG_ERROR, "invalid number of base matrixes\n");
+        return -1;
+    }
+
+    for(n=0; n<matrices; n++){
+        for (i = 0; i < 64; i++)
+            s->base_matrix[n][i]= get_bits(gb, 8);
+    }
+
+    for (inter = 0; inter <= 1; inter++) {
+        for (plane = 0; plane <= 2; plane++) {
+            int newqr= 1;
+            if (inter || plane > 0)
+                newqr = get_bits(gb, 1);
+            if (!newqr) {
+                int qtj, plj;
+                if(inter && get_bits(gb, 1)){
+                    qtj = 0;
+                    plj = plane;
+                }else{
+                    qtj= (3*inter + plane - 1) / 3;
+                    plj= (plane + 2) % 3;
+                }
+                s->qr_count[inter][plane]= s->qr_count[qtj][plj];
+                memcpy(s->qr_size[inter][plane], s->qr_size[qtj][plj], sizeof(s->qr_size[0][0]));
+                memcpy(s->qr_base[inter][plane], s->qr_base[qtj][plj], sizeof(s->qr_base[0][0]));
+            } else {
+                int qri= 0;
+                int qi = 0;
+
+                for(;;){
+                    i= get_bits(gb, av_log2(matrices-1)+1);
+                    if(i>= matrices){
+                        av_log(avctx, AV_LOG_ERROR, "invalid base matrix index\n");
+                        return -1;
+                    }
+                    s->qr_base[inter][plane][qri]= i;
+                    if(qi >= 63)
+                        break;
+                    i = get_bits(gb, av_log2(63-qi)+1) + 1;
+                    s->qr_size[inter][plane][qri++]= i;
+                    qi += i;
+                }
+
+                if (qi > 63) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid qi %d > 63\n", qi);
+                    return -1;
+                }
+                s->qr_count[inter][plane]= qri;
+            }
+        }
+    }
+
+    /* Huffman tables */
+    for (s->hti = 0; s->hti < 80; s->hti++) {
+        s->entries = 0;
+        s->huff_code_size = 1;
+        if (!get_bits(gb, 1)) {
+            s->hbits = 0;
+            read_huffman_tree(avctx, gb);
+            s->hbits = 1;
+            read_huffman_tree(avctx, gb);
+        }
+    }
+
+    s->theora_tables = 1;
+
+    return 0;
+}
+
+static int theora_decode_init(AVCodecContext *avctx)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+    GetBitContext gb;
+    int ptype;
+    uint8_t *p= avctx->extradata;
+    int op_bytes, i;
+
+    s->theora = 1;
+
+    if (!avctx->extradata_size)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Missing extradata!\n");
+        return -1;
+    }
+
+  for(i=0;i<3;i++) {
+    op_bytes = *(p++)<<8;
+    op_bytes += *(p++);
+
+    init_get_bits(&gb, p, op_bytes);
+    p += op_bytes;
+
+    ptype = get_bits(&gb, 8);
+    debug_vp3("Theora headerpacket type: %x\n", ptype);
+
+     if (!(ptype & 0x80))
+     {
+        av_log(avctx, AV_LOG_ERROR, "Invalid extradata!\n");
+//        return -1;
+     }
+
+    // FIXME: check for this aswell
+    skip_bits(&gb, 6*8); /* "theora" */
+
+    switch(ptype)
+    {
+        case 0x80:
+            theora_decode_header(avctx, &gb);
+                break;
+        case 0x81:
+// FIXME: is this needed? it breaks sometimes
+//            theora_decode_comments(avctx, gb);
+            break;
+        case 0x82:
+            theora_decode_tables(avctx, &gb);
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80);
+            break;
+    }
+    if(8*op_bytes != get_bits_count(&gb))
+        av_log(avctx, AV_LOG_ERROR, "%d bits left in packet %X\n", 8*op_bytes - get_bits_count(&gb), ptype);
+    if (s->theora < 0x030200)
+        break;
+  }
+
+    vp3_decode_init(avctx);
+    return 0;
+}
+
+AVCodec vp3_decoder = {
+    "vp3",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP3,
+    sizeof(Vp3DecodeContext),
+    vp3_decode_init,
+    NULL,
+    vp3_decode_end,
+    vp3_decode_frame,
+    0,
+    NULL
+};
+
+#ifndef CONFIG_LIBTHEORA
+AVCodec theora_decoder = {
+    "theora",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_THEORA,
+    sizeof(Vp3DecodeContext),
+    theora_decode_init,
+    NULL,
+    vp3_decode_end,
+    vp3_decode_frame,
+    0,
+    NULL
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/vp3data.h b/contrib/ffmpeg/libavcodec/vp3data.h
new file mode 100644
index 000000000..d69ddfa28
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp3data.h
@@ -0,0 +1,3178 @@
+/*
+ * copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef VP3DATA_H
+#define VP3DATA_H
+
+/* these coefficients dequantize intraframe Y plane coefficients
+ * (note: same as JPEG) */
+static const int16_t vp31_intra_y_dequant[64] =
+{       16,  11,  10,  16,  24,  40,  51,  61,
+        12,  12,  14,  19,  26,  58,  60,  55,
+        14,  13,  16,  24,  40,  57,  69,  56,
+        14,  17,  22,  29,  51,  87,  80,  62,
+        18,  22,  37,  58,  68, 109, 103,  77,
+        24,  35,  55,  64,  81, 104, 113,  92,
+        49,  64,  78,  87, 103, 121, 120, 101,
+        72,  92,  95,  98, 112, 100, 103,  99
+};
+
+/* these coefficients dequantize intraframe C plane coefficients
+ * (note: same as JPEG) */
+static const int16_t vp31_intra_c_dequant[64] =
+{       17,  18,     24,     47,     99,     99,     99,     99,
+        18,  21,     26,     66,     99,     99,     99,     99,
+        24,  26,     56,     99,     99,     99,     99,     99,
+        47,  66,     99,     99,     99,     99,     99,     99,
+        99,  99,     99,     99,     99,     99,     99,     99,
+        99,  99,     99,     99,     99,     99,     99,     99,
+        99,  99,     99,     99,     99,     99,     99,     99,
+        99,  99,     99,     99,     99,     99,     99,     99
+};
+
+/* these coefficients dequantize interframe coefficients (all planes) */
+static const int16_t vp31_inter_dequant[64] =
+{   16,  16,  16,  20,  24,  28,  32,  40,
+    16,  16,  20,  24,  28,  32,  40,  48,
+    16,  20,  24,  28,  32,  40,  48,  64,
+    20,  24,  28,  32,  40,  48,  64,  64,
+    24,  28,  32,  40,  48,  64,  64,  64,
+    28,  32,  40,  48,  64,  64,  64,  96,
+    32,  40,  48,  64,  64,  64,  96,  128,
+    40,  48,  64,  64,  64,  96,  128, 128
+};
+
+static const int16_t vp31_dc_scale_factor[64] =
+{ 220, 200, 190, 180, 170, 170, 160, 160,
+  150, 150, 140, 140, 130, 130, 120, 120,
+  110, 110, 100, 100, 90,  90,  90,  80,
+  80,  80,  70,  70,  70,  60,  60,  60,
+  60,  50,  50,  50,  50,  40,  40,  40,
+  40,  40,  30,  30,  30,  30,  30,  30,
+  30,  20,  20,  20,  20,  20,  20,  20,
+  20,  10,  10,  10,  10,  10,  10,  10
+};
+
+static const uint32_t vp31_ac_scale_factor[64] =
+{ 500,  450,  400,  370,  340,  310, 285, 265,
+  245,  225,  210,  195,  185,  180, 170, 160,
+  150,  145,  135,  130,  125,  115, 110, 107,
+  100,   96,   93,   89,   85,   82,  75,  74,
+   70,   68,   64,   60,   57,   56,  52,  50,
+   49,   45,   44,   43,   40,   38,  37,  35,
+   33,   32,   30,   29,   28,   25,  24,  22,
+   21,   19,   18,   17,   15,   13,  12,  10
+};
+
+static const uint32_t vp31_filter_limit_values[64] =
+{  30, 25, 20, 20, 15, 15, 14, 14,
+   13, 13, 12, 12, 11, 11, 10, 10,
+    9,  9,  8,  8,  7,  7,  7,  7,
+    6,  6,  6,  6,  5,  5,  5,  5,
+    4,  4,  4,  4,  3,  3,  3,  3,
+    2,  2,  2,  2,  2,  2,  2,  2,
+    0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0
+};
+
+static const uint16_t superblock_run_length_vlc_table[34][2] = {
+    { 0, 1 },
+
+    { 4, 3 },    { 5, 3 },
+
+    { 0xC, 4 },  { 0xD, 4 },
+
+    { 0x38, 6 },    { 0x39, 6 },    { 0x3A, 6 },    { 0x3B, 6 },
+
+    { 0xF0, 8 },    { 0xF1, 8 },    { 0xF2, 8 },    { 0xF3, 8 },
+    { 0xF4, 8 },    { 0xF5, 8 },    { 0xF6, 8 },    { 0xF7, 8 },
+
+    { 0x3E0, 10 },    { 0x3E1, 10 },    { 0x3E2, 10 },    { 0x3E3, 10 },
+    { 0x3E4, 10 },    { 0x3E5, 10 },    { 0x3E6, 10 },    { 0x3E7, 10 },
+    { 0x3E8, 10 },    { 0x3E9, 10 },    { 0x3EA, 10 },    { 0x3EB, 10 },
+    { 0x3EC, 10 },    { 0x3ED, 10 },    { 0x3EE, 10 },    { 0x3EF, 10 },
+
+    { 0x3F, 6 }  /* this last VLC is a special case for reading 12 more
+                    bits from stream and adding the value 34 */
+};
+
+static const uint16_t fragment_run_length_vlc_table[30][2] = {
+    /* 1 -> 2 */
+    { 0x0, 2 },    { 0x1, 2 },
+
+    /* 3 -> 4 */
+    { 0x4, 3 },    { 0x5, 3 },
+
+    /* 5 -> 6 */
+    { 0xC, 4 },    { 0xD, 4 },
+
+    /* 7 -> 10 */
+    { 0x38, 6 },   { 0x39, 6 },
+    { 0x3A, 6 },   { 0x3B, 6 },
+
+    /* 11 -> 14 */
+    { 0x78, 7 },   { 0x79, 7 },
+    { 0x7A, 7 },   { 0x7B, 7 },
+
+    /* 15 -> 30 */
+    { 0x1F0, 9 },  { 0x1F1, 9 },  { 0x1F2, 9 },  { 0x1F3, 9 },
+    { 0x1F4, 9 },  { 0x1F5, 9 },  { 0x1F6, 9 },  { 0x1F7, 9 },
+    { 0x1F8, 9 },  { 0x1F9, 9 },  { 0x1FA, 9 },  { 0x1FB, 9 },
+    { 0x1FC, 9 },  { 0x1FD, 9 },  { 0x1FE, 9 },  { 0x1FF, 9 }
+};
+
+static const uint8_t mode_code_vlc_table[8][2] = {
+    { 0, 1 },      { 2, 2 },
+    { 6, 3 },      { 14, 4 },
+    { 30, 5 },     { 62, 6 },
+    { 126, 7 },    { 127, 7 }
+};
+
+static const uint8_t motion_vector_vlc_table[63][2] = {
+    { 0, 3 },
+    { 1, 3 },
+    { 2, 3 },
+
+    { 6, 4 },    { 7, 4 },
+
+    { 8, 4 },    { 9, 4 },
+
+    { 40, 6 },    { 41, 6 },    { 42, 6 },    { 43, 6 },
+    { 44, 6 },    { 45, 6 },    { 46, 6 },    { 47, 6 },
+
+    { 96, 7 },    { 97, 7 },    { 98, 7 },    { 99, 7 },
+    { 100, 7 },   { 101, 7 },   { 102, 7 },   { 103, 7 },
+    { 104, 7 },   { 105, 7 },   { 106, 7 },   { 107, 7 },
+    { 108, 7 },   { 109, 7 },   { 110, 7 },   { 111, 7 },
+
+    { 0xE0, 8 },  { 0xE1, 8 },  { 0xE2, 8 },  { 0xE3, 8 },
+    { 0xE4, 8 },  { 0xE5, 8 },  { 0xE6, 8 },  { 0xE7, 8 },
+    { 0xE8, 8 },  { 0xE9, 8 },  { 0xEA, 8 },  { 0xEB, 8 },
+    { 0xEC, 8 },  { 0xED, 8 },  { 0xEE, 8 },  { 0xEF, 8 },
+
+    { 0xF0, 8 },  { 0xF1, 8 },  { 0xF2, 8 },  { 0xF3, 8 },
+    { 0xF4, 8 },  { 0xF5, 8 },  { 0xF6, 8 },  { 0xF7, 8 },
+    { 0xF8, 8 },  { 0xF9, 8 },  { 0xFA, 8 },  { 0xFB, 8 },
+    { 0xFC, 8 },  { 0xFD, 8 },  { 0xFE, 8 },  { 0xFF, 8 }
+};
+
+static const int motion_vector_table[63] = {
+    0, 1, -1,
+    2, -2,
+    3, -3,
+    4, -4, 5, -5, 6, -6, 7, -7,
+    8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
+    16, -16, 17, -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
+    24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31, -31
+};
+
+static const int8_t fixed_motion_vector_table[64] = {
+   0,   0,   1,  -1,   2,  -2,   3,  -3,
+   4,  -4,   5,  -5,   6,  -6,   7,  -7,
+   8,  -8,   9,  -9,  10, -10,  11, -11,
+  12, -12,  13, -13,  14, -14,  15, -15,
+  16, -16,  17, -17,  18, -18,  19, -19,
+  20, -20,  21, -21,  22, -22,  23, -23,
+  24, -24,  25, -25,  26, -26,  27, -27,
+  28, -28,  29, -29,  30, -30,  31, -31
+};
+
+/* only tokens 0..6 indicate eob runs */
+static const int eob_run_base[7] = {
+    1, 2, 3, 4, 8, 16, 0
+};
+static const int eob_run_get_bits[7] = {
+    0, 0, 0, 2, 3, 4, 12
+};
+
+static const int zero_run_base[32] = {
+    0, 0, 0, 0, 0, 0, 0,  /* 0..6 are never used */
+    0, 0,  /* 7..8 */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 9..22 */
+    1, 2, 3, 4, 5,  /* 23..27 */
+    6, 10, 1, 2  /* 28..31 */
+};
+static const int zero_run_get_bits[32] = {
+    0, 0, 0, 0, 0, 0, 0,  /* 0..6 are never used */
+    3, 6,  /* 7..8 */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 9..22 */
+    0, 0, 0, 0, 0,  /* 23..27 */
+    2, 3, 0, 1  /* 28..31 */
+};
+
+static const int coeff_get_bits[32] = {
+    0, 0, 0, 0, 0, 0, 0,  /* 0..6 are never used */
+    0, 0, 0, 0, 0, 0,  /* 7..12 use constant coeffs */
+    1, 1, 1, 1,  /* 13..16 are constants but still need sign bit */
+    2, 3, 4, 5, 6, 10,  /* 17..22, for reading large coeffs */
+    1, 1, 1, 1, 1, 1, 1,  /* 23..29 are constants but still need sign bit */
+    2, 2  /* 30..31 */
+};
+
+static const int16_t coeff_table_token_7_8[1] = { 0 };
+static const int16_t coeff_table_token_9[1] = { 1 };
+static const int16_t coeff_table_token_10[1] = { -1 };
+static const int16_t coeff_table_token_11[1] = { 2 };
+static const int16_t coeff_table_token_12[1] = { -2 };
+
+static const int16_t coeff_table_token_13[2] = { 3, -3 };
+static const int16_t coeff_table_token_14[2] = { 4, -4 };
+static const int16_t coeff_table_token_15[2] = { 5, -5 };
+static const int16_t coeff_table_token_16[2] = { 6, -6 };
+
+static const int16_t coeff_table_token_23_24_25_26_27_28_29[2] = { 1, -1 };
+static const int16_t coeff_table_token_30[4] = { 2, 3, -2, -3 };
+static const int16_t coeff_table_token_31[4] = { 2, 3, -2, -3 };
+
+static const int16_t coeff_table_token_17[4] = {
+    7, 8, -7, -8
+};
+
+static const int16_t coeff_table_token_18[8] = {
+    9, 10, 11, 12, -9, -10, -11, -12
+};
+
+static const int16_t coeff_table_token_19[16] = {
+    13, 14, 15, 16, 17, 18, 19, 20, -13, -14, -15, -16, -17, -18, -19, -20
+};
+
+static const int16_t coeff_table_token_20[32] = {
+    21, 22, 23, 24, 25, 26, 27, 28,
+    29, 30, 31, 32, 33, 34, 35, 36,
+    -21, -22, -23, -24, -25, -26, -27, -28,
+    -29, -30, -31, -32, -33, -34, -35, -36
+};
+
+static const int16_t coeff_table_token_21[64] = {
+    37, 38, 39, 40, 41, 42, 43, 44,
+    45, 46, 47, 48, 49, 50, 51, 52,
+    53, 54, 55, 56, 57, 58, 59, 60,
+    61, 62, 63, 64, 65, 66, 67, 68,
+    -37, -38, -39, -40, -41, -42, -43, -44,
+    -45, -46, -47, -48, -49, -50, -51, -52,
+    -53, -54, -55, -56, -57, -58, -59, -60,
+    -61, -62, -63, -64, -65, -66, -67, -68
+};
+
+static const int16_t coeff_table_token_22[1024] = {
+    69, 70, 71, 72, 73, 74, 75, 76,
+    77, 78, 79, 80, 81, 82, 83, 84,
+    85, 86, 87, 88, 89, 90, 91, 92,
+    93, 94, 95, 96, 97, 98, 99, 100,
+    101, 102, 103, 104, 105, 106, 107, 108,
+    109, 110, 111, 112, 113, 114, 115, 116,
+    117, 118, 119, 120, 121, 122, 123, 124,
+    125, 126, 127, 128, 129, 130, 131, 132,
+    133, 134, 135, 136, 137, 138, 139, 140,
+    141, 142, 143, 144, 145, 146, 147, 148,
+    149, 150, 151, 152, 153, 154, 155, 156,
+    157, 158, 159, 160, 161, 162, 163, 164,
+    165, 166, 167, 168, 169, 170, 171, 172,
+    173, 174, 175, 176, 177, 178, 179, 180,
+    181, 182, 183, 184, 185, 186, 187, 188,
+    189, 190, 191, 192, 193, 194, 195, 196,
+    197, 198, 199, 200, 201, 202, 203, 204,
+    205, 206, 207, 208, 209, 210, 211, 212,
+    213, 214, 215, 216, 217, 218, 219, 220,
+    221, 222, 223, 224, 225, 226, 227, 228,
+    229, 230, 231, 232, 233, 234, 235, 236,
+    237, 238, 239, 240, 241, 242, 243, 244,
+    245, 246, 247, 248, 249, 250, 251, 252,
+    253, 254, 255, 256, 257, 258, 259, 260,
+    261, 262, 263, 264, 265, 266, 267, 268,
+    269, 270, 271, 272, 273, 274, 275, 276,
+    277, 278, 279, 280, 281, 282, 283, 284,
+    285, 286, 287, 288, 289, 290, 291, 292,
+    293, 294, 295, 296, 297, 298, 299, 300,
+    301, 302, 303, 304, 305, 306, 307, 308,
+    309, 310, 311, 312, 313, 314, 315, 316,
+    317, 318, 319, 320, 321, 322, 323, 324,
+    325, 326, 327, 328, 329, 330, 331, 332,
+    333, 334, 335, 336, 337, 338, 339, 340,
+    341, 342, 343, 344, 345, 346, 347, 348,
+    349, 350, 351, 352, 353, 354, 355, 356,
+    357, 358, 359, 360, 361, 362, 363, 364,
+    365, 366, 367, 368, 369, 370, 371, 372,
+    373, 374, 375, 376, 377, 378, 379, 380,
+    381, 382, 383, 384, 385, 386, 387, 388,
+    389, 390, 391, 392, 393, 394, 395, 396,
+    397, 398, 399, 400, 401, 402, 403, 404,
+    405, 406, 407, 408, 409, 410, 411, 412,
+    413, 414, 415, 416, 417, 418, 419, 420,
+    421, 422, 423, 424, 425, 426, 427, 428,
+    429, 430, 431, 432, 433, 434, 435, 436,
+    437, 438, 439, 440, 441, 442, 443, 444,
+    445, 446, 447, 448, 449, 450, 451, 452,
+    453, 454, 455, 456, 457, 458, 459, 460,
+    461, 462, 463, 464, 465, 466, 467, 468,
+    469, 470, 471, 472, 473, 474, 475, 476,
+    477, 478, 479, 480, 481, 482, 483, 484,
+    485, 486, 487, 488, 489, 490, 491, 492,
+    493, 494, 495, 496, 497, 498, 499, 500,
+    501, 502, 503, 504, 505, 506, 507, 508,
+    509, 510, 511, 512, 513, 514, 515, 516,
+    517, 518, 519, 520, 521, 522, 523, 524,
+    525, 526, 527, 528, 529, 530, 531, 532,
+    533, 534, 535, 536, 537, 538, 539, 540,
+    541, 542, 543, 544, 545, 546, 547, 548,
+    549, 550, 551, 552, 553, 554, 555, 556,
+    557, 558, 559, 560, 561, 562, 563, 564,
+    565, 566, 567, 568, 569, 570, 571, 572,
+    573, 574, 575, 576, 577, 578, 579, 580,
+    -69, -70, -71, -72, -73, -74, -75, -76,
+    -77, -78, -79, -80, -81, -82, -83, -84,
+    -85, -86, -87, -88, -89, -90, -91, -92,
+    -93, -94, -95, -96, -97, -98, -99, -100,
+    -101, -102, -103, -104, -105, -106, -107, -108,
+    -109, -110, -111, -112, -113, -114, -115, -116,
+    -117, -118, -119, -120, -121, -122, -123, -124,
+    -125, -126, -127, -128, -129, -130, -131, -132,
+    -133, -134, -135, -136, -137, -138, -139, -140,
+    -141, -142, -143, -144, -145, -146, -147, -148,
+    -149, -150, -151, -152, -153, -154, -155, -156,
+    -157, -158, -159, -160, -161, -162, -163, -164,
+    -165, -166, -167, -168, -169, -170, -171, -172,
+    -173, -174, -175, -176, -177, -178, -179, -180,
+    -181, -182, -183, -184, -185, -186, -187, -188,
+    -189, -190, -191, -192, -193, -194, -195, -196,
+    -197, -198, -199, -200, -201, -202, -203, -204,
+    -205, -206, -207, -208, -209, -210, -211, -212,
+    -213, -214, -215, -216, -217, -218, -219, -220,
+    -221, -222, -223, -224, -225, -226, -227, -228,
+    -229, -230, -231, -232, -233, -234, -235, -236,
+    -237, -238, -239, -240, -241, -242, -243, -244,
+    -245, -246, -247, -248, -249, -250, -251, -252,
+    -253, -254, -255, -256, -257, -258, -259, -260,
+    -261, -262, -263, -264, -265, -266, -267, -268,
+    -269, -270, -271, -272, -273, -274, -275, -276,
+    -277, -278, -279, -280, -281, -282, -283, -284,
+    -285, -286, -287, -288, -289, -290, -291, -292,
+    -293, -294, -295, -296, -297, -298, -299, -300,
+    -301, -302, -303, -304, -305, -306, -307, -308,
+    -309, -310, -311, -312, -313, -314, -315, -316,
+    -317, -318, -319, -320, -321, -322, -323, -324,
+    -325, -326, -327, -328, -329, -330, -331, -332,
+    -333, -334, -335, -336, -337, -338, -339, -340,
+    -341, -342, -343, -344, -345, -346, -347, -348,
+    -349, -350, -351, -352, -353, -354, -355, -356,
+    -357, -358, -359, -360, -361, -362, -363, -364,
+    -365, -366, -367, -368, -369, -370, -371, -372,
+    -373, -374, -375, -376, -377, -378, -379, -380,
+    -381, -382, -383, -384, -385, -386, -387, -388,
+    -389, -390, -391, -392, -393, -394, -395, -396,
+    -397, -398, -399, -400, -401, -402, -403, -404,
+    -405, -406, -407, -408, -409, -410, -411, -412,
+    -413, -414, -415, -416, -417, -418, -419, -420,
+    -421, -422, -423, -424, -425, -426, -427, -428,
+    -429, -430, -431, -432, -433, -434, -435, -436,
+    -437, -438, -439, -440, -441, -442, -443, -444,
+    -445, -446, -447, -448, -449, -450, -451, -452,
+    -453, -454, -455, -456, -457, -458, -459, -460,
+    -461, -462, -463, -464, -465, -466, -467, -468,
+    -469, -470, -471, -472, -473, -474, -475, -476,
+    -477, -478, -479, -480, -481, -482, -483, -484,
+    -485, -486, -487, -488, -489, -490, -491, -492,
+    -493, -494, -495, -496, -497, -498, -499, -500,
+    -501, -502, -503, -504, -505, -506, -507, -508,
+    -509, -510, -511, -512, -513, -514, -515, -516,
+    -517, -518, -519, -520, -521, -522, -523, -524,
+    -525, -526, -527, -528, -529, -530, -531, -532,
+    -533, -534, -535, -536, -537, -538, -539, -540,
+    -541, -542, -543, -544, -545, -546, -547, -548,
+    -549, -550, -551, -552, -553, -554, -555, -556,
+    -557, -558, -559, -560, -561, -562, -563, -564,
+    -565, -566, -567, -568, -569, -570, -571, -572,
+    -573, -574, -575, -576, -577, -578, -579, -580
+};
+
+static const int16_t *coeff_tables[32] = {
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    coeff_table_token_7_8,
+
+    coeff_table_token_7_8,
+    coeff_table_token_9,
+    coeff_table_token_10,
+    coeff_table_token_11,
+    coeff_table_token_12,
+    coeff_table_token_13,
+    coeff_table_token_14,
+    coeff_table_token_15,
+
+    coeff_table_token_16,
+    coeff_table_token_17,
+    coeff_table_token_18,
+    coeff_table_token_19,
+    coeff_table_token_20,
+    coeff_table_token_21,
+    coeff_table_token_22,
+    coeff_table_token_23_24_25_26_27_28_29,
+
+    coeff_table_token_23_24_25_26_27_28_29,
+    coeff_table_token_23_24_25_26_27_28_29,
+    coeff_table_token_23_24_25_26_27_28_29,
+    coeff_table_token_23_24_25_26_27_28_29,
+    coeff_table_token_23_24_25_26_27_28_29,
+    coeff_table_token_23_24_25_26_27_28_29,
+    coeff_table_token_30,
+    coeff_table_token_31
+};
+
+static const uint16_t dc_bias[16][32][2] = {
+  {  /* DC bias table 0 */
+    { 0x2D, 6 },
+    { 0x26, 7 },
+    { 0x166, 9 },
+    { 0x4E, 8 },
+    { 0x2CE, 10 },
+    { 0x59E, 11 },
+    { 0x27D, 11 },
+    { 0x8, 5 },
+    { 0x4F9, 12 },
+    { 0xF, 4 },
+    { 0xE, 4 },
+    { 0x1B, 5 },
+    { 0x6, 4 },
+    { 0x8, 4 },
+    { 0x5, 4 },
+    { 0x1A, 5 },
+    { 0x15, 5 },
+    { 0x7, 4 },
+    { 0xC, 4 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0x9, 4 },
+    { 0x17, 5 },
+    { 0x29, 6 },
+    { 0x28, 6 },
+    { 0xB2, 8 },
+    { 0x4F8, 12 },
+    { 0x59F, 11 },
+    { 0x9E, 9 },
+    { 0x13F, 10 },
+    { 0x12, 6 },
+    { 0x58, 7 }
+  },
+  {  /* DC bias table 1 */
+    { 0x10, 5 },
+    { 0x47, 7 },
+    { 0x1FF, 9 },
+    { 0x8C, 8 },
+    { 0x3FC, 10 },
+    { 0x46A, 11 },
+    { 0x469, 11 },
+    { 0x22, 6 },
+    { 0x11A1, 13 },
+    { 0xE, 4 },
+    { 0xD, 4 },
+    { 0x4, 4 },
+    { 0x5, 4 },
+    { 0x9, 4 },
+    { 0x6, 4 },
+    { 0x1E, 5 },
+    { 0x16, 5 },
+    { 0x7, 4 },
+    { 0xC, 4 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xA, 4 },
+    { 0x17, 5 },
+    { 0x7D, 7 },
+    { 0x7E, 7 },
+    { 0x11B, 9 },
+    { 0x8D1, 12 },
+    { 0x3FD, 10 },
+    { 0x46B, 11 },
+    { 0x11A0, 13 },
+    { 0x7C, 7 },
+    { 0xFE, 8 }
+  },
+  {  /* DC bias table 2 */
+    { 0x16, 5 },
+    { 0x20, 6 },
+    { 0x86, 8 },
+    { 0x87, 8 },
+    { 0x367, 10 },
+    { 0x6CC, 11 },
+    { 0x6CB, 11 },
+    { 0x6E, 7 },
+    { 0x366D, 14 },
+    { 0xF, 4 },
+    { 0xE, 4 },
+    { 0x4, 4 },
+    { 0x5, 4 },
+    { 0xA, 4 },
+    { 0x6, 4 },
+    { 0x1A, 5 },
+    { 0x11, 5 },
+    { 0x7, 4 },
+    { 0xC, 4 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0x9, 4 },
+    { 0x17, 5 },
+    { 0x6F, 7 },
+    { 0x6D, 7 },
+    { 0x364, 10 },
+    { 0xD9A, 12 },
+    { 0x6CA, 11 },
+    { 0x1B37, 13 },
+    { 0x366C, 14 },
+    { 0x42, 7 },
+    { 0xD8, 8 }
+  },
+  {  /* DC bias table 3 */
+    { 0x0, 4 },
+    { 0x2D, 6 },
+    { 0xF7, 8 },
+    { 0x58, 7 },
+    { 0x167, 9 },
+    { 0x2CB, 10 },
+    { 0x2CA, 10 },
+    { 0xE, 6 },
+    { 0x1661, 13 },
+    { 0x3, 3 },
+    { 0x2, 3 },
+    { 0x8, 4 },
+    { 0x9, 4 },
+    { 0xD, 4 },
+    { 0x2, 4 },
+    { 0x1F, 5 },
+    { 0x17, 5 },
+    { 0x1, 4 },
+    { 0xC, 4 },
+    { 0xE, 4 },
+    { 0xA, 4 },
+    { 0x6, 5 },
+    { 0x78, 7 },
+    { 0xF, 6 },
+    { 0x7A, 7 },
+    { 0x164, 9 },
+    { 0x599, 11 },
+    { 0x2CD, 10 },
+    { 0xB31, 12 },
+    { 0x1660, 13 },
+    { 0x79, 7 },
+    { 0xF6, 8 }
+  },
+  {  /* DC bias table 4 */
+    { 0x3, 4 },
+    { 0x3C, 6 },
+    { 0xF, 7 },
+    { 0x7A, 7 },
+    { 0x1D, 8 },
+    { 0x20, 9 },
+    { 0x72, 10 },
+    { 0x6, 6 },
+    { 0x399, 13 },
+    { 0x4, 3 },
+    { 0x5, 3 },
+    { 0x5, 4 },
+    { 0x6, 4 },
+    { 0xE, 4 },
+    { 0x4, 4 },
+    { 0x0, 4 },
+    { 0x19, 5 },
+    { 0x2, 4 },
+    { 0xD, 4 },
+    { 0x7, 4 },
+    { 0x1F, 5 },
+    { 0x30, 6 },
+    { 0x11, 8 },
+    { 0x31, 6 },
+    { 0x5, 6 },
+    { 0x21, 9 },
+    { 0xE7, 11 },
+    { 0x38, 9 },
+    { 0x1CD, 12 },
+    { 0x398, 13 },
+    { 0x7B, 7 },
+    { 0x9, 7 }
+  },
+  {  /* DC bias table 5 */
+    { 0x9, 4 },
+    { 0x2, 5 },
+    { 0x74, 7 },
+    { 0x7, 6 },
+    { 0xEC, 8 },
+    { 0xD1, 9 },
+    { 0x1A6, 10 },
+    { 0x6, 6 },
+    { 0xD21, 13 },
+    { 0x5, 3 },
+    { 0x6, 3 },
+    { 0x8, 4 },
+    { 0x7, 4 },
+    { 0xF, 4 },
+    { 0x4, 4 },
+    { 0x0, 4 },
+    { 0x1C, 5 },
+    { 0x2, 4 },
+    { 0x5, 4 },
+    { 0x3, 4 },
+    { 0xC, 5 },
+    { 0x35, 7 },
+    { 0x1A7, 10 },
+    { 0x1B, 6 },
+    { 0x77, 7 },
+    { 0x1A5, 10 },
+    { 0x349, 11 },
+    { 0xD0, 9 },
+    { 0x691, 12 },
+    { 0xD20, 13 },
+    { 0x75, 7 },
+    { 0xED, 8 }
+  },
+  {  /* DC bias table 6 */
+    { 0xA, 4 },
+    { 0xC, 5 },
+    { 0x12, 6 },
+    { 0x1B, 6 },
+    { 0xB7, 8 },
+    { 0x16C, 9 },
+    { 0x99, 9 },
+    { 0x5A, 7 },
+    { 0x16D8, 13 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x0, 3 },
+    { 0x5, 4 },
+    { 0x17, 5 },
+    { 0xE, 5 },
+    { 0x2, 4 },
+    { 0x3, 4 },
+    { 0xF, 5 },
+    { 0x1A, 6 },
+    { 0x4D, 8 },
+    { 0x2DB3, 14 },
+    { 0x2C, 6 },
+    { 0x11, 6 },
+    { 0x2DA, 10 },
+    { 0x5B7, 11 },
+    { 0x98, 9 },
+    { 0xB6D, 12 },
+    { 0x2DB2, 14 },
+    { 0x10, 6 },
+    { 0x27, 7 }
+  },
+  {  /* DC bias table 7 */
+    { 0xD, 4 },
+    { 0xF, 5 },
+    { 0x1D, 6 },
+    { 0x8, 5 },
+    { 0x51, 7 },
+    { 0x56, 8 },
+    { 0xAF, 9 },
+    { 0x2A, 7 },
+    { 0x148A, 13 },
+    { 0x7, 3 },
+    { 0x0, 2 },
+    { 0x8, 4 },
+    { 0x9, 4 },
+    { 0xC, 4 },
+    { 0x6, 4 },
+    { 0x17, 5 },
+    { 0xB, 5 },
+    { 0x16, 5 },
+    { 0x15, 5 },
+    { 0x9, 5 },
+    { 0x50, 7 },
+    { 0xAE, 9 },
+    { 0x2917, 14 },
+    { 0x1C, 6 },
+    { 0x14, 6 },
+    { 0x290, 10 },
+    { 0x523, 11 },
+    { 0x149, 9 },
+    { 0xA44, 12 },
+    { 0x2916, 14 },
+    { 0x53, 7 },
+    { 0xA5, 8 }
+  },
+  {  /* DC bias table 8 */
+    { 0x1, 4 },
+    { 0x1D, 6 },
+    { 0xF5, 8 },
+    { 0xF4, 8 },
+    { 0x24D, 10 },
+    { 0x499, 11 },
+    { 0x498, 11 },
+    { 0x1, 5 },
+    { 0x21, 6 },
+    { 0x6, 3 },
+    { 0x5, 3 },
+    { 0x6, 4 },
+    { 0x5, 4 },
+    { 0x2, 4 },
+    { 0x7, 5 },
+    { 0x25, 6 },
+    { 0x7B, 7 },
+    { 0x1C, 6 },
+    { 0x20, 6 },
+    { 0xD, 6 },
+    { 0x48, 7 },
+    { 0x92, 8 },
+    { 0x127, 9 },
+    { 0xE, 4 },
+    { 0x4, 4 },
+    { 0x11, 5 },
+    { 0xC, 6 },
+    { 0x3C, 6 },
+    { 0xF, 5 },
+    { 0x0, 5 },
+    { 0x1F, 5 },
+    { 0x13, 5 }
+  },
+  {  /* DC bias table 9 */
+    { 0x5, 4 },
+    { 0x3C, 6 },
+    { 0x40, 7 },
+    { 0xD, 7 },
+    { 0x31, 9 },
+    { 0x61, 10 },
+    { 0x60, 10 },
+    { 0x2, 5 },
+    { 0xF5, 8 },
+    { 0x6, 3 },
+    { 0x5, 3 },
+    { 0x7, 4 },
+    { 0x6, 4 },
+    { 0x2, 4 },
+    { 0x9, 5 },
+    { 0x25, 6 },
+    { 0x7, 6 },
+    { 0x21, 6 },
+    { 0x24, 6 },
+    { 0x10, 6 },
+    { 0x41, 7 },
+    { 0xF4, 8 },
+    { 0x19, 8 },
+    { 0xE, 4 },
+    { 0x3, 4 },
+    { 0x11, 5 },
+    { 0x11, 6 },
+    { 0x3F, 6 },
+    { 0x3E, 6 },
+    { 0x7B, 7 },
+    { 0x0, 4 },
+    { 0x13, 5 }
+  },
+  {  /* DC bias table 10 */
+    { 0xA, 4 },
+    { 0x7, 5 },
+    { 0x1, 6 },
+    { 0x9, 6 },
+    { 0x131, 9 },
+    { 0x261, 10 },
+    { 0x260, 10 },
+    { 0x15, 6 },
+    { 0x1, 7 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0x8, 4 },
+    { 0x7, 4 },
+    { 0x6, 4 },
+    { 0x12, 5 },
+    { 0x2F, 6 },
+    { 0x14, 6 },
+    { 0x27, 6 },
+    { 0x2D, 6 },
+    { 0x16, 6 },
+    { 0x4D, 7 },
+    { 0x99, 8 },
+    { 0x0, 7 },
+    { 0x4, 4 },
+    { 0x1, 4 },
+    { 0x5, 5 },
+    { 0x17, 6 },
+    { 0x2E, 6 },
+    { 0x2C, 6 },
+    { 0x8, 6 },
+    { 0x6, 5 },
+    { 0x1, 5 }
+  },
+  {  /* DC bias table 11 */
+    { 0x0, 3 },
+    { 0xE, 5 },
+    { 0x17, 6 },
+    { 0x2A, 6 },
+    { 0x10, 7 },
+    { 0xF9, 10 },
+    { 0xF8, 10 },
+    { 0x1E, 7 },
+    { 0x3F, 8 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x6, 4 },
+    { 0xF, 5 },
+    { 0x5, 5 },
+    { 0x16, 6 },
+    { 0x29, 6 },
+    { 0x2B, 6 },
+    { 0x15, 6 },
+    { 0x50, 7 },
+    { 0x11, 7 },
+    { 0x7D, 9 },
+    { 0x4, 4 },
+    { 0x17, 5 },
+    { 0x6, 5 },
+    { 0x14, 6 },
+    { 0x2C, 6 },
+    { 0x2D, 6 },
+    { 0xE, 6 },
+    { 0x9, 6 },
+    { 0x51, 7 }
+  },
+  {  /* DC bias table 12 */
+    { 0x2, 3 },
+    { 0x18, 5 },
+    { 0x2F, 6 },
+    { 0xD, 5 },
+    { 0x53, 7 },
+    { 0x295, 10 },
+    { 0x294, 10 },
+    { 0xA4, 8 },
+    { 0x7C, 8 },
+    { 0x0, 2 },
+    { 0x7, 3 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x1B, 5 },
+    { 0xC, 5 },
+    { 0x28, 6 },
+    { 0x6A, 7 },
+    { 0x1E, 6 },
+    { 0x1D, 6 },
+    { 0x69, 7 },
+    { 0xD7, 8 },
+    { 0x7D, 8 },
+    { 0x14B, 9 },
+    { 0x19, 5 },
+    { 0x16, 5 },
+    { 0x2E, 6 },
+    { 0x1C, 6 },
+    { 0x2B, 6 },
+    { 0x2A, 6 },
+    { 0x68, 7 },
+    { 0x3F, 7 },
+    { 0xD6, 8 }
+  },
+  {  /* DC bias table 13 */
+    { 0x2, 3 },
+    { 0x1B, 5 },
+    { 0xC, 5 },
+    { 0x18, 5 },
+    { 0x29, 6 },
+    { 0x7F, 8 },
+    { 0x2F0, 10 },
+    { 0x198, 9 },
+    { 0x179, 9 },
+    { 0x0, 2 },
+    { 0x7, 3 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x1A, 5 },
+    { 0xD, 5 },
+    { 0x2A, 6 },
+    { 0x64, 7 },
+    { 0x1E, 6 },
+    { 0x67, 7 },
+    { 0x5F, 7 },
+    { 0xCD, 8 },
+    { 0x7E, 8 },
+    { 0x2F1, 10 },
+    { 0x16, 5 },
+    { 0xE, 5 },
+    { 0x2E, 6 },
+    { 0x65, 7 },
+    { 0x2B, 6 },
+    { 0x28, 6 },
+    { 0x3E, 7 },
+    { 0xBD, 8 },
+    { 0x199, 9 }
+  },
+  {  /* DC bias table 14 */
+    { 0x2, 3 },
+    { 0x7, 4 },
+    { 0x16, 5 },
+    { 0x6, 4 },
+    { 0x36, 6 },
+    { 0x5C, 7 },
+    { 0x15D, 9 },
+    { 0x15C, 9 },
+    { 0x2BF, 10 },
+    { 0x0, 2 },
+    { 0x7, 3 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x18, 5 },
+    { 0x34, 6 },
+    { 0x2A, 6 },
+    { 0x5E, 7 },
+    { 0x6A, 7 },
+    { 0x64, 7 },
+    { 0x5D, 7 },
+    { 0xCB, 8 },
+    { 0xAD, 8 },
+    { 0x2BE, 10 },
+    { 0x14, 5 },
+    { 0x33, 6 },
+    { 0x6E, 7 },
+    { 0x5F, 7 },
+    { 0x6F, 7 },
+    { 0x6B, 7 },
+    { 0xCA, 8 },
+    { 0xAC, 8 },
+    { 0x15E, 9 }
+  },
+  {  /* DC bias table 15 */
+    { 0xF, 4 },
+    { 0x1D, 5 },
+    { 0x18, 5 },
+    { 0xB, 4 },
+    { 0x19, 5 },
+    { 0x29, 6 },
+    { 0xD6, 8 },
+    { 0x551, 11 },
+    { 0xAA1, 12 },
+    { 0x1, 2 },
+    { 0x0, 2 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x1B, 5 },
+    { 0x38, 6 },
+    { 0x28, 6 },
+    { 0x57, 7 },
+    { 0x6A, 7 },
+    { 0x68, 7 },
+    { 0x56, 7 },
+    { 0xE5, 8 },
+    { 0x155, 9 },
+    { 0xAA0, 12 },
+    { 0x73, 7 },
+    { 0x69, 7 },
+    { 0xD7, 8 },
+    { 0xAB, 8 },
+    { 0xE4, 8 },
+    { 0xA9, 8 },
+    { 0x151, 9 },
+    { 0x150, 9 },
+    { 0x2A9, 10 }
+  }
+};
+
+static const uint16_t ac_bias_0[16][32][2] = {
+  {  /* AC bias group 1, table 0 */
+    { 0x8, 5 },
+    { 0x25, 7 },
+    { 0x17A, 9 },
+    { 0x2F7, 10 },
+    { 0xBDB, 12 },
+    { 0x17B4, 13 },
+    { 0x2F6B, 14 },
+    { 0x1D, 5 },
+    { 0x2F6A, 14 },
+    { 0x8, 4 },
+    { 0x7, 4 },
+    { 0x1, 4 },
+    { 0x2, 4 },
+    { 0xA, 4 },
+    { 0x6, 4 },
+    { 0x0, 4 },
+    { 0x1C, 5 },
+    { 0x9, 4 },
+    { 0xD, 4 },
+    { 0xF, 4 },
+    { 0xC, 4 },
+    { 0x3, 4 },
+    { 0xA, 5 },
+    { 0x16, 5 },
+    { 0x13, 6 },
+    { 0x5D, 7 },
+    { 0x24, 7 },
+    { 0xBC, 8 },
+    { 0x5C, 7 },
+    { 0x5EC, 11 },
+    { 0xB, 5 },
+    { 0x5F, 7 }
+  },
+  {  /* AC bias group 1, table 1 */
+    { 0xF, 5 },
+    { 0x10, 6 },
+    { 0x4B, 8 },
+    { 0xC6, 8 },
+    { 0x31D, 10 },
+    { 0xC71, 12 },
+    { 0xC70, 12 },
+    { 0x1, 4 },
+    { 0xC73, 12 },
+    { 0x8, 4 },
+    { 0x9, 4 },
+    { 0x2, 4 },
+    { 0x3, 4 },
+    { 0xB, 4 },
+    { 0x6, 4 },
+    { 0x0, 4 },
+    { 0x1C, 5 },
+    { 0x5, 4 },
+    { 0xD, 4 },
+    { 0xF, 4 },
+    { 0xA, 4 },
+    { 0x19, 5 },
+    { 0x13, 6 },
+    { 0x1D, 5 },
+    { 0x30, 6 },
+    { 0x62, 7 },
+    { 0x24, 7 },
+    { 0x4A, 8 },
+    { 0x18F, 9 },
+    { 0xC72, 12 },
+    { 0xE, 5 },
+    { 0x11, 6 }
+  },
+  {  /* AC bias group 1, table 2 */
+    { 0x1B, 5 },
+    { 0x3, 6 },
+    { 0x8D, 8 },
+    { 0x40, 7 },
+    { 0x239, 10 },
+    { 0x471, 11 },
+    { 0x8E0, 12 },
+    { 0x3, 4 },
+    { 0x11C3, 13 },
+    { 0xA, 4 },
+    { 0x9, 4 },
+    { 0x4, 4 },
+    { 0x5, 4 },
+    { 0xE, 4 },
+    { 0x7, 4 },
+    { 0x1, 4 },
+    { 0x1E, 5 },
+    { 0x6, 4 },
+    { 0xC, 4 },
+    { 0xB, 4 },
+    { 0x2, 4 },
+    { 0x0, 5 },
+    { 0x41, 7 },
+    { 0x1F, 5 },
+    { 0x22, 6 },
+    { 0x2, 6 },
+    { 0x8F, 8 },
+    { 0x8C, 8 },
+    { 0x11D, 9 },
+    { 0x11C2, 13 },
+    { 0x1A, 5 },
+    { 0x21, 6 }
+  },
+  {  /* AC bias group 1, table 3 */
+    { 0x1F, 5 },
+    { 0x3, 6 },
+    { 0x3, 7 },
+    { 0x43, 7 },
+    { 0xB, 9 },
+    { 0x15, 10 },
+    { 0x51, 12 },
+    { 0x3, 4 },
+    { 0x50, 12 },
+    { 0xD, 4 },
+    { 0xC, 4 },
+    { 0x4, 4 },
+    { 0x6, 4 },
+    { 0xE, 4 },
+    { 0xA, 4 },
+    { 0x1, 4 },
+    { 0x1E, 5 },
+    { 0x5, 4 },
+    { 0x9, 4 },
+    { 0x7, 4 },
+    { 0x11, 5 },
+    { 0x2, 6 },
+    { 0x4, 8 },
+    { 0x2, 4 },
+    { 0x2D, 6 },
+    { 0x20, 6 },
+    { 0x42, 7 },
+    { 0x1, 7 },
+    { 0x0, 7 },
+    { 0x29, 11 },
+    { 0x17, 5 },
+    { 0x2C, 6 }
+  },
+  {  /* AC bias group 1, table 4 */
+    { 0x3, 4 },
+    { 0x1F, 6 },
+    { 0x3A, 7 },
+    { 0x5D, 7 },
+    { 0x173, 9 },
+    { 0x2E4, 10 },
+    { 0x172D, 13 },
+    { 0x4, 4 },
+    { 0x172C, 13 },
+    { 0xF, 4 },
+    { 0xE, 4 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0xC, 4 },
+    { 0xA, 4 },
+    { 0x1, 4 },
+    { 0x16, 5 },
+    { 0x2, 4 },
+    { 0x5, 4 },
+    { 0x1A, 5 },
+    { 0x2F, 6 },
+    { 0x38, 7 },
+    { 0x5CA, 11 },
+    { 0x6, 4 },
+    { 0x37, 6 },
+    { 0x1E, 6 },
+    { 0x3B, 7 },
+    { 0x39, 7 },
+    { 0xB8, 8 },
+    { 0xB97, 12 },
+    { 0x0, 4 },
+    { 0x36, 6 }
+  },
+  {  /* AC bias group 1, table 5 */
+    { 0x6, 4 },
+    { 0x37, 6 },
+    { 0x5D, 7 },
+    { 0xC, 6 },
+    { 0xB9, 8 },
+    { 0x2E3, 10 },
+    { 0x5C4, 11 },
+    { 0x4, 4 },
+    { 0x1715, 13 },
+    { 0x0, 3 },
+    { 0xF, 4 },
+    { 0x8, 4 },
+    { 0x7, 4 },
+    { 0xC, 4 },
+    { 0x9, 4 },
+    { 0x1D, 5 },
+    { 0x16, 5 },
+    { 0x1C, 5 },
+    { 0x1A, 5 },
+    { 0xB, 5 },
+    { 0x5E, 7 },
+    { 0x170, 9 },
+    { 0x1714, 13 },
+    { 0xA, 4 },
+    { 0xA, 5 },
+    { 0x36, 6 },
+    { 0x5F, 7 },
+    { 0x1B, 7 },
+    { 0x1A, 7 },
+    { 0xB8B, 12 },
+    { 0x2, 4 },
+    { 0x7, 5 }
+  },
+  {  /* AC bias group 1, table 6 */
+    { 0xC, 4 },
+    { 0xB, 5 },
+    { 0x79, 7 },
+    { 0x22, 6 },
+    { 0xF0, 8 },
+    { 0x119, 9 },
+    { 0x230, 10 },
+    { 0x1D, 5 },
+    { 0x8C4, 12 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xA, 4 },
+    { 0x9, 4 },
+    { 0xB, 4 },
+    { 0x7, 4 },
+    { 0x1C, 5 },
+    { 0x3D, 6 },
+    { 0xD, 5 },
+    { 0x8, 5 },
+    { 0x15, 6 },
+    { 0x8D, 8 },
+    { 0x118B, 13 },
+    { 0x118A, 13 },
+    { 0xD, 4 },
+    { 0x10, 5 },
+    { 0x9, 5 },
+    { 0x14, 6 },
+    { 0x47, 7 },
+    { 0xF1, 8 },
+    { 0x463, 11 },
+    { 0x1F, 5 },
+    { 0xC, 5 }
+  },
+  {  /* AC bias group 1, table 7 */
+    { 0x0, 3 },
+    { 0x1A, 5 },
+    { 0x33, 6 },
+    { 0xC, 5 },
+    { 0x46, 7 },
+    { 0x1E3, 9 },
+    { 0x3C5, 10 },
+    { 0x17, 5 },
+    { 0x1E21, 13 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x9, 4 },
+    { 0xA, 4 },
+    { 0x7, 4 },
+    { 0x1B, 5 },
+    { 0x3D, 6 },
+    { 0x1B, 6 },
+    { 0x22, 6 },
+    { 0x79, 7 },
+    { 0xF0, 8 },
+    { 0x1E20, 13 },
+    { 0x1E23, 13 },
+    { 0x1E22, 13 },
+    { 0xE, 4 },
+    { 0x16, 5 },
+    { 0x18, 5 },
+    { 0x32, 6 },
+    { 0x1A, 6 },
+    { 0x47, 7 },
+    { 0x789, 11 },
+    { 0x1F, 5 },
+    { 0x10, 5 }
+  },
+  {  /* AC bias group 1, table 8 */
+    { 0x1D, 5 },
+    { 0x61, 7 },
+    { 0x4E, 8 },
+    { 0x9E, 9 },
+    { 0x27C, 11 },
+    { 0x9F5, 13 },
+    { 0x9F4, 13 },
+    { 0x3, 4 },
+    { 0x60, 7 },
+    { 0x0, 3 },
+    { 0xF, 4 },
+    { 0xB, 4 },
+    { 0xA, 4 },
+    { 0x9, 4 },
+    { 0x5, 4 },
+    { 0xD, 5 },
+    { 0x31, 6 },
+    { 0x8, 5 },
+    { 0x38, 6 },
+    { 0x12, 6 },
+    { 0x26, 7 },
+    { 0x13F, 10 },
+    { 0x4FB, 12 },
+    { 0xD, 4 },
+    { 0x2, 4 },
+    { 0xC, 5 },
+    { 0x39, 6 },
+    { 0x1C, 6 },
+    { 0xF, 5 },
+    { 0x1D, 6 },
+    { 0x8, 4 },
+    { 0x19, 5 }
+  },
+  {  /* AC bias group 1, table 9 */
+    { 0x7, 4 },
+    { 0x19, 6 },
+    { 0xAB, 8 },
+    { 0xAA, 8 },
+    { 0x119, 10 },
+    { 0x461, 12 },
+    { 0x460, 12 },
+    { 0x1B, 5 },
+    { 0x47, 8 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xC, 4 },
+    { 0xB, 4 },
+    { 0x9, 4 },
+    { 0x5, 4 },
+    { 0xD, 5 },
+    { 0x35, 6 },
+    { 0x3D, 6 },
+    { 0x3C, 6 },
+    { 0x18, 6 },
+    { 0x22, 7 },
+    { 0x8D, 9 },
+    { 0x231, 11 },
+    { 0xE, 4 },
+    { 0x1F, 5 },
+    { 0x9, 5 },
+    { 0x2B, 6 },
+    { 0x10, 6 },
+    { 0x34, 6 },
+    { 0x54, 7 },
+    { 0x8, 4 },
+    { 0x14, 5 }
+  },
+  {  /* AC bias group 1, table 10 */
+    { 0xC, 4 },
+    { 0x5, 5 },
+    { 0x8, 6 },
+    { 0x5B, 7 },
+    { 0x4D, 9 },
+    { 0x131, 11 },
+    { 0x261, 12 },
+    { 0x1A, 5 },
+    { 0x12, 7 },
+    { 0x0, 3 },
+    { 0xF, 4 },
+    { 0xA, 4 },
+    { 0x9, 4 },
+    { 0x6, 4 },
+    { 0x1B, 5 },
+    { 0x6, 5 },
+    { 0x1C, 6 },
+    { 0x2C, 6 },
+    { 0x15, 6 },
+    { 0x5A, 7 },
+    { 0x27, 8 },
+    { 0x99, 10 },
+    { 0x260, 12 },
+    { 0xE, 4 },
+    { 0x4, 4 },
+    { 0xF, 5 },
+    { 0x7, 5 },
+    { 0x1D, 6 },
+    { 0xB, 5 },
+    { 0x14, 6 },
+    { 0x8, 4 },
+    { 0x17, 5 }
+  },
+  {  /* AC bias group 1, table 11 */
+    { 0xF, 4 },
+    { 0x13, 5 },
+    { 0x75, 7 },
+    { 0x24, 6 },
+    { 0x95, 8 },
+    { 0x251, 10 },
+    { 0x4A0, 11 },
+    { 0x10, 5 },
+    { 0xC8, 8 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x1, 4 },
+    { 0x0, 4 },
+    { 0x1A, 5 },
+    { 0x11, 5 },
+    { 0x2C, 6 },
+    { 0x65, 7 },
+    { 0x74, 7 },
+    { 0x4B, 7 },
+    { 0xC9, 8 },
+    { 0x129, 9 },
+    { 0x943, 12 },
+    { 0x942, 12 },
+    { 0x3, 3 },
+    { 0xA, 4 },
+    { 0x1C, 5 },
+    { 0x18, 5 },
+    { 0x33, 6 },
+    { 0x17, 5 },
+    { 0x2D, 6 },
+    { 0x1B, 5 },
+    { 0x3B, 6 }
+  },
+  {  /* AC bias group 1, table 12 */
+    { 0x3, 3 },
+    { 0x1A, 5 },
+    { 0x2D, 6 },
+    { 0x38, 6 },
+    { 0x28, 7 },
+    { 0x395, 10 },
+    { 0xE51, 12 },
+    { 0x37, 6 },
+    { 0xE4, 8 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0x1F, 5 },
+    { 0x1E, 5 },
+    { 0x17, 5 },
+    { 0x3A, 6 },
+    { 0x73, 7 },
+    { 0x2A, 7 },
+    { 0x2B, 7 },
+    { 0x29, 7 },
+    { 0x1CB, 9 },
+    { 0x729, 11 },
+    { 0x1CA1, 13 },
+    { 0x1CA0, 13 },
+    { 0x4, 3 },
+    { 0xA, 4 },
+    { 0x4, 4 },
+    { 0x18, 5 },
+    { 0x36, 6 },
+    { 0xB, 5 },
+    { 0x2C, 6 },
+    { 0x19, 5 },
+    { 0x3B, 6 }
+  },
+  {  /* AC bias group 1, table 13 */
+    { 0x4, 3 },
+    { 0x4, 4 },
+    { 0x3F, 6 },
+    { 0x17, 5 },
+    { 0x75, 7 },
+    { 0x1F5, 9 },
+    { 0x7D1, 11 },
+    { 0x17, 6 },
+    { 0x1F6, 9 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0x1B, 5 },
+    { 0x1A, 5 },
+    { 0xA, 5 },
+    { 0x32, 6 },
+    { 0x74, 7 },
+    { 0xF8, 8 },
+    { 0xF9, 8 },
+    { 0x1F7, 9 },
+    { 0x3E9, 10 },
+    { 0xFA0, 12 },
+    { 0x1F43, 13 },
+    { 0x1F42, 13 },
+    { 0x3, 3 },
+    { 0xA, 4 },
+    { 0x1E, 5 },
+    { 0x1C, 5 },
+    { 0x3B, 6 },
+    { 0x18, 5 },
+    { 0x16, 6 },
+    { 0x16, 5 },
+    { 0x33, 6 }
+  },
+  {  /* AC bias group 1, table 14 */
+    { 0x4, 3 },
+    { 0x7, 4 },
+    { 0x18, 5 },
+    { 0x1E, 5 },
+    { 0x36, 6 },
+    { 0x31, 7 },
+    { 0x177, 9 },
+    { 0x77, 7 },
+    { 0x176, 9 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0x1A, 5 },
+    { 0x19, 5 },
+    { 0x3A, 6 },
+    { 0x19, 6 },
+    { 0x5C, 7 },
+    { 0xBA, 8 },
+    { 0x61, 8 },
+    { 0xC1, 9 },
+    { 0x180, 10 },
+    { 0x302, 11 },
+    { 0x607, 12 },
+    { 0x606, 12 },
+    { 0x2, 3 },
+    { 0xA, 4 },
+    { 0x1F, 5 },
+    { 0x1C, 5 },
+    { 0x37, 6 },
+    { 0x16, 5 },
+    { 0x76, 7 },
+    { 0xD, 5 },
+    { 0x2F, 6 }
+  },
+  {  /* AC bias group 1, table 15 */
+    { 0x0, 3 },
+    { 0xA, 4 },
+    { 0x1A, 5 },
+    { 0xC, 4 },
+    { 0x1D, 5 },
+    { 0x39, 6 },
+    { 0x78, 7 },
+    { 0x5E, 7 },
+    { 0x393, 11 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x16, 5 },
+    { 0xF, 5 },
+    { 0x2E, 6 },
+    { 0x5F, 7 },
+    { 0x73, 8 },
+    { 0xE5, 9 },
+    { 0x1C8, 10 },
+    { 0xE4A, 13 },
+    { 0x1C97, 14 },
+    { 0x1C96, 14 },
+    { 0xE49, 13 },
+    { 0xE48, 13 },
+    { 0x4, 3 },
+    { 0x6, 4 },
+    { 0x1F, 5 },
+    { 0x1B, 5 },
+    { 0x1D, 6 },
+    { 0x38, 6 },
+    { 0x38, 7 },
+    { 0x3D, 6 },
+    { 0x79, 7 }
+  }
+};
+
+static const uint16_t ac_bias_1[16][32][2] = {
+  {  /* AC bias group 2, table 0 */
+    { 0xB, 5 },
+    { 0x2B, 7 },
+    { 0x54, 8 },
+    { 0x1B7, 9 },
+    { 0x6D9, 11 },
+    { 0xDB1, 12 },
+    { 0xDB0, 12 },
+    { 0x2, 4 },
+    { 0xAB, 9 },
+    { 0x9, 4 },
+    { 0xA, 4 },
+    { 0x7, 4 },
+    { 0x8, 4 },
+    { 0xF, 4 },
+    { 0xC, 4 },
+    { 0x3, 4 },
+    { 0x1D, 5 },
+    { 0x4, 4 },
+    { 0xB, 4 },
+    { 0x6, 4 },
+    { 0x1A, 5 },
+    { 0x3, 6 },
+    { 0xAA, 9 },
+    { 0x1, 4 },
+    { 0x0, 5 },
+    { 0x14, 6 },
+    { 0x6C, 7 },
+    { 0xDA, 8 },
+    { 0x2, 6 },
+    { 0x36D, 10 },
+    { 0x1C, 5 },
+    { 0x37, 6 }
+  },
+  {  /* AC bias group 2, table 1 */
+    { 0x1D, 5 },
+    { 0x4, 6 },
+    { 0xB6, 8 },
+    { 0x6A, 8 },
+    { 0x5B9, 11 },
+    { 0x16E1, 13 },
+    { 0x16E0, 13 },
+    { 0x7, 4 },
+    { 0x16F, 9 },
+    { 0xC, 4 },
+    { 0xD, 4 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0xF, 4 },
+    { 0xA, 4 },
+    { 0x3, 4 },
+    { 0x17, 5 },
+    { 0x2, 4 },
+    { 0x4, 4 },
+    { 0x1C, 5 },
+    { 0x2C, 6 },
+    { 0x6B, 8 },
+    { 0xB71, 12 },
+    { 0x5, 4 },
+    { 0x3, 5 },
+    { 0x1B, 6 },
+    { 0x5A, 7 },
+    { 0x34, 7 },
+    { 0x5, 6 },
+    { 0x2DD, 10 },
+    { 0x0, 4 },
+    { 0xC, 5 }
+  },
+  {  /* AC bias group 2, table 2 */
+    { 0x3, 4 },
+    { 0x7F, 7 },
+    { 0xA1, 8 },
+    { 0xA0, 8 },
+    { 0x20C, 10 },
+    { 0x834, 12 },
+    { 0x106B, 13 },
+    { 0x7, 4 },
+    { 0x82, 8 },
+    { 0xE, 4 },
+    { 0xD, 4 },
+    { 0xB, 4 },
+    { 0xC, 4 },
+    { 0x0, 3 },
+    { 0x9, 4 },
+    { 0x2, 4 },
+    { 0x11, 5 },
+    { 0x1E, 5 },
+    { 0x15, 5 },
+    { 0x3E, 6 },
+    { 0x40, 7 },
+    { 0x41B, 11 },
+    { 0x106A, 13 },
+    { 0x6, 4 },
+    { 0xA, 5 },
+    { 0x29, 6 },
+    { 0x7E, 7 },
+    { 0x51, 7 },
+    { 0x21, 6 },
+    { 0x107, 9 },
+    { 0x4, 4 },
+    { 0xB, 5 }
+  },
+  {  /* AC bias group 2, table 3 */
+    { 0x7, 4 },
+    { 0x1B, 6 },
+    { 0xF6, 8 },
+    { 0xE9, 8 },
+    { 0x3A1, 10 },
+    { 0x740, 11 },
+    { 0xE82, 12 },
+    { 0x1F, 5 },
+    { 0x1EF, 9 },
+    { 0x1, 3 },
+    { 0x2, 3 },
+    { 0xB, 4 },
+    { 0xC, 4 },
+    { 0xD, 4 },
+    { 0x8, 4 },
+    { 0x1C, 5 },
+    { 0x3, 5 },
+    { 0x12, 5 },
+    { 0x2, 5 },
+    { 0x75, 7 },
+    { 0x1D1, 9 },
+    { 0x1D07, 13 },
+    { 0x1D06, 13 },
+    { 0xA, 4 },
+    { 0x13, 5 },
+    { 0x3B, 6 },
+    { 0x1A, 6 },
+    { 0x7A, 7 },
+    { 0x3C, 6 },
+    { 0x1EE, 9 },
+    { 0x0, 4 },
+    { 0xC, 5 }
+  },
+  {  /* AC bias group 2, table 4 */
+    { 0xD, 4 },
+    { 0x3D, 6 },
+    { 0x42, 7 },
+    { 0x37, 7 },
+    { 0xD9, 9 },
+    { 0x362, 11 },
+    { 0x6C6, 12 },
+    { 0x1F, 5 },
+    { 0x86, 8 },
+    { 0x1, 3 },
+    { 0x2, 3 },
+    { 0xC, 4 },
+    { 0xB, 4 },
+    { 0xA, 4 },
+    { 0x1, 4 },
+    { 0xF, 5 },
+    { 0x25, 6 },
+    { 0x3C, 6 },
+    { 0x1A, 6 },
+    { 0x87, 8 },
+    { 0x1B0, 10 },
+    { 0xD8F, 13 },
+    { 0xD8E, 13 },
+    { 0xE, 4 },
+    { 0x13, 5 },
+    { 0xC, 5 },
+    { 0x24, 6 },
+    { 0x20, 6 },
+    { 0x11, 5 },
+    { 0x6D, 8 },
+    { 0x0, 4 },
+    { 0xE, 5 }
+  },
+  {  /* AC bias group 2, table 5 */
+    { 0x0, 3 },
+    { 0x12, 5 },
+    { 0x76, 7 },
+    { 0x77, 7 },
+    { 0x14D, 9 },
+    { 0x533, 11 },
+    { 0x14C9, 13 },
+    { 0x13, 5 },
+    { 0xA5, 8 },
+    { 0x2, 3 },
+    { 0x3, 3 },
+    { 0xB, 4 },
+    { 0xC, 4 },
+    { 0x8, 4 },
+    { 0x1A, 5 },
+    { 0x2B, 6 },
+    { 0x75, 7 },
+    { 0x74, 7 },
+    { 0xA7, 8 },
+    { 0x298, 10 },
+    { 0x14C8, 13 },
+    { 0x14CB, 13 },
+    { 0x14CA, 13 },
+    { 0xF, 4 },
+    { 0x1C, 5 },
+    { 0x7, 5 },
+    { 0x2A, 6 },
+    { 0x28, 6 },
+    { 0x1B, 5 },
+    { 0xA4, 8 },
+    { 0x2, 4 },
+    { 0x6, 5 }
+  },
+  {  /* AC bias group 2, table 6 */
+    { 0x2, 3 },
+    { 0x1A, 5 },
+    { 0x2B, 6 },
+    { 0x3A, 6 },
+    { 0xED, 8 },
+    { 0x283, 10 },
+    { 0xA0A, 12 },
+    { 0x4, 5 },
+    { 0xA1, 8 },
+    { 0x4, 3 },
+    { 0x3, 3 },
+    { 0xB, 4 },
+    { 0xC, 4 },
+    { 0x1F, 5 },
+    { 0x6, 5 },
+    { 0x77, 7 },
+    { 0xA3, 8 },
+    { 0xA2, 8 },
+    { 0x140, 9 },
+    { 0x1417, 13 },
+    { 0x1416, 13 },
+    { 0xA09, 12 },
+    { 0xA08, 12 },
+    { 0x0, 3 },
+    { 0x1E, 5 },
+    { 0x7, 5 },
+    { 0x2A, 6 },
+    { 0x29, 6 },
+    { 0x1C, 5 },
+    { 0xEC, 8 },
+    { 0x1B, 5 },
+    { 0x5, 5 }
+  },
+  {  /* AC bias group 2, table 7 */
+    { 0x2, 3 },
+    { 0x2, 4 },
+    { 0x18, 5 },
+    { 0x1D, 5 },
+    { 0x35, 6 },
+    { 0xE4, 8 },
+    { 0x1CF, 11 },
+    { 0x1D, 7 },
+    { 0x72, 9 },
+    { 0x4, 3 },
+    { 0x5, 3 },
+    { 0x6, 4 },
+    { 0x7, 4 },
+    { 0x6, 5 },
+    { 0x73, 7 },
+    { 0x38, 8 },
+    { 0x1CE, 11 },
+    { 0x39B, 12 },
+    { 0x398, 12 },
+    { 0x733, 13 },
+    { 0x732, 13 },
+    { 0x735, 13 },
+    { 0x734, 13 },
+    { 0x0, 3 },
+    { 0x1F, 5 },
+    { 0x1B, 5 },
+    { 0x34, 6 },
+    { 0xF, 6 },
+    { 0x1E, 5 },
+    { 0xE5, 8 },
+    { 0x19, 5 },
+    { 0x38, 6 }
+  },
+  {  /* AC bias group 2, table 8 */
+    { 0x16, 5 },
+    { 0x50, 7 },
+    { 0x172, 9 },
+    { 0x2E7, 10 },
+    { 0x1732, 13 },
+    { 0x2E67, 14 },
+    { 0x2E66, 14 },
+    { 0x6, 4 },
+    { 0x51, 7 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xD, 4 },
+    { 0xC, 4 },
+    { 0x9, 4 },
+    { 0x1C, 5 },
+    { 0x9, 5 },
+    { 0x1C, 6 },
+    { 0x1D, 6 },
+    { 0x5D, 7 },
+    { 0xB8, 8 },
+    { 0x5CD, 11 },
+    { 0x1731, 13 },
+    { 0x1730, 13 },
+    { 0xF, 4 },
+    { 0x5, 4 },
+    { 0xF, 5 },
+    { 0x8, 5 },
+    { 0x29, 6 },
+    { 0x1D, 5 },
+    { 0x2F, 6 },
+    { 0x8, 4 },
+    { 0x15, 5 }
+  },
+  {  /* AC bias group 2, table 9 */
+    { 0x9, 4 },
+    { 0x21, 6 },
+    { 0x40, 7 },
+    { 0xAD, 8 },
+    { 0x2B0, 10 },
+    { 0x1589, 13 },
+    { 0x1588, 13 },
+    { 0x1C, 5 },
+    { 0x5F, 7 },
+    { 0x0, 3 },
+    { 0xF, 4 },
+    { 0xD, 4 },
+    { 0xC, 4 },
+    { 0x6, 4 },
+    { 0x11, 5 },
+    { 0x2A, 6 },
+    { 0x57, 7 },
+    { 0x5E, 7 },
+    { 0x41, 7 },
+    { 0x159, 9 },
+    { 0x563, 11 },
+    { 0x158B, 13 },
+    { 0x158A, 13 },
+    { 0x1, 3 },
+    { 0x5, 4 },
+    { 0x14, 5 },
+    { 0x3B, 6 },
+    { 0x2E, 6 },
+    { 0x4, 4 },
+    { 0x3A, 6 },
+    { 0x7, 4 },
+    { 0x16, 5 }
+  },
+  {  /* AC bias group 2, table 10 */
+    { 0xE, 4 },
+    { 0x7, 5 },
+    { 0x46, 7 },
+    { 0x45, 7 },
+    { 0x64, 9 },
+    { 0x32A, 12 },
+    { 0x657, 13 },
+    { 0x18, 5 },
+    { 0xD, 6 },
+    { 0x0, 3 },
+    { 0xF, 4 },
+    { 0xA, 4 },
+    { 0xB, 4 },
+    { 0x1A, 5 },
+    { 0x36, 6 },
+    { 0x47, 7 },
+    { 0x44, 7 },
+    { 0x18, 7 },
+    { 0x33, 8 },
+    { 0xCB, 10 },
+    { 0x656, 13 },
+    { 0x329, 12 },
+    { 0x328, 12 },
+    { 0x2, 3 },
+    { 0x6, 4 },
+    { 0x19, 5 },
+    { 0xE, 5 },
+    { 0x37, 6 },
+    { 0x9, 4 },
+    { 0xF, 5 },
+    { 0x2, 4 },
+    { 0x10, 5 }
+  },
+  {  /* AC bias group 2, table 11 */
+    { 0x3, 3 },
+    { 0x18, 5 },
+    { 0x23, 6 },
+    { 0x77, 7 },
+    { 0x194, 9 },
+    { 0x1956, 13 },
+    { 0x32AF, 14 },
+    { 0x3A, 6 },
+    { 0x76, 7 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x1F, 5 },
+    { 0x1E, 5 },
+    { 0x14, 5 },
+    { 0x22, 6 },
+    { 0x64, 7 },
+    { 0x197, 9 },
+    { 0x196, 9 },
+    { 0x32B, 10 },
+    { 0x654, 11 },
+    { 0x32AE, 14 },
+    { 0x1955, 13 },
+    { 0x1954, 13 },
+    { 0x0, 3 },
+    { 0x9, 4 },
+    { 0x1C, 5 },
+    { 0x15, 5 },
+    { 0x10, 5 },
+    { 0xD, 4 },
+    { 0x17, 5 },
+    { 0x16, 5 },
+    { 0x33, 6 }
+  },
+  {  /* AC bias group 2, table 12 */
+    { 0x5, 3 },
+    { 0x6, 4 },
+    { 0x3E, 6 },
+    { 0x10, 5 },
+    { 0x48, 7 },
+    { 0x93F, 12 },
+    { 0x24FA, 14 },
+    { 0x32, 6 },
+    { 0x67, 7 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x1B, 5 },
+    { 0x1E, 5 },
+    { 0x34, 6 },
+    { 0x66, 7 },
+    { 0x92, 8 },
+    { 0x126, 9 },
+    { 0x24E, 10 },
+    { 0x49E, 11 },
+    { 0x49F7, 15 },
+    { 0x49F6, 15 },
+    { 0x24F9, 14 },
+    { 0x24F8, 14 },
+    { 0x0, 3 },
+    { 0x7, 4 },
+    { 0x18, 5 },
+    { 0x11, 5 },
+    { 0x3F, 6 },
+    { 0xE, 4 },
+    { 0x13, 5 },
+    { 0x35, 6 },
+    { 0x25, 6 }
+  },
+  {  /* AC bias group 2, table 13 */
+    { 0x5, 3 },
+    { 0x8, 4 },
+    { 0x12, 5 },
+    { 0x1C, 5 },
+    { 0x1C, 6 },
+    { 0xEA, 9 },
+    { 0x1D75, 14 },
+    { 0x1E, 6 },
+    { 0x66, 7 },
+    { 0x1, 3 },
+    { 0x2, 3 },
+    { 0x1B, 5 },
+    { 0x1A, 5 },
+    { 0x1F, 6 },
+    { 0x3B, 7 },
+    { 0x74, 8 },
+    { 0x1D6, 10 },
+    { 0x3AF, 11 },
+    { 0x1D74, 14 },
+    { 0x1D77, 14 },
+    { 0x1D76, 14 },
+    { 0xEB9, 13 },
+    { 0xEB8, 13 },
+    { 0xF, 4 },
+    { 0x6, 4 },
+    { 0x13, 5 },
+    { 0x3B, 6 },
+    { 0x3A, 6 },
+    { 0x0, 3 },
+    { 0x18, 5 },
+    { 0x32, 6 },
+    { 0x67, 7 }
+  },
+  {  /* AC bias group 2, table 14 */
+    { 0x4, 3 },
+    { 0xA, 4 },
+    { 0x1B, 5 },
+    { 0xC, 4 },
+    { 0xD, 5 },
+    { 0xE6, 8 },
+    { 0x684, 11 },
+    { 0x72, 7 },
+    { 0xE7, 8 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x17, 5 },
+    { 0x16, 5 },
+    { 0x18, 6 },
+    { 0xD1, 8 },
+    { 0x1A0, 9 },
+    { 0x686, 11 },
+    { 0xD0F, 12 },
+    { 0xD0A, 12 },
+    { 0x1A17, 13 },
+    { 0x1A16, 13 },
+    { 0x1A1D, 13 },
+    { 0x1A1C, 13 },
+    { 0xF, 4 },
+    { 0x1D, 5 },
+    { 0xE, 5 },
+    { 0x35, 6 },
+    { 0x38, 6 },
+    { 0x0, 3 },
+    { 0xF, 5 },
+    { 0x19, 6 },
+    { 0x69, 7 }
+  },
+  {  /* AC bias group 2, table 15 */
+    { 0x3, 3 },
+    { 0xC, 4 },
+    { 0x1B, 5 },
+    { 0x0, 3 },
+    { 0x3, 4 },
+    { 0x2E, 6 },
+    { 0x51, 9 },
+    { 0xBC, 8 },
+    { 0x53, 9 },
+    { 0x4, 3 },
+    { 0x2, 3 },
+    { 0x16, 5 },
+    { 0x15, 5 },
+    { 0x15, 7 },
+    { 0x50, 9 },
+    { 0xA4, 10 },
+    { 0x294, 12 },
+    { 0x52B, 13 },
+    { 0x52A, 13 },
+    { 0x52D, 13 },
+    { 0x52C, 13 },
+    { 0x52F, 13 },
+    { 0x52E, 13 },
+    { 0xE, 4 },
+    { 0x1A, 5 },
+    { 0x4, 5 },
+    { 0x28, 6 },
+    { 0x29, 6 },
+    { 0xF, 4 },
+    { 0xB, 6 },
+    { 0x5F, 7 },
+    { 0xBD, 8 }
+  }
+};
+
+static const uint16_t ac_bias_2[16][32][2] = {
+  {  /* AC bias group 3, table 0 */
+    { 0x3, 4 },
+    { 0x9, 6 },
+    { 0xD0, 8 },
+    { 0x1A3, 9 },
+    { 0x344, 10 },
+    { 0xD14, 12 },
+    { 0x1A2B, 13 },
+    { 0x4, 4 },
+    { 0x15, 7 },
+    { 0x0, 3 },
+    { 0xF, 4 },
+    { 0xB, 4 },
+    { 0xC, 4 },
+    { 0xE, 4 },
+    { 0x9, 4 },
+    { 0x1B, 5 },
+    { 0xA, 5 },
+    { 0x14, 5 },
+    { 0xD, 5 },
+    { 0x2A, 6 },
+    { 0x14, 7 },
+    { 0x68B, 11 },
+    { 0x1A2A, 13 },
+    { 0x8, 4 },
+    { 0xB, 5 },
+    { 0x2B, 6 },
+    { 0xB, 6 },
+    { 0x69, 7 },
+    { 0x35, 6 },
+    { 0x8, 6 },
+    { 0x7, 4 },
+    { 0xC, 5 }
+  },
+  {  /* AC bias group 3, table 1 */
+    { 0xA, 4 },
+    { 0x3C, 6 },
+    { 0x32, 7 },
+    { 0x30, 7 },
+    { 0xC5, 9 },
+    { 0x621, 12 },
+    { 0x620, 12 },
+    { 0x1F, 5 },
+    { 0x33, 7 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xE, 4 },
+    { 0xD, 4 },
+    { 0xC, 4 },
+    { 0x4, 4 },
+    { 0xD, 5 },
+    { 0x26, 6 },
+    { 0x27, 6 },
+    { 0x14, 6 },
+    { 0x63, 8 },
+    { 0x189, 10 },
+    { 0x623, 12 },
+    { 0x622, 12 },
+    { 0xB, 4 },
+    { 0x12, 5 },
+    { 0x3D, 6 },
+    { 0x22, 6 },
+    { 0x15, 6 },
+    { 0xB, 5 },
+    { 0x23, 6 },
+    { 0x7, 4 },
+    { 0x10, 5 }
+  },
+  {  /* AC bias group 3, table 2 */
+    { 0xF, 4 },
+    { 0xC, 5 },
+    { 0x43, 7 },
+    { 0x10, 6 },
+    { 0x44, 8 },
+    { 0x114, 10 },
+    { 0x455, 12 },
+    { 0x18, 5 },
+    { 0x23, 7 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xE, 4 },
+    { 0xD, 4 },
+    { 0x9, 4 },
+    { 0x19, 5 },
+    { 0x9, 5 },
+    { 0x17, 6 },
+    { 0x16, 6 },
+    { 0x42, 7 },
+    { 0x8B, 9 },
+    { 0x454, 12 },
+    { 0x457, 12 },
+    { 0x456, 12 },
+    { 0xB, 4 },
+    { 0x15, 5 },
+    { 0xA, 5 },
+    { 0x29, 6 },
+    { 0x20, 6 },
+    { 0xD, 5 },
+    { 0x28, 6 },
+    { 0x7, 4 },
+    { 0x11, 5 }
+  },
+  {  /* AC bias group 3, table 3 */
+    { 0x1, 3 },
+    { 0x1A, 5 },
+    { 0x29, 6 },
+    { 0x2A, 6 },
+    { 0xA0, 8 },
+    { 0x285, 10 },
+    { 0x1425, 13 },
+    { 0x2, 5 },
+    { 0x0, 7 },
+    { 0x2, 3 },
+    { 0x3, 3 },
+    { 0xC, 4 },
+    { 0xB, 4 },
+    { 0x8, 4 },
+    { 0x12, 5 },
+    { 0x1, 6 },
+    { 0x51, 7 },
+    { 0x1, 7 },
+    { 0x143, 9 },
+    { 0x508, 11 },
+    { 0x1424, 13 },
+    { 0x1427, 13 },
+    { 0x1426, 13 },
+    { 0xF, 4 },
+    { 0x1C, 5 },
+    { 0x3, 5 },
+    { 0x37, 6 },
+    { 0x2B, 6 },
+    { 0x13, 5 },
+    { 0x36, 6 },
+    { 0x1D, 5 },
+    { 0x1, 5 }
+  },
+  {  /* AC bias group 3, table 4 */
+    { 0x4, 3 },
+    { 0x1F, 5 },
+    { 0x3D, 6 },
+    { 0x6, 5 },
+    { 0x16, 7 },
+    { 0x53, 9 },
+    { 0x14A, 11 },
+    { 0x34, 6 },
+    { 0x2A, 8 },
+    { 0x2, 3 },
+    { 0x3, 3 },
+    { 0xB, 4 },
+    { 0xC, 4 },
+    { 0x1C, 5 },
+    { 0x37, 6 },
+    { 0x17, 7 },
+    { 0x2B, 8 },
+    { 0x28, 8 },
+    { 0xA4, 10 },
+    { 0x52D, 13 },
+    { 0x52C, 13 },
+    { 0x52F, 13 },
+    { 0x52E, 13 },
+    { 0x0, 3 },
+    { 0x1D, 5 },
+    { 0x7, 5 },
+    { 0x4, 5 },
+    { 0x35, 6 },
+    { 0x14, 5 },
+    { 0x36, 6 },
+    { 0x15, 5 },
+    { 0x3C, 6 }
+  },
+  {  /* AC bias group 3, table 5 */
+    { 0x4, 3 },
+    { 0xA, 4 },
+    { 0x7, 5 },
+    { 0x1D, 5 },
+    { 0x9, 6 },
+    { 0x1F3, 9 },
+    { 0x7C7, 11 },
+    { 0x8, 6 },
+    { 0x1F0, 9 },
+    { 0x3, 3 },
+    { 0x2, 3 },
+    { 0xD, 4 },
+    { 0xC, 4 },
+    { 0x17, 5 },
+    { 0x7D, 7 },
+    { 0x1F2, 9 },
+    { 0x7C6, 11 },
+    { 0x7C5, 11 },
+    { 0x1F12, 13 },
+    { 0x3E27, 14 },
+    { 0x3E26, 14 },
+    { 0x1F11, 13 },
+    { 0x1F10, 13 },
+    { 0x0, 3 },
+    { 0x1E, 5 },
+    { 0x6, 5 },
+    { 0x39, 6 },
+    { 0x38, 6 },
+    { 0x3F, 6 },
+    { 0x2C, 6 },
+    { 0x5, 5 },
+    { 0x2D, 6 }
+  },
+  {  /* AC bias group 3, table 6 */
+    { 0x2, 3 },
+    { 0x7, 4 },
+    { 0x18, 5 },
+    { 0x3, 4 },
+    { 0x5, 5 },
+    { 0x35, 7 },
+    { 0x4F, 9 },
+    { 0x12, 7 },
+    { 0x4E5, 13 },
+    { 0x5, 3 },
+    { 0x4, 3 },
+    { 0xD, 4 },
+    { 0xE, 4 },
+    { 0x33, 6 },
+    { 0x26, 8 },
+    { 0x9D, 10 },
+    { 0x4E4, 13 },
+    { 0x4E7, 13 },
+    { 0x4E6, 13 },
+    { 0x4E1, 13 },
+    { 0x4E0, 13 },
+    { 0x4E3, 13 },
+    { 0x4E2, 13 },
+    { 0x0, 3 },
+    { 0x1F, 5 },
+    { 0xC, 5 },
+    { 0x3D, 6 },
+    { 0x3C, 6 },
+    { 0x32, 6 },
+    { 0x34, 7 },
+    { 0x1B, 6 },
+    { 0x8, 6 }
+  },
+  {  /* AC bias group 3, table 7 */
+    { 0x0, 3 },
+    { 0x4, 4 },
+    { 0x1C, 5 },
+    { 0xF, 4 },
+    { 0x2, 4 },
+    { 0x7, 5 },
+    { 0x75, 7 },
+    { 0xE8, 8 },
+    { 0x1D2A, 13 },
+    { 0x5, 3 },
+    { 0x4, 3 },
+    { 0xD, 4 },
+    { 0xC, 4 },
+    { 0x77, 7 },
+    { 0xE96, 12 },
+    { 0x3A57, 14 },
+    { 0x3A56, 14 },
+    { 0x3A5D, 14 },
+    { 0x3A5C, 14 },
+    { 0x3A5F, 14 },
+    { 0x3A5E, 14 },
+    { 0x1D29, 13 },
+    { 0x1D28, 13 },
+    { 0x3, 3 },
+    { 0x6, 5 },
+    { 0xA, 5 },
+    { 0x2C, 7 },
+    { 0x17, 6 },
+    { 0x76, 7 },
+    { 0x1D3, 9 },
+    { 0x3A4, 10 },
+    { 0x2D, 7 }
+  },
+  {  /* AC bias group 3, table 8 */
+    { 0xA, 4 },
+    { 0x24, 6 },
+    { 0xBF, 8 },
+    { 0x85, 8 },
+    { 0x211, 10 },
+    { 0x842, 12 },
+    { 0x1087, 13 },
+    { 0x18, 5 },
+    { 0x20, 6 },
+    { 0x1, 3 },
+    { 0x2, 3 },
+    { 0xE, 4 },
+    { 0xD, 4 },
+    { 0x7, 4 },
+    { 0x13, 5 },
+    { 0x25, 6 },
+    { 0x5E, 7 },
+    { 0x43, 7 },
+    { 0xBE, 8 },
+    { 0x109, 9 },
+    { 0x1086, 13 },
+    { 0x841, 12 },
+    { 0x840, 12 },
+    { 0xF, 4 },
+    { 0x1, 4 },
+    { 0x11, 5 },
+    { 0x0, 5 },
+    { 0x2E, 6 },
+    { 0x19, 5 },
+    { 0x1, 5 },
+    { 0x6, 4 },
+    { 0x16, 5 }
+  },
+  {  /* AC bias group 3, table 9 */
+    { 0x2, 3 },
+    { 0xF, 5 },
+    { 0x6F, 7 },
+    { 0x61, 7 },
+    { 0x374, 10 },
+    { 0x1BA8, 13 },
+    { 0x3753, 14 },
+    { 0x12, 5 },
+    { 0x36, 6 },
+    { 0x0, 3 },
+    { 0x1, 3 },
+    { 0xA, 4 },
+    { 0xB, 4 },
+    { 0x1A, 5 },
+    { 0x31, 6 },
+    { 0x60, 7 },
+    { 0xDC, 8 },
+    { 0x1BB, 9 },
+    { 0x6EB, 11 },
+    { 0x1BAB, 13 },
+    { 0x3752, 14 },
+    { 0x3755, 14 },
+    { 0x3754, 14 },
+    { 0xE, 4 },
+    { 0x6, 4 },
+    { 0x13, 5 },
+    { 0xE, 5 },
+    { 0x3E, 6 },
+    { 0x8, 4 },
+    { 0x1E, 5 },
+    { 0x19, 5 },
+    { 0x3F, 6 }
+  },
+  {  /* AC bias group 3, table 10 */
+    { 0x3, 3 },
+    { 0x1C, 5 },
+    { 0x25, 6 },
+    { 0x24, 6 },
+    { 0x1DA, 9 },
+    { 0x1DBD, 13 },
+    { 0x3B7C, 14 },
+    { 0x3C, 6 },
+    { 0x3D, 6 },
+    { 0x0, 3 },
+    { 0x1, 3 },
+    { 0xB, 4 },
+    { 0xA, 4 },
+    { 0xB, 5 },
+    { 0x77, 7 },
+    { 0xEC, 8 },
+    { 0x3B6, 10 },
+    { 0x76E, 11 },
+    { 0x1DBF, 13 },
+    { 0x76FB, 15 },
+    { 0x76FA, 15 },
+    { 0x3B79, 14 },
+    { 0x3B78, 14 },
+    { 0xD, 4 },
+    { 0x1F, 5 },
+    { 0x13, 5 },
+    { 0xA, 5 },
+    { 0x8, 5 },
+    { 0xC, 4 },
+    { 0x8, 4 },
+    { 0x9, 5 },
+    { 0x3A, 6 }
+  },
+  {  /* AC bias group 3, table 11 */
+    { 0x5, 3 },
+    { 0x3, 4 },
+    { 0x4, 5 },
+    { 0x10, 5 },
+    { 0x8F, 8 },
+    { 0x475, 11 },
+    { 0x11D1, 13 },
+    { 0x79, 7 },
+    { 0x27, 6 },
+    { 0x2, 3 },
+    { 0x3, 3 },
+    { 0x1, 4 },
+    { 0x0, 4 },
+    { 0x26, 6 },
+    { 0x46, 7 },
+    { 0x11C, 9 },
+    { 0x477, 11 },
+    { 0x8ED, 12 },
+    { 0x11D0, 13 },
+    { 0x11D3, 13 },
+    { 0x11D2, 13 },
+    { 0x11D9, 13 },
+    { 0x11D8, 13 },
+    { 0xD, 4 },
+    { 0x1F, 5 },
+    { 0x12, 5 },
+    { 0x5, 5 },
+    { 0x3D, 6 },
+    { 0xC, 4 },
+    { 0xE, 4 },
+    { 0x22, 6 },
+    { 0x78, 7 }
+  },
+  {  /* AC bias group 3, table 12 */
+    { 0x5, 3 },
+    { 0xC, 4 },
+    { 0x1B, 5 },
+    { 0x0, 4 },
+    { 0x6, 6 },
+    { 0x3E2, 10 },
+    { 0x3E3D, 14 },
+    { 0xF, 7 },
+    { 0x34, 6 },
+    { 0x3, 3 },
+    { 0x2, 3 },
+    { 0x1E, 5 },
+    { 0x1D, 5 },
+    { 0x7D, 7 },
+    { 0x1F0, 9 },
+    { 0x7C6, 11 },
+    { 0x3E3C, 14 },
+    { 0x3E3F, 14 },
+    { 0x3E3E, 14 },
+    { 0x3E39, 14 },
+    { 0x3E38, 14 },
+    { 0x3E3B, 14 },
+    { 0x3E3A, 14 },
+    { 0x8, 4 },
+    { 0x1C, 5 },
+    { 0x2, 5 },
+    { 0x3F, 6 },
+    { 0x35, 6 },
+    { 0x9, 4 },
+    { 0x1, 3 },
+    { 0xE, 7 },
+    { 0xF9, 8 }
+  },
+  {  /* AC bias group 3, table 13 */
+    { 0x4, 3 },
+    { 0xB, 4 },
+    { 0x1, 4 },
+    { 0xA, 4 },
+    { 0x1E, 6 },
+    { 0xE0, 9 },
+    { 0xE1E, 13 },
+    { 0x71, 8 },
+    { 0x39, 7 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0xD, 5 },
+    { 0xC, 5 },
+    { 0x20, 7 },
+    { 0x1C2, 10 },
+    { 0x1C3F, 14 },
+    { 0x1C3E, 14 },
+    { 0xE19, 13 },
+    { 0xE18, 13 },
+    { 0xE1B, 13 },
+    { 0xE1A, 13 },
+    { 0xE1D, 13 },
+    { 0xE1C, 13 },
+    { 0x0, 4 },
+    { 0x9, 5 },
+    { 0x1D, 6 },
+    { 0x1F, 6 },
+    { 0x11, 6 },
+    { 0x5, 4 },
+    { 0x1, 3 },
+    { 0x43, 8 },
+    { 0x42, 8 }
+  },
+  {  /* AC bias group 3, table 14 */
+    { 0x4, 3 },
+    { 0xD, 4 },
+    { 0x7, 4 },
+    { 0x2, 3 },
+    { 0x14, 5 },
+    { 0x16C, 9 },
+    { 0x16D1, 13 },
+    { 0x2DF, 10 },
+    { 0x16E, 9 },
+    { 0x0, 2 },
+    { 0x7, 3 },
+    { 0x2C, 6 },
+    { 0x2B, 6 },
+    { 0x2DE, 10 },
+    { 0x16D0, 13 },
+    { 0x16D3, 13 },
+    { 0x16D2, 13 },
+    { 0x2DB5, 14 },
+    { 0x2DB4, 14 },
+    { 0x2DB7, 14 },
+    { 0x2DB6, 14 },
+    { 0x16D9, 13 },
+    { 0x16D8, 13 },
+    { 0xC, 5 },
+    { 0x2A, 6 },
+    { 0x5A, 7 },
+    { 0x1B, 6 },
+    { 0x1A, 6 },
+    { 0x17, 5 },
+    { 0xC, 4 },
+    { 0x5B7, 11 },
+    { 0x5B5, 11 }
+  },
+  {  /* AC bias group 3, table 15 */
+    { 0x2, 2 },
+    { 0xF, 4 },
+    { 0x1C, 5 },
+    { 0xC, 4 },
+    { 0x3B, 6 },
+    { 0x1AC, 9 },
+    { 0x1AD8, 13 },
+    { 0x35B3, 14 },
+    { 0x35B2, 14 },
+    { 0x1, 2 },
+    { 0x0, 2 },
+    { 0x69, 7 },
+    { 0x68, 7 },
+    { 0x35BD, 14 },
+    { 0x35BC, 14 },
+    { 0x35BF, 14 },
+    { 0x35BE, 14 },
+    { 0x35B9, 14 },
+    { 0x35B8, 14 },
+    { 0x35BB, 14 },
+    { 0x35BA, 14 },
+    { 0x35B5, 14 },
+    { 0x35B4, 14 },
+    { 0x1A9, 9 },
+    { 0x1A8, 9 },
+    { 0x35A, 10 },
+    { 0xD7, 8 },
+    { 0xD5, 8 },
+    { 0x3A, 6 },
+    { 0x1B, 5 },
+    { 0x35B7, 14 },
+    { 0x35B6, 14 }
+  }
+};
+
+static const uint16_t ac_bias_3[16][32][2] = {
+  {  /* AC bias group 4, table 0 */
+    { 0x0, 3 },
+    { 0x10, 5 },
+    { 0x72, 7 },
+    { 0x71, 7 },
+    { 0x154, 9 },
+    { 0xAAB, 12 },
+    { 0xAA8, 12 },
+    { 0x14, 5 },
+    { 0x70, 7 },
+    { 0x2, 3 },
+    { 0x3, 3 },
+    { 0xC, 4 },
+    { 0xB, 4 },
+    { 0x3, 4 },
+    { 0x11, 5 },
+    { 0x73, 7 },
+    { 0x54, 7 },
+    { 0xAB, 8 },
+    { 0x2AB, 10 },
+    { 0x1553, 13 },
+    { 0x1552, 13 },
+    { 0x1555, 13 },
+    { 0x1554, 13 },
+    { 0xD, 4 },
+    { 0x1E, 5 },
+    { 0x12, 5 },
+    { 0x3E, 6 },
+    { 0x2B, 6 },
+    { 0x2, 4 },
+    { 0x3F, 6 },
+    { 0x1D, 5 },
+    { 0x13, 5 }
+  },
+  {  /* AC bias group 4, table 1 */
+    { 0x3, 3 },
+    { 0x1F, 5 },
+    { 0x29, 6 },
+    { 0x3D, 6 },
+    { 0xC, 7 },
+    { 0x69, 10 },
+    { 0x345, 13 },
+    { 0x2, 5 },
+    { 0x28, 6 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0xE, 4 },
+    { 0xC, 4 },
+    { 0x15, 5 },
+    { 0x7, 6 },
+    { 0x1B, 8 },
+    { 0x6B, 10 },
+    { 0x6A, 10 },
+    { 0x344, 13 },
+    { 0x347, 13 },
+    { 0x346, 13 },
+    { 0x1A1, 12 },
+    { 0x1A0, 12 },
+    { 0xB, 4 },
+    { 0x1A, 5 },
+    { 0x12, 5 },
+    { 0x0, 5 },
+    { 0x3C, 6 },
+    { 0x8, 4 },
+    { 0x1B, 5 },
+    { 0x13, 5 },
+    { 0x1, 5 }
+  },
+  {  /* AC bias group 4, table 2 */
+    { 0x4, 3 },
+    { 0x4, 4 },
+    { 0x3F, 6 },
+    { 0x14, 5 },
+    { 0x56, 7 },
+    { 0x15C, 9 },
+    { 0x15D5, 13 },
+    { 0x3C, 6 },
+    { 0x2A, 6 },
+    { 0x0, 3 },
+    { 0x1, 3 },
+    { 0xE, 4 },
+    { 0xD, 4 },
+    { 0xC, 5 },
+    { 0xAF, 8 },
+    { 0x2BB, 10 },
+    { 0x15D4, 13 },
+    { 0x15D7, 13 },
+    { 0x15D6, 13 },
+    { 0x15D1, 13 },
+    { 0x15D0, 13 },
+    { 0x15D3, 13 },
+    { 0x15D2, 13 },
+    { 0xB, 4 },
+    { 0x19, 5 },
+    { 0xD, 5 },
+    { 0x3E, 6 },
+    { 0x31, 6 },
+    { 0x7, 4 },
+    { 0x5, 4 },
+    { 0x3D, 6 },
+    { 0x30, 6 }
+  },
+  {  /* AC bias group 4, table 3 */
+    { 0x5, 3 },
+    { 0x8, 4 },
+    { 0x1A, 5 },
+    { 0x0, 4 },
+    { 0x36, 6 },
+    { 0x11, 8 },
+    { 0x106, 12 },
+    { 0xA, 7 },
+    { 0x6E, 7 },
+    { 0x2, 3 },
+    { 0x3, 3 },
+    { 0x3, 4 },
+    { 0x2, 4 },
+    { 0x6F, 7 },
+    { 0x21, 9 },
+    { 0x20F, 13 },
+    { 0x20E, 13 },
+    { 0x101, 12 },
+    { 0x100, 12 },
+    { 0x103, 12 },
+    { 0x102, 12 },
+    { 0x105, 12 },
+    { 0x104, 12 },
+    { 0xC, 4 },
+    { 0x1E, 5 },
+    { 0x3, 5 },
+    { 0x3E, 6 },
+    { 0x3F, 6 },
+    { 0x9, 4 },
+    { 0xE, 4 },
+    { 0xB, 7 },
+    { 0x9, 7 }
+  },
+  {  /* AC bias group 4, table 4 */
+    { 0x2, 3 },
+    { 0xE, 4 },
+    { 0x1E, 5 },
+    { 0xC, 4 },
+    { 0x1F, 5 },
+    { 0x6E, 7 },
+    { 0xAD, 10 },
+    { 0xAF, 10 },
+    { 0x14, 7 },
+    { 0x4, 3 },
+    { 0x3, 3 },
+    { 0x1A, 5 },
+    { 0x17, 5 },
+    { 0x2A, 8 },
+    { 0x576, 13 },
+    { 0xAEF, 14 },
+    { 0xAEE, 14 },
+    { 0x571, 13 },
+    { 0x570, 13 },
+    { 0x573, 13 },
+    { 0x572, 13 },
+    { 0x575, 13 },
+    { 0x574, 13 },
+    { 0x3, 4 },
+    { 0x16, 5 },
+    { 0x4, 5 },
+    { 0x36, 6 },
+    { 0xB, 6 },
+    { 0xA, 4 },
+    { 0x0, 3 },
+    { 0x6F, 7 },
+    { 0xAC, 10 }
+  },
+  {  /* AC bias group 4, table 5 */
+    { 0x4, 3 },
+    { 0x5, 4 },
+    { 0x3, 3 },
+    { 0x1, 3 },
+    { 0x4, 4 },
+    { 0x2F, 6 },
+    { 0x526, 11 },
+    { 0x1495, 13 },
+    { 0xA6, 8 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0x2D, 6 },
+    { 0x2C, 6 },
+    { 0x1494, 13 },
+    { 0x1497, 13 },
+    { 0x1496, 13 },
+    { 0x1491, 13 },
+    { 0x1490, 13 },
+    { 0x1493, 13 },
+    { 0x1492, 13 },
+    { 0x293D, 14 },
+    { 0x293C, 14 },
+    { 0x293F, 14 },
+    { 0x0, 3 },
+    { 0x28, 6 },
+    { 0xA5, 8 },
+    { 0x148, 9 },
+    { 0xA7, 8 },
+    { 0x2E, 6 },
+    { 0x15, 5 },
+    { 0xA4E, 12 },
+    { 0x293E, 14 }
+  },
+  {  /* AC bias group 4, table 6 */
+    { 0x4, 3 },
+    { 0x5, 4 },
+    { 0x3, 3 },
+    { 0x1, 3 },
+    { 0x4, 4 },
+    { 0x2F, 6 },
+    { 0x526, 11 },
+    { 0x1495, 13 },
+    { 0xA6, 8 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0x2D, 6 },
+    { 0x2C, 6 },
+    { 0x1494, 13 },
+    { 0x1497, 13 },
+    { 0x1496, 13 },
+    { 0x1491, 13 },
+    { 0x1490, 13 },
+    { 0x1493, 13 },
+    { 0x1492, 13 },
+    { 0x293D, 14 },
+    { 0x293C, 14 },
+    { 0x293F, 14 },
+    { 0x0, 3 },
+    { 0x28, 6 },
+    { 0xA5, 8 },
+    { 0x148, 9 },
+    { 0xA7, 8 },
+    { 0x2E, 6 },
+    { 0x15, 5 },
+    { 0xA4E, 12 },
+    { 0x293E, 14 }
+  },
+  {  /* AC bias group 4, table 7 */
+    { 0x4, 3 },
+    { 0x5, 4 },
+    { 0x3, 3 },
+    { 0x1, 3 },
+    { 0x4, 4 },
+    { 0x2F, 6 },
+    { 0x526, 11 },
+    { 0x1495, 13 },
+    { 0xA6, 8 },
+    { 0x7, 3 },
+    { 0x6, 3 },
+    { 0x2D, 6 },
+    { 0x2C, 6 },
+    { 0x1494, 13 },
+    { 0x1497, 13 },
+    { 0x1496, 13 },
+    { 0x1491, 13 },
+    { 0x1490, 13 },
+    { 0x1493, 13 },
+    { 0x1492, 13 },
+    { 0x293D, 14 },
+    { 0x293C, 14 },
+    { 0x293F, 14 },
+    { 0x0, 3 },
+    { 0x28, 6 },
+    { 0xA5, 8 },
+    { 0x148, 9 },
+    { 0xA7, 8 },
+    { 0x2E, 6 },
+    { 0x15, 5 },
+    { 0xA4E, 12 },
+    { 0x293E, 14 }
+  },
+  {  /* AC bias group 4, table 8 */
+    { 0x3, 3 },
+    { 0x11, 5 },
+    { 0x20, 6 },
+    { 0x74, 7 },
+    { 0x10D, 9 },
+    { 0x863, 12 },
+    { 0x860, 12 },
+    { 0xA, 5 },
+    { 0x75, 7 },
+    { 0x1, 3 },
+    { 0x0, 3 },
+    { 0xB, 4 },
+    { 0xA, 4 },
+    { 0x18, 5 },
+    { 0x38, 6 },
+    { 0x42, 7 },
+    { 0x10F, 9 },
+    { 0x10E, 9 },
+    { 0x219, 10 },
+    { 0x10C3, 13 },
+    { 0x10C2, 13 },
+    { 0x10C5, 13 },
+    { 0x10C4, 13 },
+    { 0xF, 4 },
+    { 0x4, 4 },
+    { 0x19, 5 },
+    { 0xB, 5 },
+    { 0x39, 6 },
+    { 0x9, 4 },
+    { 0x1B, 5 },
+    { 0x1A, 5 },
+    { 0x3B, 6 }
+  },
+  {  /* AC bias group 4, table 9 */
+    { 0x5, 3 },
+    { 0x1, 4 },
+    { 0x3E, 6 },
+    { 0x1, 5 },
+    { 0xE2, 8 },
+    { 0x1C6F, 13 },
+    { 0x38D9, 14 },
+    { 0x39, 6 },
+    { 0x1F, 6 },
+    { 0x2, 3 },
+    { 0x1, 3 },
+    { 0x9, 4 },
+    { 0x8, 4 },
+    { 0x0, 5 },
+    { 0x70, 7 },
+    { 0x1C7, 9 },
+    { 0x38C, 10 },
+    { 0x71A, 11 },
+    { 0x38D8, 14 },
+    { 0x38DB, 14 },
+    { 0x38DA, 14 },
+    { 0x38DD, 14 },
+    { 0x38DC, 14 },
+    { 0xD, 4 },
+    { 0x1D, 5 },
+    { 0xE, 5 },
+    { 0x3F, 6 },
+    { 0x3C, 6 },
+    { 0xC, 4 },
+    { 0x6, 4 },
+    { 0x3D, 6 },
+    { 0x1E, 6 }
+  },
+  {  /* AC bias group 4, table 10 */
+    { 0x6, 3 },
+    { 0xB, 4 },
+    { 0x11, 5 },
+    { 0x1E, 5 },
+    { 0x74, 7 },
+    { 0x3AA, 10 },
+    { 0x1D5C, 13 },
+    { 0x1, 6 },
+    { 0x21, 6 },
+    { 0x1, 3 },
+    { 0x2, 3 },
+    { 0x7, 4 },
+    { 0x6, 4 },
+    { 0x3E, 6 },
+    { 0xEB, 8 },
+    { 0x1D4, 9 },
+    { 0xEAF, 12 },
+    { 0x3ABB, 14 },
+    { 0x3ABA, 14 },
+    { 0x1D59, 13 },
+    { 0x1D58, 13 },
+    { 0x1D5B, 13 },
+    { 0x1D5A, 13 },
+    { 0xA, 4 },
+    { 0x1C, 5 },
+    { 0x1, 5 },
+    { 0x3F, 6 },
+    { 0x3B, 6 },
+    { 0x1, 4 },
+    { 0x9, 4 },
+    { 0x20, 6 },
+    { 0x0, 6 }
+  },
+  {  /* AC bias group 4, table 11 */
+    { 0x4, 3 },
+    { 0xA, 4 },
+    { 0x17, 5 },
+    { 0x4, 4 },
+    { 0x16, 6 },
+    { 0x16A, 9 },
+    { 0x16B1, 13 },
+    { 0x17, 7 },
+    { 0x5B, 7 },
+    { 0x6, 3 },
+    { 0x7, 3 },
+    { 0x1, 4 },
+    { 0x0, 4 },
+    { 0xA, 6 },
+    { 0x2D7, 10 },
+    { 0xB5A, 12 },
+    { 0x16B0, 13 },
+    { 0x16B3, 13 },
+    { 0x16B2, 13 },
+    { 0x2D6D, 14 },
+    { 0x2D6C, 14 },
+    { 0x2D6F, 14 },
+    { 0x2D6E, 14 },
+    { 0x6, 4 },
+    { 0xA, 5 },
+    { 0x4, 5 },
+    { 0x2C, 6 },
+    { 0x17, 6 },
+    { 0x3, 4 },
+    { 0x7, 4 },
+    { 0x16, 7 },
+    { 0xB4, 8 }
+  },
+  {  /* AC bias group 4, table 12 */
+    { 0x5, 3 },
+    { 0xD, 4 },
+    { 0x5, 4 },
+    { 0x9, 4 },
+    { 0x33, 6 },
+    { 0x193, 9 },
+    { 0x192C, 13 },
+    { 0x61, 8 },
+    { 0x31, 7 },
+    { 0x0, 2 },
+    { 0x7, 3 },
+    { 0x10, 5 },
+    { 0x11, 5 },
+    { 0xC8, 8 },
+    { 0x192F, 13 },
+    { 0x325B, 14 },
+    { 0x325A, 14 },
+    { 0x1929, 13 },
+    { 0x1928, 13 },
+    { 0x192B, 13 },
+    { 0x192A, 13 },
+    { 0x325D, 14 },
+    { 0x325C, 14 },
+    { 0x18, 5 },
+    { 0x1A, 6 },
+    { 0x1B, 6 },
+    { 0x65, 7 },
+    { 0x19, 6 },
+    { 0x4, 4 },
+    { 0x7, 4 },
+    { 0x60, 8 },
+    { 0x324, 10 }
+  },
+  {  /* AC bias group 4, table 13 */
+    { 0x6, 3 },
+    { 0x0, 3 },
+    { 0x2, 4 },
+    { 0xF, 4 },
+    { 0x39, 6 },
+    { 0x1D9, 9 },
+    { 0x1D82, 13 },
+    { 0x761, 11 },
+    { 0x3BE, 10 },
+    { 0x1, 2 },
+    { 0x2, 2 },
+    { 0xF, 6 },
+    { 0xE, 6 },
+    { 0x762, 11 },
+    { 0x3B07, 14 },
+    { 0x3B06, 14 },
+    { 0x3B1D, 14 },
+    { 0x3B1C, 14 },
+    { 0x3B1F, 14 },
+    { 0x3B1E, 14 },
+    { 0x3B19, 14 },
+    { 0x3B18, 14 },
+    { 0x3B1B, 14 },
+    { 0x38, 6 },
+    { 0x1DE, 9 },
+    { 0xED, 8 },
+    { 0x3BF, 10 },
+    { 0xEE, 8 },
+    { 0x3A, 6 },
+    { 0x6, 5 },
+    { 0xEC0, 12 },
+    { 0x3B1A, 14 }
+  },
+  {  /* AC bias group 4, table 14 */
+    { 0x0, 2 },
+    { 0x2, 3 },
+    { 0xF, 5 },
+    { 0x6, 4 },
+    { 0x1C, 6 },
+    { 0x1D0, 10 },
+    { 0xE8C, 13 },
+    { 0x1D1B, 14 },
+    { 0x1D1A, 14 },
+    { 0x3, 2 },
+    { 0x2, 2 },
+    { 0xEA, 9 },
+    { 0xE9, 9 },
+    { 0xE89, 13 },
+    { 0xE88, 13 },
+    { 0xE8B, 13 },
+    { 0xE8A, 13 },
+    { 0x1D65, 14 },
+    { 0x1D64, 14 },
+    { 0x1D67, 14 },
+    { 0x1D66, 14 },
+    { 0x1D61, 14 },
+    { 0x1D60, 14 },
+    { 0x3AD, 11 },
+    { 0x1D63, 14 },
+    { 0x1D62, 14 },
+    { 0x1D1D, 14 },
+    { 0x1D1C, 14 },
+    { 0x3B, 7 },
+    { 0x1D7, 10 },
+    { 0x1D1F, 14 },
+    { 0x1D1E, 14 }
+  },
+  {  /* AC bias group 4, table 15 */
+    { 0x2, 2 },
+    { 0xF, 4 },
+    { 0x1C, 5 },
+    { 0xC, 4 },
+    { 0x3B, 6 },
+    { 0x1AC, 9 },
+    { 0x1AD8, 13 },
+    { 0x35B3, 14 },
+    { 0x35B2, 14 },
+    { 0x1, 2 },
+    { 0x0, 2 },
+    { 0x69, 7 },
+    { 0x68, 7 },
+    { 0x35BD, 14 },
+    { 0x35BC, 14 },
+    { 0x35BF, 14 },
+    { 0x35BE, 14 },
+    { 0x35B9, 14 },
+    { 0x35B8, 14 },
+    { 0x35BB, 14 },
+    { 0x35BA, 14 },
+    { 0x35B5, 14 },
+    { 0x35B4, 14 },
+    { 0x1A9, 9 },
+    { 0x1A8, 9 },
+    { 0x35A, 10 },
+    { 0xD7, 8 },
+    { 0xD5, 8 },
+    { 0x3A, 6 },
+    { 0x1B, 5 },
+    { 0x35B7, 14 },
+    { 0x35B6, 14 }
+  }
+};
+
+#endif /* VP3DATA_H */
diff --git a/contrib/ffmpeg/libavcodec/vp3dsp.c b/contrib/ffmpeg/libavcodec/vp3dsp.c
new file mode 100644
index 000000000..a48515a5e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp3dsp.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2004 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file vp3dsp.c
+ * Standard C DSP-oriented functions cribbed from the original VP3
+ * source code.
+ */
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define IdctAdjustBeforeShift 8
+#define xC1S7 64277
+#define xC2S6 60547
+#define xC3S5 54491
+#define xC4S4 46341
+#define xC5S3 36410
+#define xC6S2 25080
+#define xC7S1 12785
+
+#define M(a,b) (((a) * (b))>>16)
+
+static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
+{
+    int16_t *ip = input;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
+    int Ed, Gd, Add, Bdd, Fd, Hd;
+
+    int i;
+
+    /* Inverse DCT on the rows now */
+    for (i = 0; i < 8; i++) {
+        /* Check for non-zero values */
+        if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
+            A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
+            B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
+            C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
+            D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
+
+            Ad = M(xC4S4, (A - C));
+            Bd = M(xC4S4, (B - D));
+
+            Cd = A + C;
+            Dd = B + D;
+
+            E = M(xC4S4, (ip[0] + ip[4]));
+            F = M(xC4S4, (ip[0] - ip[4]));
+
+            G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
+            H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
+
+            Ed = E - G;
+            Gd = E + G;
+
+            Add = F + Ad;
+            Bdd = Bd - H;
+
+            Fd = F - Ad;
+            Hd = Bd + H;
+
+            /*  Final sequence of operations over-write original inputs. */
+            ip[0] = Gd + Cd ;
+            ip[7] = Gd - Cd ;
+
+            ip[1] = Add + Hd;
+            ip[2] = Add - Hd;
+
+            ip[3] = Ed + Dd ;
+            ip[4] = Ed - Dd ;
+
+            ip[5] = Fd + Bdd;
+            ip[6] = Fd - Bdd;
+        }
+
+        ip += 8;            /* next row */
+    }
+
+    ip = input;
+
+    for ( i = 0; i < 8; i++) {
+        /* Check for non-zero values (bitwise or faster than ||) */
+        if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
+             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
+
+            A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]);
+            B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]);
+            C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]);
+            D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]);
+
+            Ad = M(xC4S4, (A - C));
+            Bd = M(xC4S4, (B - D));
+
+            Cd = A + C;
+            Dd = B + D;
+
+            E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8;
+            F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8;
+
+            if(type==1){  //HACK
+                E += 16*128;
+                F += 16*128;
+            }
+
+            G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]);
+            H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]);
+
+            Ed = E - G;
+            Gd = E + G;
+
+            Add = F + Ad;
+            Bdd = Bd - H;
+
+            Fd = F - Ad;
+            Hd = Bd + H;
+
+            /* Final sequence of operations over-write original inputs. */
+            if(type==0){
+                ip[0*8] = (Gd + Cd )  >> 4;
+                ip[7*8] = (Gd - Cd )  >> 4;
+
+                ip[1*8] = (Add + Hd ) >> 4;
+                ip[2*8] = (Add - Hd ) >> 4;
+
+                ip[3*8] = (Ed + Dd )  >> 4;
+                ip[4*8] = (Ed - Dd )  >> 4;
+
+                ip[5*8] = (Fd + Bdd ) >> 4;
+                ip[6*8] = (Fd - Bdd ) >> 4;
+            }else if(type==1){
+                dst[0*stride] = cm[(Gd + Cd )  >> 4];
+                dst[7*stride] = cm[(Gd - Cd )  >> 4];
+
+                dst[1*stride] = cm[(Add + Hd ) >> 4];
+                dst[2*stride] = cm[(Add - Hd ) >> 4];
+
+                dst[3*stride] = cm[(Ed + Dd )  >> 4];
+                dst[4*stride] = cm[(Ed - Dd )  >> 4];
+
+                dst[5*stride] = cm[(Fd + Bdd ) >> 4];
+                dst[6*stride] = cm[(Fd - Bdd ) >> 4];
+            }else{
+                dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd )  >> 4)];
+                dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd )  >> 4)];
+
+                dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)];
+                dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)];
+
+                dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd )  >> 4)];
+                dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd )  >> 4)];
+
+                dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)];
+                dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)];
+            }
+
+        } else {
+            if(type==0){
+                ip[0*8] =
+                ip[1*8] =
+                ip[2*8] =
+                ip[3*8] =
+                ip[4*8] =
+                ip[5*8] =
+                ip[6*8] =
+                ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
+            }else if(type==1){
+                dst[0*stride]=
+                dst[1*stride]=
+                dst[2*stride]=
+                dst[3*stride]=
+                dst[4*stride]=
+                dst[5*stride]=
+                dst[6*stride]=
+                dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
+            }else{
+                if(ip[0*8]){
+                    int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
+                    dst[0*stride] = cm[dst[0*stride] + v];
+                    dst[1*stride] = cm[dst[1*stride] + v];
+                    dst[2*stride] = cm[dst[2*stride] + v];
+                    dst[3*stride] = cm[dst[3*stride] + v];
+                    dst[4*stride] = cm[dst[4*stride] + v];
+                    dst[5*stride] = cm[dst[5*stride] + v];
+                    dst[6*stride] = cm[dst[6*stride] + v];
+                    dst[7*stride] = cm[dst[7*stride] + v];
+                }
+            }
+        }
+
+        ip++;            /* next column */
+        dst++;
+    }
+}
+
+void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
+    idct(NULL, 0, block, 0);
+}
+
+void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
+    idct(dest, line_size, block, 1);
+}
+
+void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
+    idct(dest, line_size, block, 2);
+}
diff --git a/contrib/ffmpeg/libavcodec/vp5.c b/contrib/ffmpeg/libavcodec/vp5.c
new file mode 100644
index 000000000..ac953c7aa
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp5.c
@@ -0,0 +1,290 @@
+/**
+ * @file vp5.c
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp5data.h"
+
+
+static int vp5_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+                            int *golden_frame)
+{
+    vp56_range_coder_t *c = &s->c;
+    int rows, cols;
+
+    vp56_init_range_decoder(&s->c, buf, buf_size);
+    s->frames[VP56_FRAME_CURRENT].key_frame = !vp56_rac_get(c);
+    vp56_rac_get(c);
+    vp56_init_dequant(s, vp56_rac_gets(c, 6));
+    if (s->frames[VP56_FRAME_CURRENT].key_frame)
+    {
+        vp56_rac_gets(c, 8);
+        if(vp56_rac_gets(c, 5) > 5)
+            return 0;
+        vp56_rac_gets(c, 2);
+        if (vp56_rac_get(c)) {
+            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+            return 0;
+        }
+        rows = vp56_rac_gets(c, 8);  /* number of stored macroblock rows */
+        cols = vp56_rac_gets(c, 8);  /* number of stored macroblock cols */
+        vp56_rac_gets(c, 8);  /* number of displayed macroblock rows */
+        vp56_rac_gets(c, 8);  /* number of displayed macroblock cols */
+        vp56_rac_gets(c, 2);
+        if (16*cols != s->avctx->coded_width ||
+            16*rows != s->avctx->coded_height) {
+            avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+            return 2;
+        }
+    }
+    return 1;
+}
+
+/* Gives very similar result than the vp6 version except in a few cases */
+static int vp5_adjust(int v, int t)
+{
+    int s2, s1 = v >> 31;
+    v ^= s1;
+    v -= s1;
+    v *= v < 2*t;
+    v -= t;
+    s2 = v >> 31;
+    v ^= s2;
+    v -= s2;
+    v = t - v;
+    v += s1;
+    v ^= s1;
+    return v;
+}
+
+static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, di;
+
+    for (comp=0; comp<2; comp++) {
+        int delta = 0;
+        if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+            int sign = vp56_rac_get_prob(c, s->vector_model_sig[comp]);
+            di  = vp56_rac_get_prob(c, s->vector_model_pdi[comp][0]);
+            di |= vp56_rac_get_prob(c, s->vector_model_pdi[comp][1]) << 1;
+            delta = vp56_rac_get_tree(c, vp56_pva_tree,
+                                      s->vector_model_pdv[comp]);
+            delta = di | (delta << 2);
+            delta = (delta ^ -sign) + sign;
+        }
+        if (!comp)
+            vect->x = delta;
+        else
+            vect->y = delta;
+    }
+}
+
+static void vp5_parse_vector_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, node;
+
+    for (comp=0; comp<2; comp++) {
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][0]))
+            s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][1]))
+            s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][2]))
+            s->vector_model_pdi[comp][0] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][3]))
+            s->vector_model_pdi[comp][1] = vp56_rac_gets_nn(c, 7);
+    }
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<7; node++)
+            if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][4 + node]))
+                s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp5_parse_coeff_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t def_prob[11];
+    int node, cg, ctx;
+    int ct;    /* code type */
+    int pt;    /* plane type (0 for Y, 1 for U or V) */
+
+    memset(def_prob, 0x80, sizeof(def_prob));
+
+    for (pt=0; pt<2; pt++)
+        for (node=0; node<11; node++)
+            if (vp56_rac_get_prob(c, vp5_dccv_pct[pt][node])) {
+                def_prob[node] = vp56_rac_gets_nn(c, 7);
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            }
+
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<6; cg++)
+                for (node=0; node<11; node++)
+                    if (vp56_rac_get_prob(c, vp5_ract_pct[ct][pt][cg][node])) {
+                        def_prob[node] = vp56_rac_gets_nn(c, 7);
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    }
+
+    /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+    for (pt=0; pt<2; pt++)
+        for (ctx=0; ctx<36; ctx++)
+            for (node=0; node<5; node++)
+                s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp5_dccv_lc[node][ctx][0] + 128) >> 8) + vp5_dccv_lc[node][ctx][1], 1, 254);
+
+    /* coeff_model_acct is a linear combination of coeff_model_ract */
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<3; cg++)
+                for (ctx=0; ctx<6; ctx++)
+                    for (node=0; node<5; node++)
+                        s->coeff_model_acct[pt][ct][cg][ctx][node] = clip(((s->coeff_model_ract[pt][ct][cg][node] * vp5_ract_lc[ct][cg][node][ctx][0] + 128) >> 8) + vp5_ract_lc[ct][cg][node][ctx][1], 1, 254);
+}
+
+static void vp5_parse_coeff(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t *permute = s->scantable.permutated;
+    uint8_t *model, *model2;
+    int coeff, sign, coeff_idx;
+    int b, i, cg, idx, ctx, ctx_last;
+    int pt = 0;    /* plane type (0 for Y, 1 for U or V) */
+
+    for (b=0; b<6; b++) {
+        int ct = 1;    /* code type */
+
+        if (b > 3) pt = 1;
+
+        ctx = 6*s->coeff_ctx[vp56_b6to4[b]][0]
+              + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+        model = s->coeff_model_dccv[pt];
+        model2 = s->coeff_model_dcct[pt][ctx];
+
+        for (coeff_idx=0; coeff_idx<64; ) {
+            if (vp56_rac_get_prob(c, model2[0])) {
+                if (vp56_rac_get_prob(c, model2[2])) {
+                    if (vp56_rac_get_prob(c, model2[3])) {
+                        s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 4;
+                        idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+                        sign = vp56_rac_get(c);
+                        coeff = vp56_coeff_bias[idx];
+                        for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+                            coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+                    } else {
+                        if (vp56_rac_get_prob(c, model2[4])) {
+                            coeff = 3 + vp56_rac_get_prob(c, model[5]);
+                            s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 3;
+                        } else {
+                            coeff = 2;
+                            s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 2;
+                        }
+                        sign = vp56_rac_get(c);
+                    }
+                    ct = 2;
+                } else {
+                    ct = 1;
+                    s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 1;
+                    sign = vp56_rac_get(c);
+                    coeff = 1;
+                }
+                coeff = (coeff ^ -sign) + sign;
+                if (coeff_idx)
+                    coeff *= s->dequant_ac;
+                s->block_coeff[b][permute[coeff_idx]] = coeff;
+            } else {
+                if (ct && !vp56_rac_get_prob(c, model2[1]))
+                    break;
+                ct = 0;
+                s->coeff_ctx[vp56_b6to4[b]][coeff_idx] = 0;
+            }
+
+            cg = vp5_coeff_groups[++coeff_idx];
+            ctx = s->coeff_ctx[vp56_b6to4[b]][coeff_idx];
+            model = s->coeff_model_ract[pt][ct][cg];
+            model2 = cg > 2 ? model : s->coeff_model_acct[pt][ct][cg][ctx];
+        }
+
+        ctx_last = FFMIN(s->coeff_ctx_last[vp56_b6to4[b]], 24);
+        s->coeff_ctx_last[vp56_b6to4[b]] = coeff_idx;
+        if (coeff_idx < ctx_last)
+            for (i=coeff_idx; i<=ctx_last; i++)
+                s->coeff_ctx[vp56_b6to4[b]][i] = 5;
+        s->above_blocks[s->above_block_idx[b]].not_null_dc = s->coeff_ctx[vp56_b6to4[b]][0];
+    }
+}
+
+static void vp5_default_models_init(vp56_context_t *s)
+{
+    int i;
+
+    for (i=0; i<2; i++) {
+        s->vector_model_sig[i] = 0x80;
+        s->vector_model_dct[i] = 0x80;
+        s->vector_model_pdi[i][0] = 0x55;
+        s->vector_model_pdi[i][1] = 0x80;
+    }
+    memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+    memset(s->vector_model_pdv, 0x80, sizeof(s->vector_model_pdv));
+}
+
+static int vp5_decode_init(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    vp56_init(s, avctx, 1);
+    s->vp56_coord_div = vp5_coord_div;
+    s->parse_vector_adjustment = vp5_parse_vector_adjustment;
+    s->adjust = vp5_adjust;
+    s->parse_coeff = vp5_parse_coeff;
+    s->default_models_init = vp5_default_models_init;
+    s->parse_vector_models = vp5_parse_vector_models;
+    s->parse_coeff_models = vp5_parse_coeff_models;
+    s->parse_header = vp5_parse_header;
+
+    return 0;
+}
+
+AVCodec vp5_decoder = {
+    "vp5",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP5,
+    sizeof(vp56_context_t),
+    vp5_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/vp56.c b/contrib/ffmpeg/libavcodec/vp56.c
new file mode 100644
index 000000000..eb78d02e4
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp56.c
@@ -0,0 +1,665 @@
+/**
+ * @file vp56.c
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "avcodec.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+
+
+void vp56_init_dequant(vp56_context_t *s, int quantizer)
+{
+    s->quantizer = quantizer;
+    s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
+    s->dequant_ac = vp56_ac_dequant[quantizer] << 2;
+}
+
+static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col,
+                                       vp56_frame_t ref_frame)
+{
+    int nb_pred = 0;
+    vp56_mv_t vect[2] = {{0,0}, {0,0}};
+    int pos, offset;
+    vp56_mv_t mvp;
+
+    for (pos=0; pos<12; pos++) {
+        mvp.x = col + vp56_candidate_predictor_pos[pos][0];
+        mvp.y = row + vp56_candidate_predictor_pos[pos][1];
+        if (mvp.x < 0 || mvp.x >= s->mb_width ||
+            mvp.y < 0 || mvp.y >= s->mb_height)
+            continue;
+        offset = mvp.x + s->mb_width*mvp.y;
+
+        if (vp56_reference_frame[s->macroblocks[offset].type] != ref_frame)
+            continue;
+        if ((s->macroblocks[offset].mv.x == vect[0].x &&
+             s->macroblocks[offset].mv.y == vect[0].y) ||
+            (s->macroblocks[offset].mv.x == 0 &&
+             s->macroblocks[offset].mv.y == 0))
+            continue;
+
+        vect[nb_pred++] = s->macroblocks[offset].mv;
+        if (nb_pred > 1) {
+            nb_pred = -1;
+            break;
+        }
+        s->vector_candidate_pos = pos;
+    }
+
+    s->vector_candidate[0] = vect[0];
+    s->vector_candidate[1] = vect[1];
+
+    return nb_pred+1;
+}
+
+static void vp56_parse_mb_type_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int i, ctx, type;
+
+    for (ctx=0; ctx<3; ctx++) {
+        if (vp56_rac_get_prob(c, 174)) {
+            int idx = vp56_rac_gets(c, 4);
+            memcpy(s->mb_types_stats[ctx],vp56_pre_def_mb_type_stats[idx][ctx],
+                   sizeof(s->mb_types_stats[ctx]));
+        }
+        if (vp56_rac_get_prob(c, 254)) {
+            for (type=0; type<10; type++) {
+                for(i=0; i<2; i++) {
+                    if (vp56_rac_get_prob(c, 205)) {
+                        int delta, sign = vp56_rac_get(c);
+
+                        delta = vp56_rac_get_tree(c, vp56_pmbtm_tree,
+                                                  vp56_mb_type_model_model);
+                        if (!delta)
+                            delta = 4 * vp56_rac_gets(c, 7);
+                        s->mb_types_stats[ctx][type][i] += (delta ^ -sign) + sign;
+                    }
+                }
+            }
+        }
+    }
+
+    /* compute MB type probability tables based on previous MB type */
+    for (ctx=0; ctx<3; ctx++) {
+        int p[10];
+
+        for (type=0; type<10; type++)
+            p[type] = 100 * s->mb_types_stats[ctx][type][1];
+
+        for (type=0; type<10; type++) {
+            int p02, p34, p0234, p17, p56, p89, p5689, p156789;
+
+            /* conservative MB type probability */
+            s->mb_type_model[ctx][type][0] = 255 - (255 * s->mb_types_stats[ctx][type][0]) / (1 + s->mb_types_stats[ctx][type][0] + s->mb_types_stats[ctx][type][1]);
+
+            p[type] = 0;    /* same MB type => weight is null */
+
+            /* binary tree parsing probabilities */
+            p02 = p[0] + p[2];
+            p34 = p[3] + p[4];
+            p0234 = p02 + p34;
+            p17 = p[1] + p[7];
+            p56 = p[5] + p[6];
+            p89 = p[8] + p[9];
+            p5689 = p56 + p89;
+            p156789 = p17 + p5689;
+
+            s->mb_type_model[ctx][type][1] = 1 + 255 * p0234/(1+p0234+p156789);
+            s->mb_type_model[ctx][type][2] = 1 + 255 * p02  / (1+p0234);
+            s->mb_type_model[ctx][type][3] = 1 + 255 * p17  / (1+p156789);
+            s->mb_type_model[ctx][type][4] = 1 + 255 * p[0] / (1+p02);
+            s->mb_type_model[ctx][type][5] = 1 + 255 * p[3] / (1+p34);
+            s->mb_type_model[ctx][type][6] = 1 + 255 * p[1] / (1+p17);
+            s->mb_type_model[ctx][type][7] = 1 + 255 * p56  / (1+p5689);
+            s->mb_type_model[ctx][type][8] = 1 + 255 * p[5] / (1+p56);
+            s->mb_type_model[ctx][type][9] = 1 + 255 * p[8] / (1+p89);
+
+            /* restore initial value */
+            p[type] = 100 * s->mb_types_stats[ctx][type][1];
+        }
+    }
+}
+
+static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s,
+                                    vp56_mb_t prev_type, int ctx)
+{
+    uint8_t *mb_type_model = s->mb_type_model[ctx][prev_type];
+    vp56_range_coder_t *c = &s->c;
+
+    if (vp56_rac_get_prob(c, mb_type_model[0]))
+        return prev_type;
+    else
+        return vp56_rac_get_tree(c, vp56_pmbt_tree, mb_type_model);
+}
+
+static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
+{
+    vp56_mv_t mv = {0,0};
+    int type[4];
+    int b;
+
+    /* parse each block type */
+    for (b=0; b<4; b++) {
+        type[b] = vp56_rac_gets(&s->c, 2);
+        if (type[b])
+            type[b]++;  /* only returns 0, 2, 3 or 4 (all INTER_PF) */
+    }
+
+    /* get vectors */
+    for (b=0; b<4; b++) {
+        switch (type[b]) {
+            case VP56_MB_INTER_NOVEC_PF:
+                s->mv[b] = (vp56_mv_t) {0,0};
+                break;
+            case VP56_MB_INTER_DELTA_PF:
+                s->parse_vector_adjustment(s, &s->mv[b]);
+                break;
+            case VP56_MB_INTER_V1_PF:
+                s->mv[b] = s->vector_candidate[0];
+                break;
+            case VP56_MB_INTER_V2_PF:
+                s->mv[b] = s->vector_candidate[1];
+                break;
+        }
+        mv.x += s->mv[b].x;
+        mv.y += s->mv[b].y;
+    }
+
+    /* this is the one selected for the whole MB for prediction */
+    s->macroblocks[row * s->mb_width + col].mv = s->mv[3];
+
+    /* chroma vectors are average luma vectors */
+    if (s->avctx->codec->id == CODEC_ID_VP5) {
+        s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
+        s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
+    } else {
+        s->mv[4] = s->mv[5] = (vp56_mv_t) {mv.x/4, mv.y/4};
+    }
+}
+
+static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col)
+{
+    vp56_mv_t *mv, vect = {0,0};
+    int ctx, b;
+
+    ctx = vp56_get_vectors_predictors(s, row, col, VP56_FRAME_PREVIOUS);
+    s->mb_type = vp56_parse_mb_type(s, s->mb_type, ctx);
+    s->macroblocks[row * s->mb_width + col].type = s->mb_type;
+
+    switch (s->mb_type) {
+        case VP56_MB_INTER_V1_PF:
+            mv = &s->vector_candidate[0];
+            break;
+
+        case VP56_MB_INTER_V2_PF:
+            mv = &s->vector_candidate[1];
+            break;
+
+        case VP56_MB_INTER_V1_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            mv = &s->vector_candidate[0];
+            break;
+
+        case VP56_MB_INTER_V2_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            mv = &s->vector_candidate[1];
+            break;
+
+        case VP56_MB_INTER_DELTA_PF:
+            s->parse_vector_adjustment(s, &vect);
+            mv = &vect;
+            break;
+
+        case VP56_MB_INTER_DELTA_GF:
+            vp56_get_vectors_predictors(s, row, col, VP56_FRAME_GOLDEN);
+            s->parse_vector_adjustment(s, &vect);
+            mv = &vect;
+            break;
+
+        case VP56_MB_INTER_4V:
+            vp56_decode_4mv(s, row, col);
+            return s->mb_type;
+
+        default:
+            mv = &vect;
+            break;
+    }
+
+    s->macroblocks[row*s->mb_width + col].mv = *mv;
+
+    /* same vector for all blocks */
+    for (b=0; b<6; b++)
+        s->mv[b] = *mv;
+
+    return s->mb_type;
+}
+
+static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame)
+{
+    int idx = s->scantable.permutated[0];
+    int i;
+
+    for (i=0; i<6; i++) {
+        vp56_ref_dc_t *ab = &s->above_blocks[s->above_block_idx[i]];
+        vp56_ref_dc_t *lb = &s->left_block[vp56_b6to4[i]];
+        int count = 0;
+        int dc = 0;
+
+        if (ref_frame == lb->ref_frame) {
+            dc += lb->dc_coeff;
+            count++;
+        }
+        if (ref_frame == ab->ref_frame) {
+            dc += ab->dc_coeff;
+            count++;
+        }
+        if (s->avctx->codec->id == CODEC_ID_VP5) {
+            if (count < 2 && ref_frame == ab[-1].ref_frame) {
+                dc += ab[-1].dc_coeff;
+                count++;
+            }
+            if (count < 2 && ref_frame == ab[1].ref_frame) {
+                dc += ab[1].dc_coeff;
+                count++;
+            }
+        }
+        if (count == 0)
+            dc = s->prev_dc[vp56_b6to3[i]][ref_frame];
+        else if (count == 2)
+            dc /= 2;
+
+        s->block_coeff[i][idx] += dc;
+        s->prev_dc[vp56_b6to3[i]][ref_frame] = s->block_coeff[i][idx];
+        ab->dc_coeff = s->block_coeff[i][idx];
+        ab->ref_frame = ref_frame;
+        lb->dc_coeff = s->block_coeff[i][idx];
+        lb->ref_frame = ref_frame;
+        s->block_coeff[i][idx] *= s->dequant_dc;
+    }
+}
+
+static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv,
+                             int pix_inc, int line_inc, int t)
+{
+    int pix2_inc = 2 * pix_inc;
+    int i, v;
+
+    for (i=0; i<12; i++) {
+        v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3;
+        v = s->adjust(v, t);
+        yuv[-pix_inc] = clip_uint8(yuv[-pix_inc] + v);
+        yuv[0] = clip_uint8(yuv[0] - v);
+        yuv += line_inc;
+    }
+}
+
+static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv,
+                                int stride, int dx, int dy)
+{
+    int t = vp56_filter_threshold[s->quantizer];
+    if (dx)  vp56_edge_filter(s, yuv +         10-dx ,      1, stride, t);
+    if (dy)  vp56_edge_filter(s, yuv + stride*(10-dy), stride,      1, t);
+}
+
+static void vp56_mc(vp56_context_t *s, int b, uint8_t *src,
+                    int stride, int x, int y)
+{
+    int plane = vp56_b6to3[b];
+    uint8_t *dst= s->frames[VP56_FRAME_CURRENT].data[plane]+s->block_offset[b];
+    uint8_t *src_block;
+    int src_offset;
+    int overlap_offset = 0;
+    int mask = s->vp56_coord_div[b] - 1;
+    int deblock_filtering = s->deblock_filtering;
+    int dx;
+    int dy;
+
+    if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
+        (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY
+         && !s->frames[VP56_FRAME_CURRENT].key_frame))
+        deblock_filtering = 0;
+
+    dx = s->mv[b].x / s->vp56_coord_div[b];
+    dy = s->mv[b].y / s->vp56_coord_div[b];
+
+    if (b >= 4) {
+        x /= 2;
+        y /= 2;
+    }
+    x += dx - 2;
+    y += dy - 2;
+
+    if (x<0 || x+12>=s->plane_width[plane] ||
+        y<0 || y+12>=s->plane_height[plane]) {
+        ff_emulated_edge_mc(s->edge_emu_buffer,
+                            src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                            stride, 12, 12, x, y,
+                            s->plane_width[plane],
+                            s->plane_height[plane]);
+        src_block = s->edge_emu_buffer;
+        src_offset = 2 + 2*stride;
+    } else if (deblock_filtering) {
+        /* only need a 12x12 block, but there is no such dsp function, */
+        /* so copy a 16x12 block */
+        s->dsp.put_pixels_tab[0][0](s->edge_emu_buffer,
+                                    src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                                    stride, 12);
+        src_block = s->edge_emu_buffer;
+        src_offset = 2 + 2*stride;
+    } else {
+        src_block = src;
+        src_offset = s->block_offset[b] + dy*stride + dx;
+    }
+
+    if (deblock_filtering)
+        vp56_deblock_filter(s, src_block, stride, dx&7, dy&7);
+
+    if (s->mv[b].x & mask)
+        overlap_offset += (s->mv[b].x > 0) ? 1 : -1;
+    if (s->mv[b].y & mask)
+        overlap_offset += (s->mv[b].y > 0) ? stride : -stride;
+
+    if (overlap_offset) {
+        if (s->filter)
+            s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset,
+                      stride, s->mv[b], mask, s->filter_selection, b<4);
+        else
+            s->dsp.put_no_rnd_pixels_l2[1](dst, src_block+src_offset,
+                                           src_block+src_offset+overlap_offset,
+                                           stride, 8);
+    } else {
+        s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8);
+    }
+}
+
+static void vp56_decode_mb(vp56_context_t *s, int row, int col)
+{
+    AVFrame *frame_current, *frame_ref;
+    vp56_mb_t mb_type;
+    vp56_frame_t ref_frame;
+    int b, plan, off;
+
+    if (s->frames[VP56_FRAME_CURRENT].key_frame)
+        mb_type = VP56_MB_INTRA;
+    else
+        mb_type = vp56_decode_mv(s, row, col);
+    ref_frame = vp56_reference_frame[mb_type];
+
+    memset(s->block_coeff, 0, sizeof(s->block_coeff));
+
+    s->parse_coeff(s);
+
+    vp56_add_predictors_dc(s, ref_frame);
+
+    frame_current = &s->frames[VP56_FRAME_CURRENT];
+    frame_ref = &s->frames[ref_frame];
+
+    switch (mb_type) {
+        case VP56_MB_INTRA:
+            for (b=0; b<6; b++) {
+                plan = vp56_b6to3[b];
+                s->dsp.idct_put(frame_current->data[plan] + s->block_offset[b],
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+
+        case VP56_MB_INTER_NOVEC_PF:
+        case VP56_MB_INTER_NOVEC_GF:
+            for (b=0; b<6; b++) {
+                plan = vp56_b6to3[b];
+                off = s->block_offset[b];
+                s->dsp.put_pixels_tab[1][0](frame_current->data[plan] + off,
+                                            frame_ref->data[plan] + off,
+                                            s->stride[plan], 8);
+                s->dsp.idct_add(frame_current->data[plan] + off,
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+
+        case VP56_MB_INTER_DELTA_PF:
+        case VP56_MB_INTER_V1_PF:
+        case VP56_MB_INTER_V2_PF:
+        case VP56_MB_INTER_DELTA_GF:
+        case VP56_MB_INTER_4V:
+        case VP56_MB_INTER_V1_GF:
+        case VP56_MB_INTER_V2_GF:
+            for (b=0; b<6; b++) {
+                int x_off = b==1 || b==3 ? 8 : 0;
+                int y_off = b==2 || b==3 ? 8 : 0;
+                plan = vp56_b6to3[b];
+                vp56_mc(s, b, frame_ref->data[plan], s->stride[plan],
+                        16*col+x_off, 16*row+y_off);
+                s->dsp.idct_add(frame_current->data[plan] + s->block_offset[b],
+                                s->stride[plan], s->block_coeff[b]);
+            }
+            break;
+    }
+}
+
+static int vp56_size_changed(AVCodecContext *avctx, vp56_context_t *s)
+{
+    int stride = s->frames[VP56_FRAME_CURRENT].linesize[0];
+    int i;
+
+    s->plane_width[0] = s->avctx->coded_width;
+    s->plane_width[1] = s->plane_width[2] = s->avctx->coded_width/2;
+    s->plane_height[0] = s->avctx->coded_height;
+    s->plane_height[1] = s->plane_height[2] = s->avctx->coded_height/2;
+
+    for (i=0; i<3; i++)
+        s->stride[i] = s->flip * s->frames[VP56_FRAME_CURRENT].linesize[i];
+
+    s->mb_width = (s->avctx->coded_width+15) / 16;
+    s->mb_height = (s->avctx->coded_height+15) / 16;
+
+    if (s->mb_width > 1000 || s->mb_height > 1000) {
+        av_log(avctx, AV_LOG_ERROR, "picture too big\n");
+        return -1;
+    }
+
+    s->above_blocks = av_realloc(s->above_blocks,
+                                 (4*s->mb_width+6) * sizeof(*s->above_blocks));
+    s->macroblocks = av_realloc(s->macroblocks,
+                                s->mb_width*s->mb_height*sizeof(*s->macroblocks));
+    av_free(s->edge_emu_buffer_alloc);
+    s->edge_emu_buffer_alloc = av_malloc(16*stride);
+    s->edge_emu_buffer = s->edge_emu_buffer_alloc;
+    if (s->flip < 0)
+        s->edge_emu_buffer += 15 * stride;
+
+    return 0;
+}
+
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                      uint8_t *buf, int buf_size)
+{
+    vp56_context_t *s = avctx->priv_data;
+    AVFrame *const p = &s->frames[VP56_FRAME_CURRENT];
+    AVFrame *picture = data;
+    int mb_row, mb_col, mb_row_flip, mb_offset = 0;
+    int block, y, uv, stride_y, stride_uv;
+    int golden_frame = 0;
+    int res;
+
+    res = s->parse_header(s, buf, buf_size, &golden_frame);
+    if (!res)
+        return -1;
+
+    p->reference = 1;
+    if (avctx->get_buffer(avctx, p) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    if (res == 2)
+        if (vp56_size_changed(avctx, s)) {
+            avctx->release_buffer(avctx, p);
+            return -1;
+        }
+
+    if (p->key_frame) {
+        p->pict_type = FF_I_TYPE;
+        s->default_models_init(s);
+        for (block=0; block<s->mb_height*s->mb_width; block++)
+            s->macroblocks[block].type = VP56_MB_INTRA;
+    } else {
+        p->pict_type = FF_P_TYPE;
+        vp56_parse_mb_type_models(s);
+        s->parse_vector_models(s);
+        s->mb_type = VP56_MB_INTER_NOVEC_PF;
+    }
+
+    s->parse_coeff_models(s);
+
+    memset(s->prev_dc, 0, sizeof(s->prev_dc));
+    s->prev_dc[1][VP56_FRAME_CURRENT] = 128;
+    s->prev_dc[2][VP56_FRAME_CURRENT] = 128;
+
+    for (block=0; block < 4*s->mb_width+6; block++) {
+        s->above_blocks[block].ref_frame = -1;
+        s->above_blocks[block].dc_coeff = 0;
+        s->above_blocks[block].not_null_dc = 0;
+    }
+    s->above_blocks[2*s->mb_width + 2].ref_frame = 0;
+    s->above_blocks[3*s->mb_width + 4].ref_frame = 0;
+
+    stride_y  = p->linesize[0];
+    stride_uv = p->linesize[1];
+
+    if (s->flip < 0)
+        mb_offset = 7;
+
+    /* main macroblocks loop */
+    for (mb_row=0; mb_row<s->mb_height; mb_row++) {
+        if (s->flip < 0)
+            mb_row_flip = s->mb_height - mb_row - 1;
+        else
+            mb_row_flip = mb_row;
+
+        for (block=0; block<4; block++) {
+            s->left_block[block].ref_frame = -1;
+            s->left_block[block].dc_coeff = 0;
+            s->left_block[block].not_null_dc = 0;
+            memset(s->coeff_ctx[block], 0, 64*sizeof(s->coeff_ctx[block][0]));
+        }
+        memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last));
+
+        s->above_block_idx[0] = 1;
+        s->above_block_idx[1] = 2;
+        s->above_block_idx[2] = 1;
+        s->above_block_idx[3] = 2;
+        s->above_block_idx[4] = 2*s->mb_width + 2 + 1;
+        s->above_block_idx[5] = 3*s->mb_width + 4 + 1;
+
+        s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y;
+        s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y;
+        s->block_offset[1] = s->block_offset[0] + 8;
+        s->block_offset[3] = s->block_offset[2] + 8;
+        s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv;
+        s->block_offset[5] = s->block_offset[4];
+
+        for (mb_col=0; mb_col<s->mb_width; mb_col++) {
+            vp56_decode_mb(s, mb_row, mb_col);
+
+            for (y=0; y<4; y++) {
+                s->above_block_idx[y] += 2;
+                s->block_offset[y] += 16;
+            }
+
+            for (uv=4; uv<6; uv++) {
+                s->above_block_idx[uv] += 1;
+                s->block_offset[uv] += 8;
+            }
+        }
+    }
+
+    if (s->frames[VP56_FRAME_PREVIOUS].data[0]
+        && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+            != s->frames[VP56_FRAME_GOLDEN].data[0])) {
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+    }
+    if (p->key_frame || golden_frame) {
+        if (s->frames[VP56_FRAME_GOLDEN].data[0])
+            avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+        s->frames[VP56_FRAME_GOLDEN] = *p;
+    }
+    s->frames[VP56_FRAME_PREVIOUS] = *p;
+
+    *picture = *p;
+    *data_size = sizeof(AVPicture);
+
+    return buf_size;
+}
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip)
+{
+    int i;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    if (s->avctx->idct_algo == FF_IDCT_AUTO)
+        s->avctx->idct_algo = FF_IDCT_VP3;
+    dsputil_init(&s->dsp, s->avctx);
+    ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
+
+    avcodec_set_dimensions(s->avctx, 0, 0);
+
+    for (i=0; i<3; i++)
+        s->frames[i].data[0] = NULL;
+    s->edge_emu_buffer_alloc = NULL;
+
+    s->above_blocks = NULL;
+    s->macroblocks = NULL;
+    s->quantizer = -1;
+    s->deblock_filtering = 1;
+
+    s->filter = NULL;
+
+    if (flip) {
+        s->flip = -1;
+        s->frbi = 2;
+        s->srbi = 0;
+    } else {
+        s->flip = 1;
+        s->frbi = 0;
+        s->srbi = 2;
+    }
+}
+
+int vp56_free(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    av_free(s->above_blocks);
+    av_free(s->macroblocks);
+    av_free(s->edge_emu_buffer_alloc);
+    if (s->frames[VP56_FRAME_GOLDEN].data[0]
+        && (s->frames[VP56_FRAME_PREVIOUS].data[0]
+            != s->frames[VP56_FRAME_GOLDEN].data[0]))
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_GOLDEN]);
+    if (s->frames[VP56_FRAME_PREVIOUS].data[0])
+        avctx->release_buffer(avctx, &s->frames[VP56_FRAME_PREVIOUS]);
+    return 0;
+}
diff --git a/contrib/ffmpeg/libavcodec/vp56.h b/contrib/ffmpeg/libavcodec/vp56.h
new file mode 100644
index 000000000..d6808b1e5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp56.h
@@ -0,0 +1,248 @@
+/**
+ * @file vp56.h
+ * VP5 and VP6 compatible video decoder (common features)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP56_H
+#define VP56_H
+
+#include "vp56data.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+
+typedef struct vp56_context vp56_context_t;
+typedef struct vp56_mv vp56_mv_t;
+
+typedef void (*vp56_parse_vector_adjustment_t)(vp56_context_t *s,
+                                               vp56_mv_t *vect);
+typedef int (*vp56_adjust_t)(int v, int t);
+typedef void (*vp56_filter_t)(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                              int offset1, int offset2, int stride,
+                              vp56_mv_t mv, int mask, int select, int luma);
+typedef void (*vp56_parse_coeff_t)(vp56_context_t *s);
+typedef void (*vp56_default_models_init_t)(vp56_context_t *s);
+typedef void (*vp56_parse_vector_models_t)(vp56_context_t *s);
+typedef void (*vp56_parse_coeff_models_t)(vp56_context_t *s);
+typedef int (*vp56_parse_header_t)(vp56_context_t *s, uint8_t *buf,
+                                   int buf_size, int *golden_frame);
+
+typedef struct {
+    int high;
+    int bits;
+    const uint8_t *buffer;
+    unsigned long code_word;
+} vp56_range_coder_t;
+
+typedef struct {
+    uint8_t not_null_dc;
+    vp56_frame_t ref_frame;
+    DCTELEM dc_coeff;
+} vp56_ref_dc_t;
+
+struct vp56_mv {
+    int x;
+    int y;
+};
+
+typedef struct {
+    uint8_t type;
+    vp56_mv_t mv;
+} vp56_macroblock_t;
+
+struct vp56_context {
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    ScanTable scantable;
+    AVFrame frames[3];
+    uint8_t *edge_emu_buffer_alloc;
+    uint8_t *edge_emu_buffer;
+    vp56_range_coder_t c;
+
+    /* frame info */
+    int plane_width[3];
+    int plane_height[3];
+    int mb_width;   /* number of horizontal MB */
+    int mb_height;  /* number of vertical MB */
+    int block_offset[6];
+
+    int quantizer;
+    uint16_t dequant_dc;
+    uint16_t dequant_ac;
+
+    /* DC predictors management */
+    vp56_ref_dc_t *above_blocks;
+    vp56_ref_dc_t left_block[4];
+    int above_block_idx[6];
+    DCTELEM prev_dc[3][3];    /* [plan][ref_frame] */
+
+    /* blocks / macroblock */
+    vp56_mb_t mb_type;
+    vp56_macroblock_t *macroblocks;
+    DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]);
+    uint8_t coeff_reorder[64];       /* used in vp6 only */
+    uint8_t coeff_index_to_pos[64];  /* used in vp6 only */
+
+    /* motion vectors */
+    vp56_mv_t mv[6];  /* vectors for each block in MB */
+    vp56_mv_t vector_candidate[2];
+    int vector_candidate_pos;
+
+    /* filtering hints */
+    int deblock_filtering;
+    int filter_selection;
+    int filter_mode;
+    int max_vector_length;
+    int sample_variance_threshold;
+
+    /* AC models */
+    uint8_t vector_model_sig[2];           /* delta sign */
+    uint8_t vector_model_dct[2];           /* delta coding types */
+    uint8_t vector_model_pdi[2][2];        /* predefined delta init */
+    uint8_t vector_model_pdv[2][7];        /* predefined delta values */
+    uint8_t vector_model_fdv[2][8];        /* 8 bit delta value definition */
+    uint8_t mb_type_model[3][10][10];      /* model for decoding MB type */
+    uint8_t coeff_model_dccv[2][11];       /* DC coeff value */
+    uint8_t coeff_model_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */
+    uint8_t coeff_model_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */
+    uint8_t coeff_model_dcct[2][36][5];    /* DC coeff coding type */
+    uint8_t coeff_model_runv[2][14];       /* run value (vp6 only) */
+    uint8_t mb_types_stats[3][10][2];      /* contextual, next MB type stats */
+    uint8_t coeff_ctx[4][64];              /* used in vp5 only */
+    uint8_t coeff_ctx_last[4];             /* used in vp5 only */
+
+    /* upside-down flipping hints */
+    int flip;  /* are we flipping ? */
+    int frbi;  /* first row block index in MB */
+    int srbi;  /* second row block index in MB */
+    int stride[3];  /* stride for each plan */
+
+    const uint8_t *vp56_coord_div;
+    vp56_parse_vector_adjustment_t parse_vector_adjustment;
+    vp56_adjust_t adjust;
+    vp56_filter_t filter;
+    vp56_parse_coeff_t parse_coeff;
+    vp56_default_models_init_t default_models_init;
+    vp56_parse_vector_models_t parse_vector_models;
+    vp56_parse_coeff_models_t parse_coeff_models;
+    vp56_parse_header_t parse_header;
+};
+
+
+void vp56_init(vp56_context_t *s, AVCodecContext *avctx, int flip);
+int vp56_free(AVCodecContext *avctx);
+void vp56_init_dequant(vp56_context_t *s, int quantizer);
+int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                      uint8_t *buf, int buf_size);
+
+
+/**
+ * vp56 specific range coder implementation
+ */
+
+static inline void vp56_init_range_decoder(vp56_range_coder_t *c,
+                                           const uint8_t *buf, int buf_size)
+{
+    c->high = 255;
+    c->bits = 8;
+    c->buffer = buf;
+    c->code_word = *c->buffer++ << 8;
+    c->code_word |= *c->buffer++;
+}
+
+static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob)
+{
+    unsigned int low = 1 + (((c->high - 1) * prob) / 256);
+    unsigned int low_shift = low << 8;
+    int bit = c->code_word >= low_shift;
+
+    if (bit) {
+        c->high -= low;
+        c->code_word -= low_shift;
+    } else {
+        c->high = low;
+    }
+
+    /* normalize */
+    while (c->high < 128) {
+        c->high <<= 1;
+        c->code_word <<= 1;
+        if (--c->bits == 0) {
+            c->bits = 8;
+            c->code_word |= *c->buffer++;
+        }
+    }
+    return bit;
+}
+
+static inline int vp56_rac_get(vp56_range_coder_t *c)
+{
+    /* equiprobable */
+    int low = (c->high + 1) >> 1;
+    unsigned int low_shift = low << 8;
+    int bit = c->code_word >= low_shift;
+    if (bit) {
+        c->high = (c->high - low) << 1;
+        c->code_word -= low_shift;
+    } else {
+        c->high = low << 1;
+    }
+
+    /* normalize */
+    c->code_word <<= 1;
+    if (--c->bits == 0) {
+        c->bits = 8;
+        c->code_word |= *c->buffer++;
+    }
+    return bit;
+}
+
+static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp56_rac_get(c);
+    }
+
+    return value;
+}
+
+static inline int vp56_rac_gets_nn(vp56_range_coder_t *c, int bits)
+{
+    int v = vp56_rac_gets(c, 7) << 1;
+    return v + !v;
+}
+
+static inline int vp56_rac_get_tree(vp56_range_coder_t *c,
+                                    const vp56_tree_t *tree,
+                                    const uint8_t *probs)
+{
+    while (tree->val > 0) {
+        if (vp56_rac_get_prob(c, probs[tree->prob_idx]))
+            tree += tree->val;
+        else
+            tree++;
+    }
+    return -tree->val;
+}
+
+#endif /* VP56_H */
diff --git a/contrib/ffmpeg/libavcodec/vp56data.c b/contrib/ffmpeg/libavcodec/vp56data.c
new file mode 100644
index 000000000..e75c6d1ce
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp56data.c
@@ -0,0 +1,66 @@
+/**
+ * @file vp56data.c
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "vp56data.h"
+
+const uint8_t vp56_b6to3[] = { 0, 0, 0, 0, 1, 2 };
+const uint8_t vp56_b6to4[] = { 0, 0, 1, 1, 2, 3 };
+
+const uint8_t vp56_coeff_parse_table[6][11] = {
+    { 159,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 145, 165,   0,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 140, 148, 173,   0,   0,   0,   0,   0,   0,   0,   0 },
+    { 135, 140, 155, 176,   0,   0,   0,   0,   0,   0,   0 },
+    { 130, 134, 141, 157, 180,   0,   0,   0,   0,   0,   0 },
+    { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254 },
+};
+
+const uint8_t vp56_def_mb_types_stats[3][10][2] = {
+    { {  69, 42 }, {   1,  2 }, {  1,   7 }, {  44, 42 }, {  6, 22 },
+      {   1,  3 }, {   0,  2 }, {  1,   5 }, {   0,  1 }, {  0,  0 }, },
+    { { 229,  8 }, {   1,  1 }, {  0,   8 }, {   0,  0 }, {  0,  0 },
+      {   1,  2 }, {   0,  1 }, {  0,   0 }, {   1,  1 }, {  0,  0 }, },
+    { { 122, 35 }, {   1,  1 }, {  1,   6 }, {  46, 34 }, {  0,  0 },
+      {   1,  2 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, },
+};
+
+const vp56_tree_t vp56_pva_tree[] = {
+    { 8, 0},
+    { 4, 1},
+    { 2, 2}, {-0}, {-1},
+    { 2, 3}, {-2}, {-3},
+    { 4, 4},
+    { 2, 5}, {-4}, {-5},
+    { 2, 6}, {-6}, {-7},
+};
+
+const vp56_tree_t vp56_pc_tree[] = {
+    { 4, 6},
+    { 2, 7}, {-0}, {-1},
+    { 4, 8},
+    { 2, 9}, {-2}, {-3},
+    { 2,10}, {-4}, {-5},
+};
+
+const uint8_t vp56_coeff_bias[] = { 5, 7, 11, 19, 35, 67 };
+const uint8_t vp56_coeff_bit_length[] = { 0, 1, 2, 3, 4, 10 };
diff --git a/contrib/ffmpeg/libavcodec/vp56data.h b/contrib/ffmpeg/libavcodec/vp56data.h
new file mode 100644
index 000000000..dbf92dd68
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp56data.h
@@ -0,0 +1,248 @@
+/**
+ * @file vp56data.h
+ * VP5 and VP6 compatible video decoder (common data)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP56DATA_H
+#define VP56DATA_H
+
+#include "common.h"
+
+typedef enum {
+    VP56_FRAME_CURRENT  = 0,
+    VP56_FRAME_PREVIOUS = 1,
+    VP56_FRAME_GOLDEN   = 2,
+} vp56_frame_t;
+
+typedef enum {
+    VP56_MB_INTER_NOVEC_PF = 0,  /**< Inter MB, no vector, from previous frame */
+    VP56_MB_INTRA          = 1,  /**< Intra MB */
+    VP56_MB_INTER_DELTA_PF = 2,  /**< Inter MB, above/left vector + delta, from previous frame */
+    VP56_MB_INTER_V1_PF    = 3,  /**< Inter MB, first vector, from previous frame */
+    VP56_MB_INTER_V2_PF    = 4,  /**< Inter MB, second vector, from previous frame */
+    VP56_MB_INTER_NOVEC_GF = 5,  /**< Inter MB, no vector, from golden frame */
+    VP56_MB_INTER_DELTA_GF = 6,  /**< Inter MB, above/left vector + delta, from golden frame */
+    VP56_MB_INTER_4V       = 7,  /**< Inter MB, 4 vectors, from previous frame */
+    VP56_MB_INTER_V1_GF    = 8,  /**< Inter MB, first vector, from golden frame */
+    VP56_MB_INTER_V2_GF    = 9,  /**< Inter MB, second vector, from golden frame */
+} vp56_mb_t;
+
+typedef struct {
+  int8_t val;
+  int8_t prob_idx;
+} vp56_tree_t;
+
+extern const uint8_t vp56_b6to3[];
+extern const uint8_t vp56_b6to4[];
+extern const uint8_t vp56_coeff_parse_table[6][11];
+extern const uint8_t vp56_def_mb_types_stats[3][10][2];
+extern const vp56_tree_t vp56_pva_tree[];
+extern const vp56_tree_t vp56_pc_tree[];
+extern const uint8_t vp56_coeff_bias[];
+extern const uint8_t vp56_coeff_bit_length[];
+
+static const vp56_frame_t vp56_reference_frame[] = {
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_NOVEC_PF */
+    VP56_FRAME_CURRENT,   /* VP56_MB_INTRA */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_DELTA_PF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_V1_PF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_V2_PF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_NOVEC_GF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_DELTA_GF */
+    VP56_FRAME_PREVIOUS,  /* VP56_MB_INTER_4V */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_V1_GF */
+    VP56_FRAME_GOLDEN,    /* VP56_MB_INTER_V2_GF */
+};
+
+static const uint8_t vp56_ac_dequant[64] = {
+    94, 92, 90, 88, 86, 82, 78, 74,
+    70, 66, 62, 58, 54, 53, 52, 51,
+    50, 49, 48, 47, 46, 45, 44, 43,
+    42, 40, 39, 37, 36, 35, 34, 33,
+    32, 31, 30, 29, 28, 27, 26, 25,
+    24, 23, 22, 21, 20, 19, 18, 17,
+    16, 15, 14, 13, 12, 11, 10,  9,
+     8,  7,  6,  5,  4,  3,  2,  1,
+};
+
+static const uint8_t vp56_dc_dequant[64] = {
+    47, 47, 47, 47, 45, 43, 43, 43,
+    43, 43, 42, 41, 41, 40, 40, 40,
+    40, 35, 35, 35, 35, 33, 33, 33,
+    33, 32, 32, 32, 27, 27, 26, 26,
+    25, 25, 24, 24, 23, 23, 19, 19,
+    19, 19, 18, 18, 17, 16, 16, 16,
+    16, 16, 15, 11, 11, 11, 10, 10,
+     9,  8,  7,  5,  3,  3,  2,  2,
+};
+
+static const uint8_t vp56_pre_def_mb_type_stats[16][3][10][2] = {
+  { { {   9, 15 }, {  32, 25 }, {  7,  19 }, {   9, 21 }, {  1, 12 },
+      {  14, 12 }, {   3, 18 }, { 14,  23 }, {   3, 10 }, {  0,  4 }, },
+    { {  41, 22 }, {   1,  0 }, {  1,  31 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   1,  7 }, {  0,   1 }, {  98, 25 }, {  4, 10 }, },
+    { {   2,  3 }, {   2,  3 }, {  0,   2 }, {   0,  2 }, {  0,  0 },
+      {  11,  4 }, {   1,  4 }, {  0,   2 }, {   3,  2 }, {  0,  4 }, }, },
+  { { {  48, 39 }, {   1,  2 }, { 11,  27 }, {  29, 44 }, {  7, 27 },
+      {   1,  4 }, {   0,  3 }, {  1,   6 }, {   1,  2 }, {  0,  0 }, },
+    { { 123, 37 }, {   6,  4 }, {  1,  27 }, {   0,  0 }, {  0,  0 },
+      {   5,  8 }, {   1,  7 }, {  0,   1 }, {  12, 10 }, {  0,  2 }, },
+    { {  49, 46 }, {   3,  4 }, {  7,  31 }, {  42, 41 }, {  0,  0 },
+      {   2,  6 }, {   1,  7 }, {  1,   4 }, {   2,  4 }, {  0,  1 }, }, },
+  { { {  21, 32 }, {   1,  2 }, {  4,  10 }, {  32, 43 }, {  6, 23 },
+      {   2,  3 }, {   1, 19 }, {  1,   6 }, {  12, 21 }, {  0,  7 }, },
+    { {  26, 14 }, {  14, 12 }, {  0,  24 }, {   0,  0 }, {  0,  0 },
+      {  55, 17 }, {   1,  9 }, {  0,  36 }, {   5,  7 }, {  1,  3 }, },
+    { {  26, 25 }, {   1,  1 }, {  2,  10 }, {  67, 39 }, {  0,  0 },
+      {   1,  1 }, {   0, 14 }, {  0,   2 }, {  31, 26 }, {  1,  6 }, }, },
+  { { {  69, 83 }, {   0,  0 }, {  0,   2 }, {  10, 29 }, {  3, 12 },
+      {   0,  1 }, {   0,  3 }, {  0,   3 }, {   2,  2 }, {  0,  0 }, },
+    { { 209,  5 }, {   0,  0 }, {  0,  27 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { { 103, 46 }, {   1,  2 }, {  2,  10 }, {  33, 42 }, {  0,  0 },
+      {   1,  4 }, {   0,  3 }, {  0,   1 }, {   1,  3 }, {  0,  0 }, }, },
+  { { {  11, 20 }, {   1,  4 }, { 18,  36 }, {  43, 48 }, { 13, 35 },
+      {   0,  2 }, {   0,  5 }, {  3,  12 }, {   1,  2 }, {  0,  0 }, },
+    { {   2,  5 }, {   4,  5 }, {  0, 121 }, {   0,  0 }, {  0,  0 },
+      {   0,  3 }, {   2,  4 }, {  1,   4 }, {   2,  2 }, {  0,  1 }, },
+    { {  14, 31 }, {   9, 13 }, { 14,  54 }, {  22, 29 }, {  0,  0 },
+      {   2,  6 }, {   4, 18 }, {  6,  13 }, {   1,  5 }, {  0,  1 }, }, },
+  { { {  70, 44 }, {   0,  1 }, {  2,  10 }, {  37, 46 }, {  8, 26 },
+      {   0,  2 }, {   0,  2 }, {  0,   2 }, {   0,  1 }, {  0,  0 }, },
+    { { 175,  5 }, {   0,  1 }, {  0,  48 }, {   0,  0 }, {  0,  0 },
+      {   0,  2 }, {   0,  1 }, {  0,   2 }, {   0,  1 }, {  0,  0 }, },
+    { {  85, 39 }, {   0,  0 }, {  1,   9 }, {  69, 40 }, {  0,  0 },
+      {   0,  1 }, {   0,  3 }, {  0,   1 }, {   2,  3 }, {  0,  0 }, }, },
+  { { {   8, 15 }, {   0,  1 }, {  8,  21 }, {  74, 53 }, { 22, 42 },
+      {   0,  1 }, {   0,  2 }, {  0,   3 }, {   1,  2 }, {  0,  0 }, },
+    { {  83,  5 }, {   2,  3 }, {  0, 102 }, {   0,  0 }, {  0,  0 },
+      {   1,  3 }, {   0,  2 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { {  31, 28 }, {   0,  0 }, {  3,  14 }, { 130, 34 }, {  0,  0 },
+      {   0,  1 }, {   0,  3 }, {  0,   1 }, {   3,  3 }, {  0,  1 }, }, },
+  { { { 141, 42 }, {   0,  0 }, {  1,   4 }, {  11, 24 }, {  1, 11 },
+      {   0,  1 }, {   0,  1 }, {  0,   2 }, {   0,  0 }, {  0,  0 }, },
+    { { 233,  6 }, {   0,  0 }, {  0,   8 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  1 }, {  0,  0 }, },
+    { { 171, 25 }, {   0,  0 }, {  1,   5 }, {  25, 21 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  0 }, {  0,  0 }, }, },
+  { { {   8, 19 }, {   4, 10 }, { 24,  45 }, {  21, 37 }, {  9, 29 },
+      {   0,  3 }, {   1,  7 }, { 11,  25 }, {   0,  2 }, {  0,  1 }, },
+    { {  34, 16 }, { 112, 21 }, {  1,  28 }, {   0,  0 }, {  0,  0 },
+      {   6,  8 }, {   1,  7 }, {  0,   3 }, {   2,  5 }, {  0,  2 }, },
+    { {  17, 21 }, {  68, 29 }, {  6,  15 }, {  13, 22 }, {  0,  0 },
+      {   6, 12 }, {   3, 14 }, {  4,  10 }, {   1,  7 }, {  0,  3 }, }, },
+  { { {  46, 42 }, {   0,  1 }, {  2,  10 }, {  54, 51 }, { 10, 30 },
+      {   0,  2 }, {   0,  2 }, {  0,   1 }, {   0,  1 }, {  0,  0 }, },
+    { { 159, 35 }, {   2,  2 }, {  0,  25 }, {   0,  0 }, {  0,  0 },
+      {   3,  6 }, {   0,  5 }, {  0,   1 }, {   4,  4 }, {  0,  1 }, },
+    { {  51, 39 }, {   0,  1 }, {  2,  12 }, {  91, 44 }, {  0,  0 },
+      {   0,  2 }, {   0,  3 }, {  0,   1 }, {   2,  3 }, {  0,  1 }, }, },
+  { { {  28, 32 }, {   0,  0 }, {  3,  10 }, {  75, 51 }, { 14, 33 },
+      {   0,  1 }, {   0,  2 }, {  0,   1 }, {   1,  2 }, {  0,  0 }, },
+    { {  75, 39 }, {   5,  7 }, {  2,  48 }, {   0,  0 }, {  0,  0 },
+      {   3, 11 }, {   2, 16 }, {  1,   4 }, {   7, 10 }, {  0,  2 }, },
+    { {  81, 25 }, {   0,  0 }, {  2,   9 }, { 106, 26 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, }, },
+  { { { 100, 46 }, {   0,  1 }, {  3,   9 }, {  21, 37 }, {  5, 20 },
+      {   0,  1 }, {   0,  2 }, {  1,   2 }, {   0,  1 }, {  0,  0 }, },
+    { { 212, 21 }, {   0,  1 }, {  0,   9 }, {   0,  0 }, {  0,  0 },
+      {   1,  2 }, {   0,  2 }, {  0,   0 }, {   2,  2 }, {  0,  0 }, },
+    { { 140, 37 }, {   0,  1 }, {  1,   8 }, {  24, 33 }, {  0,  0 },
+      {   1,  2 }, {   0,  2 }, {  0,   1 }, {   1,  2 }, {  0,  0 }, }, },
+  { { {  27, 29 }, {   0,  1 }, {  9,  25 }, {  53, 51 }, { 12, 34 },
+      {   0,  1 }, {   0,  3 }, {  1,   5 }, {   0,  2 }, {  0,  0 }, },
+    { {   4,  2 }, {   0,  0 }, {  0, 172 }, {   0,  0 }, {  0,  0 },
+      {   0,  1 }, {   0,  2 }, {  0,   0 }, {   2,  0 }, {  0,  0 }, },
+    { {  14, 23 }, {   1,  3 }, { 11,  53 }, {  90, 31 }, {  0,  0 },
+      {   0,  3 }, {   1,  5 }, {  2,   6 }, {   1,  2 }, {  0,  0 }, }, },
+  { { {  80, 38 }, {   0,  0 }, {  1,   4 }, {  69, 33 }, {  5, 16 },
+      {   0,  1 }, {   0,  1 }, {  0,   0 }, {   0,  1 }, {  0,  0 }, },
+    { { 187, 22 }, {   1,  1 }, {  0,  17 }, {   0,  0 }, {  0,  0 },
+      {   3,  6 }, {   0,  4 }, {  0,   1 }, {   4,  4 }, {  0,  1 }, },
+    { { 123, 29 }, {   0,  0 }, {  1,   7 }, {  57, 30 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  1 }, {  0,  0 }, }, },
+  { { {  16, 20 }, {   0,  0 }, {  2,   8 }, { 104, 49 }, { 15, 33 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, },
+    { { 133,  6 }, {   1,  2 }, {  1,  70 }, {   0,  0 }, {  0,  0 },
+      {   0,  2 }, {   0,  4 }, {  0,   3 }, {   1,  1 }, {  0,  0 }, },
+    { {  13, 14 }, {   0,  0 }, {  4,  20 }, { 175, 20 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   1,  1 }, {  0,  0 }, }, },
+  { { { 194, 16 }, {   0,  0 }, {  1,   1 }, {   1,  9 }, {  1,  3 },
+      {   0,  0 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, },
+    { { 251,  1 }, {   0,  0 }, {  0,   2 }, {   0,  0 }, {  0,  0 },
+      {   0,  0 }, {   0,  0 }, {  0,   0 }, {   0,  0 }, {  0,  0 }, },
+    { { 202, 23 }, {   0,  0 }, {  1,   3 }, {   2,  9 }, {  0,  0 },
+      {   0,  1 }, {   0,  1 }, {  0,   1 }, {   0,  0 }, {  0,  0 }, }, },
+};
+
+static const uint8_t vp56_filter_threshold[] = {
+    14, 14, 13, 13, 12, 12, 10, 10,
+    10, 10,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  7,  7,  7,  7,
+     7,  7,  6,  6,  6,  6,  6,  6,
+     5,  5,  5,  5,  4,  4,  4,  4,
+     4,  4,  4,  3,  3,  3,  3,  2,
+};
+
+static const uint8_t vp56_mb_type_model_model[] = {
+    171, 83, 199, 140, 125, 104,
+};
+
+static const vp56_tree_t vp56_pmbtm_tree[] = {
+    { 4, 0},
+    { 2, 1}, {-8}, {-4},
+    { 8, 2},
+    { 6, 3},
+    { 4, 4},
+    { 2, 5}, {-24}, {-20}, {-16}, {-12}, {-0},
+};
+
+static const vp56_tree_t vp56_pmbt_tree[] = {
+    { 8, 1},
+    { 4, 2},
+    { 2, 4}, {-VP56_MB_INTER_NOVEC_PF}, {-VP56_MB_INTER_DELTA_PF},
+    { 2, 5}, {-VP56_MB_INTER_V1_PF},    {-VP56_MB_INTER_V2_PF},
+    { 4, 3},
+    { 2, 6}, {-VP56_MB_INTRA},          {-VP56_MB_INTER_4V},
+    { 4, 7},
+    { 2, 8}, {-VP56_MB_INTER_NOVEC_GF}, {-VP56_MB_INTER_DELTA_GF},
+    { 2, 9}, {-VP56_MB_INTER_V1_GF},    {-VP56_MB_INTER_V2_GF},
+};
+
+/* relative pos of surrounding blocks, from closest to farthest */
+static const int8_t vp56_candidate_predictor_pos[12][2] = {
+    {  0, -1 },
+    { -1,  0 },
+    { -1, -1 },
+    {  1, -1 },
+    {  0, -2 },
+    { -2,  0 },
+    { -2, -1 },
+    { -1, -2 },
+    {  1, -2 },
+    {  2, -1 },
+    { -2, -2 },
+    {  2, -2 },
+};
+
+#endif /* VP56DATA */
diff --git a/contrib/ffmpeg/libavcodec/vp5data.h b/contrib/ffmpeg/libavcodec/vp5data.h
new file mode 100644
index 000000000..effc17c2c
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp5data.h
@@ -0,0 +1,173 @@
+/**
+ * @file vp5data.h
+ * VP5 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP5DATA_H
+#define VP5DATA_H
+
+static const uint8_t vp5_coeff_groups[] = {
+    -1, 0, 1, 1, 2, 1, 1, 2,
+     2, 1, 1, 2, 2, 2, 1, 2,
+     2, 2, 2, 2, 1, 1, 2, 2,
+     3, 3, 4, 3, 4, 4, 4, 3,
+     3, 3, 3, 3, 4, 3, 3, 3,
+     4, 4, 4, 4, 4, 3, 3, 4,
+     4, 4, 3, 4, 4, 4, 4, 4,
+     4, 4, 5, 5, 5, 5, 5, 5,
+};
+
+static const uint8_t vp5_vmc_pct[2][11] = {
+    { 243, 220, 251, 253, 237, 232, 241, 245, 247, 251, 253 },
+    { 235, 211, 246, 249, 234, 231, 248, 249, 252, 252, 254 },
+};
+
+static const uint8_t vp5_dccv_pct[2][11] = {
+    { 146, 197, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+    { 179, 219, 214, 240, 250, 254, 244, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp5_ract_pct[3][2][6][11] = {
+    { { { 227, 246, 230, 247, 244, 254, 254, 254, 254, 254, 254 },
+        { 202, 254, 209, 231, 231, 249, 249, 253, 254, 254, 254 },
+        { 206, 254, 225, 242, 241, 251, 253, 254, 254, 254, 254 },
+        { 235, 254, 241, 253, 252, 254, 254, 254, 254, 254, 254 },
+        { 234, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 240, 254, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 238, 254, 240, 253, 254, 254, 254, 254, 254, 254, 254 },
+        { 244, 254, 251, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+    { { { 206, 203, 227, 239, 247, 254, 253, 254, 254, 254, 254 },
+        { 207, 199, 220, 236, 243, 252, 252, 254, 254, 254, 254 },
+        { 212, 219, 230, 243, 244, 253, 252, 254, 254, 254, 254 },
+        { 236, 237, 247, 252, 253, 254, 254, 254, 254, 254, 254 },
+        { 240, 240, 248, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 230, 233, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 238, 238, 250, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 248, 251, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+    { { { 225, 239, 227, 231, 244, 253, 243, 254, 254, 253, 254 },
+        { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 254 },
+        { 235, 249, 238, 240, 251, 254, 249, 254, 253, 253, 254 },
+        { 249, 253, 251, 250, 254, 254, 254, 254, 254, 254, 254 },
+        { 251, 250, 249, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } },
+      { { 243, 244, 250, 250, 254, 254, 254, 254, 254, 254, 254 },
+        { 249, 248, 250, 253, 254, 254, 254, 254, 254, 254, 254 },
+        { 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 },
+        { 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254 } } },
+};
+
+static const int16_t vp5_dccv_lc[5][36][2] = {
+    { {154,  61}, {141,  54}, { 90,  45}, { 54,  34}, { 54,  13}, {128, 109},
+      {136,  54}, {148,  45}, { 92,  41}, { 54,  33}, { 51,  15}, { 87, 113},
+      { 87,  44}, { 97,  40}, { 67,  36}, { 46,  29}, { 41,  15}, { 64,  80},
+      { 59,  33}, { 61,  31}, { 51,  28}, { 44,  22}, { 33,  12}, { 49,  63},
+      { 69,  12}, { 59,  16}, { 46,  14}, { 31,  13}, { 26,   6}, { 92,  26},
+      {128, 108}, { 77, 119}, { 54,  84}, { 26,  71}, { 87,  19}, { 95, 155} },
+    { {154,   4}, {182,   0}, {159,  -8}, {128,  -5}, {143,  -5}, {187,  55},
+      {182,   0}, {228,  -3}, {187,  -7}, {174,  -9}, {189, -11}, {169,  79},
+      {161,  -9}, {192,  -8}, {187,  -9}, {169, -10}, {136,  -9}, {184,  40},
+      {164, -11}, {179, -10}, {174, -10}, {161, -10}, {115,  -7}, {197,  20},
+      {195, -11}, {195, -11}, {146, -10}, {110,  -6}, { 95,  -4}, {195,  39},
+      {182,  55}, {172,  77}, {177,  37}, {169,  29}, {172,  52}, { 92, 162} },
+    { {174,  80}, {164,  80}, { 95,  80}, { 46,  66}, { 56,  24}, { 36, 193},
+      {164,  80}, {166,  77}, {105,  76}, { 49,  68}, { 46,  31}, { 49, 186},
+      { 97,  78}, {110,  74}, { 72,  72}, { 44,  60}, { 33,  30}, { 69, 131},
+      { 61,  61}, { 69,  63}, { 51,  57}, { 31,  48}, { 26,  27}, { 64,  89},
+      { 67,  23}, { 51,  32}, { 36,  33}, { 26,  28}, { 20,  12}, { 44,  68},
+      { 26, 197}, { 41, 189}, { 61, 129}, { 28, 103}, { 49,  52}, {-12, 245} },
+    { {102, 141}, { 79, 166}, { 72, 162}, { 97, 125}, {179,   4}, {307,   0},
+      { 72, 168}, { 69, 175}, { 84, 160}, {105, 127}, {148,  34}, {310,   0},
+      { 84, 151}, { 82, 161}, { 87, 153}, { 87, 135}, {115,  51}, {317,   0},
+      { 97, 125}, {102, 131}, {105, 125}, { 87, 122}, { 84,  64}, { 54, 184},
+      {166,  18}, {146,  43}, {125,  51}, { 90,  64}, { 95,   7}, { 38, 154},
+      {294,   0}, { 13, 225}, { 10, 225}, { 67, 168}, {  0, 167}, {161,  94} },
+    { {172,  76}, {172,  75}, {136,  80}, { 64,  98}, { 74,  67}, {315,   0},
+      {169,  76}, {207,  56}, {164,  66}, { 97,  80}, { 67,  72}, {328,   0},
+      {136,  80}, {187,  53}, {154,  62}, { 72,  85}, { -2, 105}, {305,   0},
+      { 74,  91}, {128,  64}, {113,  64}, { 61,  77}, { 41,  75}, {259,   0},
+      { 46,  84}, { 51,  81}, { 28,  89}, { 31,  78}, { 23,  77}, {202,   0},
+      {323,   0}, {323,   0}, {300,   0}, {236,   0}, {195,   0}, {328,   0} },
+};
+
+static const int16_t vp5_ract_lc[3][3][5][6][2] = {
+    { { { {276,  0}, {238,  0}, {195,  0}, {156,  0}, {113,  0}, {274,  0} },
+        { {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {192, 59}, {182, 50}, {141, 48}, {110, 40}, { 92, 19}, {125,128} },
+        { {169, 87}, {169, 83}, {184, 62}, {220, 16}, {184,  0}, {264,  0} },
+        { {212, 40}, {212, 36}, {169, 49}, {174, 27}, {  8,120}, {182, 71} } },
+      { { {259, 10}, {197, 19}, {143, 22}, {123, 16}, {110,  8}, {133, 88} },
+        { {  0,  1}, {256,  0}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {207, 46}, {187, 50}, { 97, 83}, { 23,100}, { 41, 56}, { 56,188} },
+        { {166, 90}, {146,108}, {161, 88}, {136, 95}, {174,  0}, {266,  0} },
+        { {264,  7}, {243, 18}, {184, 43}, {-14,154}, { 20,112}, { 20,199} } },
+      { { {230, 26}, {197, 22}, {159, 20}, {146, 12}, {136,  4}, { 54,162} },
+        { {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1}, {  0,  1} },
+        { {192, 59}, {156, 72}, { 84,101}, { 49,101}, { 79, 47}, { 79,167} },
+        { {138,115}, {136,116}, {166, 80}, {238,  0}, {195,  0}, {261,  0} },
+        { {225, 33}, {205, 42}, {159, 61}, { 79, 96}, { 92, 66}, { 28,195} } },
+    }, {
+      { { {200, 37}, {197, 18}, {159, 13}, {143,  7}, {102,  5}, {123,126} },
+        { {197,  3}, {220, -9}, {210,-12}, {187, -6}, {151, -2}, {174, 80} },
+        { {200, 53}, {187, 47}, {159, 40}, {118, 38}, {100, 18}, {141,111} },
+        { {179, 78}, {166, 86}, {197, 50}, {207, 27}, {187,  0}, {115,139} },
+        { {218, 34}, {220, 29}, {174, 46}, {128, 61}, { 54, 89}, {187, 65} } },
+      { { {238, 14}, {197, 18}, {125, 26}, { 90, 25}, { 82, 13}, {161, 86} },
+        { {189,  1}, {205, -2}, {156, -4}, {143, -4}, {146, -4}, {172, 72} },
+        { {230, 31}, {192, 45}, {102, 76}, { 38, 85}, { 56, 41}, { 64,173} },
+        { {166, 91}, {141,111}, {128,116}, {118,109}, {177,  0}, { 23,222} },
+        { {253, 14}, {236, 21}, {174, 49}, { 33,118}, { 44, 93}, { 23,187} } },
+      { { {218, 28}, {179, 28}, {118, 35}, { 95, 30}, { 72, 24}, {128,108} },
+        { {187,  1}, {174, -1}, {125, -1}, {110, -1}, {108, -1}, {202, 52} },
+        { {197, 53}, {146, 75}, { 46,118}, { 33,103}, { 64, 50}, {118,126} },
+        { {138,114}, {128,122}, {161, 86}, {243, -6}, {195,  0}, { 38,210} },
+        { {215, 39}, {179, 58}, { 97,101}, { 95, 85}, { 87, 70}, { 69,152} } },
+    }, {
+      { { {236, 24}, {205, 18}, {172, 12}, {154,  6}, {125,  1}, {169, 75} },
+        { {187,  4}, {230, -2}, {228, -4}, {236, -4}, {241, -2}, {192, 66} },
+        { {200, 46}, {187, 42}, {159, 34}, {136, 25}, {105, 10}, {179, 62} },
+        { {207, 55}, {192, 63}, {192, 54}, {195, 36}, {177,  1}, {143, 98} },
+        { {225, 27}, {207, 34}, {200, 30}, {131, 57}, { 97, 60}, {197, 45} } },
+      { { {271,  8}, {218, 13}, {133, 19}, { 90, 19}, { 72,  7}, {182, 51} },
+        { {179,  1}, {225, -1}, {154, -2}, {110, -1}, { 92,  0}, {195, 41} },
+        { {241, 26}, {189, 40}, { 82, 64}, { 33, 60}, { 67, 17}, {120, 94} },
+        { {192, 68}, {151, 94}, {146, 90}, {143, 72}, {161,  0}, {113,128} },
+        { {256, 12}, {218, 29}, {166, 48}, { 44, 99}, { 31, 87}, {148, 78} } },
+      { { {238, 20}, {184, 22}, {113, 27}, { 90, 22}, { 74,  9}, {192, 37} },
+        { {184,  0}, {215, -1}, {141, -1}, { 97,  0}, { 49,  0}, {264, 13} },
+        { {182, 51}, {138, 61}, { 95, 63}, { 54, 59}, { 64, 25}, {200, 45} },
+        { {179, 75}, {156, 87}, {174, 65}, {177, 44}, {174,  0}, {164, 85} },
+        { {195, 45}, {148, 65}, {105, 79}, { 95, 72}, { 87, 60}, {169, 63} } },
+    }
+};
+
+static const uint8_t vp5_coord_div[] = { 2, 2, 2, 2, 4, 4 };
+
+#endif /* VP5DATA_H */
diff --git a/contrib/ffmpeg/libavcodec/vp6.c b/contrib/ffmpeg/libavcodec/vp6.c
new file mode 100644
index 000000000..b7ff004cc
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp6.c
@@ -0,0 +1,522 @@
+/**
+ * @file vp6.c
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ *
+ * The VP6F decoder accept an optional 1 byte extradata. It is composed of:
+ *  - upper 4bits: difference between encoded width and visible width
+ *  - lower 4bits: difference between encoded height and visible height
+ */
+
+#include <stdlib.h>
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+
+#include "vp56.h"
+#include "vp56data.h"
+#include "vp6data.h"
+
+
+static int vp6_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size,
+                            int *golden_frame)
+{
+    vp56_range_coder_t *c = &s->c;
+    int parse_filter_info;
+    int rows, cols;
+    int res = 1;
+
+    if (buf[0] & 1)
+        return 0;
+
+    s->frames[VP56_FRAME_CURRENT].key_frame = !(buf[0] & 0x80);
+    vp56_init_dequant(s, (buf[0] >> 1) & 0x3F);
+
+    if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+        if ((buf[1] & 0xFE) != 0x46)  /* would be 0x36 for VP61 */
+            return 0;
+        if (buf[1] & 1) {
+            av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n");
+            return 0;
+        }
+
+        rows = buf[2];  /* number of stored macroblock rows */
+        cols = buf[3];  /* number of stored macroblock cols */
+        /* buf[4] is number of displayed macroblock rows */
+        /* buf[5] is number of displayed macroblock cols */
+
+        if (16*cols != s->avctx->coded_width ||
+            16*rows != s->avctx->coded_height) {
+            avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
+            if (s->avctx->extradata_size == 1) {
+                s->avctx->width  -= s->avctx->extradata[0] >> 4;
+                s->avctx->height -= s->avctx->extradata[0] & 0x0F;
+            }
+            res = 2;
+        }
+
+        vp56_init_range_decoder(c, buf+6, buf_size-6);
+        vp56_rac_gets(c, 2);
+
+        parse_filter_info = 1;
+    } else {
+        vp56_init_range_decoder(c, buf+1, buf_size-1);
+
+        *golden_frame = vp56_rac_get(c);
+        s->deblock_filtering = vp56_rac_get(c);
+        if (s->deblock_filtering)
+            vp56_rac_get(c);
+        parse_filter_info = vp56_rac_get(c);
+    }
+
+    if (parse_filter_info) {
+        if (vp56_rac_get(c)) {
+            s->filter_mode = 2;
+            s->sample_variance_threshold = vp56_rac_gets(c, 5);
+            s->max_vector_length = 2 << vp56_rac_gets(c, 3);
+        } else if (vp56_rac_get(c)) {
+            s->filter_mode = 1;
+        } else {
+            s->filter_mode = 0;
+        }
+        s->filter_selection = vp56_rac_gets(c, 4);
+    }
+
+    vp56_rac_get(c);
+    return res;
+}
+
+static void vp6_coeff_order_table_init(vp56_context_t *s)
+{
+    int i, pos, idx = 1;
+
+    s->coeff_index_to_pos[0] = 0;
+    for (i=0; i<16; i++)
+        for (pos=1; pos<64; pos++)
+            if (s->coeff_reorder[pos] == i)
+                s->coeff_index_to_pos[idx++] = pos;
+}
+
+static void vp6_default_models_init(vp56_context_t *s)
+{
+    s->vector_model_dct[0] = 0xA2;
+    s->vector_model_dct[1] = 0xA4;
+    s->vector_model_sig[0] = 0x80;
+    s->vector_model_sig[1] = 0x80;
+
+    memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats));
+    memcpy(s->vector_model_fdv, vp6_def_fdv_vector_model, sizeof(s->vector_model_fdv));
+    memcpy(s->vector_model_pdv, vp6_def_pdv_vector_model, sizeof(s->vector_model_pdv));
+    memcpy(s->coeff_model_runv, vp6_def_runv_coeff_model, sizeof(s->coeff_model_runv));
+    memcpy(s->coeff_reorder, vp6_def_coeff_reorder, sizeof(s->coeff_reorder));
+
+    vp6_coeff_order_table_init(s);
+}
+
+static void vp6_parse_vector_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp, node;
+
+    for (comp=0; comp<2; comp++) {
+        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0]))
+            s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7);
+        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1]))
+            s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7);
+    }
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<7; node++)
+            if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node]))
+                s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
+
+    for (comp=0; comp<2; comp++)
+        for (node=0; node<8; node++)
+            if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node]))
+                s->vector_model_fdv[comp][node] = vp56_rac_gets_nn(c, 7);
+}
+
+static void vp6_parse_coeff_models(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    int def_prob[11];
+    int node, cg, ctx, pos;
+    int ct;    /* code type */
+    int pt;    /* plane type (0 for Y, 1 for U or V) */
+
+    memset(def_prob, 0x80, sizeof(def_prob));
+
+    for (pt=0; pt<2; pt++)
+        for (node=0; node<11; node++)
+            if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) {
+                def_prob[node] = vp56_rac_gets_nn(c, 7);
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                s->coeff_model_dccv[pt][node] = def_prob[node];
+            }
+
+    if (vp56_rac_get(c)) {
+        for (pos=1; pos<64; pos++)
+            if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos]))
+                s->coeff_reorder[pos] = vp56_rac_gets(c, 4);
+        vp6_coeff_order_table_init(s);
+    }
+
+    for (cg=0; cg<2; cg++)
+        for (node=0; node<14; node++)
+            if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node]))
+                s->coeff_model_runv[cg][node] = vp56_rac_gets_nn(c, 7);
+
+    for (ct=0; ct<3; ct++)
+        for (pt=0; pt<2; pt++)
+            for (cg=0; cg<6; cg++)
+                for (node=0; node<11; node++)
+                    if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) {
+                        def_prob[node] = vp56_rac_gets_nn(c, 7);
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    } else if (s->frames[VP56_FRAME_CURRENT].key_frame) {
+                        s->coeff_model_ract[pt][ct][cg][node] = def_prob[node];
+                    }
+
+    /* coeff_model_dcct is a linear combination of coeff_model_dccv */
+    for (pt=0; pt<2; pt++)
+        for (ctx=0; ctx<3; ctx++)
+            for (node=0; node<5; node++)
+                s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp6_dccv_lc[ctx][node][0] + 128) >> 8) + vp6_dccv_lc[ctx][node][1], 1, 255);
+}
+
+static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+{
+    vp56_range_coder_t *c = &s->c;
+    int comp;
+
+    *vect = (vp56_mv_t) {0,0};
+    if (s->vector_candidate_pos < 2)
+        *vect = s->vector_candidate[0];
+
+    for (comp=0; comp<2; comp++) {
+        int i, delta = 0;
+
+        if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) {
+            static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4};
+            for (i=0; i<sizeof(prob_order); i++) {
+                int j = prob_order[i];
+                delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][j])<<j;
+            }
+            if (delta & 0xF0)
+                delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][3])<<3;
+            else
+                delta |= 8;
+        } else {
+            delta = vp56_rac_get_tree(c, vp56_pva_tree,
+                                      s->vector_model_pdv[comp]);
+        }
+
+        if (delta && vp56_rac_get_prob(c, s->vector_model_sig[comp]))
+            delta = -delta;
+
+        if (!comp)
+            vect->x += delta;
+        else
+            vect->y += delta;
+    }
+}
+
+static void vp6_parse_coeff(vp56_context_t *s)
+{
+    vp56_range_coder_t *c = &s->c;
+    uint8_t *permute = s->scantable.permutated;
+    uint8_t *model, *model2, *model3;
+    int coeff, sign, coeff_idx;
+    int b, i, cg, idx, ctx;
+    int pt = 0;    /* plane type (0 for Y, 1 for U or V) */
+
+    for (b=0; b<6; b++) {
+        int ct = 1;    /* code type */
+        int run = 1;
+
+        if (b > 3) pt = 1;
+
+        ctx = s->left_block[vp56_b6to4[b]].not_null_dc
+              + s->above_blocks[s->above_block_idx[b]].not_null_dc;
+        model = s->coeff_model_dccv[pt];
+        model2 = s->coeff_model_dcct[pt][ctx];
+
+        for (coeff_idx=0; coeff_idx<64; ) {
+            if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
+                /* parse a coeff */
+                if (coeff_idx == 0) {
+                    s->left_block[vp56_b6to4[b]].not_null_dc = 1;
+                    s->above_blocks[s->above_block_idx[b]].not_null_dc = 1;
+                }
+
+                if (vp56_rac_get_prob(c, model2[2])) {
+                    if (vp56_rac_get_prob(c, model2[3])) {
+                        idx = vp56_rac_get_tree(c, vp56_pc_tree, model);
+                        coeff = vp56_coeff_bias[idx];
+                        for (i=vp56_coeff_bit_length[idx]; i>=0; i--)
+                            coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i;
+                    } else {
+                        if (vp56_rac_get_prob(c, model2[4]))
+                            coeff = 3 + vp56_rac_get_prob(c, model[5]);
+                        else
+                            coeff = 2;
+                    }
+                    ct = 2;
+                } else {
+                    ct = 1;
+                    coeff = 1;
+                }
+                sign = vp56_rac_get(c);
+                coeff = (coeff ^ -sign) + sign;
+                if (coeff_idx)
+                    coeff *= s->dequant_ac;
+                idx = s->coeff_index_to_pos[coeff_idx];
+                s->block_coeff[b][permute[idx]] = coeff;
+                run = 1;
+            } else {
+                /* parse a run */
+                ct = 0;
+                if (coeff_idx == 0) {
+                    s->left_block[vp56_b6to4[b]].not_null_dc = 0;
+                    s->above_blocks[s->above_block_idx[b]].not_null_dc = 0;
+                } else {
+                    if (!vp56_rac_get_prob(c, model2[1]))
+                        break;
+
+                    model3 = s->coeff_model_runv[coeff_idx >= 6];
+                    run = vp56_rac_get_tree(c, vp6_pcr_tree, model3);
+                    if (!run)
+                        for (run=9, i=0; i<6; i++)
+                            run += vp56_rac_get_prob(c, model3[i+8]) << i;
+                }
+            }
+
+            cg = vp6_coeff_groups[coeff_idx+=run];
+            model = model2 = s->coeff_model_ract[pt][ct][cg];
+        }
+    }
+}
+
+static int vp6_adjust(int v, int t)
+{
+    int V = v, s = v >> 31;
+    V ^= s;
+    V -= s;
+    if (V-t-1 >= (unsigned)(t-1))
+        return v;
+    V = 2*t - V;
+    V += s;
+    V ^= s;
+    return V;
+}
+
+static int vp6_block_variance(uint8_t *src, int stride)
+{
+    int sum = 0, square_sum = 0;
+    int y, x;
+
+    for (y=0; y<8; y+=2) {
+        for (x=0; x<8; x+=2) {
+            sum += src[x];
+            square_sum += src[x]*src[x];
+        }
+        src += 2*stride;
+    }
+    return (16*square_sum - sum*sum) / (16*16);
+}
+
+static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                           int stride, int delta, int16_t weight)
+{
+    s->dsp.put_pixels_tab[1][0](dst, src, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2,
+                                       8-weight, weight, 0);
+}
+
+static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride,
+                           int delta, const int16_t *weights)
+{
+    int x, y;
+
+    for (y=0; y<8; y++) {
+        for (x=0; x<8; x++) {
+            dst[x] = clip_uint8((  src[x-delta  ] * weights[0]
+                                 + src[x        ] * weights[1]
+                                 + src[x+delta  ] * weights[2]
+                                 + src[x+2*delta] * weights[3] + 64) >> 7);
+        }
+        src += stride;
+        dst += stride;
+    }
+}
+
+static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                             int stride, int h_weight, int v_weight)
+{
+    uint8_t *tmp = s->edge_emu_buffer+16;
+    int x, xmax;
+
+    s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2,
+                                       8-h_weight, h_weight, 0);
+    /* we need a 8x9 block to do vertical filter, so compute one more line */
+    for (x=8*stride, xmax=x+8; x<xmax; x++)
+        tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3;
+
+    s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8);
+    s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2,
+                                       8-v_weight, v_weight, 0);
+}
+
+static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights)
+{
+    int x, y;
+    int tmp[8*11];
+    int *t = tmp;
+
+    src -= stride;
+
+    for (y=0; y<11; y++) {
+        for (x=0; x<8; x++) {
+            t[x] = clip_uint8((  src[x-1] * h_weights[0]
+                               + src[x  ] * h_weights[1]
+                               + src[x+1] * h_weights[2]
+                               + src[x+2] * h_weights[3] + 64) >> 7);
+        }
+        src += stride;
+        t += 8;
+    }
+
+    t = tmp + 8;
+    for (y=0; y<8; y++) {
+        for (x=0; x<8; x++) {
+            dst[x] = clip_uint8((  t[x-8 ] * v_weights[0]
+                                 + t[x   ] * v_weights[1]
+                                 + t[x+8 ] * v_weights[2]
+                                 + t[x+16] * v_weights[3] + 64) >> 7);
+        }
+        dst += stride;
+        t += 8;
+    }
+}
+
+static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+                       int offset1, int offset2, int stride,
+                       vp56_mv_t mv, int mask, int select, int luma)
+{
+    int filter4 = 0;
+    int x8 = mv.x & mask;
+    int y8 = mv.y & mask;
+
+    if (luma) {
+        x8 *= 2;
+        y8 *= 2;
+        filter4 = s->filter_mode;
+        if (filter4 == 2) {
+            if (s->max_vector_length &&
+                (FFABS(mv.x) > s->max_vector_length ||
+                 FFABS(mv.y) > s->max_vector_length)) {
+                filter4 = 0;
+            } else if (!s->sample_variance_threshold
+                       || (vp6_block_variance(src+offset1, stride)
+                           < s->sample_variance_threshold)) {
+                filter4 = 0;
+            }
+        }
+    }
+
+    if ((y8 && (offset2-offset1)*s->flip<0) || (!y8 && offset1 > offset2)) {
+        offset1 = offset2;
+    }
+
+    if (filter4) {
+        if (!y8) {                      /* left or right combine */
+            vp6_filter_hv4(dst, src+offset1, stride, 1,
+                           vp6_block_copy_filter[select][x8]);
+        } else if (!x8) {               /* above or below combine */
+            vp6_filter_hv4(dst, src+offset1, stride, stride,
+                           vp6_block_copy_filter[select][y8]);
+        } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+            vp6_filter_diag4(dst, src+offset1-1, stride,
+                             vp6_block_copy_filter[select][x8],
+                             vp6_block_copy_filter[select][y8]);
+        } else {                        /* lower-right or upper-left combine */
+            vp6_filter_diag4(dst, src+offset1, stride,
+                             vp6_block_copy_filter[select][x8],
+                             vp6_block_copy_filter[select][y8]);
+        }
+    } else {
+        if (!y8) {                      /* left or right combine */
+            vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8);
+        } else if (!x8) {               /* above or below combine */
+            vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8);
+        } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
+            vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8);
+        } else {                        /* lower-right or upper-left combine */
+            vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8);
+        }
+    }
+}
+
+static int vp6_decode_init(AVCodecContext *avctx)
+{
+    vp56_context_t *s = avctx->priv_data;
+
+    vp56_init(s, avctx, avctx->codec->id == CODEC_ID_VP6);
+    s->vp56_coord_div = vp6_coord_div;
+    s->parse_vector_adjustment = vp6_parse_vector_adjustment;
+    s->adjust = vp6_adjust;
+    s->filter = vp6_filter;
+    s->parse_coeff = vp6_parse_coeff;
+    s->default_models_init = vp6_default_models_init;
+    s->parse_vector_models = vp6_parse_vector_models;
+    s->parse_coeff_models = vp6_parse_coeff_models;
+    s->parse_header = vp6_parse_header;
+
+    return 0;
+}
+
+AVCodec vp6_decoder = {
+    "vp6",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP6,
+    sizeof(vp56_context_t),
+    vp6_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
+
+/* flash version, not flipped upside-down */
+AVCodec vp6f_decoder = {
+    "vp6f",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VP6F,
+    sizeof(vp56_context_t),
+    vp6_decode_init,
+    NULL,
+    vp56_free,
+    vp56_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/vp6data.h b/contrib/ffmpeg/libavcodec/vp6data.h
new file mode 100644
index 000000000..ba4c7abf7
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vp6data.h
@@ -0,0 +1,292 @@
+/**
+ * @file vp6data.h
+ * VP6 compatible video decoder
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef VP6DATA_H
+#define VP6DATA_H
+
+#include "vp56data.h"
+
+static const uint8_t vp6_def_fdv_vector_model[2][8] = {
+    { 247, 210, 135, 68, 138, 220, 239, 246 },
+    { 244, 184, 201, 44, 173, 221, 239, 253 },
+};
+
+static const uint8_t vp6_def_pdv_vector_model[2][7] = {
+    { 225, 146, 172, 147, 214,  39, 156 },
+    { 204, 170, 119, 235, 140, 230, 228 },
+};
+
+static const uint8_t vp6_def_coeff_reorder[] = {
+     0,  0,  1,  1,  1,  2,  2,  2,
+     2,  2,  2,  3,  3,  4,  4,  4,
+     5,  5,  5,  5,  6,  6,  7,  7,
+     7,  7,  7,  8,  8,  9,  9,  9,
+     9,  9,  9, 10, 10, 11, 11, 11,
+    11, 11, 11, 12, 12, 12, 12, 12,
+    12, 13, 13, 13, 13, 13, 14, 14,
+    14, 14, 15, 15, 15, 15, 15, 15,
+};
+
+static const uint8_t vp6_def_runv_coeff_model[2][14] = {
+    { 198, 197, 196, 146, 198, 204, 169, 142, 130, 136, 149, 149, 191, 249 },
+    { 135, 201, 181, 154,  98, 117, 132, 126, 146, 169, 184, 240, 246, 254 },
+};
+
+static const uint8_t vp6_sig_dct_pct[2][2] = {
+    { 237, 246 },
+    { 231, 243 },
+};
+
+static const uint8_t vp6_pdv_pct[2][7] = {
+    { 253, 253, 254, 254, 254, 254, 254 },
+    { 245, 253, 254, 254, 254, 254, 254 },
+};
+
+static const uint8_t vp6_fdv_pct[2][8] = {
+    { 254, 254, 254, 254, 254, 250, 250, 252 },
+    { 254, 254, 254, 254, 254, 251, 251, 254 },
+};
+
+static const uint8_t vp6_dccv_pct[2][11] = {
+    { 146, 255, 181, 207, 232, 243, 238, 251, 244, 250, 249 },
+    { 179, 255, 214, 240, 250, 255, 244, 255, 255, 255, 255 },
+};
+
+static const uint8_t vp6_coeff_reorder_pct[] =  {
+    255, 132, 132, 159, 153, 151, 161, 170,
+    164, 162, 136, 110, 103, 114, 129, 118,
+    124, 125, 132, 136, 114, 110, 142, 135,
+    134, 123, 143, 126, 153, 183, 166, 161,
+    171, 180, 179, 164, 203, 218, 225, 217,
+    215, 206, 203, 217, 229, 241, 248, 243,
+    253, 255, 253, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+static const uint8_t vp6_runv_pct[2][14] = {
+  { 219, 246, 238, 249, 232, 239, 249, 255, 248, 253, 239, 244, 241, 248 },
+  { 198, 232, 251, 253, 219, 241, 253, 255, 248, 249, 244, 238, 251, 255 },
+};
+
+static const uint8_t vp6_ract_pct[3][2][6][11] = {
+  { { { 227, 246, 230, 247, 244, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 209, 231, 231, 249, 249, 253, 255, 255, 255 },
+      { 255, 255, 225, 242, 241, 251, 253, 255, 255, 255, 255 },
+      { 255, 255, 241, 253, 252, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 240, 255, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 240, 253, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+  { { { 206, 203, 227, 239, 247, 255, 253, 255, 255, 255, 255 },
+      { 207, 199, 220, 236, 243, 252, 252, 255, 255, 255, 255 },
+      { 212, 219, 230, 243, 244, 253, 252, 255, 255, 255, 255 },
+      { 236, 237, 247, 252, 253, 255, 255, 255, 255, 255, 255 },
+      { 240, 240, 248, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 230, 233, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 238, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 251, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } },
+  { { { 225, 239, 227, 231, 244, 253, 243, 255, 255, 253, 255 },
+      { 232, 234, 224, 228, 242, 249, 242, 252, 251, 251, 255 },
+      { 235, 249, 238, 240, 251, 255, 249, 255, 253, 253, 255 },
+      { 249, 253, 251, 250, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 250, 249, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } },
+    { { 243, 244, 250, 250, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 248, 250, 253, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 } } }
+};
+
+static const int vp6_dccv_lc[3][5][2] = {
+  { { 122, 133 }, { 0, 1 }, { 78,  171 }, { 139, 117 }, { 168, 79 } },
+  { { 133,  51 }, { 0, 1 }, { 169,  71 }, { 214,  44 }, { 210, 38 } },
+  { { 142, -16 }, { 0, 1 }, { 221, -30 }, { 246,  -3 }, { 203, 17 } },
+};
+
+static const uint8_t vp6_coeff_groups[] = {
+    0, 0, 1, 1, 1, 2, 2, 2,
+    2, 2, 2, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+static const int16_t vp6_block_copy_filter[16][8][4] = {
+  { {   0, 128,   0,   0  },  /* 0 */
+    {  -3, 122,   9,   0  },
+    {  -4, 109,  24,  -1  },
+    {  -5,  91,  45,  -3  },
+    {  -4,  68,  68,  -4  },
+    {  -3,  45,  91,  -5  },
+    {  -1,  24, 109,  -4  },
+    {   0,   9, 122,  -3  } },
+  { {   0, 128,   0,   0  },  /* 1 */
+    {  -4, 124,   9,  -1  },
+    {  -5, 110,  25,  -2  },
+    {  -6,  91,  46,  -3  },
+    {  -5,  69,  69,  -5  },
+    {  -3,  46,  91,  -6  },
+    {  -2,  25, 110,  -5  },
+    {  -1,   9, 124,  -4  } },
+  { {   0, 128,   0,   0  },  /* 2 */
+    {  -4, 123,  10,  -1  },
+    {  -6, 110,  26,  -2  },
+    {  -7,  92,  47,  -4  },
+    {  -6,  70,  70,  -6  },
+    {  -4,  47,  92,  -7  },
+    {  -2,  26, 110,  -6  },
+    {  -1,  10, 123,  -4  } },
+  { {   0, 128,   0,   0  },  /* 3 */
+    {  -5, 124,  10,  -1  },
+    {  -7, 110,  27,  -2  },
+    {  -7,  91,  48,  -4  },
+    {  -6,  70,  70,  -6  },
+    {  -4,  48,  92,  -8  },
+    {  -2,  27, 110,  -7  },
+    {  -1,  10, 124,  -5  } },
+  { {   0, 128,   0,   0  },  /* 4 */
+    {  -6, 124,  11,  -1  },
+    {  -8, 111,  28,  -3  },
+    {  -8,  92,  49,  -5  },
+    {  -7,  71,  71,  -7  },
+    {  -5,  49,  92,  -8  },
+    {  -3,  28, 111,  -8  },
+    {  -1,  11, 124,  -6  } },
+  { {  0,  128,   0,   0  },  /* 5 */
+    {  -6, 123,  12,  -1  },
+    {  -9, 111,  29,  -3  },
+    {  -9,  93,  50,  -6  },
+    {  -8,  72,  72,  -8  },
+    {  -6,  50,  93,  -9  },
+    {  -3,  29, 111,  -9  },
+    {  -1,  12, 123,  -6  } },
+  { {   0, 128,   0,   0  },  /* 6 */
+    {  -7, 124,  12,  -1  },
+    { -10, 111,  30,  -3  },
+    { -10,  93,  51,  -6  },
+    {  -9,  73,  73,  -9  },
+    {  -6,  51,  93, -10  },
+    {  -3,  30, 111, -10  },
+    {  -1,  12, 124,  -7  } },
+  { {   0, 128,   0,   0  },  /* 7 */
+    {  -7, 123,  13,  -1  },
+    { -11, 112,  31,  -4  },
+    { -11,  94,  52,  -7  },
+    { -10,  74,  74, -10  },
+    {  -7,  52,  94, -11  },
+    {  -4,  31, 112, -11  },
+    {  -1,  13, 123,  -7  } },
+  { {   0, 128,   0,  0  },  /* 8 */
+    {  -8, 124,  13,  -1  },
+    { -12, 112,  32,  -4  },
+    { -12,  94,  53,  -7  },
+    { -10,  74,  74, -10  },
+    {  -7,  53,  94, -12  },
+    {  -4,  32, 112, -12  },
+    {  -1,  13, 124,  -8  } },
+  { {   0, 128,   0,   0  },  /* 9 */
+    {  -9, 124,  14,  -1  },
+    { -13, 112,  33,  -4  },
+    { -13,  95,  54,  -8  },
+    { -11,  75,  75, -11  },
+    {  -8,  54,  95, -13  },
+    {  -4,  33, 112, -13  },
+    {  -1,  14, 124,  -9  } },
+  { {   0, 128,   0,   0  },  /* 10 */
+    {  -9, 123,  15,  -1  },
+    { -14, 113,  34,  -5  },
+    { -14,  95,  55,  -8  },
+    { -12,  76,  76, -12  },
+    {  -8,  55,  95, -14  },
+    {  -5,  34, 112, -13  },
+    {  -1,  15, 123,  -9  } },
+  { {   0, 128,   0,   0  },  /* 11 */
+    { -10, 124,  15,  -1  },
+    { -14, 113,  34,  -5  },
+    { -15,  96,  56,  -9  },
+    { -13,  77,  77, -13  },
+    {  -9,  56,  96, -15  },
+    {  -5,  34, 113, -14  },
+    {  -1,  15, 124, -10  } },
+  { {   0, 128,   0,   0  },  /* 12 */
+    { -10, 123,  16,  -1  },
+    { -15, 113,  35,  -5  },
+    { -16,  98,  56, -10  },
+    { -14,  78,  78, -14  },
+    { -10,  56,  98, -16  },
+    {  -5,  35, 113, -15  },
+    {  -1,  16, 123, -10  } },
+  { {   0, 128,   0,   0  },  /* 13 */
+    { -11, 124,  17,  -2  },
+    { -16, 113,  36,  -5  },
+    { -17,  98,  57, -10  },
+    { -14,  78,  78, -14  },
+    { -10,  57,  98, -17  },
+    {  -5,  36, 113, -16  },
+    {  -2,  17, 124, -11  } },
+  { {   0, 128,   0,   0  },  /* 14 */
+    { -12, 125,  17,  -2  },
+    { -17, 114,  37,  -6  },
+    { -18,  99,  58, -11  },
+    { -15,  79,  79, -15  },
+    { -11,  58,  99, -18  },
+    {  -6,  37, 114, -17  },
+    {  -2,  17, 125, -12  } },
+  { {   0, 128,   0,   0  },  /* 15 */
+    { -12, 124,  18,  -2  },
+    { -18, 114,  38,  -6  },
+    { -19,  99,  59, -11  },
+    { -16,  80,  80, -16  },
+    { -11,  59,  99, -19  },
+    {  -6,  38, 114, -18  },
+    {  -2,  18, 124, -12  } },
+};
+
+static const vp56_tree_t vp6_pcr_tree[] = {
+    { 8, 0},
+    { 4, 1},
+    { 2, 2}, {-1}, {-2},
+    { 2, 3}, {-3}, {-4},
+    { 8, 4},
+    { 4, 5},
+    { 2, 6}, {-5}, {-6},
+    { 2, 7}, {-7}, {-8},
+             {-0},
+};
+
+static const uint8_t vp6_coord_div[] = { 4, 4, 4, 4, 8, 8 };
+
+#endif /* VP6DATA_H */
diff --git a/contrib/ffmpeg/libavcodec/vqavideo.c b/contrib/ffmpeg/libavcodec/vqavideo.c
new file mode 100644
index 000000000..912ced0df
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/vqavideo.c
@@ -0,0 +1,625 @@
+/*
+ * Westwood Studios VQA Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file vqavideo.c
+ * VQA Video Decoder by Mike Melanson (melanson@pcisys.net)
+ * For more information about the VQA format, visit:
+ *   http://wiki.multimedia.cx/index.php?title=VQA
+ *
+ * The VQA video decoder outputs PAL8 or RGB555 colorspace data, depending
+ * on the type of data in the file.
+ *
+ * This decoder needs the 42-byte VQHD header from the beginning
+ * of the VQA file passed through the extradata field. The VQHD header
+ * is laid out as:
+ *
+ *   bytes 0-3   chunk fourcc: 'VQHD'
+ *   bytes 4-7   chunk size in big-endian format, should be 0x0000002A
+ *   bytes 8-49  VQHD chunk data
+ *
+ * Bytes 8-49 are what this decoder expects to see.
+ *
+ * Briefly, VQA is a vector quantized animation format that operates in a
+ * VGA palettized colorspace. It operates on pixel vectors (blocks)
+ * of either 4x2 or 4x4 in size. Compressed VQA chunks can contain vector
+ * codebooks, palette information, and code maps for rendering vectors onto
+ * frames. Any of these components can also be compressed with a run-length
+ * encoding (RLE) algorithm commonly referred to as "format80".
+ *
+ * VQA takes a novel approach to rate control. Each group of n frames
+ * (usually, n = 8) relies on a different vector codebook. Rather than
+ * transporting an entire codebook every 8th frame, the new codebook is
+ * broken up into 8 pieces and sent along with the compressed video chunks
+ * for each of the 8 frames preceding the 8 frames which require the
+ * codebook. A full codebook is also sent on the very first frame of a
+ * file. This is an interesting technique, although it makes random file
+ * seeking difficult despite the fact that the frames are all intracoded.
+ *
+ * V1,2 VQA uses 12-bit codebook indices. If the 12-bit indices were
+ * packed into bytes and then RLE compressed, bytewise, the results would
+ * be poor. That is why the coding method divides each index into 2 parts,
+ * the top 4 bits and the bottom 8 bits, then RL encodes the 4-bit pieces
+ * together and the 8-bit pieces together. If most of the vectors are
+ * clustered into one group of 256 vectors, most of the 4-bit index pieces
+ * should be the same.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define PALETTE_COUNT 256
+#define VQA_HEADER_SIZE 0x2A
+#define CHUNK_PREAMBLE_SIZE 8
+
+/* allocate the maximum vector space, regardless of the file version:
+ * (0xFF00 codebook vectors + 0x100 solid pixel vectors) * (4x4 pixels/block) */
+#define MAX_CODEBOOK_VECTORS 0xFF00
+#define SOLID_PIXEL_VECTORS 0x100
+#define MAX_VECTORS (MAX_CODEBOOK_VECTORS + SOLID_PIXEL_VECTORS)
+#define MAX_CODEBOOK_SIZE (MAX_VECTORS * 4 * 4)
+
+#define CBF0_TAG MKBETAG('C', 'B', 'F', '0')
+#define CBFZ_TAG MKBETAG('C', 'B', 'F', 'Z')
+#define CBP0_TAG MKBETAG('C', 'B', 'P', '0')
+#define CBPZ_TAG MKBETAG('C', 'B', 'P', 'Z')
+#define CPL0_TAG MKBETAG('C', 'P', 'L', '0')
+#define CPLZ_TAG MKBETAG('C', 'P', 'L', 'Z')
+#define VPTZ_TAG MKBETAG('V', 'P', 'T', 'Z')
+
+#define VQA_DEBUG 0
+
+#if VQA_DEBUG
+#define vqa_debug printf
+#else
+static inline void vqa_debug(const char *format, ...) { }
+#endif
+
+typedef struct VqaContext {
+
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    AVFrame frame;
+
+    unsigned char *buf;
+    int size;
+
+    uint32_t palette[PALETTE_COUNT];
+
+    int width;   /* width of a frame */
+    int height;   /* height of a frame */
+    int vector_width;  /* width of individual vector */
+    int vector_height;  /* height of individual vector */
+    int vqa_version;  /* this should be either 1, 2 or 3 */
+
+    unsigned char *codebook;         /* the current codebook */
+    int codebook_size;
+    unsigned char *next_codebook_buffer;  /* accumulator for next codebook */
+    int next_codebook_buffer_index;
+
+    unsigned char *decode_buffer;
+    int decode_buffer_size;
+
+    /* number of frames to go before replacing codebook */
+    int partial_countdown;
+    int partial_count;
+
+} VqaContext;
+
+static int vqa_decode_init(AVCodecContext *avctx)
+{
+    VqaContext *s = (VqaContext *)avctx->priv_data;
+    unsigned char *vqa_header;
+    int i, j, codebook_index;;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+    dsputil_init(&s->dsp, avctx);
+
+    /* make sure the extradata made it */
+    if (s->avctx->extradata_size != VQA_HEADER_SIZE) {
+        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: expected extradata size of %d\n", VQA_HEADER_SIZE);
+        return -1;
+    }
+
+    /* load up the VQA parameters from the header */
+    vqa_header = (unsigned char *)s->avctx->extradata;
+    s->vqa_version = vqa_header[0];
+    s->width = LE_16(&vqa_header[6]);
+    s->height = LE_16(&vqa_header[8]);
+    if(avcodec_check_dimensions(avctx, s->width, s->height)){
+        s->width= s->height= 0;
+        return -1;
+    }
+    s->vector_width = vqa_header[10];
+    s->vector_height = vqa_header[11];
+    s->partial_count = s->partial_countdown = vqa_header[13];
+
+    /* the vector dimensions have to meet very stringent requirements */
+    if ((s->vector_width != 4) ||
+        ((s->vector_height != 2) && (s->vector_height != 4))) {
+        /* return without further initialization */
+        return -1;
+    }
+
+    /* allocate codebooks */
+    s->codebook_size = MAX_CODEBOOK_SIZE;
+    s->codebook = av_malloc(s->codebook_size);
+    s->next_codebook_buffer = av_malloc(s->codebook_size);
+
+    /* initialize the solid-color vectors */
+    if (s->vector_height == 4) {
+        codebook_index = 0xFF00 * 16;
+        for (i = 0; i < 256; i++)
+            for (j = 0; j < 16; j++)
+                s->codebook[codebook_index++] = i;
+    } else {
+        codebook_index = 0xF00 * 8;
+        for (i = 0; i < 256; i++)
+            for (j = 0; j < 8; j++)
+                s->codebook[codebook_index++] = i;
+    }
+    s->next_codebook_buffer_index = 0;
+
+    /* allocate decode buffer */
+    s->decode_buffer_size = (s->width / s->vector_width) *
+        (s->height / s->vector_height) * 2;
+    s->decode_buffer = av_malloc(s->decode_buffer_size);
+
+    s->frame.data[0] = NULL;
+
+    return 0;
+}
+
+#define CHECK_COUNT() \
+    if (dest_index + count > dest_size) { \
+        av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: next op would overflow dest_index\n"); \
+        av_log(NULL, AV_LOG_ERROR, "  VQA video: current dest_index = %d, count = %d, dest_size = %d\n", \
+            dest_index, count, dest_size); \
+        return; \
+    }
+
+static void decode_format80(unsigned char *src, int src_size,
+    unsigned char *dest, int dest_size, int check_size) {
+
+    int src_index = 0;
+    int dest_index = 0;
+    int count;
+    int src_pos;
+    unsigned char color;
+    int i;
+
+    while (src_index < src_size) {
+
+        vqa_debug("      opcode %02X: ", src[src_index]);
+
+        /* 0x80 means that frame is finished */
+        if (src[src_index] == 0x80)
+            return;
+
+        if (dest_index >= dest_size) {
+            av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: dest_index (%d) exceeded dest_size (%d)\n",
+                dest_index, dest_size);
+            return;
+        }
+
+        if (src[src_index] == 0xFF) {
+
+            src_index++;
+            count = LE_16(&src[src_index]);
+            src_index += 2;
+            src_pos = LE_16(&src[src_index]);
+            src_index += 2;
+            vqa_debug("(1) copy %X bytes from absolute pos %X\n", count, src_pos);
+            CHECK_COUNT();
+            for (i = 0; i < count; i++)
+                dest[dest_index + i] = dest[src_pos + i];
+            dest_index += count;
+
+        } else if (src[src_index] == 0xFE) {
+
+            src_index++;
+            count = LE_16(&src[src_index]);
+            src_index += 2;
+            color = src[src_index++];
+            vqa_debug("(2) set %X bytes to %02X\n", count, color);
+            CHECK_COUNT();
+            memset(&dest[dest_index], color, count);
+            dest_index += count;
+
+        } else if ((src[src_index] & 0xC0) == 0xC0) {
+
+            count = (src[src_index++] & 0x3F) + 3;
+            src_pos = LE_16(&src[src_index]);
+            src_index += 2;
+            vqa_debug("(3) copy %X bytes from absolute pos %X\n", count, src_pos);
+            CHECK_COUNT();
+            for (i = 0; i < count; i++)
+                dest[dest_index + i] = dest[src_pos + i];
+            dest_index += count;
+
+        } else if (src[src_index] > 0x80) {
+
+            count = src[src_index++] & 0x3F;
+            vqa_debug("(4) copy %X bytes from source to dest\n", count);
+            CHECK_COUNT();
+            memcpy(&dest[dest_index], &src[src_index], count);
+            src_index += count;
+            dest_index += count;
+
+        } else {
+
+            count = ((src[src_index] & 0x70) >> 4) + 3;
+            src_pos = BE_16(&src[src_index]) & 0x0FFF;
+            src_index += 2;
+            vqa_debug("(5) copy %X bytes from relpos %X\n", count, src_pos);
+            CHECK_COUNT();
+            for (i = 0; i < count; i++)
+                dest[dest_index + i] = dest[dest_index - src_pos + i];
+            dest_index += count;
+        }
+    }
+
+    /* validate that the entire destination buffer was filled; this is
+     * important for decoding frame maps since each vector needs to have a
+     * codebook entry; it is not important for compressed codebooks because
+     * not every entry needs to be filled */
+    if (check_size)
+        if (dest_index < dest_size)
+            av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: decode finished with dest_index (%d) < dest_size (%d)\n",
+                dest_index, dest_size);
+}
+
+static void vqa_decode_chunk(VqaContext *s)
+{
+    unsigned int chunk_type;
+    unsigned int chunk_size;
+    int byte_skip;
+    unsigned int index = 0;
+    int i;
+    unsigned char r, g, b;
+    int index_shift;
+
+    int cbf0_chunk = -1;
+    int cbfz_chunk = -1;
+    int cbp0_chunk = -1;
+    int cbpz_chunk = -1;
+    int cpl0_chunk = -1;
+    int cplz_chunk = -1;
+    int vptz_chunk = -1;
+
+    int x, y;
+    int lines = 0;
+    int pixel_ptr;
+    int vector_index = 0;
+    int lobyte = 0;
+    int hibyte = 0;
+    int lobytes = 0;
+    int hibytes = s->decode_buffer_size / 2;
+
+    /* first, traverse through the frame and find the subchunks */
+    while (index < s->size) {
+
+        chunk_type = BE_32(&s->buf[index]);
+        chunk_size = BE_32(&s->buf[index + 4]);
+
+        switch (chunk_type) {
+
+        case CBF0_TAG:
+            cbf0_chunk = index;
+            break;
+
+        case CBFZ_TAG:
+            cbfz_chunk = index;
+            break;
+
+        case CBP0_TAG:
+            cbp0_chunk = index;
+            break;
+
+        case CBPZ_TAG:
+            cbpz_chunk = index;
+            break;
+
+        case CPL0_TAG:
+            cpl0_chunk = index;
+            break;
+
+        case CPLZ_TAG:
+            cplz_chunk = index;
+            break;
+
+        case VPTZ_TAG:
+            vptz_chunk = index;
+            break;
+
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "  VQA video: Found unknown chunk type: %c%c%c%c (%08X)\n",
+            (chunk_type >> 24) & 0xFF,
+            (chunk_type >> 16) & 0xFF,
+            (chunk_type >>  8) & 0xFF,
+            (chunk_type >>  0) & 0xFF,
+            chunk_type);
+            break;
+        }
+
+        byte_skip = chunk_size & 0x01;
+        index += (CHUNK_PREAMBLE_SIZE + chunk_size + byte_skip);
+    }
+
+    /* next, deal with the palette */
+    if ((cpl0_chunk != -1) && (cplz_chunk != -1)) {
+
+        /* a chunk should not have both chunk types */
+        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found both CPL0 and CPLZ chunks\n");
+        return;
+    }
+
+    /* decompress the palette chunk */
+    if (cplz_chunk != -1) {
+
+/* yet to be handled */
+
+    }
+
+    /* convert the RGB palette into the machine's endian format */
+    if (cpl0_chunk != -1) {
+
+        chunk_size = BE_32(&s->buf[cpl0_chunk + 4]);
+        /* sanity check the palette size */
+        if (chunk_size / 3 > 256) {
+            av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found a palette chunk with %d colors\n",
+                chunk_size / 3);
+            return;
+        }
+        cpl0_chunk += CHUNK_PREAMBLE_SIZE;
+        for (i = 0; i < chunk_size / 3; i++) {
+            /* scale by 4 to transform 6-bit palette -> 8-bit */
+            r = s->buf[cpl0_chunk++] * 4;
+            g = s->buf[cpl0_chunk++] * 4;
+            b = s->buf[cpl0_chunk++] * 4;
+            s->palette[i] = (r << 16) | (g << 8) | (b);
+        }
+    }
+
+    /* next, look for a full codebook */
+    if ((cbf0_chunk != -1) && (cbfz_chunk != -1)) {
+
+        /* a chunk should not have both chunk types */
+        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found both CBF0 and CBFZ chunks\n");
+        return;
+    }
+
+    /* decompress the full codebook chunk */
+    if (cbfz_chunk != -1) {
+
+        chunk_size = BE_32(&s->buf[cbfz_chunk + 4]);
+        cbfz_chunk += CHUNK_PREAMBLE_SIZE;
+        decode_format80(&s->buf[cbfz_chunk], chunk_size,
+            s->codebook, s->codebook_size, 0);
+    }
+
+    /* copy a full codebook */
+    if (cbf0_chunk != -1) {
+
+        chunk_size = BE_32(&s->buf[cbf0_chunk + 4]);
+        /* sanity check the full codebook size */
+        if (chunk_size > MAX_CODEBOOK_SIZE) {
+            av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: CBF0 chunk too large (0x%X bytes)\n",
+                chunk_size);
+            return;
+        }
+        cbf0_chunk += CHUNK_PREAMBLE_SIZE;
+
+        memcpy(s->codebook, &s->buf[cbf0_chunk], chunk_size);
+    }
+
+    /* decode the frame */
+    if (vptz_chunk == -1) {
+
+        /* something is wrong if there is no VPTZ chunk */
+        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: no VPTZ chunk found\n");
+        return;
+    }
+
+    chunk_size = BE_32(&s->buf[vptz_chunk + 4]);
+    vptz_chunk += CHUNK_PREAMBLE_SIZE;
+    decode_format80(&s->buf[vptz_chunk], chunk_size,
+        s->decode_buffer, s->decode_buffer_size, 1);
+
+    /* render the final PAL8 frame */
+    if (s->vector_height == 4)
+        index_shift = 4;
+    else
+        index_shift = 3;
+    for (y = 0; y < s->frame.linesize[0] * s->height;
+        y += s->frame.linesize[0] * s->vector_height) {
+
+        for (x = y; x < y + s->width; x += 4, lobytes++, hibytes++) {
+            pixel_ptr = x;
+
+            /* get the vector index, the method for which varies according to
+             * VQA file version */
+            switch (s->vqa_version) {
+
+            case 1:
+/* still need sample media for this case (only one game, "Legend of
+ * Kyrandia III : Malcolm's Revenge", is known to use this version) */
+                lobyte = s->decode_buffer[lobytes * 2];
+                hibyte = s->decode_buffer[(lobytes * 2) + 1];
+                vector_index = ((hibyte << 8) | lobyte) >> 3;
+                vector_index <<= index_shift;
+                lines = s->vector_height;
+                /* uniform color fill - a quick hack */
+                if (hibyte == 0xFF) {
+                    while (lines--) {
+                        s->frame.data[0][pixel_ptr + 0] = 255 - lobyte;
+                        s->frame.data[0][pixel_ptr + 1] = 255 - lobyte;
+                        s->frame.data[0][pixel_ptr + 2] = 255 - lobyte;
+                        s->frame.data[0][pixel_ptr + 3] = 255 - lobyte;
+                        pixel_ptr += s->frame.linesize[0];
+                    }
+                    lines=0;
+                }
+                break;
+
+            case 2:
+                lobyte = s->decode_buffer[lobytes];
+                hibyte = s->decode_buffer[hibytes];
+                vector_index = (hibyte << 8) | lobyte;
+                vector_index <<= index_shift;
+                lines = s->vector_height;
+                break;
+
+            case 3:
+/* not implemented yet */
+                lines = 0;
+                break;
+            }
+
+            while (lines--) {
+                s->frame.data[0][pixel_ptr + 0] = s->codebook[vector_index++];
+                s->frame.data[0][pixel_ptr + 1] = s->codebook[vector_index++];
+                s->frame.data[0][pixel_ptr + 2] = s->codebook[vector_index++];
+                s->frame.data[0][pixel_ptr + 3] = s->codebook[vector_index++];
+                pixel_ptr += s->frame.linesize[0];
+            }
+        }
+    }
+
+    /* handle partial codebook */
+    if ((cbp0_chunk != -1) && (cbpz_chunk != -1)) {
+        /* a chunk should not have both chunk types */
+        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found both CBP0 and CBPZ chunks\n");
+        return;
+    }
+
+    if (cbp0_chunk != -1) {
+
+        chunk_size = BE_32(&s->buf[cbp0_chunk + 4]);
+        cbp0_chunk += CHUNK_PREAMBLE_SIZE;
+
+        /* accumulate partial codebook */
+        memcpy(&s->next_codebook_buffer[s->next_codebook_buffer_index],
+            &s->buf[cbp0_chunk], chunk_size);
+        s->next_codebook_buffer_index += chunk_size;
+
+        s->partial_countdown--;
+        if (s->partial_countdown == 0) {
+
+            /* time to replace codebook */
+            memcpy(s->codebook, s->next_codebook_buffer,
+                s->next_codebook_buffer_index);
+
+            /* reset accounting */
+            s->next_codebook_buffer_index = 0;
+            s->partial_countdown = s->partial_count;
+        }
+    }
+
+    if (cbpz_chunk != -1) {
+
+        chunk_size = BE_32(&s->buf[cbpz_chunk + 4]);
+        cbpz_chunk += CHUNK_PREAMBLE_SIZE;
+
+        /* accumulate partial codebook */
+        memcpy(&s->next_codebook_buffer[s->next_codebook_buffer_index],
+            &s->buf[cbpz_chunk], chunk_size);
+        s->next_codebook_buffer_index += chunk_size;
+
+        s->partial_countdown--;
+        if (s->partial_countdown == 0) {
+
+            /* decompress codebook */
+            decode_format80(s->next_codebook_buffer,
+                s->next_codebook_buffer_index,
+                s->codebook, s->codebook_size, 0);
+
+            /* reset accounting */
+            s->next_codebook_buffer_index = 0;
+            s->partial_countdown = s->partial_count;
+        }
+    }
+}
+
+static int vqa_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    VqaContext *s = (VqaContext *)avctx->priv_data;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    if (avctx->get_buffer(avctx, &s->frame)) {
+        av_log(s->avctx, AV_LOG_ERROR, "  VQA Video: get_buffer() failed\n");
+        return -1;
+    }
+
+    vqa_decode_chunk(s);
+
+    /* make the palette available on the way out */
+    memcpy(s->frame.data[1], s->palette, PALETTE_COUNT * 4);
+    s->frame.palette_has_changed = 1;
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->frame;
+
+    /* report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int vqa_decode_end(AVCodecContext *avctx)
+{
+    VqaContext *s = (VqaContext *)avctx->priv_data;
+
+    av_free(s->codebook);
+    av_free(s->next_codebook_buffer);
+    av_free(s->decode_buffer);
+
+    if (s->frame.data[0])
+        avctx->release_buffer(avctx, &s->frame);
+
+    return 0;
+}
+
+AVCodec vqa_decoder = {
+    "vqavideo",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WS_VQA,
+    sizeof(VqaContext),
+    vqa_decode_init,
+    NULL,
+    vqa_decode_end,
+    vqa_decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/w32thread.c b/contrib/ffmpeg/libavcodec/w32thread.c
new file mode 100644
index 000000000..e749a64af
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/w32thread.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+//#define DEBUG
+
+#include "avcodec.h"
+#include "common.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+
+typedef struct ThreadContext{
+    AVCodecContext *avctx;
+    HANDLE thread;
+    HANDLE work_sem;
+    HANDLE done_sem;
+    int (*func)(AVCodecContext *c, void *arg);
+    void *arg;
+    int ret;
+}ThreadContext;
+
+
+static unsigned __stdcall thread_func(void *v){
+    ThreadContext *c= v;
+
+    for(;;){
+//printf("thread_func %X enter wait\n", (int)v); fflush(stdout);
+        WaitForSingleObject(c->work_sem, INFINITE);
+//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout);
+        if(c->func)
+            c->ret= c->func(c->avctx, c->arg);
+        else
+            return 0;
+//printf("thread_func %X signal complete\n", (int)v); fflush(stdout);
+        ReleaseSemaphore(c->done_sem, 1, 0);
+    }
+
+    return 0;
+}
+
+/**
+ * free what has been allocated by avcodec_thread_init().
+ * must be called after decoding has finished, especially dont call while avcodec_thread_execute() is running
+ */
+void avcodec_thread_free(AVCodecContext *s){
+    ThreadContext *c= s->thread_opaque;
+    int i;
+
+    for(i=0; i<s->thread_count; i++){
+
+        c[i].func= NULL;
+        ReleaseSemaphore(c[i].work_sem, 1, 0);
+        WaitForSingleObject(c[i].thread, INFINITE);
+        if(c[i].work_sem) CloseHandle(c[i].work_sem);
+        if(c[i].done_sem) CloseHandle(c[i].done_sem);
+    }
+
+    av_freep(&s->thread_opaque);
+}
+
+int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){
+    ThreadContext *c= s->thread_opaque;
+    int i;
+
+    assert(s == c->avctx);
+    assert(count <= s->thread_count);
+
+    /* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */
+
+    for(i=0; i<count; i++){
+        c[i].arg= arg[i];
+        c[i].func= func;
+        c[i].ret= 12345;
+
+        ReleaseSemaphore(c[i].work_sem, 1, 0);
+    }
+    for(i=0; i<count; i++){
+        WaitForSingleObject(c[i].done_sem, INFINITE);
+
+        c[i].func= NULL;
+        if(ret) ret[i]= c[i].ret;
+    }
+    return 0;
+}
+
+int avcodec_thread_init(AVCodecContext *s, int thread_count){
+    int i;
+    ThreadContext *c;
+    uint32_t threadid;
+
+    s->thread_count= thread_count;
+
+    assert(!s->thread_opaque);
+    c= av_mallocz(sizeof(ThreadContext)*thread_count);
+    s->thread_opaque= c;
+
+    for(i=0; i<thread_count; i++){
+//printf("init semaphors %d\n", i); fflush(stdout);
+        c[i].avctx= s;
+
+        if(!(c[i].work_sem = CreateSemaphore(NULL, 0, s->thread_count, NULL)))
+            goto fail;
+        if(!(c[i].done_sem = CreateSemaphore(NULL, 0, s->thread_count, NULL)))
+            goto fail;
+
+//printf("create thread %d\n", i); fflush(stdout);
+        c[i].thread = (HANDLE)_beginthreadex(NULL, 0, thread_func, &c[i], 0, &threadid );
+        if( !c[i].thread ) goto fail;
+    }
+//printf("init done\n"); fflush(stdout);
+
+    s->execute= avcodec_thread_execute;
+
+    return 0;
+fail:
+    avcodec_thread_free(s);
+    return -1;
+}
diff --git a/contrib/ffmpeg/libavcodec/wavpack.c b/contrib/ffmpeg/libavcodec/wavpack.c
new file mode 100644
index 000000000..5a54f7d0e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/wavpack.c
@@ -0,0 +1,556 @@
+/*
+ * WavPack lossless audio decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#define ALT_BITSTREAM_READER_LE
+#include "avcodec.h"
+#include "bitstream.h"
+
+/**
+ * @file wavpack.c
+ * WavPack lossless audio decoder
+ */
+
+#define WV_JOINT 0x0010
+
+enum WP_ID_Flags{
+    WP_IDF_MASK   = 0x1F,
+    WP_IDF_IGNORE = 0x20,
+    WP_IDF_ODD    = 0x40,
+    WP_IDF_LONG   = 0x80
+};
+
+enum WP_ID{
+    WP_ID_DUMMY = 0,
+    WP_ID_ENCINFO,
+    WP_ID_DECTERMS,
+    WP_ID_DECWEIGHTS,
+    WP_ID_DECSAMPLES,
+    WP_ID_ENTROPY,
+    WP_ID_HYBRID,
+    WP_ID_SHAPING,
+    WP_ID_FLOATINFO,
+    WP_ID_INT32INFO,
+    WP_ID_DATA,
+    WP_ID_CORR,
+    WP_ID_FLT,
+    WP_ID_CHANINFO
+};
+
+#define MAX_TERMS 16
+
+typedef struct Decorr {
+    int delta;
+    int value;
+    int weightA;
+    int weightB;
+    int samplesA[8];
+    int samplesB[8];
+} Decorr;
+
+typedef struct WavpackContext {
+    AVCodecContext *avctx;
+    int stereo;
+    int joint;
+    uint32_t CRC;
+    GetBitContext gb;
+    int data_size; // in bits
+    int samples;
+    int median[6];
+    int terms;
+    Decorr decorr[MAX_TERMS];
+    int zero, one, zeroes;
+} WavpackContext;
+
+// exponent table copied from WavPack source
+static const uint8_t wp_exp2_table [256] = {
+    0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b,
+    0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16,
+    0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d,
+    0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b,
+    0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,
+    0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a,
+    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,
+    0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+    0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4,
+    0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9,
+    0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff
+};
+
+static always_inline int wp_exp2(int16_t val)
+{
+    int res, neg = 0;
+
+    if(val < 0){
+        val = -val;
+        neg = 1;
+    }
+
+    res = wp_exp2_table[val & 0xFF] | 0x100;
+    val >>= 8;
+    res = (val > 9) ? (res << (val - 9)) : (res >> (9 - val));
+    return neg ? -res : res;
+}
+
+static inline int get_unary(GetBitContext *gb){
+    int r=0;
+    while(get_bits1(gb) && r<33)r++;
+    return r;
+}
+
+// macros for manipulating median values
+#define GET_MED(n) ((median[n] >> 4) + 1)
+#define DEC_MED(n) median[n] -= ((median[n] + (128>>n) - 2) / (128>>n)) * 2
+#define INC_MED(n) median[n] += ((median[n] + (128>>n)) / (128>>n)) * 5
+
+// macros for applying weight
+#define UPDATE_WEIGHT_CLIP(weight, delta, samples, in) \
+        if(samples && in){ \
+            if((samples ^ in) < 0){ \
+                weight -= delta; \
+                if(weight < -1024) weight = -1024; \
+            }else{ \
+                weight += delta; \
+                if(weight > 1024) weight = 1024; \
+            } \
+        }
+
+
+static always_inline int get_tail(GetBitContext *gb, int k)
+{
+    int p, e, res;
+
+    if(k<1 || k>65535)return 0;
+    p = av_log2_16bit(k);
+    e = (1 << (p + 1)) - k - 1;
+    res = p ? get_bits(gb, p) : 0;
+    if(res >= e){
+        res = (res<<1) - e + get_bits1(gb);
+    }
+    return res;
+}
+
+static int wv_get_value(WavpackContext *ctx, GetBitContext *gb, int *median, int *last)
+{
+    int t, t2;
+    int sign, base, add, ret;
+
+    *last = 0;
+
+    if((ctx->median[0] < 2U) && (ctx->median[3] < 2U) && !ctx->zero && !ctx->one){
+        if(ctx->zeroes){
+            ctx->zeroes--;
+            if(ctx->zeroes)
+                return 0;
+        }else{
+            t = get_unary(gb);
+            if(t >= 2) t = get_bits(gb, t - 1) | (1 << (t-1));
+            ctx->zeroes = t;
+            if(ctx->zeroes){
+                memset(ctx->median, 0, sizeof(ctx->median));
+                return 0;
+            }
+        }
+    }
+
+    if(get_bits_count(gb) >= ctx->data_size){
+        *last = 1;
+        return 0;
+    }
+
+    if(ctx->zero){
+        t = 0;
+        ctx->zero = 0;
+    }else{
+        t = get_unary(gb);
+        if(get_bits_count(gb) >= ctx->data_size){
+            *last = 1;
+            return 0;
+        }
+        if(t == 16) {
+            t2 = get_unary(gb);
+            if(t2 < 2) t += t2;
+            else t += get_bits(gb, t2 - 1) | (1 << (t2 - 1));
+        }
+
+        if(ctx->one){
+            ctx->one = t&1;
+            t = (t>>1) + 1;
+        }else{
+            ctx->one = t&1;
+            t >>= 1;
+        }
+        ctx->zero = !ctx->one;
+    }
+
+    if(!t){
+        base = 0;
+        add = GET_MED(0) - 1;
+        DEC_MED(0);
+    }else if(t == 1){
+        base = GET_MED(0);
+        add = GET_MED(1) - 1;
+        INC_MED(0);
+        DEC_MED(1);
+    }else if(t == 2){
+        base = GET_MED(0) + GET_MED(1);
+        add = GET_MED(2) - 1;
+        INC_MED(0);
+        INC_MED(1);
+        DEC_MED(2);
+    }else{
+        base = GET_MED(0) + GET_MED(1) + GET_MED(2) * (t - 2);
+        add = GET_MED(2) - 1;
+        INC_MED(0);
+        INC_MED(1);
+        INC_MED(2);
+    }
+    ret = base + get_tail(gb, add);
+    sign = get_bits1(gb);
+    return sign ? ~ret : ret;
+}
+
+static int wv_unpack_stereo(WavpackContext *s, GetBitContext *gb, int16_t *dst)
+{
+    int i, j, count = 0;
+    int last, t;
+    int A, B, L, L2, R, R2;
+    int pos = 0;
+    uint32_t crc = 0xFFFFFFFF;
+
+    s->one = s->zero = s->zeroes = 0;
+    do{
+        L = wv_get_value(s, gb, s->median, &last);
+        if(last) break;
+        R = wv_get_value(s, gb, s->median + 3, &last);
+        if(last) break;
+        for(i = 0; i < s->terms; i++){
+            t = s->decorr[i].value;
+            j = 0;
+            if(t > 0){
+                if(t > 8){
+                    if(t & 1){
+                        A = 2 * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1];
+                        B = 2 * s->decorr[i].samplesB[0] - s->decorr[i].samplesB[1];
+                    }else{
+                        A = (3 * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1]) >> 1;
+                        B = (3 * s->decorr[i].samplesB[0] - s->decorr[i].samplesB[1]) >> 1;
+                    }
+                    s->decorr[i].samplesA[1] = s->decorr[i].samplesA[0];
+                    s->decorr[i].samplesB[1] = s->decorr[i].samplesB[0];
+                    j = 0;
+                }else{
+                    A = s->decorr[i].samplesA[pos];
+                    B = s->decorr[i].samplesB[pos];
+                    j = (pos + t) & 7;
+                }
+                L2 = L + ((s->decorr[i].weightA * A + 512) >> 10);
+                R2 = R + ((s->decorr[i].weightB * B + 512) >> 10);
+                if(A && L) s->decorr[i].weightA -= ((((L ^ A) >> 30) & 2) - 1) * s->decorr[i].delta;
+                if(B && R) s->decorr[i].weightB -= ((((R ^ B) >> 30) & 2) - 1) * s->decorr[i].delta;
+                s->decorr[i].samplesA[j] = L = L2;
+                s->decorr[i].samplesB[j] = R = R2;
+            }else if(t == -1){
+                L2 = L + ((s->decorr[i].weightA * s->decorr[i].samplesA[0] + 512) >> 10);
+                UPDATE_WEIGHT_CLIP(s->decorr[i].weightA, s->decorr[i].delta, s->decorr[i].samplesA[0], L);
+                L = L2;
+                R2 = R + ((s->decorr[i].weightB * L2 + 512) >> 10);
+                UPDATE_WEIGHT_CLIP(s->decorr[i].weightB, s->decorr[i].delta, L2, R);
+                R = R2;
+                s->decorr[i].samplesA[0] = R;
+            }else{
+                R2 = R + ((s->decorr[i].weightB * s->decorr[i].samplesB[0] + 512) >> 10);
+                UPDATE_WEIGHT_CLIP(s->decorr[i].weightB, s->decorr[i].delta, s->decorr[i].samplesB[0], R);
+                R = R2;
+
+                if(t == -3){
+                    R2 = s->decorr[i].samplesA[0];
+                    s->decorr[i].samplesA[0] = R;
+                }
+
+                L2 = L + ((s->decorr[i].weightA * R2 + 512) >> 10);
+                UPDATE_WEIGHT_CLIP(s->decorr[i].weightA, s->decorr[i].delta, R2, L);
+                L = L2;
+                s->decorr[i].samplesB[0] = L;
+            }
+        }
+        pos = (pos + 1) & 7;
+        if(s->joint)
+            L += (R -= (L >> 1));
+        crc = (crc * 3 + L) * 3 + R;
+        *dst++ = L;
+        *dst++ = R;
+
+        count++;
+    }while(!last && count < s->samples);
+
+    if(crc != s->CRC){
+        av_log(s->avctx, AV_LOG_ERROR, "CRC error\n");
+        return -1;
+    }
+    return count * 2;
+}
+
+static int wv_unpack_mono(WavpackContext *s, GetBitContext *gb, int16_t *dst)
+{
+    int i, j, count = 0;
+    int last, t;
+    int A, S, T;
+    int pos = 0;
+    uint32_t crc = 0xFFFFFFFF;
+
+    s->one = s->zero = s->zeroes = 0;
+    do{
+        T = wv_get_value(s, gb, s->median, &last);
+        S = 0;
+        if(last) break;
+        for(i = 0; i < s->terms; i++){
+            t = s->decorr[i].value;
+            if(t > 8){
+                if(t & 1)
+                    A = 2 * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1];
+                else
+                    A = (3 * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1]) >> 1;
+                s->decorr[i].samplesA[1] = s->decorr[i].samplesA[0];
+                j = 0;
+            }else{
+                A = s->decorr[i].samplesA[pos];
+                j = (pos + t) & 7;
+            }
+            S = T + ((s->decorr[i].weightA * A + 512) >> 10);
+            if(A && T) s->decorr[i].weightA -= ((((T ^ A) >> 30) & 2) - 1) * s->decorr[i].delta;
+            s->decorr[i].samplesA[j] = T = S;
+        }
+        pos = (pos + 1) & 7;
+        crc = crc * 3 + S;
+        *dst++ = S;
+        count++;
+    }while(!last && count < s->samples);
+
+    if(crc != s->CRC){
+        av_log(s->avctx, AV_LOG_ERROR, "CRC error\n");
+        return -1;
+    }
+    return count;
+}
+
+static int wavpack_decode_init(AVCodecContext *avctx)
+{
+    WavpackContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->stereo = (avctx->channels == 2);
+
+    return 0;
+}
+
+static int wavpack_decode_close(AVCodecContext *avctx)
+{
+//    WavpackContext *s = avctx->priv_data;
+
+    return 0;
+}
+
+static int wavpack_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    WavpackContext *s = avctx->priv_data;
+    int16_t *samples = data;
+    int samplecount;
+    int got_terms = 0, got_weights = 0, got_samples = 0, got_entropy = 0, got_bs = 0;
+    uint8_t* buf_end = buf + buf_size;
+    int i, j, id, size, ssize, weights, t;
+
+    if (buf_size == 0) return 0;
+
+    memset(s->decorr, 0, MAX_TERMS * sizeof(Decorr));
+
+    s->samples = LE_32(buf); buf += 4;
+    if(!s->samples) return buf_size;
+    /* should not happen but who knows */
+    if(s->samples * 2 * avctx->channels > AVCODEC_MAX_AUDIO_FRAME_SIZE){
+        av_log(avctx, AV_LOG_ERROR, "Packet size is too big to be handled in lavc!\n");
+        return -1;
+    }
+    s->joint = LE_32(buf) & WV_JOINT; buf += 4;
+    s->CRC = LE_32(buf); buf += 4;
+    // parse metadata blocks
+    while(buf < buf_end){
+        id = *buf++;
+        size = *buf++;
+        if(id & WP_IDF_LONG) {
+            size |= (*buf++) << 8;
+            size |= (*buf++) << 16;
+        }
+        size <<= 1; // size is specified in words
+        ssize = size;
+        if(id & WP_IDF_ODD) size--;
+        if(size < 0){
+            av_log(avctx, AV_LOG_ERROR, "Got incorrect block %02X with size %i\n", id, size);
+            break;
+        }
+        if(buf + ssize > buf_end){
+            av_log(avctx, AV_LOG_ERROR, "Block size %i is out of bounds\n", size);
+            break;
+        }
+        if(id & WP_IDF_IGNORE){
+            buf += ssize;
+            continue;
+        }
+        switch(id & WP_IDF_MASK){
+        case WP_ID_DECTERMS:
+            s->terms = size;
+            if(s->terms > MAX_TERMS){
+                av_log(avctx, AV_LOG_ERROR, "Too many decorrelation terms\n");
+                buf += ssize;
+                continue;
+            }
+            for(i = 0; i < s->terms; i++) {
+                s->decorr[s->terms - i - 1].value = (*buf & 0x1F) - 5;
+                s->decorr[s->terms - i - 1].delta = *buf >> 5;
+                buf++;
+            }
+            got_terms = 1;
+            break;
+        case WP_ID_DECWEIGHTS:
+            if(!got_terms){
+                av_log(avctx, AV_LOG_ERROR, "No decorrelation terms met\n");
+                continue;
+            }
+            weights = size >> s->stereo;
+            if(weights > MAX_TERMS || weights > s->terms){
+                av_log(avctx, AV_LOG_ERROR, "Too many decorrelation weights\n");
+                buf += ssize;
+                continue;
+            }
+            for(i = 0; i < weights; i++) {
+                t = (int8_t)(*buf++);
+                s->decorr[s->terms - i - 1].weightA = t << 3;
+                if(s->decorr[s->terms - i - 1].weightA > 0)
+                    s->decorr[s->terms - i - 1].weightA += (s->decorr[s->terms - i - 1].weightA + 64) >> 7;
+                if(s->stereo){
+                    t = (int8_t)(*buf++);
+                    s->decorr[s->terms - i - 1].weightB = t << 3;
+                    if(s->decorr[s->terms - i - 1].weightB > 0)
+                        s->decorr[s->terms - i - 1].weightB += (s->decorr[s->terms - i - 1].weightB + 64) >> 7;
+                }
+            }
+            got_weights = 1;
+            break;
+        case WP_ID_DECSAMPLES:
+            if(!got_terms){
+                av_log(avctx, AV_LOG_ERROR, "No decorrelation terms met\n");
+                continue;
+            }
+            t = 0;
+            for(i = s->terms - 1; (i >= 0) && (t < size); i--) {
+                if(s->decorr[i].value > 8){
+                    s->decorr[i].samplesA[0] = wp_exp2(LE_16(buf)); buf += 2;
+                    s->decorr[i].samplesA[1] = wp_exp2(LE_16(buf)); buf += 2;
+                    if(s->stereo){
+                        s->decorr[i].samplesB[0] = wp_exp2(LE_16(buf)); buf += 2;
+                        s->decorr[i].samplesB[1] = wp_exp2(LE_16(buf)); buf += 2;
+                        t += 4;
+                    }
+                    t += 4;
+                }else if(s->decorr[i].value < 0){
+                    s->decorr[i].samplesA[0] = wp_exp2(LE_16(buf)); buf += 2;
+                    s->decorr[i].samplesB[0] = wp_exp2(LE_16(buf)); buf += 2;
+                    t += 4;
+                }else{
+                    for(j = 0; j < s->decorr[i].value; j++){
+                        s->decorr[i].samplesA[j] = wp_exp2(LE_16(buf)); buf += 2;
+                        if(s->stereo){
+                            s->decorr[i].samplesB[j] = wp_exp2(LE_16(buf)); buf += 2;
+                        }
+                    }
+                    t += s->decorr[i].value * 2 * avctx->channels;
+                }
+            }
+            got_samples = 1;
+            break;
+        case WP_ID_ENTROPY:
+            if(size != 6 * avctx->channels){
+                av_log(avctx, AV_LOG_ERROR, "Entropy vars size should be %i, got %i", 6 * avctx->channels, size);
+                buf += ssize;
+                continue;
+            }
+            for(i = 0; i < 3 * avctx->channels; i++){
+                s->median[i] = wp_exp2(LE_16(buf));
+                buf += 2;
+            }
+            got_entropy = 1;
+            break;
+        case WP_ID_DATA:
+            init_get_bits(&s->gb, buf, size * 8);
+            s->data_size = size * 8;
+            buf += size;
+            got_bs = 1;
+            break;
+        default:
+            buf += size;
+        }
+        if(id & WP_IDF_ODD) buf++;
+    }
+    if(!got_terms){
+        av_log(avctx, AV_LOG_ERROR, "No block with decorrelation terms\n");
+        return -1;
+    }
+    if(!got_weights){
+        av_log(avctx, AV_LOG_ERROR, "No block with decorrelation weights\n");
+        return -1;
+    }
+    if(!got_samples){
+        av_log(avctx, AV_LOG_ERROR, "No block with decorrelation samples\n");
+        return -1;
+    }
+    if(!got_entropy){
+        av_log(avctx, AV_LOG_ERROR, "No block with entropy info\n");
+        return -1;
+    }
+    if(!got_bs){
+        av_log(avctx, AV_LOG_ERROR, "Packed samples not found\n");
+        return -1;
+    }
+
+    if(s->stereo)
+        samplecount = wv_unpack_stereo(s, &s->gb, samples);
+    else
+        samplecount = wv_unpack_mono(s, &s->gb, samples);
+    *data_size = samplecount * 2;
+
+    return buf_size;
+}
+
+AVCodec wavpack_decoder = {
+    "wavpack",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_WAVPACK,
+    sizeof(WavpackContext),
+    wavpack_decode_init,
+    NULL,
+    wavpack_decode_close,
+    wavpack_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/wmadata.h b/contrib/ffmpeg/libavcodec/wmadata.h
new file mode 100644
index 000000000..35e545ce6
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/wmadata.h
@@ -0,0 +1,1433 @@
+/*
+ * WMA compatible decoder
+ * copyright (c) 2002 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file wmadata.h
+ * Various WMA tables.
+ */
+
+static const uint16_t wma_critical_freqs[25] = {
+    100,   200,  300, 400,   510,  630,  770,    920,
+    1080, 1270, 1480, 1720, 2000, 2320, 2700,   3150,
+    3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500,
+    24500,
+};
+
+/* first value is number of bands */
+static const uint8_t exponent_band_22050[3][25] = {
+    { 10, 4, 8, 4, 8, 8, 12, 20, 24, 24, 16, },
+    { 14, 4, 8, 8, 4, 12, 12, 16, 24, 16, 20, 24, 32, 40, 36, },
+    { 23, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, 12, 12, 16, 16, 24, 24, 32, 44, 48, 60, 84, 72, },
+};
+
+static const uint8_t exponent_band_32000[3][25] = {
+    { 11, 4, 4, 8, 4, 4, 12, 16, 24, 20, 28, 4, },
+    { 15, 4, 8, 4, 4, 8, 8, 16, 20, 12, 20, 20, 28, 40, 56, 8, },
+    { 16, 8, 4, 8, 8, 12, 16, 20, 24, 40, 32, 32, 44, 56, 80, 112, 16, },
+};
+
+static const uint8_t exponent_band_44100[3][25] = {
+    { 12,  4,   4,   4,   4,   4,   8,   8,   8,  12,  16,  20,  36, },
+    { 15,  4,   8,   4,   8,   8,   4,   8,   8,  12,  12,  12,  24,  28,  40,  76, },
+    { 17,  4,   8,   8,   4,  12,  12,   8,   8,  24,  16,  20,  24,  32,  40,  60,  80, 152, },
+};
+
+static const uint16_t hgain_huffcodes[37] = {
+ 0x00003, 0x002e7, 0x00001, 0x005cd, 0x0005d, 0x005c9, 0x0005e, 0x00003,
+ 0x00016, 0x0000b, 0x00001, 0x00006, 0x00001, 0x00006, 0x00004, 0x00005,
+ 0x00004, 0x00007, 0x00003, 0x00007, 0x00004, 0x0000a, 0x0000a, 0x00002,
+ 0x00003, 0x00000, 0x00005, 0x00002, 0x0005f, 0x00004, 0x00003, 0x00002,
+ 0x005c8, 0x000b8, 0x005ca, 0x005cb, 0x005cc,
+};
+
+static const uint8_t hgain_huffbits[37] = {
+ 10, 12, 10, 13,  9, 13,  9,  8,
+  7,  5,  5,  4,  4,  3,  3,  3,
+  4,  3,  4,  4,  5,  5,  6,  8,
+  7, 10,  8, 10,  9,  8,  9,  9,
+ 13, 10, 13, 13, 13,
+};
+
+static const float lsp_codebook[NB_LSP_COEFS][16] = {
+ { 1.98732877, 1.97944528, 1.97179088, 1.96260549, 1.95038374, 1.93336114, 1.90719232, 1.86191415, },
+ { 1.97260000, 1.96083160, 1.94982586, 1.93806164, 1.92516608, 1.91010199, 1.89232331, 1.87149812,
+   1.84564818, 1.81358067, 1.77620070, 1.73265264, 1.67907855, 1.60959081, 1.50829650, 1.33120330, },
+ { 1.90109110, 1.86482426, 1.83419671, 1.80168452, 1.76650116, 1.72816320, 1.68502700, 1.63738256,
+   1.58501580, 1.51795181, 1.43679906, 1.33950585, 1.24176208, 1.12260729, 0.96749668, 0.74048265, },
+ { 1.76943864, 1.67822463, 1.59946365, 1.53560582, 1.47470796, 1.41210167, 1.34509536, 1.27339507,
+   1.19303814, 1.09765169, 0.98818722, 0.87239446, 0.74369172, 0.59768184, 0.43168630, 0.17977021, },
+ { 1.43428349, 1.32038354, 1.21074086, 1.10577988, 1.00561746, 0.90335924, 0.80437489, 0.70709671,
+   0.60427395, 0.49814048, 0.38509539, 0.27106800, 0.14407416, 0.00219910, -0.16725141, -0.36936085, },
+ { 0.99895687, 0.84188166, 0.70753739, 0.57906595, 0.47055563, 0.36966965, 0.26826648, 0.17163380,
+   0.07208392, -0.03062936, -1.40037388, -0.25128968, -0.37213937, -0.51075646, -0.64887512, -0.80308031, },
+ { 0.26515280, 0.06313551, -0.08872080, -0.21103548, -0.31069678, -0.39680323, -0.47223474, -0.54167135,
+   -0.61444740, -0.68943343, -0.76580211, -0.85170082, -0.95289061, -1.06514703, -1.20510707, -1.37617746, },
+ { -0.53940301, -0.73770929, -0.88424876, -1.01117930, -1.13389091, -1.26830073, -1.42041987, -1.62033919,
+   -1.10158808, -1.16512566, -1.23337128, -1.30414401, -1.37663312, -1.46853845, -1.57625798, -1.66893638, },
+ { -0.38601997, -0.56009350, -0.66978483, -0.76028471, -0.83846064, -0.90868087, -0.97408881, -1.03694962, },
+ { -1.56144989, -1.65944032, -1.72689685, -1.77857740, -1.82203011, -1.86220079, -1.90283983, -1.94820479, },
+};
+
+static const uint32_t scale_huffcodes[121] = {
+ 0x3ffe8, 0x3ffe6, 0x3ffe7, 0x3ffe5, 0x7fff5, 0x7fff1, 0x7ffed, 0x7fff6,
+ 0x7ffee, 0x7ffef, 0x7fff0, 0x7fffc, 0x7fffd, 0x7ffff, 0x7fffe, 0x7fff7,
+ 0x7fff8, 0x7fffb, 0x7fff9, 0x3ffe4, 0x7fffa, 0x3ffe3, 0x1ffef, 0x1fff0,
+ 0x0fff5, 0x1ffee, 0x0fff2, 0x0fff3, 0x0fff4, 0x0fff1, 0x07ff6, 0x07ff7,
+ 0x03ff9, 0x03ff5, 0x03ff7, 0x03ff3, 0x03ff6, 0x03ff2, 0x01ff7, 0x01ff5,
+ 0x00ff9, 0x00ff7, 0x00ff6, 0x007f9, 0x00ff4, 0x007f8, 0x003f9, 0x003f7,
+ 0x003f5, 0x001f8, 0x001f7, 0x000fa, 0x000f8, 0x000f6, 0x00079, 0x0003a,
+ 0x00038, 0x0001a, 0x0000b, 0x00004, 0x00000, 0x0000a, 0x0000c, 0x0001b,
+ 0x00039, 0x0003b, 0x00078, 0x0007a, 0x000f7, 0x000f9, 0x001f6, 0x001f9,
+ 0x003f4, 0x003f6, 0x003f8, 0x007f5, 0x007f4, 0x007f6, 0x007f7, 0x00ff5,
+ 0x00ff8, 0x01ff4, 0x01ff6, 0x01ff8, 0x03ff8, 0x03ff4, 0x0fff0, 0x07ff4,
+ 0x0fff6, 0x07ff5, 0x3ffe2, 0x7ffd9, 0x7ffda, 0x7ffdb, 0x7ffdc, 0x7ffdd,
+ 0x7ffde, 0x7ffd8, 0x7ffd2, 0x7ffd3, 0x7ffd4, 0x7ffd5, 0x7ffd6, 0x7fff2,
+ 0x7ffdf, 0x7ffe7, 0x7ffe8, 0x7ffe9, 0x7ffea, 0x7ffeb, 0x7ffe6, 0x7ffe0,
+ 0x7ffe1, 0x7ffe2, 0x7ffe3, 0x7ffe4, 0x7ffe5, 0x7ffd7, 0x7ffec, 0x7fff4,
+ 0x7fff3,
+};
+
+static const uint8_t scale_huffbits[121] = {
+ 18, 18, 18, 18, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 18, 19, 18, 17, 17,
+ 16, 17, 16, 16, 16, 16, 15, 15,
+ 14, 14, 14, 14, 14, 14, 13, 13,
+ 12, 12, 12, 11, 12, 11, 10, 10,
+ 10,  9,  9,  8,  8,  8,  7,  6,
+  6,  5,  4,  3,  1,  4,  4,  5,
+  6,  6,  7,  7,  8,  8,  9,  9,
+ 10, 10, 10, 11, 11, 11, 11, 12,
+ 12, 13, 13, 13, 14, 14, 16, 15,
+ 16, 15, 18, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,
+ 19,
+};
+
+static const uint32_t coef0_huffcodes[666] = {
+ 0x00258, 0x0003d, 0x00000, 0x00005, 0x00008, 0x00008, 0x0000c, 0x0001b,
+ 0x0001f, 0x00015, 0x00024, 0x00032, 0x0003a, 0x00026, 0x0002c, 0x0002f,
+ 0x0004a, 0x0004d, 0x00061, 0x00070, 0x00073, 0x00048, 0x00052, 0x0005a,
+ 0x0005d, 0x0006e, 0x00099, 0x0009e, 0x000c1, 0x000ce, 0x000e4, 0x000f0,
+ 0x00093, 0x0009e, 0x000a2, 0x000a1, 0x000b8, 0x000d2, 0x000d3, 0x0012e,
+ 0x00130, 0x000de, 0x0012d, 0x0019b, 0x001e4, 0x00139, 0x0013a, 0x0013f,
+ 0x0014f, 0x0016d, 0x001a2, 0x0027c, 0x0027e, 0x00332, 0x0033c, 0x0033f,
+ 0x0038b, 0x00396, 0x003c5, 0x00270, 0x0027c, 0x0025a, 0x00395, 0x00248,
+ 0x004bd, 0x004fb, 0x00662, 0x00661, 0x0071b, 0x004e6, 0x004ff, 0x00666,
+ 0x0071c, 0x0071a, 0x0071f, 0x00794, 0x00536, 0x004e2, 0x0078e, 0x004ee,
+ 0x00518, 0x00535, 0x004fb, 0x0078d, 0x00530, 0x00680, 0x0068f, 0x005cb,
+ 0x00965, 0x006a6, 0x00967, 0x0097f, 0x00682, 0x006ae, 0x00cd0, 0x00e28,
+ 0x00f13, 0x00f1f, 0x009f5, 0x00cd3, 0x00f11, 0x00926, 0x00964, 0x00f32,
+ 0x00f12, 0x00f30, 0x00966, 0x00d0b, 0x00a68, 0x00b91, 0x009c7, 0x00b73,
+ 0x012fa, 0x0131d, 0x013f9, 0x01ca0, 0x0199c, 0x01c7a, 0x0198c, 0x01248,
+ 0x01c74, 0x01c64, 0x0139e, 0x012fd, 0x00a77, 0x012fc, 0x01c7b, 0x012ca,
+ 0x014cc, 0x014d2, 0x014e3, 0x014dc, 0x012dc, 0x03344, 0x02598, 0x0263c,
+ 0x0333b, 0x025e6, 0x01a1c, 0x01e3c, 0x014e2, 0x033d4, 0x01a11, 0x03349,
+ 0x03cce, 0x014e1, 0x01a34, 0x0273e, 0x02627, 0x0273f, 0x038ee, 0x03971,
+ 0x03c67, 0x03c61, 0x0333d, 0x038c2, 0x0263f, 0x038cd, 0x02638, 0x02e41,
+ 0x0351f, 0x03348, 0x03c66, 0x03562, 0x02989, 0x027d5, 0x0333c, 0x02e4f,
+ 0x0343b, 0x02ddf, 0x04bc8, 0x029c0, 0x02e57, 0x04c72, 0x025b7, 0x03547,
+ 0x03540, 0x029d3, 0x04c45, 0x025bb, 0x06600, 0x04c73, 0x04bce, 0x0357b,
+ 0x029a6, 0x029d2, 0x0263e, 0x0298a, 0x07183, 0x06602, 0x07958, 0x04b66,
+ 0x0537d, 0x05375, 0x04fe9, 0x04b67, 0x0799f, 0x04bc9, 0x051fe, 0x06a3b,
+ 0x05bb6, 0x04fa8, 0x0728f, 0x05376, 0x0492c, 0x0537e, 0x0795a, 0x06a3c,
+ 0x0e515, 0x07887, 0x0683a, 0x051f9, 0x051fd, 0x0cc6a, 0x06a8a, 0x0cc6d,
+ 0x05bb3, 0x0683b, 0x051fc, 0x05378, 0x0728e, 0x07886, 0x05bb7, 0x0f2a4,
+ 0x0795b, 0x0683c, 0x09fc1, 0x0683d, 0x0b752, 0x09678, 0x0a3e8, 0x06ac7,
+ 0x051f0, 0x0b759, 0x06af3, 0x04b6b, 0x0f2a0, 0x0f2ad, 0x096c3, 0x0e518,
+ 0x0b75c, 0x0d458, 0x0cc6b, 0x0537c, 0x067aa, 0x04fea, 0x0343a, 0x0cc71,
+ 0x0967f, 0x09fc4, 0x096c2, 0x0e516, 0x0f2a1, 0x0d45c, 0x0d45d, 0x0d45e,
+ 0x12fb9, 0x0967e, 0x1982f, 0x09883, 0x096c4, 0x0b753, 0x12fb8, 0x0f2a8,
+ 0x1ca21, 0x096c5, 0x0e51a, 0x1ca27, 0x12f3c, 0x0d471, 0x0f2aa, 0x0b75b,
+ 0x12fbb, 0x0f2a9, 0x0f2ac, 0x0d45a, 0x0b74f, 0x096c8, 0x16e91, 0x096ca,
+ 0x12fbf, 0x0d0a7, 0x13103, 0x0d516, 0x16e99, 0x12cbd, 0x0a3ea, 0x19829,
+ 0x0b755, 0x29ba7, 0x1ca28, 0x29ba5, 0x16e93, 0x1982c, 0x19828, 0x25994,
+ 0x0a3eb, 0x1ca29, 0x16e90, 0x1ca25, 0x1982d, 0x1ca26, 0x16e9b, 0x0b756,
+ 0x0967c, 0x25997, 0x0b75f, 0x198d3, 0x0b757, 0x19a2a, 0x0d45b, 0x0e517,
+ 0x1ca24, 0x1ca23, 0x1ca22, 0x0b758, 0x16e97, 0x0cd14, 0x13100, 0x00007,
+ 0x0003b, 0x0006b, 0x00097, 0x00138, 0x00125, 0x00173, 0x00258, 0x00335,
+ 0x0028e, 0x004c6, 0x00715, 0x00729, 0x004ef, 0x00519, 0x004ed, 0x00532,
+ 0x0068c, 0x00686, 0x00978, 0x00e5d, 0x00e31, 0x009f4, 0x00b92, 0x012f8,
+ 0x00d06, 0x00a67, 0x00d44, 0x00a76, 0x00d59, 0x012cd, 0x01c78, 0x01c75,
+ 0x0199f, 0x0198f, 0x01c67, 0x014c6, 0x01c79, 0x01c76, 0x00b94, 0x00d1b,
+ 0x01e32, 0x01e31, 0x01ab0, 0x01a05, 0x01aa1, 0x0333a, 0x025e5, 0x02626,
+ 0x03541, 0x03544, 0x03421, 0x03546, 0x02e55, 0x02e56, 0x0492d, 0x02dde,
+ 0x0299b, 0x02ddc, 0x0357a, 0x0249c, 0x0668b, 0x1c77f, 0x1ca20, 0x0d45f,
+ 0x09886, 0x16e9a, 0x0f2a7, 0x0b751, 0x0a3ee, 0x0cf59, 0x0cf57, 0x0b754,
+ 0x0d0a6, 0x16e98, 0x0b760, 0x06ac6, 0x0a3f0, 0x12fbe, 0x13104, 0x0f2a5,
+ 0x0a3ef, 0x0d472, 0x12cba, 0x1982e, 0x16e9c, 0x1c77e, 0x198d0, 0x13105,
+ 0x16e92, 0x0b75d, 0x0d459, 0x0001a, 0x000c0, 0x0016c, 0x003cd, 0x00350,
+ 0x0067b, 0x0051e, 0x006a9, 0x009f4, 0x00b72, 0x00d09, 0x01249, 0x01e3d,
+ 0x01ca1, 0x01a1f, 0x01721, 0x01a8a, 0x016e8, 0x03347, 0x01a35, 0x0249d,
+ 0x0299a, 0x02596, 0x02e4e, 0x0298b, 0x07182, 0x04c46, 0x025ba, 0x02e40,
+ 0x027d6, 0x04fe8, 0x06607, 0x05310, 0x09884, 0x072e1, 0x06a3d, 0x04b6a,
+ 0x04c7a, 0x06603, 0x04c7b, 0x03428, 0x06605, 0x09664, 0x09fc0, 0x071de,
+ 0x06601, 0x05bb2, 0x09885, 0x0a3e2, 0x1c61f, 0x12cbb, 0x0b750, 0x0cf58,
+ 0x0967d, 0x25995, 0x668ad, 0x0b75a, 0x09fc2, 0x0537f, 0x0b75e, 0x13fae,
+ 0x12fbc, 0x00031, 0x001c4, 0x004c5, 0x005b8, 0x00cf4, 0x0096f, 0x00d46,
+ 0x01e57, 0x01a04, 0x02625, 0x03346, 0x028f9, 0x04c47, 0x072e0, 0x04b69,
+ 0x03420, 0x07957, 0x06639, 0x0799e, 0x07959, 0x07881, 0x04b68, 0x09fc3,
+ 0x09fd6, 0x0cc70, 0x0a3f1, 0x12cbe, 0x0e30e, 0x0e51b, 0x06af2, 0x12cbc,
+ 0x1c77d, 0x0f2ab, 0x12fbd, 0x1aa2f, 0x0a3ec, 0x0d473, 0x05377, 0x0a3e9,
+ 0x1982b, 0x0e300, 0x12f3f, 0x0cf5f, 0x096c0, 0x38c3c, 0x16e94, 0x16e95,
+ 0x12f3d, 0x29ba4, 0x29ba6, 0x1c77c, 0x6a8ba, 0x3545c, 0x33457, 0x668ac,
+ 0x6a8bb, 0x16e9d, 0x0e519, 0x25996, 0x12f3e, 0x00036, 0x0033e, 0x006ad,
+ 0x00d03, 0x012c8, 0x0124a, 0x03c42, 0x03ccd, 0x06606, 0x07880, 0x06852,
+ 0x06a3a, 0x05bb4, 0x0f2a2, 0x09fc7, 0x12cb9, 0x0cc6c, 0x0a6e8, 0x096c1,
+ 0x0004a, 0x00355, 0x012f9, 0x014e8, 0x01abe, 0x025b6, 0x0492e, 0x09fc6,
+ 0x051ff, 0x0cc6f, 0x096cb, 0x0d071, 0x198d1, 0x12cb8, 0x38c3d, 0x13faf,
+ 0x096c9, 0x0009d, 0x00539, 0x012ce, 0x0341f, 0x029c1, 0x04b33, 0x0a3e3,
+ 0x0d070, 0x16e96, 0x0b763, 0x000a0, 0x009ce, 0x038cc, 0x0343d, 0x051fa,
+ 0x09888, 0x12fba, 0x000df, 0x00a75, 0x029a7, 0x09fc5, 0x0e301, 0x0967b,
+ 0x001e7, 0x012c9, 0x051fb, 0x09889, 0x0f2a6, 0x0016f, 0x01cb9, 0x0cf5a,
+ 0x12cbf, 0x09679, 0x00272, 0x01a15, 0x0967a, 0x003cb, 0x025f6, 0x0b762,
+ 0x0028d, 0x03c60, 0x0cf5e, 0x00352, 0x03ccc, 0x0072f, 0x07186, 0x004ec,
+ 0x05379, 0x0068e, 0x09887, 0x006a7, 0x06af1, 0x00e29, 0x0cf5b, 0x00f31,
+ 0x0d470, 0x009c6, 0x013fb, 0x13102, 0x019a5, 0x13101, 0x01983, 0x01c65,
+ 0x0124f, 0x014c7, 0x01726, 0x01abf, 0x03304, 0x02624, 0x03c41, 0x027d7,
+ 0x02ddd, 0x02e54, 0x0343c, 0x06604, 0x07181, 0x0663a, 0x04fa9, 0x0663b,
+ 0x05311, 0x0537a, 0x06839, 0x05bb5, 0x0492f, 0x06af0, 0x096c7, 0x0cc6e,
+ 0x0537b, 0x0cf5c, 0x0cf56, 0x198d2, 0x0cf5d, 0x0a3ed, 0x0f2a3, 0x1982a,
+ 0x0b761, 0x096c6,
+};
+
+static const uint8_t coef0_huffbits[666] = {
+ 11,  6,  2,  3,  4,  5,  5,  5,
+  5,  6,  6,  6,  6,  7,  7,  7,
+  7,  7,  7,  7,  7,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  9,  9,  9,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 10, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 12, 12, 11, 12,
+ 12, 12, 12, 11, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 13, 13, 12,
+ 12, 12, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 14,
+ 13, 13, 13, 13, 13, 13, 13, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 13, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 15,
+ 15, 14, 14, 15, 15, 15, 14, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 14, 15, 15, 15, 15, 16,
+ 16, 16, 15, 16, 15, 15, 16, 16,
+ 16, 16, 15, 16, 16, 16, 15, 16,
+ 16, 15, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 15, 15, 16, 16,
+ 15, 16, 16, 16, 17, 17, 17, 16,
+ 16, 17, 16, 16, 16, 16, 17, 16,
+ 17, 17, 16, 16, 15, 15, 15, 16,
+ 17, 16, 17, 16, 16, 17, 17, 17,
+ 17, 17, 17, 16, 17, 17, 17, 16,
+ 17, 17, 16, 17, 17, 17, 16, 17,
+ 17, 16, 16, 17, 17, 17, 18, 17,
+ 17, 17, 17, 17, 18, 18, 17, 17,
+ 17, 19, 17, 19, 18, 17, 17, 18,
+ 17, 17, 18, 17, 17, 17, 18, 17,
+ 17, 18, 17, 17, 17, 17, 17, 16,
+ 17, 17, 17, 17, 18, 16, 17,  4,
+  6,  8,  9,  9, 10, 10, 10, 10,
+ 11, 11, 11, 11, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 14, 13, 13, 13, 13,
+ 13, 13, 14, 14, 14, 14, 14, 14,
+ 15, 15, 15, 15, 15, 15, 16, 15,
+ 15, 15, 15, 15, 15, 17, 17, 17,
+ 16, 18, 16, 17, 17, 16, 16, 17,
+ 17, 18, 17, 16, 17, 17, 17, 16,
+ 17, 17, 18, 17, 18, 17, 17, 17,
+ 18, 17, 17,  5,  8, 10, 10, 11,
+ 11, 12, 12, 12, 13, 13, 14, 13,
+ 13, 14, 14, 14, 14, 14, 14, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 16, 16, 15, 16, 16,
+ 15, 15, 15, 15, 15, 16, 16, 15,
+ 15, 16, 16, 17, 17, 18, 17, 16,
+ 17, 18, 19, 17, 16, 16, 17, 17,
+ 17,  6,  9, 11, 12, 12, 13, 13,
+ 13, 14, 14, 14, 15, 15, 15, 16,
+ 15, 15, 15, 15, 15, 15, 16, 16,
+ 16, 16, 17, 18, 16, 16, 16, 18,
+ 17, 16, 17, 18, 17, 17, 16, 17,
+ 17, 16, 17, 16, 17, 18, 18, 18,
+ 17, 19, 19, 17, 20, 19, 18, 19,
+ 20, 18, 16, 18, 17,  7, 10, 12,
+ 13, 13, 14, 14, 14, 15, 15, 16,
+ 16, 16, 16, 16, 18, 16, 17, 17,
+  8, 11, 13, 14, 14, 15, 16, 16,
+ 16, 16, 17, 17, 17, 18, 18, 17,
+ 17,  8, 12, 14, 15, 15, 15, 17,
+ 17, 18, 17,  9, 12, 14, 15, 16,
+ 16, 17,  9, 13, 15, 16, 16, 17,
+  9, 13, 16, 16, 16, 10, 13, 16,
+ 18, 17, 10, 14, 17, 10, 14, 17,
+ 11, 14, 16, 11, 14, 11, 15, 12,
+ 16, 12, 16, 12, 16, 12, 16, 12,
+ 17, 13, 13, 17, 13, 17, 13, 13,
+ 14, 14, 14, 14, 14, 14, 14, 15,
+ 15, 15, 15, 15, 15, 15, 16, 15,
+ 16, 16, 16, 16, 16, 16, 17, 16,
+ 16, 16, 16, 17, 16, 17, 16, 17,
+ 17, 17,
+};
+
+static const uint32_t coef1_huffcodes[555] = {
+ 0x00115, 0x00002, 0x00001, 0x00000, 0x0000d, 0x00007, 0x00013, 0x0001d,
+ 0x00008, 0x0000c, 0x00023, 0x0002b, 0x0003f, 0x00017, 0x0001b, 0x00043,
+ 0x00049, 0x00050, 0x00055, 0x00054, 0x00067, 0x00064, 0x0007b, 0x0002d,
+ 0x00028, 0x0002a, 0x00085, 0x00089, 0x0002b, 0x00035, 0x00090, 0x00091,
+ 0x00094, 0x00088, 0x000c1, 0x000c6, 0x000f2, 0x000e3, 0x000c5, 0x000e2,
+ 0x00036, 0x000f0, 0x000a7, 0x000cd, 0x000fb, 0x00059, 0x00116, 0x00103,
+ 0x00108, 0x0012b, 0x0012d, 0x00188, 0x0012e, 0x0014c, 0x001c3, 0x00187,
+ 0x001e7, 0x0006f, 0x00094, 0x00069, 0x001e6, 0x001ca, 0x00147, 0x00195,
+ 0x000a7, 0x00213, 0x00209, 0x00303, 0x00295, 0x00289, 0x0028c, 0x0028d,
+ 0x00312, 0x00330, 0x0029b, 0x00308, 0x00328, 0x0029a, 0x0025e, 0x003c5,
+ 0x00384, 0x0039f, 0x00397, 0x00296, 0x0032e, 0x00332, 0x003c6, 0x003e6,
+ 0x0012d, 0x000d1, 0x00402, 0x000dd, 0x00161, 0x0012b, 0x00127, 0x0045d,
+ 0x00601, 0x004ab, 0x0045f, 0x00410, 0x004bf, 0x00528, 0x0045c, 0x00424,
+ 0x00400, 0x00511, 0x00618, 0x0073d, 0x0063a, 0x00614, 0x0073c, 0x007c0,
+ 0x007cf, 0x00802, 0x00966, 0x00964, 0x00951, 0x008a0, 0x00346, 0x00803,
+ 0x00a52, 0x0024a, 0x007c1, 0x0063f, 0x00126, 0x00406, 0x00789, 0x008a2,
+ 0x00960, 0x00967, 0x00c05, 0x00c70, 0x00c79, 0x00a5d, 0x00c26, 0x00c4d,
+ 0x00372, 0x008a5, 0x00c08, 0x002c5, 0x00f11, 0x00cc4, 0x00f8e, 0x00e16,
+ 0x00496, 0x00e77, 0x00f9c, 0x00c25, 0x00f1e, 0x00c27, 0x00f1f, 0x00e17,
+ 0x00ccd, 0x00355, 0x00c09, 0x00c78, 0x00f90, 0x00521, 0x00357, 0x00356,
+ 0x0068e, 0x00f9d, 0x00c04, 0x00e58, 0x00a20, 0x00a2c, 0x00c4c, 0x0052f,
+ 0x00f8d, 0x01178, 0x01053, 0x01097, 0x0180f, 0x0180d, 0x012fb, 0x012aa,
+ 0x0202a, 0x00a40, 0x018ed, 0x01ceb, 0x01455, 0x018e3, 0x012a1, 0x00354,
+ 0x00353, 0x00f1c, 0x00c7b, 0x00c37, 0x0101d, 0x012cb, 0x01142, 0x0197d,
+ 0x01095, 0x01e3b, 0x0186b, 0x00588, 0x01c2a, 0x014b8, 0x01e3a, 0x018ec,
+ 0x01f46, 0x012fa, 0x00a53, 0x01ce8, 0x00a55, 0x01c29, 0x0117b, 0x01052,
+ 0x012a0, 0x00589, 0x00950, 0x01c2b, 0x00a50, 0x0208b, 0x0180e, 0x02027,
+ 0x02556, 0x01e20, 0x006e7, 0x01c28, 0x0197a, 0x00684, 0x020a2, 0x01f22,
+ 0x03018, 0x039cf, 0x03e25, 0x02557, 0x0294c, 0x028a6, 0x00d11, 0x028a9,
+ 0x02979, 0x00d46, 0x00a56, 0x039ce, 0x030cc, 0x0329a, 0x0149d, 0x0510f,
+ 0x0451c, 0x02028, 0x03299, 0x01ced, 0x014b9, 0x00f85, 0x00c7a, 0x01800,
+ 0x00341, 0x012ca, 0x039c8, 0x0329d, 0x00d0d, 0x03e20, 0x05144, 0x00d45,
+ 0x030d0, 0x0186d, 0x030d5, 0x00d0f, 0x00d40, 0x04114, 0x020a1, 0x0297f,
+ 0x03e24, 0x032f1, 0x04047, 0x030d4, 0x028a8, 0x00d0e, 0x0451d, 0x04044,
+ 0x0297e, 0x04042, 0x030d2, 0x030cf, 0x03e21, 0x03e26, 0x028a5, 0x0451a,
+ 0x00d48, 0x01a16, 0x00d44, 0x04518, 0x0149b, 0x039ca, 0x01498, 0x0403d,
+ 0x0451b, 0x0149c, 0x032f3, 0x030cb, 0x08073, 0x03e22, 0x0529a, 0x020aa,
+ 0x039cc, 0x0738a, 0x06530, 0x07389, 0x06193, 0x08071, 0x04043, 0x030ce,
+ 0x05147, 0x07388, 0x05145, 0x08072, 0x04521, 0x00d47, 0x0297c, 0x030cd,
+ 0x030ca, 0x0000b, 0x0000c, 0x00083, 0x000e4, 0x00048, 0x00102, 0x001cc,
+ 0x001f5, 0x00097, 0x0020b, 0x00124, 0x00453, 0x00627, 0x00639, 0x00605,
+ 0x00517, 0x001b8, 0x00663, 0x00667, 0x007c3, 0x00823, 0x00961, 0x00963,
+ 0x00e5a, 0x00e59, 0x00a2b, 0x00cbf, 0x00292, 0x00a2d, 0x007d0, 0x00953,
+ 0x00cc5, 0x00f84, 0x004ab, 0x014a7, 0x0068a, 0x0117a, 0x0052e, 0x01442,
+ 0x0052c, 0x00c77, 0x00f8f, 0x004aa, 0x01094, 0x01801, 0x012c4, 0x0297b,
+ 0x00952, 0x01f19, 0x006a5, 0x01149, 0x012c5, 0x01803, 0x022f2, 0x0329b,
+ 0x04520, 0x0149e, 0x00d13, 0x01f16, 0x01ce9, 0x0101c, 0x006e6, 0x039c9,
+ 0x06191, 0x07c8e, 0x06192, 0x0ca63, 0x039cd, 0x06190, 0x06884, 0x06885,
+ 0x07382, 0x00d49, 0x00d41, 0x0450c, 0x0149a, 0x030d1, 0x08077, 0x03e23,
+ 0x01a15, 0x0e701, 0x0e702, 0x08079, 0x0822a, 0x0a218, 0x07887, 0x0403f,
+ 0x0520b, 0x0529b, 0x0e700, 0x04519, 0x00007, 0x000e0, 0x000d0, 0x0039b,
+ 0x003e5, 0x00163, 0x0063e, 0x007c9, 0x00806, 0x00954, 0x01044, 0x01f44,
+ 0x0197c, 0x01f45, 0x00a51, 0x01f47, 0x00951, 0x0052d, 0x02291, 0x0092f,
+ 0x00a54, 0x00d12, 0x0297d, 0x00d0c, 0x01499, 0x0329e, 0x032f0, 0x02025,
+ 0x039c6, 0x00a57, 0x03e46, 0x00d42, 0x0738b, 0x05146, 0x04046, 0x08078,
+ 0x0510e, 0x07886, 0x02904, 0x04156, 0x04157, 0x06032, 0x030d3, 0x08bce,
+ 0x04040, 0x0403e, 0x0a414, 0x10457, 0x08075, 0x06887, 0x07c8f, 0x039c7,
+ 0x07387, 0x08070, 0x08bcf, 0x1482a, 0x10456, 0x1482b, 0x01a17, 0x06886,
+ 0x0450d, 0x00013, 0x0006b, 0x00615, 0x0080b, 0x0082b, 0x00952, 0x00e5b,
+ 0x018e2, 0x0186c, 0x01f18, 0x0329f, 0x00d43, 0x03e29, 0x05140, 0x05141,
+ 0x0ca62, 0x06033, 0x03c42, 0x03e28, 0x0450f, 0x0a21a, 0x07384, 0x0a219,
+ 0x0e703, 0x0a21b, 0x01a14, 0x07383, 0x045e6, 0x0007a, 0x0012c, 0x00ccc,
+ 0x0068f, 0x01802, 0x00a52, 0x00953, 0x04045, 0x01a20, 0x0451f, 0x000a4,
+ 0x00735, 0x01cec, 0x02029, 0x020a3, 0x0451e, 0x00069, 0x00c24, 0x02024,
+ 0x032f2, 0x05142, 0x00196, 0x00523, 0x000a6, 0x0197b, 0x0030b, 0x0092e,
+ 0x003e9, 0x03e27, 0x00160, 0x05143, 0x00652, 0x04041, 0x00734, 0x028a7,
+ 0x0080f, 0x01483, 0x0097c, 0x00340, 0x0068b, 0x00522, 0x01054, 0x01096,
+ 0x01f17, 0x0202b, 0x01cea, 0x020a0, 0x02978, 0x02026, 0x0297a, 0x039cb,
+ 0x03e2b, 0x0149f, 0x0329c, 0x07385, 0x08074, 0x0450e, 0x03e2a, 0x05149,
+ 0x08076, 0x07386, 0x05148,
+};
+
+static const uint8_t coef1_huffbits[555] = {
+  9,  5,  2,  4,  4,  5,  5,  5,
+  6,  6,  6,  6,  6,  7,  7,  7,
+  7,  7,  7,  7,  7,  7,  7,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,
+  9,  8,  8,  8,  8,  9,  9,  9,
+  9,  9,  9,  9,  9,  9,  9,  9,
+  9, 10, 10, 10,  9,  9,  9,  9,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 11, 11, 11, 11, 11, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 13, 12, 12, 12, 12, 12, 12, 12,
+ 13, 12, 12, 12, 12, 12, 12, 12,
+ 12, 13, 12, 12, 12, 13, 13, 13,
+ 13, 12, 12, 12, 12, 12, 12, 13,
+ 12, 13, 13, 13, 13, 13, 13, 13,
+ 14, 14, 13, 13, 13, 13, 13, 13,
+ 13, 12, 12, 12, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 14, 13, 14, 13, 13, 13,
+ 13, 13, 14, 13, 14, 14, 13, 14,
+ 14, 13, 14, 13, 13, 14, 14, 13,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 15, 14, 14, 14, 14, 15, 15,
+ 15, 14, 14, 13, 13, 12, 12, 13,
+ 13, 13, 14, 14, 15, 14, 15, 15,
+ 14, 13, 14, 15, 15, 15, 14, 14,
+ 14, 14, 15, 14, 14, 15, 15, 15,
+ 14, 15, 14, 14, 14, 14, 14, 15,
+ 15, 16, 15, 15, 15, 14, 15, 15,
+ 15, 15, 14, 14, 16, 14, 15, 14,
+ 14, 15, 15, 15, 15, 16, 15, 14,
+ 15, 15, 15, 16, 15, 15, 14, 14,
+ 14,  4,  7,  8,  8,  9,  9,  9,
+  9, 10, 10, 11, 11, 11, 11, 11,
+ 11, 12, 11, 11, 11, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 11, 12,
+ 12, 12, 13, 13, 13, 13, 13, 13,
+ 13, 12, 12, 13, 13, 13, 13, 14,
+ 14, 13, 14, 13, 13, 13, 14, 14,
+ 15, 15, 14, 13, 13, 13, 14, 14,
+ 15, 15, 15, 16, 14, 15, 17, 17,
+ 15, 15, 15, 15, 15, 14, 16, 14,
+ 16, 16, 16, 16, 16, 16, 15, 15,
+ 17, 15, 16, 15,  6,  8, 10, 10,
+ 10, 11, 11, 11, 12, 12, 13, 13,
+ 13, 13, 14, 13, 14, 13, 14, 14,
+ 14, 14, 14, 15, 15, 14, 14, 14,
+ 14, 14, 14, 15, 15, 15, 15, 16,
+ 15, 15, 16, 15, 15, 15, 14, 16,
+ 15, 15, 18, 17, 16, 17, 15, 14,
+ 15, 16, 16, 19, 17, 19, 16, 17,
+ 15,  7, 10, 11, 12, 12, 12, 12,
+ 13, 13, 13, 14, 15, 14, 15, 15,
+ 16, 15, 14, 14, 15, 16, 15, 16,
+ 16, 16, 16, 15, 15,  7, 11, 12,
+ 13, 13, 14, 14, 15, 15, 15,  8,
+ 11, 13, 14, 14, 15,  9, 12, 14,
+ 14, 15,  9, 13, 10, 13, 10, 14,
+ 10, 14, 11, 15, 11, 15, 11, 14,
+ 12, 15, 12, 13, 13, 13, 13, 13,
+ 13, 14, 13, 14, 14, 14, 14, 14,
+ 14, 15, 14, 15, 16, 15, 14, 15,
+ 16, 15, 15,
+};
+
+static const uint32_t coef2_huffcodes[1336] = {
+ 0x003e6, 0x000f6, 0x00000, 0x00002, 0x00006, 0x0000f, 0x0001b, 0x00028,
+ 0x00039, 0x0003f, 0x0006b, 0x00076, 0x000b7, 0x000e8, 0x000ef, 0x00169,
+ 0x001a7, 0x001d4, 0x001dc, 0x002c4, 0x00349, 0x00355, 0x00391, 0x003dc,
+ 0x00581, 0x005b2, 0x00698, 0x0070c, 0x00755, 0x0073a, 0x00774, 0x007cf,
+ 0x00b0a, 0x00b66, 0x00d2e, 0x00d5e, 0x00e1b, 0x00eac, 0x00e5a, 0x00f7e,
+ 0x00fa1, 0x0163e, 0x01a37, 0x01a52, 0x01c39, 0x01ab3, 0x01d5f, 0x01cb6,
+ 0x01f52, 0x01dd9, 0x02c04, 0x02c2e, 0x02c2d, 0x02c23, 0x03467, 0x034a3,
+ 0x0351b, 0x03501, 0x03a5d, 0x0351c, 0x03875, 0x03dea, 0x0397b, 0x039db,
+ 0x03df1, 0x039d8, 0x03bb4, 0x0580a, 0x0584d, 0x05842, 0x05b13, 0x058ea,
+ 0x0697d, 0x06a06, 0x068cc, 0x06ac7, 0x06a96, 0x072f4, 0x07543, 0x072b4,
+ 0x07d20, 0x0b003, 0x073b5, 0x07be6, 0x0d180, 0x07bd1, 0x07cb8, 0x07d06,
+ 0x07d25, 0x0d2f2, 0x0d19a, 0x0d334, 0x0e1dc, 0x0d529, 0x0d584, 0x0e1d2,
+ 0x0e5e3, 0x0eec4, 0x0e564, 0x0fa49, 0x16001, 0x0eedc, 0x0f7fa, 0x1a32c,
+ 0x16131, 0x16003, 0x0f9c8, 0x1ef80, 0x1d2a0, 0x1aa4b, 0x0f7ce, 0x1abfe,
+ 0x1aa50, 0x1a458, 0x1a816, 0x1cae4, 0x1d2fe, 0x1d52e, 0x1aa4c, 0x2c245,
+ 0x1d2a1, 0x1a35d, 0x1ca1b, 0x1d5d8, 0x1f531, 0x1ca1c, 0x1f389, 0x1f4af,
+ 0x3a5e7, 0x351fb, 0x2c24b, 0x34bce, 0x2c24d, 0x2c249, 0x2c24a, 0x72dfc,
+ 0x357ef, 0x35002, 0x3a5e6, 0x39431, 0x5843b, 0x34a77, 0x58431, 0x3a5f3,
+ 0x3a5dd, 0x3e5e5, 0x356bd, 0x3976e, 0x6a3d2, 0x3500d, 0x694c4, 0x580bd,
+ 0x3e5e8, 0x74b95, 0x34a6e, 0x3977c, 0x39432, 0x5b0d2, 0x6a3d8, 0x580b8,
+ 0x5b0cb, 0x5b0d7, 0x72dee, 0x72ded, 0x72dec, 0x74b9c, 0x3977f, 0x72dea,
+ 0x74b9e, 0x7be7d, 0x580bf, 0x5b0d5, 0x7cba8, 0x74b91, 0x3e5dd, 0xb6171,
+ 0xd46b3, 0xd46b9, 0x7cba1, 0x74b9f, 0x72de1, 0xe59f5, 0x3e5eb, 0x00004,
+ 0x00015, 0x00038, 0x00075, 0x000e8, 0x001d3, 0x00347, 0x0039c, 0x00690,
+ 0x0074a, 0x00b60, 0x00e93, 0x00f74, 0x0163d, 0x01a5a, 0x01d24, 0x01cbe,
+ 0x01f4b, 0x03468, 0x03562, 0x03947, 0x03e82, 0x05804, 0x05b12, 0x05803,
+ 0x0696d, 0x06a9e, 0x0697c, 0x06978, 0x06afb, 0x074b2, 0x072f5, 0x073c0,
+ 0x07541, 0x06944, 0x074b7, 0x070d3, 0x07ba9, 0x0b0b1, 0x0d1af, 0x0e1dd,
+ 0x0e5e2, 0x0e1a3, 0x0eec3, 0x1612f, 0x0e961, 0x0eeda, 0x0e78e, 0x0fa48,
+ 0x1612c, 0x0e511, 0x0e565, 0x0e953, 0x1aa4a, 0x0e59d, 0x1d52c, 0x1a811,
+ 0x1cae7, 0x1abfc, 0x1d52d, 0x1cacf, 0x1cf05, 0x2c254, 0x34a72, 0x1f4ac,
+ 0x3976b, 0x34a71, 0x2c6d9, 0x2d873, 0x34a6a, 0x357e7, 0x3464c, 0x3e5f5,
+ 0x58433, 0x1f53a, 0x3500a, 0x357ea, 0x34a73, 0x3942f, 0x357e5, 0x39775,
+ 0x694cd, 0x39772, 0x7cba5, 0x6a3ef, 0x35483, 0x74b98, 0x5b0c1, 0x39770,
+ 0x3a5d7, 0x39433, 0x39434, 0x694ce, 0x580be, 0x3e5ff, 0x6a3ec, 0xb616f,
+ 0xd46b1, 0x6a3d1, 0x72de5, 0x74b6e, 0x72de9, 0x3e700, 0xd46b6, 0x6a3e9,
+ 0x74b69, 0xe5675, 0xd46b8, 0x7cbaa, 0x3a5d1, 0x0000c, 0x0003c, 0x000eb,
+ 0x001f1, 0x003a4, 0x006a8, 0x007d5, 0x00d43, 0x00e77, 0x016c5, 0x01cb1,
+ 0x02c5d, 0x03a55, 0x03a56, 0x03e51, 0x03bb5, 0x05b0a, 0x06a9f, 0x074b8,
+ 0x07d28, 0x0d187, 0x0d40e, 0x0d52e, 0x0d425, 0x0eae3, 0x0e1d3, 0x1612e,
+ 0x0e59e, 0x0eec2, 0x0e578, 0x0e51a, 0x0e579, 0x0e515, 0x0e960, 0x0d183,
+ 0x0d220, 0x0d2cb, 0x0e512, 0x16c3e, 0x16002, 0x16c42, 0x1cae9, 0x3461a,
+ 0x1d2fa, 0x1a308, 0x1a849, 0x1cf07, 0x1f38f, 0x34b65, 0x2c253, 0x1ef9e,
+ 0x1cbc3, 0x1cbc1, 0x2c255, 0x1f384, 0x58435, 0x2c5cd, 0x3a5f7, 0x2c252,
+ 0x3959c, 0x2c6d8, 0x3a5d3, 0x6ad78, 0x6a3f2, 0x7cba9, 0xb6176, 0x72deb,
+ 0x39764, 0x3e5f6, 0x3a5d8, 0x74a8c, 0x6a3e6, 0x694d1, 0x6ad79, 0x1a4592,
+ 0xe59fb, 0x7cbb3, 0x5b0cd, 0x00017, 0x000b5, 0x002c3, 0x005b7, 0x00b1c,
+ 0x00e5c, 0x0163f, 0x01ab2, 0x01efa, 0x0348a, 0x0396e, 0x058da, 0x06963,
+ 0x06a30, 0x072cd, 0x073cf, 0x07ce7, 0x0d2ca, 0x0d2d8, 0x0e764, 0x0e794,
+ 0x16008, 0x16167, 0x1617e, 0x1aa49, 0x1a30b, 0x1a813, 0x2c6da, 0x1a580,
+ 0x1cbc2, 0x0f9ca, 0x1617f, 0x1d2fe, 0x0f7fc, 0x16c40, 0x0e513, 0x0eec5,
+ 0x0f7c3, 0x1d508, 0x1a81e, 0x1d2fd, 0x39430, 0x35486, 0x3e5fd, 0x2c24c,
+ 0x2c75a, 0x34a74, 0x3a5f4, 0x3464d, 0x694ca, 0x3a5f1, 0x1d509, 0x1d5c0,
+ 0x34648, 0x3464e, 0x6a3d5, 0x6a3e8, 0x6a3e7, 0x5b0c3, 0x2c248, 0x1f38a,
+ 0x3a5f2, 0x6a3e5, 0x00029, 0x00168, 0x0058c, 0x00b67, 0x00f9d, 0x01c3d,
+ 0x01cbf, 0x02c20, 0x0351d, 0x03df6, 0x06af9, 0x072b5, 0x0b1d7, 0x0b0b2,
+ 0x0d40a, 0x0d52b, 0x0e952, 0x0e797, 0x163c3, 0x1c3a0, 0x1f386, 0x1ca21,
+ 0x34655, 0x2c247, 0x1f53b, 0x2c250, 0x2c24f, 0x1f385, 0x1ef5d, 0x1cf15,
+ 0x1caea, 0x1ab0a, 0x1cf19, 0x1f53d, 0x1d5c2, 0x1d2fb, 0x1ef58, 0x34a78,
+ 0x357ec, 0x1f533, 0x3a5e1, 0x694d2, 0x58482, 0x3a5ee, 0x2c6dc, 0x357eb,
+ 0x5b0c4, 0x39778, 0x6a3e1, 0x7cbb4, 0x3a5e1, 0x74b68, 0x3a5ef, 0x3a5d2,
+ 0x39424, 0x72de2, 0xe59f6, 0xe59f7, 0x3e702, 0x3e5ec, 0x1f38b, 0x0003b,
+ 0x001f0, 0x00777, 0x00fa8, 0x01cb2, 0x02d84, 0x03a57, 0x03dd6, 0x06917,
+ 0x06a11, 0x07d07, 0x0eae2, 0x0e796, 0x0f9c9, 0x0f7fb, 0x16166, 0x16160,
+ 0x1ab1b, 0x1abfa, 0x2d87b, 0x1d2f7, 0x39768, 0x1f38c, 0x34653, 0x34651,
+ 0x6a3d9, 0x35001, 0x3abbd, 0x38742, 0x39426, 0x34a76, 0x3a5ec, 0x34a75,
+ 0x35000, 0x35488, 0x1cf10, 0x2c6db, 0x357ed, 0x357e8, 0x357e9, 0x3a5f0,
+ 0x694c2, 0xb6178, 0x72df5, 0x39425, 0x3942b, 0x74b6d, 0x74b6f, 0xb6177,
+ 0xb6179, 0x74b6a, 0xb6172, 0x58487, 0x3e5ee, 0x3e5ed, 0x72df2, 0x72df4,
+ 0x7cbae, 0x6a3ca, 0x70e86, 0x34bcf, 0x6a3c8, 0x00059, 0x00384, 0x00d5b,
+ 0x01c38, 0x03560, 0x0395b, 0x0584e, 0x06964, 0x073cd, 0x0b1e7, 0x0e798,
+ 0x0e78d, 0x0fa43, 0x1a848, 0x1a32f, 0x1aa4e, 0x3464a, 0x1f4ab, 0x1f38d,
+ 0x3a5eb, 0x3a5d4, 0x3548a, 0x6a3c7, 0x5b0d0, 0x6a3c5, 0x7cbb0, 0x694cb,
+ 0x3a5e5, 0x3e5e2, 0x3942c, 0x2d872, 0x1f4ae, 0x3a5d5, 0x694d3, 0x58481,
+ 0x35009, 0x39774, 0x58432, 0xb616c, 0x5b0db, 0x3548b, 0xb6174, 0x1d5d95,
+ 0xb004c, 0x7cbb2, 0x3a5e5, 0x74a8f, 0xe59f9, 0x72df6, 0xe59fd, 0x7cbad,
+ 0xd427d, 0x72cff, 0x3977a, 0x5b0d9, 0xb616d, 0xb616b, 0x1a4593, 0x7cbaf,
+ 0x5b0da, 0x00071, 0x003eb, 0x01603, 0x02c6c, 0x03961, 0x068c8, 0x06a31,
+ 0x072bd, 0x0d2c2, 0x0e51b, 0x0e5e6, 0x1abfb, 0x1d2ff, 0x1cae5, 0x1ef5c,
+ 0x1ef5e, 0x1cf13, 0x34a6d, 0x3976d, 0xb616a, 0x3e5f2, 0x6a3c4, 0xb6169,
+ 0x3e5dc, 0x580b9, 0x74b99, 0x75764, 0x58434, 0x3a5d9, 0x6945a, 0x69459,
+ 0x3548c, 0x3a5e9, 0x69457, 0x72df1, 0x6945e, 0x6a35e, 0x3e701, 0xb6168,
+ 0x5b0dd, 0x3a5de, 0x6a3c2, 0xd4278, 0x6a3cc, 0x72dfd, 0xb6165, 0x16009a,
+ 0x7cbb1, 0xd427c, 0xb6162, 0xe765e, 0x1cecbe, 0x7cbb6, 0x69454, 0xb6160,
+ 0xd427a, 0x1d5d96, 0xb1d6d, 0xe59f4, 0x72de8, 0x3a5db, 0x0007a, 0x006ae,
+ 0x01c3c, 0x03aba, 0x058e9, 0x072cc, 0x0d2dd, 0x0d22d, 0x0eec1, 0x0eedb,
+ 0x1d2a2, 0x1ef5b, 0x357e2, 0x3abbf, 0x1d2f9, 0x35004, 0x3a5dc, 0x351fc,
+ 0x3976c, 0x6a3c6, 0x6a3cb, 0x3e5ea, 0xe59f3, 0x6a3ce, 0x69452, 0xe59f0,
+ 0x74b90, 0xd4279, 0xd427b, 0x7cbb5, 0x5b0c5, 0x3a5e3, 0x3a5e2, 0x000d0,
+ 0x00775, 0x01efe, 0x03dd5, 0x0728c, 0x07cb9, 0x0e1a2, 0x0ea85, 0x0eed8,
+ 0x1a30a, 0x1aa4f, 0x3a5df, 0x35008, 0x3a5e0, 0x3e5f4, 0x3e5f7, 0xb1d6c,
+ 0x5843e, 0x34a70, 0x72df8, 0x74b6b, 0xd427f, 0x72df0, 0x5b0bf, 0x5b0c0,
+ 0xd46b0, 0x72def, 0xe59f8, 0x162e64, 0xb1d6f, 0x3a5e0, 0x39427, 0x69166,
+ 0x6a3e2, 0x6a3e3, 0x74a8d, 0xd427e, 0x1d5d97, 0xd46b4, 0x5b0d8, 0x6a3d3,
+ 0x000e0, 0x00b63, 0x034cc, 0x06a33, 0x073c9, 0x0e1a0, 0x0f7fd, 0x0f9cc,
+ 0x1617d, 0x1caeb, 0x1f4a9, 0x3abb3, 0x69450, 0x39420, 0x39777, 0x3e5e0,
+ 0x6a3d4, 0x6a3ed, 0xb6166, 0xe59f1, 0xb1d6e, 0xe5676, 0x6a3ea, 0xe5674,
+ 0xb6163, 0xd46b7, 0x7cba6, 0xd46ba, 0x1d5d94, 0xb6164, 0x6a3f1, 0x7cba2,
+ 0x69451, 0x72dfa, 0xd46bb, 0x72df7, 0x74b94, 0x1cecbf, 0xe59fa, 0x16009b,
+ 0x6a3e4, 0x000e6, 0x00e94, 0x03876, 0x070ef, 0x0d52a, 0x16015, 0x16014,
+ 0x1abf9, 0x1cf17, 0x34a79, 0x34650, 0x3e705, 0x6a3d0, 0x58430, 0x74b9d,
+ 0x7be7e, 0x5b0be, 0x39773, 0x6a3de, 0x000fb, 0x00f7b, 0x03dd7, 0x07bd0,
+ 0x0e59c, 0x0f9cd, 0x1cf18, 0x1d2ff, 0x34a7a, 0x39429, 0x3500c, 0x72de0,
+ 0x69456, 0x7be7c, 0xd46b5, 0xd46b2, 0x6a3dd, 0x001a2, 0x0163b, 0x06913,
+ 0x0b016, 0x0fa42, 0x1a32d, 0x1cf06, 0x34a7c, 0x34a7d, 0xb6161, 0x35481,
+ 0x3e5fa, 0x7cba0, 0x7be7f, 0x7cba3, 0x7cba7, 0x5b0d3, 0x72de6, 0x6a3dc,
+ 0x001a9, 0x01ab4, 0x06a34, 0x0d46a, 0x16130, 0x1ef5f, 0x1f532, 0x1f536,
+ 0x3942e, 0x58436, 0x6a3db, 0x6945b, 0x001c9, 0x01ca0, 0x0728b, 0x0eed9,
+ 0x1f539, 0x1ca1d, 0x39765, 0x39766, 0x58439, 0x6945d, 0x39767, 0x001d3,
+ 0x01f2c, 0x07bfc, 0x16161, 0x34652, 0x3a5ed, 0x3548d, 0x58438, 0x6a3da,
+ 0x002c1, 0x02c5e, 0x0d335, 0x1ab1a, 0x2d874, 0x35006, 0x35484, 0x5b0cc,
+ 0x74b9a, 0x72df3, 0x6a3d6, 0x002da, 0x034b3, 0x0d5ae, 0x1caee, 0x2d871,
+ 0x357e3, 0x74b97, 0x72df9, 0x580ba, 0x5b0d4, 0x0034d, 0x0354e, 0x0f750,
+ 0x1cbc0, 0x3a5e7, 0x3a5e4, 0x00385, 0x03a58, 0x16c41, 0x2c5cf, 0x3e5e1,
+ 0x74b6c, 0xe5677, 0x6a3df, 0x00390, 0x03e50, 0x163c2, 0x2d876, 0x35482,
+ 0x5b0d6, 0x5843a, 0x0039f, 0x0585e, 0x1a583, 0x3500f, 0x74b93, 0x39771,
+ 0x003e4, 0x06912, 0x16c43, 0x357e1, 0x0058a, 0x0696f, 0x1f538, 0x5b0c9,
+ 0x6a3cf, 0x005b6, 0x06af8, 0x1f534, 0x58483, 0x6a3e0, 0x00695, 0x07d02,
+ 0x1cae8, 0x58485, 0x006a2, 0x0754a, 0x357ee, 0x3977b, 0x00748, 0x074b2,
+ 0x34a7b, 0x00729, 0x0b1e0, 0x34649, 0x3e5e3, 0x0073d, 0x0d2c4, 0x3e5e6,
+ 0x007bb, 0x0b099, 0x39762, 0x5b0ce, 0x6945f, 0x007d1, 0x0d5ab, 0x39779,
+ 0x007d3, 0x0d52f, 0x39763, 0x6945c, 0x00b1a, 0x0d2c5, 0x35489, 0x00d23,
+ 0x0eaed, 0x3e5f8, 0x00d32, 0x16016, 0x3e5fb, 0x00d41, 0x0e768, 0x3a5ed,
+ 0x00e1f, 0x16017, 0x58027, 0x00ead, 0x0fa07, 0x69455, 0x00e54, 0x1612b,
+ 0x00e55, 0x1a581, 0x00f78, 0x1a32b, 0x580bc, 0x6a3ee, 0x00f79, 0x1abfd,
+ 0x00f95, 0x1ab18, 0x6a3f0, 0x01637, 0x1aa4d, 0x0162d, 0x1f53c, 0x6a3f3,
+ 0x01a31, 0x1a810, 0x39769, 0x01a50, 0x1caef, 0x01a36, 0x1a32e, 0x01a67,
+ 0x1f38e, 0x01a85, 0x1ef59, 0x01aa6, 0x1ef83, 0x01d51, 0x2c012, 0x01d53,
+ 0x2d879, 0x01d5e, 0x35005, 0x01cba, 0x1cf04, 0x69453, 0x01d2d, 0x351ff,
+ 0x01f2d, 0x2d86f, 0x01f29, 0x35007, 0x02c22, 0x351fa, 0x02c03, 0x3a5ec,
+ 0x02c5f, 0x3a5eb, 0x02c58, 0x34a6b, 0x03469, 0x356be, 0x02c59, 0x34a6c,
+ 0x0346a, 0x3a5ea, 0x034bd, 0x034bf, 0x356bf, 0x0386a, 0x03ab9, 0x5843f,
+ 0x0386b, 0x3a5f5, 0x03a4b, 0x39421, 0x03aa4, 0x3a5e9, 0x03a5a, 0x03960,
+ 0x3977e, 0x03de9, 0x03958, 0x03df7, 0x039e1, 0x3e5e4, 0x0395f, 0x69458,
+ 0x03e91, 0x03df2, 0x39428, 0x058f2, 0x03e80, 0x6a3c3, 0x03e93, 0x694c0,
+ 0x058b8, 0x5b0ca, 0x0584f, 0x694c1, 0x058f1, 0x068d6, 0x06a10, 0x06ac3,
+ 0x06a32, 0x070d2, 0x06911, 0x074b1, 0x07494, 0x06ad4, 0x06ad6, 0x072b8,
+ 0x06afa, 0x074b3, 0x07540, 0x073ce, 0x0b005, 0x074b3, 0x07495, 0x074b9,
+ 0x0d336, 0x07bff, 0x07763, 0x073c8, 0x07d29, 0x0b622, 0x0d221, 0x0d181,
+ 0x0b1d1, 0x074b8, 0x0b1d0, 0x0d19b, 0x0d2c3, 0x0b172, 0x0d2dc, 0x0b623,
+ 0x0d5aa, 0x0d426, 0x0d182, 0x0e795, 0x0e1d1, 0x0d337, 0x0e96c, 0x0e5e4,
+ 0x0e514, 0x0eaee, 0x16000, 0x0e767, 0x0e1a1, 0x0e78f, 0x16004, 0x0f7c2,
+ 0x0e799, 0x0e5e7, 0x0e566, 0x0e769, 0x0f751, 0x0eede, 0x0fa06, 0x16005,
+ 0x0fa9f, 0x1a5e6, 0x0e766, 0x1636f, 0x0eedd, 0x0eec0, 0x1a309, 0x1ceca,
+ 0x163cd, 0x0f9cb, 0x0eedf, 0x1a582, 0x1612d, 0x0e5e5, 0x1abf8, 0x1a30c,
+ 0x1ca1f, 0x163cc, 0x1a35c, 0x1ca1e, 0x1aa51, 0x163ac, 0x1a84e, 0x1a53f,
+ 0x1cf16, 0x1d2fc, 0x1a5b3, 0x1ab19, 0x1a81f, 0x1d5c3, 0x16c3f, 0x1d5c1,
+ 0x1d2fc, 0x1f4aa, 0x1a812, 0x1f535, 0x1cf12, 0x1a817, 0x1617c, 0x1ab0b,
+ 0x1d2f8, 0x1ef82, 0x2d87a, 0x1d52f, 0x1f530, 0x1aa48, 0x35487, 0x1d2fd,
+ 0x1f4ad, 0x1cf11, 0x3461b, 0x35485, 0x1ca20, 0x1caed, 0x1cae6, 0x1abff,
+ 0x3464f, 0x34a6f, 0x1ef81, 0x3464b, 0x39d96, 0x1f383, 0x1f537, 0x1cf14,
+ 0x2c5ce, 0x3500e, 0x2c251, 0x1caec, 0x1f387, 0x34654, 0x357e4, 0x2d878,
+ 0x3500b, 0x35480, 0x3a5e8, 0x3548e, 0x34b64, 0x1f4a8, 0x35003, 0x3e5df,
+ 0x2d870, 0x357e6, 0x3e5f0, 0x1ef5a, 0x3a5ea, 0x1f388, 0x3e703, 0x2c24e,
+ 0x3a5e2, 0x351fd, 0x2c6dd, 0x3e704, 0x351fe, 0x2d875, 0x5b0c7, 0x3976a,
+ 0x3a5e6, 0x39423, 0x58480, 0x2c246, 0x3a5e3, 0x2d877, 0x3e5f1, 0x3abbe,
+ 0x58489, 0x3e5f9, 0x357e0, 0x3abbc, 0x5b0c6, 0x69167, 0x69165, 0x3e5e9,
+ 0x39422, 0x3976f, 0x3977d, 0x3e5de, 0x6a3c9, 0x58b98, 0x3a5f6, 0x3a5d0,
+ 0x58486, 0x6a3c1, 0x3e5fc, 0x5b0dc, 0x3548f, 0x3942d, 0x694c9, 0x58484,
+ 0x3a5e8, 0x74b9b, 0x74b96, 0x694d0, 0x58488, 0x3a5e4, 0x3942a, 0x72ec2,
+ 0x39776, 0x5b0d1, 0x5b0cf, 0x3a5d6, 0xe59fc, 0x5b0c8, 0x3e5e7, 0x7cbb7,
+ 0x70e87, 0x7cbab, 0x5b0c2, 0x694c3, 0x74a8e, 0x3e5f3, 0x6a3cd, 0x72dfe,
+ 0x73b2e, 0x72ec0, 0x694c5, 0x58437, 0x694c8, 0x72dff, 0x39435, 0x5843d,
+ 0x6a3d7, 0x72ec1, 0xd22c8, 0x694cf, 0xb6173, 0x3e5fe, 0x580bb, 0xe59f2,
+ 0xb616e, 0xb6175, 0x3a5da, 0x5b0bd, 0x694cc, 0x5843c, 0x694c7, 0x74b92,
+ 0x72ec3, 0x694c6, 0xb6170, 0x7cbac, 0xb1733, 0x7cba4, 0xb6167, 0x72de7,
+ 0x72de4, 0x6a3c0, 0x3e5ef, 0x162e65, 0x72de3, 0x72dfb, 0x6a35f, 0x6a3eb,
+};
+
+static const uint8_t coef2_huffbits[1336] = {
+ 11,  9,  2,  3,  4,  4,  5,  6,
+  6,  7,  7,  8,  8,  8,  9,  9,
+  9,  9, 10, 10, 10, 10, 11, 11,
+ 11, 11, 11, 11, 11, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 16, 15, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 18, 17, 17, 17, 17,
+ 17, 17, 17, 18, 18, 17, 17, 18,
+ 17, 17, 18, 17, 18, 18, 18, 18,
+ 19, 18, 18, 18, 18, 18, 18, 20,
+ 18, 18, 18, 19, 19, 18, 19, 18,
+ 19, 19, 18, 19, 19, 18, 19, 19,
+ 19, 19, 18, 19, 19, 19, 19, 19,
+ 19, 19, 20, 20, 20, 19, 19, 20,
+ 19, 20, 19, 19, 20, 19, 19, 20,
+ 20, 20, 20, 19, 20, 21, 19,  3,
+  5,  7,  8,  9,  9, 10, 11, 11,
+ 12, 12, 12, 13, 13, 13, 13, 14,
+ 14, 14, 14, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 16, 16,
+ 15, 15, 15, 15, 16, 16, 16, 16,
+ 17, 16, 17, 17, 16, 17, 17, 17,
+ 17, 17, 17, 16, 17, 17, 17, 17,
+ 18, 17, 17, 18, 18, 18, 18, 18,
+ 19, 18, 18, 18, 18, 18, 18, 19,
+ 19, 18, 18, 18, 18, 19, 18, 19,
+ 19, 19, 20, 19, 18, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 20,
+ 20, 19, 20, 19, 20, 19, 20, 19,
+ 19, 21, 20, 20, 19,  4,  7,  8,
+ 10, 11, 11, 12, 12, 13, 13, 14,
+ 14, 14, 14, 15, 15, 15, 15, 15,
+ 16, 16, 16, 16, 16, 16, 16, 17,
+ 17, 17, 17, 17, 17, 17, 16, 16,
+ 16, 16, 17, 17, 17, 17, 18, 18,
+ 18, 17, 17, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 19, 18, 18, 18,
+ 19, 18, 19, 19, 19, 20, 20, 20,
+ 19, 19, 19, 19, 19, 19, 19, 21,
+ 21, 20, 19,  5,  8, 10, 11, 12,
+ 13, 13, 13, 14, 14, 15, 15, 15,
+ 15, 16, 16, 16, 16, 16, 17, 17,
+ 17, 17, 17, 17, 17, 17, 18, 17,
+ 18, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 19, 18, 19, 18,
+ 18, 18, 18, 18, 19, 18, 17, 17,
+ 18, 18, 19, 19, 19, 19, 18, 18,
+ 18, 19,  6,  9, 11, 12, 13, 13,
+ 14, 14, 14, 15, 15, 16, 16, 16,
+ 16, 16, 16, 17, 17, 17, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 17, 18, 18, 17, 18, 18, 18,
+ 18, 18, 18, 19, 19, 18, 18, 18,
+ 19, 19, 19, 20, 19, 19, 18, 19,
+ 19, 20, 21, 21, 19, 19, 18,  6,
+ 10, 12, 13, 14, 14, 14, 15, 15,
+ 15, 16, 16, 17, 17, 17, 17, 17,
+ 17, 17, 18, 18, 19, 18, 18, 18,
+ 19, 18, 18, 18, 19, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18,
+ 19, 20, 20, 19, 19, 19, 19, 20,
+ 20, 19, 20, 19, 19, 19, 20, 20,
+ 20, 19, 19, 18, 19,  7, 10, 12,
+ 13, 14, 15, 15, 15, 16, 16, 17,
+ 17, 17, 17, 17, 17, 18, 18, 18,
+ 18, 19, 18, 19, 19, 19, 20, 19,
+ 18, 19, 19, 18, 18, 19, 19, 19,
+ 18, 19, 19, 20, 19, 18, 20, 21,
+ 20, 20, 19, 19, 21, 20, 21, 20,
+ 20, 20, 19, 19, 20, 20, 21, 20,
+ 19,  7, 11, 13, 14, 15, 15, 15,
+ 16, 16, 17, 17, 17, 17, 18, 18,
+ 18, 18, 18, 19, 20, 19, 19, 20,
+ 19, 19, 19, 19, 19, 19, 19, 19,
+ 18, 18, 19, 20, 19, 19, 19, 20,
+ 19, 19, 19, 20, 19, 20, 20, 21,
+ 20, 20, 20, 21, 22, 20, 19, 20,
+ 20, 21, 20, 21, 20, 19,  8, 11,
+ 13, 14, 15, 16, 16, 16, 17, 17,
+ 17, 18, 18, 18, 18, 18, 19, 18,
+ 19, 19, 19, 19, 21, 19, 19, 21,
+ 19, 20, 20, 20, 19, 18, 18,  8,
+ 12, 14, 15, 16, 16, 16, 16, 17,
+ 17, 17, 19, 18, 18, 19, 19, 20,
+ 19, 18, 20, 19, 20, 20, 19, 19,
+ 20, 20, 21, 21, 20, 19, 19, 19,
+ 19, 19, 19, 20, 21, 20, 19, 19,
+  8, 12, 14, 15, 16, 16, 17, 17,
+ 17, 18, 18, 18, 19, 19, 19, 19,
+ 19, 19, 20, 21, 20, 21, 19, 21,
+ 20, 20, 20, 20, 21, 20, 19, 20,
+ 19, 20, 20, 20, 19, 22, 21, 21,
+ 19,  9, 12, 14, 15, 16, 17, 17,
+ 17, 18, 18, 18, 19, 19, 19, 19,
+ 20, 19, 19, 19,  9, 13, 15, 16,
+ 17, 17, 18, 18, 18, 19, 18, 20,
+ 19, 20, 20, 20, 19,  9, 13, 15,
+ 16, 17, 17, 18, 18, 18, 20, 18,
+ 19, 20, 20, 20, 20, 19, 20, 19,
+  9, 13, 15, 16, 17, 18, 18, 18,
+ 19, 19, 19, 19, 10, 14, 16, 17,
+ 18, 18, 19, 19, 19, 19, 19, 10,
+ 14, 16, 17, 18, 18, 18, 19, 19,
+ 10, 14, 16, 17, 18, 18, 18, 19,
+ 19, 20, 19, 10, 14, 16, 18, 18,
+ 18, 19, 20, 19, 19, 10, 14, 17,
+ 18, 18, 18, 10, 15, 17, 18, 19,
+ 19, 21, 19, 11, 15, 17, 18, 18,
+ 19, 19, 11, 15, 17, 18, 19, 19,
+ 11, 15, 17, 18, 11, 15, 18, 19,
+ 19, 11, 15, 18, 19, 19, 11, 16,
+ 18, 19, 11, 15, 18, 19, 11, 16,
+ 18, 12, 16, 18, 19, 12, 16, 19,
+ 12, 16, 19, 19, 19, 12, 16, 19,
+ 12, 16, 19, 19, 12, 16, 18, 12,
+ 16, 19, 12, 17, 19, 12, 17, 19,
+ 12, 17, 19, 12, 17, 19, 13, 17,
+ 13, 17, 13, 17, 19, 19, 13, 17,
+ 13, 17, 19, 13, 17, 13, 18, 19,
+ 13, 17, 19, 13, 18, 13, 17, 13,
+ 18, 13, 18, 13, 18, 13, 18, 13,
+ 18, 13, 18, 14, 18, 19, 14, 18,
+ 14, 18, 14, 18, 14, 18, 14, 19,
+ 14, 19, 14, 18, 14, 18, 14, 18,
+ 14, 19, 14, 14, 18, 14, 14, 19,
+ 14, 18, 14, 19, 14, 19, 14, 15,
+ 19, 15, 15, 15, 15, 19, 15, 19,
+ 15, 15, 19, 15, 15, 19, 15, 19,
+ 15, 19, 15, 19, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 16,
+ 15, 15, 15, 16, 16, 16, 15, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 17, 16, 16, 16, 17,
+ 17, 16, 17, 17, 16, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 18,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 18, 17, 17, 18, 17, 17, 17, 17,
+ 18, 18, 17, 17, 17, 17, 17, 17,
+ 17, 18, 17, 18, 18, 17, 17, 17,
+ 18, 18, 18, 17, 18, 17, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 17,
+ 18, 18, 18, 18, 19, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 19,
+ 18, 18, 19, 18, 18, 18, 19, 18,
+ 19, 18, 18, 19, 18, 18, 19, 19,
+ 19, 19, 19, 18, 19, 18, 19, 18,
+ 19, 19, 18, 18, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 18, 19,
+ 19, 19, 19, 19, 18, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 20,
+ 19, 19, 19, 19, 21, 19, 19, 20,
+ 19, 20, 19, 19, 19, 19, 19, 20,
+ 20, 20, 19, 19, 19, 20, 19, 19,
+ 19, 20, 20, 19, 20, 19, 19, 21,
+ 20, 20, 19, 19, 19, 19, 19, 19,
+ 20, 19, 20, 20, 20, 20, 20, 20,
+ 20, 19, 19, 21, 20, 20, 19, 19,
+};
+
+static const uint32_t coef3_huffcodes[1072] = {
+ 0x001b2, 0x00069, 0x00000, 0x00004, 0x00006, 0x0000e, 0x00014, 0x00019,
+ 0x00016, 0x0002b, 0x00030, 0x0003d, 0x0003c, 0x0005a, 0x0005f, 0x0006d,
+ 0x0007e, 0x0005f, 0x0007f, 0x000b6, 0x000bc, 0x000d8, 0x000f2, 0x000fe,
+ 0x000bc, 0x000fc, 0x00161, 0x0016e, 0x00174, 0x00176, 0x001a2, 0x001e3,
+ 0x001f3, 0x00174, 0x0017a, 0x001ea, 0x002a8, 0x002c4, 0x002e6, 0x00314,
+ 0x00346, 0x00367, 0x003e9, 0x002e5, 0x002ee, 0x003d6, 0x00555, 0x00554,
+ 0x00557, 0x005c3, 0x005d6, 0x006e0, 0x0062f, 0x006e2, 0x00799, 0x00789,
+ 0x007fa, 0x005ce, 0x007fe, 0x005ec, 0x007cc, 0x007af, 0x00aa7, 0x00b19,
+ 0x00b94, 0x00b85, 0x00b9f, 0x00c48, 0x00c45, 0x00dd8, 0x00c4c, 0x00c4b,
+ 0x00d99, 0x00d1f, 0x00dc2, 0x00f95, 0x00fa2, 0x00bb5, 0x00b9f, 0x00f5d,
+ 0x00bbf, 0x00f47, 0x0154a, 0x00fd5, 0x00f45, 0x00f7f, 0x0160d, 0x01889,
+ 0x01757, 0x01722, 0x018b3, 0x0172d, 0x01a39, 0x01a18, 0x01bb3, 0x01b30,
+ 0x01e63, 0x0173c, 0x01b35, 0x01723, 0x01e80, 0x01fee, 0x01761, 0x01ffc,
+ 0x01f7f, 0x02c7c, 0x01fa1, 0x0177b, 0x01755, 0x0175a, 0x01fa6, 0x02eab,
+ 0x0310a, 0x02c69, 0x03669, 0x03127, 0x03103, 0x02e43, 0x03662, 0x03165,
+ 0x03124, 0x0313b, 0x03111, 0x03668, 0x0343b, 0x03c52, 0x03efc, 0x02e6c,
+ 0x03fda, 0x03ef8, 0x02e7b, 0x03ee2, 0x03cc5, 0x03d72, 0x058c0, 0x03df8,
+ 0x02ea9, 0x03e7e, 0x0556d, 0x05c82, 0x03d71, 0x03e7b, 0x03c42, 0x058d7,
+ 0x03f4e, 0x06200, 0x03d70, 0x05cb2, 0x05c96, 0x05cb0, 0x03f45, 0x05cb1,
+ 0x02e6d, 0x03110, 0x02f68, 0x05c90, 0x07ca6, 0x07c88, 0x06204, 0x062c8,
+ 0x078a6, 0x07986, 0x079d5, 0x0b1ad, 0x07989, 0x0b079, 0x05cdd, 0x0aad4,
+ 0x05de8, 0x07dcd, 0x07987, 0x05d67, 0x05d99, 0x0b91d, 0x07cf1, 0x05d9b,
+ 0x079d7, 0x0b07b, 0x05c85, 0x05d9a, 0x07dcc, 0x07ebf, 0x07dce, 0x07dfb,
+ 0x07ec0, 0x07d1a, 0x07a07, 0x05c84, 0x0c471, 0x07cf2, 0x0baef, 0x0b9d2,
+ 0x05deb, 0x07bd6, 0x0b845, 0x05d98, 0x0b91a, 0x0bae8, 0x0c4e0, 0x0dc31,
+ 0x0f93d, 0x0bbce, 0x0d1d2, 0x0f7a9, 0x0d9b9, 0x0bbcb, 0x0b900, 0x0aad7,
+ 0x0babd, 0x0c4e1, 0x0f46f, 0x0c588, 0x0c58b, 0x160e6, 0x0bbcf, 0x0bac3,
+ 0x0f945, 0x0f7a3, 0x0d1c1, 0x0fb8e, 0x0f7a4, 0x0fb8c, 0x0f40c, 0x0c473,
+ 0x0fd72, 0x0bbcd, 0x0fffa, 0x0f940, 0x0bbc9, 0x0f7a8, 0x1a1ed, 0x0bbc5,
+ 0x1f26f, 0x163fd, 0x160c7, 0x1a1f5, 0x0f947, 0x163fc, 0x154b3, 0x0fff6,
+ 0x163f6, 0x160e9, 0x1a1f0, 0x0bab9, 0x0baba, 0x17086, 0x0b903, 0x0fd75,
+ 0x0f308, 0x176f3, 0x163ff, 0x0fd7d, 0x1bb78, 0x163fb, 0x188db, 0x1a1f7,
+ 0x154b2, 0x172fd, 0x163f4, 0x1bb73, 0x172ff, 0x0babc, 0x0f97d, 0x1a1f3,
+ 0x1bb6d, 0x1ffd5, 0x1a1f4, 0x1f272, 0x17380, 0x17382, 0x1ffe7, 0x0bac8,
+ 0x0bbc4, 0x188d3, 0x160e0, 0x0fd7b, 0x1725f, 0x172f5, 0x1bb79, 0x1fad9,
+ 0x1f269, 0x188d0, 0x0bac4, 0x0bac5, 0x31185, 0x188d2, 0x188cc, 0x31187,
+ 0x3e7fe, 0x188d1, 0x1bb6c, 0x1f268, 0x1fad2, 0x1ffd9, 0x1a1ea, 0x1bb68,
+ 0x1facb, 0x3fdb2, 0x1e81a, 0x188ce, 0x172fb, 0x1a1ef, 0x1face, 0x1bb70,
+ 0x0bac1, 0x1bb6b, 0x172f8, 0x1bb66, 0x1ffdf, 0x1bb6a, 0x1ffd7, 0x1f266,
+ 0x176f8, 0x37653, 0x1fa7e, 0x31182, 0x1fac8, 0x2c7e3, 0x370ee, 0x176ec,
+ 0x176e9, 0x2e4bc, 0x160c5, 0x3765a, 0x3ce9c, 0x17373, 0x176e8, 0x188d4,
+ 0x176f1, 0x176ef, 0x37659, 0x1bb7c, 0x1ffde, 0x176f2, 0x3118b, 0x2c7d4,
+ 0x37651, 0x5ce9f, 0x37650, 0x31191, 0x3f4f6, 0x3f4f5, 0x7a06c, 0x1fac1,
+ 0x5c97b, 0x2c7e0, 0x79d3a, 0x3e7fd, 0x2c7df, 0x3f4f0, 0x7a06d, 0x376c1,
+ 0x79d3b, 0x00004, 0x00014, 0x00059, 0x000ab, 0x000b8, 0x00177, 0x001f5,
+ 0x001f2, 0x00315, 0x003fc, 0x005bd, 0x0062d, 0x006e8, 0x007dd, 0x00b04,
+ 0x007cd, 0x00b1e, 0x00d1e, 0x00f15, 0x00f3b, 0x00f41, 0x01548, 0x018b0,
+ 0x0173b, 0x01884, 0x01a1c, 0x01bb4, 0x01f25, 0x017b5, 0x0176d, 0x01ef8,
+ 0x02e73, 0x03107, 0x03125, 0x03105, 0x02e49, 0x03ce8, 0x03ef9, 0x03e5e,
+ 0x02e72, 0x03471, 0x03fd9, 0x0623f, 0x078a0, 0x06867, 0x05cb3, 0x06272,
+ 0x068ec, 0x06e9a, 0x079d4, 0x06e98, 0x0b1aa, 0x06e1a, 0x07985, 0x068ee,
+ 0x06e9b, 0x05c88, 0x0b1ac, 0x07dfa, 0x05d65, 0x07cf0, 0x07cbf, 0x0c475,
+ 0x160eb, 0x1bb7e, 0x0f7a6, 0x1fedd, 0x160e3, 0x0fffb, 0x0fb8d, 0x0fff9,
+ 0x0d1c0, 0x0c58c, 0x1a1e9, 0x0bab8, 0x0f5cf, 0x0fff5, 0x376c5, 0x1a1ec,
+ 0x160ed, 0x1fede, 0x1fac9, 0x1a1eb, 0x1f224, 0x176ee, 0x0fd79, 0x17080,
+ 0x17387, 0x1bb7a, 0x1ffe9, 0x176f7, 0x17385, 0x17781, 0x2c7d5, 0x17785,
+ 0x1ffe3, 0x163f5, 0x1fac2, 0x3e7f9, 0x3118d, 0x3fdb1, 0x1ffe2, 0x1f226,
+ 0x3118a, 0x2c7d9, 0x31190, 0x3118c, 0x3f4f3, 0x1bb7f, 0x1bb72, 0x31184,
+ 0xb92f4, 0x3e7fb, 0x6e1d9, 0x1faca, 0x62300, 0x3fdb8, 0x3d037, 0x3e7fc,
+ 0x62301, 0x3f4f2, 0x1f26a, 0x0000e, 0x00063, 0x000f8, 0x001ee, 0x00377,
+ 0x003f7, 0x006e3, 0x005cc, 0x00b05, 0x00dd2, 0x00fd4, 0x0172e, 0x0172a,
+ 0x01e23, 0x01f2d, 0x01763, 0x01769, 0x0176c, 0x02e75, 0x03104, 0x02ec1,
+ 0x03e58, 0x0583f, 0x03f62, 0x03f44, 0x058c5, 0x0623c, 0x05cf4, 0x07bd7,
+ 0x05d9d, 0x0aad2, 0x05d66, 0x0b1a9, 0x0b078, 0x07cfe, 0x0b918, 0x0c46f,
+ 0x0b919, 0x0b847, 0x06e1b, 0x0b84b, 0x0aad8, 0x0fd74, 0x172f4, 0x17081,
+ 0x0f97c, 0x1f273, 0x0f7a0, 0x0fd7c, 0x172f7, 0x0fd7a, 0x1bb77, 0x172fe,
+ 0x1f270, 0x0fd73, 0x1bb7b, 0x1a1bc, 0x1bb7d, 0x0bbc3, 0x172f6, 0x0baeb,
+ 0x0fb8f, 0x3f4f4, 0x3fdb4, 0x376c8, 0x3e7fa, 0x1ffd0, 0x62303, 0xb92f5,
+ 0x1f261, 0x31189, 0x3fdb5, 0x2c7db, 0x376c9, 0x1fad6, 0x1fad1, 0x00015,
+ 0x000f0, 0x002e0, 0x0058e, 0x005d7, 0x00c4d, 0x00fa1, 0x00bdb, 0x01756,
+ 0x01f70, 0x02c19, 0x0313c, 0x0370f, 0x03cc0, 0x02ea8, 0x058c6, 0x058c7,
+ 0x02eb7, 0x058d0, 0x07d18, 0x0aa58, 0x0b848, 0x05d9e, 0x05d6c, 0x0b84c,
+ 0x0c589, 0x0b901, 0x163f8, 0x0bac9, 0x0b9c5, 0x0f93c, 0x188d8, 0x0bbc7,
+ 0x160ec, 0x0fd6f, 0x188d9, 0x160ea, 0x0f7a7, 0x0f944, 0x0baab, 0x0dc3a,
+ 0x188cf, 0x176fb, 0x2c7d8, 0x2c7d7, 0x1bb75, 0x5ce9e, 0x62302, 0x370ed,
+ 0x176f4, 0x1ffd1, 0x370ef, 0x3f4f8, 0x376c7, 0x1ffe1, 0x376c6, 0x176ff,
+ 0x6e1d8, 0x176f6, 0x17087, 0x0f5cd, 0x00035, 0x001a0, 0x0058b, 0x00aac,
+ 0x00b9a, 0x0175f, 0x01e22, 0x01e8c, 0x01fb2, 0x0310b, 0x058d1, 0x0552e,
+ 0x05c27, 0x0686e, 0x07ca7, 0x0c474, 0x0dc33, 0x07bf2, 0x05de9, 0x07a35,
+ 0x0baaa, 0x0b9eb, 0x0fb95, 0x0b9b8, 0x17381, 0x1f262, 0x188cd, 0x17088,
+ 0x172fa, 0x0f7a2, 0x1fad3, 0x0bac0, 0x3765c, 0x1fedf, 0x1f225, 0x1fad4,
+ 0x2c7da, 0x5ce9d, 0x3e7f8, 0x1e203, 0x188d7, 0x00054, 0x002c0, 0x007a1,
+ 0x00f78, 0x01b36, 0x01fa3, 0x0313a, 0x03436, 0x0343a, 0x07d1d, 0x07bd8,
+ 0x05cdf, 0x0b846, 0x0b189, 0x0d9b8, 0x0fff8, 0x0d9be, 0x0c58a, 0x05dea,
+ 0x0d1d3, 0x160e4, 0x1f26b, 0x188da, 0x1e202, 0x2c7d2, 0x163fe, 0x31193,
+ 0x17782, 0x376c2, 0x2c7d1, 0x3fdb0, 0x3765d, 0x2c7d0, 0x1fad0, 0x1e201,
+ 0x188dd, 0x2c7e2, 0x37657, 0x37655, 0x376c4, 0x376c0, 0x176ea, 0x0006f,
+ 0x003cf, 0x00dd5, 0x01f23, 0x02c61, 0x02ed0, 0x05d54, 0x0552d, 0x07883,
+ 0x0b1a8, 0x0b91c, 0x0babf, 0x0b902, 0x0f7aa, 0x0f7a5, 0x1a1e8, 0x1ffd6,
+ 0x0babe, 0x1a1bf, 0x163f3, 0x1ffd8, 0x1fad7, 0x1f275, 0x1ffdc, 0x0007d,
+ 0x005bc, 0x01549, 0x02a99, 0x03def, 0x06273, 0x079d6, 0x07d1b, 0x0aad3,
+ 0x0d0fc, 0x2c7dd, 0x188d6, 0x0bac2, 0x2c7e1, 0x1bb76, 0x1a1bd, 0x31186,
+ 0x0fd78, 0x1a1be, 0x31183, 0x3fdb6, 0x3f4f1, 0x37652, 0x1fad5, 0x3f4f9,
+ 0x3e7ff, 0x5ce9c, 0x3765b, 0x31188, 0x17372, 0x000bd, 0x0078b, 0x01f21,
+ 0x03c43, 0x03ded, 0x0aad6, 0x07ec1, 0x0f942, 0x05c86, 0x17089, 0x0babb,
+ 0x1ffe8, 0x2c7de, 0x1f26e, 0x1fac4, 0x3f4f7, 0x37656, 0x1fa7d, 0x376c3,
+ 0x3fdb3, 0x3118f, 0x1fac6, 0x000f8, 0x007ed, 0x01efd, 0x03e7a, 0x05c91,
+ 0x0aad9, 0x0baec, 0x0dc32, 0x0f46e, 0x1e200, 0x176fa, 0x3765e, 0x3fdb7,
+ 0x2c7d6, 0x3fdb9, 0x37654, 0x37658, 0x3118e, 0x1ffdb, 0x000f6, 0x00c43,
+ 0x03106, 0x068ef, 0x0b84d, 0x0b188, 0x0bbcc, 0x1f264, 0x1bb69, 0x17386,
+ 0x1fac0, 0x00171, 0x00f39, 0x03e41, 0x068ed, 0x0d9bc, 0x0f7a1, 0x1bb67,
+ 0x1ffdd, 0x176f9, 0x001b9, 0x00f7d, 0x03f63, 0x0d0fd, 0x0b9ea, 0x188dc,
+ 0x1fac3, 0x1a1f2, 0x31192, 0x1ffe4, 0x001f6, 0x01754, 0x06865, 0x0f309,
+ 0x160e5, 0x176f5, 0x3765f, 0x1facc, 0x001e9, 0x01a1a, 0x06201, 0x0f105,
+ 0x176f0, 0x002df, 0x01756, 0x05d6d, 0x163fa, 0x176ed, 0x00342, 0x02e40,
+ 0x0d0ff, 0x17082, 0x003cd, 0x02a98, 0x0fffc, 0x2c7dc, 0x1fa7f, 0x003fe,
+ 0x03764, 0x0fffd, 0x176fc, 0x1fac5, 0x002f7, 0x02ed1, 0x0fb97, 0x0058a,
+ 0x02edc, 0x0bbc8, 0x005d4, 0x0623d, 0x160e8, 0x0062e, 0x05830, 0x163f9,
+ 0x006eb, 0x06205, 0x1f274, 0x007de, 0x062c9, 0x1f265, 0x005c9, 0x05cde,
+ 0x1ffd3, 0x005d4, 0x07988, 0x007ce, 0x0b849, 0x00b1b, 0x05c89, 0x1fac7,
+ 0x00b93, 0x05c83, 0x00b9e, 0x0f14f, 0x00c4a, 0x0b9c7, 0x00dd4, 0x0c470,
+ 0x1f271, 0x00f38, 0x0fb96, 0x176eb, 0x00fa0, 0x163f7, 0x00bb2, 0x0b91b,
+ 0x00bbe, 0x0f102, 0x00f44, 0x0f946, 0x1facd, 0x00f79, 0x0d9bd, 0x0154d,
+ 0x0bbc6, 0x00fd2, 0x160e7, 0x0172b, 0x188cb, 0x0175e, 0x0fd76, 0x0175c,
+ 0x1bb71, 0x0189f, 0x1a1ee, 0x01f24, 0x1a1f6, 0x01ba7, 0x0bbca, 0x01f7d,
+ 0x0ffff, 0x01f2e, 0x1bb65, 0x01bb5, 0x172f9, 0x01fef, 0x1f26c, 0x01f3e,
+ 0x0fd77, 0x01762, 0x1bb6e, 0x01ef9, 0x172fc, 0x01fa0, 0x02ab7, 0x02e4a,
+ 0x1f267, 0x01fb3, 0x1ffda, 0x02e42, 0x03101, 0x17780, 0x0313d, 0x03475,
+ 0x17784, 0x03126, 0x1facf, 0x03c51, 0x17783, 0x03e40, 0x1ffe5, 0x03663,
+ 0x1ffe0, 0x03e8f, 0x1f26d, 0x0343c, 0x03cc1, 0x176fd, 0x03e45, 0x02ec0,
+ 0x03f61, 0x03dee, 0x03fd8, 0x0583e, 0x02e45, 0x03e59, 0x03d02, 0x05ce8,
+ 0x05568, 0x176fe, 0x02f69, 0x1fad8, 0x058c1, 0x05c83, 0x1ffe6, 0x06271,
+ 0x06e1c, 0x062c7, 0x068e1, 0x0552f, 0x06864, 0x06866, 0x06e99, 0x05cbc,
+ 0x07ca5, 0x078a1, 0x05c82, 0x07dcf, 0x0623b, 0x0623e, 0x068e8, 0x07a36,
+ 0x05d9c, 0x0b077, 0x07cf3, 0x07a34, 0x07ca4, 0x07d19, 0x079d2, 0x07d1c,
+ 0x07bd9, 0x0b84a, 0x0fb94, 0x0aad5, 0x0dc30, 0x07bf3, 0x0baee, 0x0b07a,
+ 0x0c472, 0x0b91e, 0x0d9ba, 0x05d9f, 0x0d0fe, 0x0b9c6, 0x05c87, 0x0f14e,
+ 0x0baed, 0x0b92e, 0x0f103, 0x0b9c4, 0x0fb91, 0x0d9bb, 0x0b1ab, 0x0c58d,
+ 0x0fffe, 0x0f93b, 0x0f941, 0x0baea, 0x0b91f, 0x0f5cc, 0x0d9bf, 0x0f943,
+ 0x0f104, 0x1f260, 0x0fb92, 0x0f93f, 0x0f3a6, 0x0bac7, 0x0f7ab, 0x0bac6,
+ 0x17383, 0x0fd6d, 0x0bae9, 0x0fd6e, 0x1e74f, 0x188ca, 0x1f227, 0x0fb93,
+ 0x0fb90, 0x0fff7, 0x17085, 0x17083, 0x160e1, 0x17084, 0x0f93e, 0x160e2,
+ 0x160c6, 0x1a1f1, 0x1bb6f, 0x17384, 0x0fd70, 0x1f263, 0x188d5, 0x173a6,
+ 0x0f5ce, 0x163f2, 0x0fd71, 0x1ffd2, 0x160c4, 0x1ffd4, 0x2c7d3, 0x1bb74,
+};
+
+static const uint8_t coef3_huffbits[1072] = {
+  9,  7,  2,  3,  4,  4,  5,  5,
+  6,  6,  6,  6,  7,  7,  7,  7,
+  7,  8,  8,  8,  8,  8,  8,  8,
+  9,  9,  9,  9,  9,  9,  9,  9,
+  9, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 12, 11, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 14, 13, 14, 14, 13, 14, 13,
+ 13, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 15,
+ 14, 14, 15, 14, 14, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 14, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 14, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 16, 15, 16, 16, 16,
+ 16, 15, 15, 16, 16, 16, 16, 16,
+ 15, 16, 16, 16, 15, 16, 15, 15,
+ 16, 15, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 17, 16, 17, 16, 17, 17, 16,
+ 17, 16, 17, 16, 16, 17, 17, 17,
+ 16, 17, 16, 16, 17, 16, 17, 16,
+ 17, 17, 16, 16, 17, 17, 17, 17,
+ 17, 17, 17, 17, 16, 17, 17, 16,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 16, 18, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 16, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 18,
+ 17, 17, 17, 17, 18, 17, 17, 18,
+ 19, 17, 17, 17, 18, 17, 17, 17,
+ 18, 18, 18, 17, 17, 17, 18, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 18, 18, 18, 18, 18, 18, 18, 18,
+ 18, 18, 17, 18, 18, 18, 18, 17,
+ 18, 18, 18, 17, 17, 18, 18, 18,
+ 18, 19, 18, 18, 19, 19, 20, 18,
+ 19, 18, 19, 19, 18, 19, 20, 18,
+ 19,  4,  6,  7,  8,  9,  9,  9,
+ 10, 10, 10, 11, 11, 11, 11, 12,
+ 12, 12, 12, 12, 12, 13, 13, 13,
+ 13, 13, 13, 13, 13, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 16, 15, 15, 15,
+ 15, 16, 16, 15, 16, 16, 15, 16,
+ 17, 17, 17, 17, 17, 16, 16, 16,
+ 16, 16, 17, 17, 17, 16, 18, 17,
+ 17, 17, 18, 17, 17, 18, 17, 17,
+ 17, 17, 17, 18, 17, 18, 18, 18,
+ 17, 17, 18, 19, 18, 18, 17, 17,
+ 18, 18, 18, 18, 19, 17, 17, 18,
+ 20, 19, 19, 18, 19, 18, 19, 19,
+ 19, 19, 17,  5,  7,  9, 10, 10,
+ 11, 11, 12, 12, 12, 13, 13, 13,
+ 13, 13, 14, 14, 14, 14, 14, 15,
+ 14, 15, 15, 15, 15, 15, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 15, 16, 16, 17, 17, 17,
+ 16, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 17, 16,
+ 16, 19, 18, 18, 19, 17, 19, 20,
+ 17, 18, 18, 18, 18, 18, 18,  6,
+  8, 10, 11, 12, 12, 12, 13, 13,
+ 13, 14, 14, 14, 14, 15, 15, 15,
+ 15, 15, 15, 16, 16, 16, 16, 16,
+ 16, 17, 17, 17, 16, 16, 17, 17,
+ 17, 17, 17, 17, 17, 16, 16, 16,
+ 17, 18, 18, 18, 17, 19, 19, 18,
+ 18, 17, 18, 19, 18, 17, 18, 18,
+ 19, 18, 17, 17,  6,  9, 11, 12,
+ 13, 13, 13, 14, 14, 14, 15, 15,
+ 15, 15, 15, 16, 16, 16, 16, 16,
+ 16, 17, 16, 17, 17, 17, 17, 17,
+ 17, 17, 18, 17, 18, 17, 17, 18,
+ 18, 19, 19, 17, 17,  7, 10, 12,
+ 13, 13, 14, 14, 14, 14, 15, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 17, 17, 17, 17, 18, 17, 18,
+ 18, 18, 18, 18, 18, 18, 18, 17,
+ 17, 18, 18, 18, 18, 18, 18,  7,
+ 10, 12, 13, 14, 15, 15, 15, 15,
+ 16, 16, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 18, 17, 17,  8,
+ 11, 13, 14, 15, 15, 15, 15, 16,
+ 16, 18, 17, 17, 18, 17, 17, 18,
+ 17, 17, 18, 18, 19, 18, 18, 19,
+ 19, 19, 18, 18, 18,  8, 11, 13,
+ 14, 15, 16, 16, 16, 16, 17, 17,
+ 17, 18, 17, 18, 19, 18, 18, 18,
+ 18, 18, 18,  8, 12, 14, 15, 15,
+ 16, 16, 16, 17, 17, 18, 18, 18,
+ 18, 18, 18, 18, 18, 17,  9, 12,
+ 14, 15, 16, 16, 17, 17, 17, 17,
+ 18,  9, 12, 14, 15, 16, 17, 17,
+ 17, 18,  9, 13, 15, 16, 17, 17,
+ 18, 17, 18, 17,  9, 13, 15, 16,
+ 17, 18, 18, 18, 10, 13, 15, 16,
+ 18, 10, 14, 16, 17, 18, 10, 14,
+ 16, 17, 10, 14, 16, 18, 18, 10,
+ 14, 16, 18, 18, 11, 15, 16, 11,
+ 15, 17, 11, 15, 17, 11, 15, 17,
+ 11, 15, 17, 11, 15, 17, 12, 16,
+ 17, 12, 15, 12, 16, 12, 16, 18,
+ 12, 16, 12, 16, 12, 16, 12, 16,
+ 17, 12, 16, 18, 12, 17, 13, 16,
+ 13, 16, 13, 16, 18, 13, 16, 13,
+ 17, 13, 17, 13, 17, 13, 17, 13,
+ 17, 13, 17, 13, 17, 13, 17, 13,
+ 16, 13, 17, 13, 17, 13, 17, 14,
+ 17, 14, 17, 14, 17, 14, 14, 14,
+ 17, 14, 17, 14, 14, 18, 14, 14,
+ 18, 14, 18, 14, 18, 14, 17, 14,
+ 17, 14, 17, 14, 14, 18, 14, 15,
+ 15, 15, 14, 15, 15, 14, 15, 15,
+ 15, 18, 15, 18, 15, 15, 17, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 16, 15, 15, 15, 15, 16,
+ 16, 16, 16, 16, 15, 15, 15, 15,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 17, 16, 16,
+ 16, 17, 16, 16, 16, 17, 17, 17,
+ 17, 17, 16, 17, 17, 17, 17, 16,
+ 16, 16, 17, 17, 17, 17, 16, 17,
+ 17, 17, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17, 17, 17, 17, 18, 17,
+};
+
+static const uint32_t coef4_huffcodes[476] = {
+ 0x00f01, 0x0001e, 0x00000, 0x00004, 0x00006, 0x0000d, 0x0000a, 0x00017,
+ 0x0001d, 0x00017, 0x0002c, 0x00031, 0x00039, 0x0003e, 0x00039, 0x0005a,
+ 0x00066, 0x00070, 0x0007b, 0x00070, 0x00077, 0x000af, 0x000c9, 0x000f2,
+ 0x000f4, 0x000b2, 0x000e3, 0x0015b, 0x0015d, 0x00181, 0x0019d, 0x001e3,
+ 0x001c5, 0x002b5, 0x002db, 0x00338, 0x003c3, 0x003cc, 0x003f0, 0x002cd,
+ 0x003fa, 0x003a1, 0x005b4, 0x00657, 0x007ab, 0x0074d, 0x0074c, 0x00ac1,
+ 0x00ac5, 0x0076b, 0x00ca8, 0x00f04, 0x00f00, 0x00fe3, 0x00f3c, 0x00f10,
+ 0x00f39, 0x00fe6, 0x00e26, 0x00e90, 0x016c5, 0x01827, 0x01954, 0x015c5,
+ 0x01958, 0x01f8a, 0x01c4a, 0x02b0f, 0x02b41, 0x02b0e, 0x033c6, 0x03050,
+ 0x01c4f, 0x02d88, 0x0305c, 0x03c18, 0x02b4f, 0x02cc2, 0x03a47, 0x05680,
+ 0x0569d, 0x06442, 0x06443, 0x06446, 0x0656e, 0x06444, 0x07120, 0x0748a,
+ 0x0c1ba, 0x07e22, 0x07aa6, 0x07f25, 0x07aa7, 0x07e20, 0x0c11b, 0x0c118,
+ 0x07aa5, 0x0ad0a, 0x0f389, 0x19ebb, 0x0caad, 0x0fe42, 0x0fe40, 0x16c34,
+ 0x2b4e5, 0x33d65, 0x16c30, 0x1e7ae, 0x1e25c, 0x18370, 0x1e703, 0x19eba,
+ 0x16c37, 0x0e234, 0x16c6e, 0x00004, 0x0002a, 0x00061, 0x00075, 0x000cb,
+ 0x000ff, 0x00190, 0x001eb, 0x001d1, 0x002b9, 0x00307, 0x00339, 0x0033f,
+ 0x003fb, 0x003b4, 0x0060c, 0x00679, 0x00645, 0x0067d, 0x0078a, 0x007e3,
+ 0x00749, 0x00ac4, 0x00ad2, 0x00ae3, 0x00c10, 0x00c16, 0x00ad1, 0x00cf4,
+ 0x00fe2, 0x01586, 0x00e9d, 0x019f1, 0x01664, 0x01e26, 0x01d38, 0x02b4d,
+ 0x033c5, 0x01fc2, 0x01fc3, 0x01d28, 0x03c1d, 0x0598e, 0x0f094, 0x07aa4,
+ 0x0ad38, 0x0ac0c, 0x0c11a, 0x079ea, 0x0c881, 0x0fe44, 0x0b635, 0x0ac0d,
+ 0x0b61e, 0x05987, 0x07121, 0x0f382, 0x0f387, 0x0e237, 0x0fe47, 0x0f383,
+ 0x0f091, 0x0f385, 0x0e233, 0x182ee, 0x19eb8, 0x1663e, 0x0f093, 0x00014,
+ 0x00058, 0x00159, 0x00167, 0x00300, 0x003d4, 0x005b5, 0x0079d, 0x0076a,
+ 0x00b67, 0x00b60, 0x00f05, 0x00cf0, 0x00f17, 0x00e95, 0x01822, 0x01913,
+ 0x016c2, 0x0182f, 0x01959, 0x01fcb, 0x01e27, 0x01c40, 0x033c7, 0x01e7b,
+ 0x01c49, 0x02d89, 0x01e23, 0x01660, 0x03f12, 0x02cc6, 0x033e1, 0x05b34,
+ 0x0609a, 0x06569, 0x07488, 0x07e21, 0x0cf5f, 0x0712c, 0x0389d, 0x067cf,
+ 0x07f28, 0x1663f, 0x33d67, 0x1663d, 0x1e25d, 0x3c1ab, 0x15c44, 0x16c36,
+ 0x0001f, 0x000ec, 0x00323, 0x005b2, 0x0079f, 0x00ac2, 0x00f16, 0x00e9e,
+ 0x01956, 0x01e0f, 0x019ea, 0x01666, 0x02b89, 0x02b02, 0x02d8c, 0x03c1b,
+ 0x03c19, 0x032b5, 0x03f9c, 0x02ccf, 0x03897, 0x05b35, 0x0ad02, 0x07f29,
+ 0x06441, 0x03884, 0x07888, 0x0784e, 0x06568, 0x0c1bb, 0x05986, 0x067cc,
+ 0x0fe49, 0x0fe48, 0x0c1bc, 0x0fe41, 0x18371, 0x1663c, 0x0e231, 0x0711e,
+ 0x0ad09, 0x0f092, 0x0002d, 0x001db, 0x00781, 0x00c1a, 0x00f55, 0x01580,
+ 0x01ea8, 0x02d9b, 0x032af, 0x03f16, 0x03c1c, 0x07834, 0x03c45, 0x0389c,
+ 0x067ce, 0x06445, 0x0c1b9, 0x07889, 0x07f3a, 0x0784f, 0x07f2b, 0x0ad0b,
+ 0x0f090, 0x0c11d, 0x0e94e, 0x0711f, 0x0e9f1, 0x0f38e, 0x079e9, 0x0ad03,
+ 0x0f09b, 0x0caae, 0x0fe46, 0x2b4e6, 0x0e9f0, 0x19eb6, 0x67ac1, 0x67ac0,
+ 0x33d66, 0x0f388, 0x00071, 0x003a0, 0x00ca9, 0x01829, 0x01d39, 0x02b43,
+ 0x02cc4, 0x06554, 0x0f09a, 0x0b61f, 0x067cd, 0x0711c, 0x0b636, 0x07f2a,
+ 0x0b634, 0x0c11f, 0x0cf5e, 0x0b61d, 0x0f06b, 0x0caab, 0x0c1be, 0x0e94c,
+ 0x0f099, 0x182ed, 0x0e94f, 0x0c119, 0x0e232, 0x2b4e4, 0x0f38a, 0x19eb4,
+ 0x1e25f, 0x0e94d, 0x000b7, 0x00785, 0x016cc, 0x03051, 0x033c4, 0x0656f,
+ 0x03891, 0x0711d, 0x0caaf, 0x0f097, 0x07489, 0x0f098, 0x0c880, 0x0caaa,
+ 0x0f386, 0x19eb7, 0x16c6f, 0x0f384, 0x182e8, 0x182e9, 0x0e230, 0x1e700,
+ 0x33d62, 0x33d63, 0x33d64, 0x16c33, 0x0e216, 0x000fd, 0x00c15, 0x01665,
+ 0x03c4a, 0x07f3b, 0x07896, 0x0c11c, 0x0e215, 0x16c32, 0x0f38b, 0x0f38d,
+ 0x182ea, 0x1e701, 0x712df, 0x15c46, 0x00194, 0x00fe0, 0x03f13, 0x0748b,
+ 0x0f096, 0x0cf80, 0x1e25e, 0xe25bd, 0x33d61, 0x16c31, 0x001f9, 0x01912,
+ 0x05710, 0x0f3d0, 0x0c1bf, 0x00301, 0x01e24, 0x0ad08, 0x003cd, 0x01c41,
+ 0x0c1bd, 0x00563, 0x03a52, 0x0f3d1, 0x00570, 0x02cce, 0x0e217, 0x0067b,
+ 0x0655d, 0x0074b, 0x06447, 0x00c12, 0x074fb, 0x00f08, 0x0b61c, 0x00e22,
+ 0x0fe43, 0x016c7, 0x01836, 0x019f2, 0x01c43, 0x01d3f, 0x01fcf, 0x02b4c,
+ 0x0304c, 0x032b6, 0x03a46, 0x05607, 0x03f17, 0x02cc5, 0x0609b, 0x0655c,
+ 0x07e23, 0x067c1, 0x07f26, 0x07f27, 0x0f095, 0x0e9f3, 0x0cf81, 0x0c11e,
+ 0x0caac, 0x0f38f, 0x0e9f2, 0x074fa, 0x0e236, 0x0fe45, 0x1c428, 0x0e235,
+ 0x182ef, 0x19eb5, 0x0f3d6, 0x182ec, 0x16c35, 0x0f38c, 0x2b4e7, 0x15c47,
+ 0xe25bc, 0x1e702, 0x1c4b6, 0x0e25a, 0x3c1aa, 0x15c45, 0x1c429, 0x19eb9,
+ 0x1e7af, 0x182eb, 0x1e0d4, 0x3896e,
+};
+
+static const uint8_t coef4_huffbits[476] = {
+ 12,  6,  2,  3,  4,  4,  5,  5,
+  5,  6,  6,  6,  6,  6,  7,  7,
+  7,  7,  7,  8,  8,  8,  8,  8,
+  8,  9,  9,  9,  9,  9,  9,  9,
+ 10, 10, 10, 10, 10, 10, 10, 11,
+ 10, 11, 11, 11, 11, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 13, 13, 13, 13, 13, 13,
+ 13, 13, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 16, 16,
+ 16, 15, 15, 15, 15, 15, 16, 16,
+ 15, 16, 16, 17, 16, 16, 16, 17,
+ 18, 18, 17, 17, 17, 17, 17, 17,
+ 17, 17, 17,  4,  6,  7,  8,  8,
+  8,  9,  9, 10, 10, 10, 10, 10,
+ 10, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 13, 13, 13, 14, 13, 14, 14,
+ 14, 13, 13, 14, 14, 16, 16, 15,
+ 16, 16, 16, 15, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 17, 16, 16,
+ 16, 16, 17, 17, 17, 18, 16,  5,
+  8,  9, 10, 10, 10, 11, 11, 12,
+ 12, 12, 12, 12, 12, 13, 13, 13,
+ 13, 13, 13, 13, 13, 14, 14, 13,
+ 14, 14, 13, 14, 14, 15, 14, 15,
+ 15, 15, 16, 15, 16, 16, 15, 15,
+ 15, 18, 18, 18, 17, 18, 17, 17,
+  6,  9, 10, 11, 11, 12, 12, 13,
+ 13, 13, 13, 14, 14, 14, 14, 14,
+ 14, 14, 14, 15, 15, 15, 16, 15,
+ 15, 15, 15, 15, 15, 16, 16, 15,
+ 16, 16, 16, 16, 17, 18, 17, 16,
+ 16, 16,  7, 10, 11, 12, 12, 13,
+ 13, 14, 14, 14, 14, 15, 14, 15,
+ 15, 15, 16, 15, 15, 15, 15, 16,
+ 16, 16, 17, 16, 17, 16, 15, 16,
+ 16, 16, 16, 18, 17, 17, 19, 19,
+ 18, 16,  7, 11, 12, 13, 14, 14,
+ 15, 15, 16, 16, 15, 16, 16, 15,
+ 16, 16, 16, 16, 16, 16, 16, 17,
+ 16, 17, 17, 16, 17, 18, 16, 17,
+ 17, 17,  8, 11, 13, 14, 14, 15,
+ 15, 16, 16, 16, 16, 16, 16, 16,
+ 16, 17, 17, 16, 17, 17, 17, 17,
+ 18, 18, 18, 17, 17,  8, 12, 14,
+ 14, 15, 15, 16, 17, 17, 16, 16,
+ 17, 17, 20, 17,  9, 12, 14, 16,
+ 16, 16, 17, 21, 18, 17,  9, 13,
+ 15, 16, 16, 10, 13, 16, 10, 14,
+ 16, 11, 15, 16, 11, 15, 17, 11,
+ 15, 12, 15, 12, 16, 12, 16, 13,
+ 16, 13, 13, 13, 14, 14, 13, 14,
+ 14, 14, 15, 15, 14, 15, 15, 15,
+ 15, 15, 15, 15, 16, 17, 16, 16,
+ 16, 16, 17, 16, 17, 16, 18, 17,
+ 17, 17, 16, 17, 17, 16, 18, 17,
+ 21, 17, 18, 17, 18, 17, 18, 17,
+ 17, 17, 17, 19,
+};
+
+static const uint32_t coef5_huffcodes[435] = {
+ 0x00347, 0x0000b, 0x00001, 0x00001, 0x0000c, 0x00004, 0x00010, 0x00015,
+ 0x0001f, 0x0000b, 0x00023, 0x00026, 0x00029, 0x00035, 0x00037, 0x00001,
+ 0x00015, 0x0001a, 0x0001d, 0x0001c, 0x0001e, 0x0004e, 0x00049, 0x00051,
+ 0x00078, 0x00004, 0x00000, 0x00008, 0x0000d, 0x0007b, 0x00005, 0x00032,
+ 0x00095, 0x00091, 0x00096, 0x000a1, 0x000d9, 0x00003, 0x00019, 0x00061,
+ 0x00066, 0x00060, 0x00017, 0x0000e, 0x00063, 0x001a0, 0x001b7, 0x001e6,
+ 0x001e7, 0x001b6, 0x00018, 0x001e8, 0x00038, 0x00031, 0x00005, 0x0003d,
+ 0x00027, 0x001ea, 0x0001a, 0x000c5, 0x000f9, 0x000ff, 0x000db, 0x00250,
+ 0x000fc, 0x0025c, 0x00008, 0x00075, 0x003d7, 0x003d3, 0x001b0, 0x0007c,
+ 0x003ca, 0x00036, 0x00189, 0x004a6, 0x004a2, 0x004fb, 0x000c0, 0x0007f,
+ 0x0009a, 0x00311, 0x0006e, 0x0009b, 0x0068c, 0x006c0, 0x00484, 0x00012,
+ 0x000c3, 0x0094f, 0x00979, 0x009f9, 0x00d09, 0x00da6, 0x00da8, 0x00901,
+ 0x000c1, 0x00373, 0x00d08, 0x009fa, 0x00d8b, 0x00d85, 0x00d86, 0x000df,
+ 0x006e2, 0x000ce, 0x00f24, 0x009fe, 0x001f7, 0x007c1, 0x000cf, 0x009fc,
+ 0x009ff, 0x00d89, 0x00da9, 0x009fd, 0x001f8, 0x01a36, 0x0128c, 0x0129d,
+ 0x01a37, 0x00196, 0x003ea, 0x00f8b, 0x00d93, 0x01e45, 0x01e58, 0x01e4b,
+ 0x01e59, 0x013f1, 0x00309, 0x00265, 0x00308, 0x0243a, 0x027e1, 0x00f89,
+ 0x00324, 0x03cbc, 0x03c86, 0x03695, 0x0243c, 0x0243b, 0x0243e, 0x01e4a,
+ 0x003a5, 0x03468, 0x03428, 0x03c84, 0x027e0, 0x025e2, 0x01880, 0x00197,
+ 0x00325, 0x03cb7, 0x0791e, 0x007ec, 0x06c75, 0x004c8, 0x04bc7, 0x004c6,
+ 0x00983, 0x0481e, 0x01b53, 0x0251b, 0x01b58, 0x00984, 0x04fa8, 0x03cbb,
+ 0x00f8a, 0x00322, 0x0346a, 0x0243d, 0x00326, 0x03469, 0x0481f, 0x0481d,
+ 0x00746, 0x09032, 0x01b50, 0x01d13, 0x0d8e4, 0x0481b, 0x06c74, 0x0796b,
+ 0x07969, 0x00985, 0x0d8e3, 0x00986, 0x00fa2, 0x01301, 0x06c7c, 0x00987,
+ 0x03cb8, 0x0f4af, 0x00e88, 0x1b1c0, 0x00fce, 0x033eb, 0x03f6a, 0x03f69,
+ 0x00fcf, 0x0791f, 0x004c9, 0x04871, 0x00fcd, 0x00982, 0x00fcc, 0x00fa3,
+ 0x01d12, 0x0796c, 0x01b47, 0x00321, 0x0796a, 0x0d8e2, 0x04872, 0x04873,
+ 0x0000e, 0x00014, 0x0000a, 0x000a0, 0x00012, 0x0007d, 0x001a2, 0x0003b,
+ 0x0025f, 0x000dd, 0x0027c, 0x00343, 0x00368, 0x0036b, 0x0003e, 0x001fa,
+ 0x00485, 0x001b3, 0x0007f, 0x001b1, 0x0019e, 0x004ba, 0x007ad, 0x00339,
+ 0x00066, 0x007a4, 0x00793, 0x006c6, 0x0007e, 0x000f1, 0x00372, 0x009fb,
+ 0x00d83, 0x00d8a, 0x00947, 0x009f4, 0x001d0, 0x01b09, 0x01b4b, 0x007ec,
+ 0x003e1, 0x000ca, 0x003ec, 0x02539, 0x04fa9, 0x01b57, 0x03429, 0x03d2a,
+ 0x00d97, 0x003a7, 0x00dc0, 0x00d96, 0x00dc1, 0x007eb, 0x03cba, 0x00c43,
+ 0x00c41, 0x01b52, 0x007ef, 0x00323, 0x03cb9, 0x03c83, 0x007d0, 0x007ed,
+ 0x06c7f, 0x09033, 0x03f6c, 0x36383, 0x1e95d, 0x06c78, 0x00747, 0x01b51,
+ 0x00022, 0x00016, 0x00039, 0x00252, 0x00079, 0x00486, 0x00338, 0x00369,
+ 0x00d88, 0x00026, 0x00d87, 0x00f4b, 0x00d82, 0x00027, 0x001e1, 0x01a15,
+ 0x007c7, 0x012f0, 0x001e0, 0x006d0, 0x01a16, 0x01e44, 0x01e5f, 0x03690,
+ 0x00d90, 0x00c42, 0x00daf, 0x00d92, 0x00f80, 0x00cfb, 0x0342f, 0x0487f,
+ 0x01b46, 0x07968, 0x00d95, 0x00d91, 0x01b55, 0x03f68, 0x04bc6, 0x03cbd,
+ 0x00f81, 0x00320, 0x00069, 0x000fe, 0x006d5, 0x0033f, 0x000de, 0x007c6,
+ 0x01e40, 0x00d94, 0x00f88, 0x03c8e, 0x03694, 0x00dae, 0x00dad, 0x00267,
+ 0x003a6, 0x00327, 0x0487e, 0x007ee, 0x00749, 0x004c7, 0x03692, 0x01b56,
+ 0x00fd1, 0x07a56, 0x06c77, 0x09031, 0x00748, 0x06c7a, 0x0796d, 0x033ea,
+ 0x06c76, 0x00fd0, 0x36382, 0x1e417, 0x00745, 0x04faf, 0x0d8e1, 0x03f6b,
+ 0x1e95c, 0x04fad, 0x0009e, 0x004bd, 0x0067c, 0x01b08, 0x003eb, 0x01b45,
+ 0x03691, 0x0d8e5, 0x07904, 0x00981, 0x007ea, 0x019f4, 0x06c7d, 0x04fab,
+ 0x04fac, 0x06c7e, 0x01300, 0x06c7b, 0x0006f, 0x003f7, 0x03c85, 0x004c4,
+ 0x0001e, 0x006e1, 0x03693, 0x01b44, 0x00241, 0x01e46, 0x0019d, 0x00266,
+ 0x004bb, 0x02538, 0x007ac, 0x01b54, 0x00902, 0x04870, 0x00da7, 0x00900,
+ 0x00185, 0x06c79, 0x006e3, 0x003e9, 0x01e94, 0x003ed, 0x003f2, 0x0342e,
+ 0x0346b, 0x0251a, 0x004c5, 0x01881, 0x0481c, 0x01b59, 0x03c87, 0x04fae,
+ 0x007e9, 0x03f6d, 0x0f20a, 0x09030, 0x04faa, 0x0d8e6, 0x03f6f, 0x0481a,
+ 0x03f6e, 0x1e416, 0x0d8e7,
+};
+
+static const uint8_t coef5_huffbits[435] = {
+ 10,  4,  2,  4,  4,  5,  5,  5,
+  5,  6,  6,  6,  6,  6,  6,  7,
+  7,  7,  7,  7,  7,  7,  7,  7,
+  7,  8,  8,  8,  8,  7,  8,  8,
+  8,  8,  8,  8,  8,  9,  9,  9,
+  9,  9,  9,  9,  9,  9,  9,  9,
+  9,  9, 10,  9, 10, 10, 10, 10,
+ 10,  9, 10, 10, 10, 10, 10, 10,
+ 10, 10, 11, 11, 10, 10, 11, 11,
+ 10, 11, 11, 11, 11, 11, 12, 12,
+ 12, 12, 12, 12, 11, 11, 11, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 13,
+ 13, 13, 12, 12, 13, 13, 13, 12,
+ 12, 12, 12, 12, 13, 13, 13, 13,
+ 13, 14, 14, 14, 14, 13, 13, 13,
+ 13, 13, 14, 14, 14, 14, 14, 14,
+ 15, 14, 14, 14, 14, 14, 14, 13,
+ 14, 14, 14, 14, 14, 14, 15, 14,
+ 15, 14, 15, 15, 15, 15, 15, 15,
+ 16, 15, 15, 14, 15, 16, 15, 14,
+ 14, 15, 14, 14, 15, 14, 15, 15,
+ 15, 16, 15, 17, 16, 15, 15, 15,
+ 15, 16, 16, 16, 16, 17, 15, 16,
+ 14, 16, 16, 17, 16, 16, 16, 16,
+ 16, 15, 15, 15, 16, 16, 16, 16,
+ 17, 15, 15, 15, 15, 16, 15, 15,
+  4,  7,  8,  8,  9,  9,  9, 10,
+ 10, 10, 10, 10, 10, 10, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 12,
+ 12, 11, 11, 11, 12, 12, 12, 12,
+ 12, 12, 12, 12, 13, 13, 13, 13,
+ 12, 13, 14, 14, 15, 15, 14, 14,
+ 14, 14, 14, 14, 14, 15, 14, 14,
+ 14, 15, 15, 15, 14, 14, 15, 15,
+ 15, 16, 16, 18, 17, 15, 15, 15,
+  6,  9, 10, 10, 11, 11, 12, 12,
+ 12, 13, 12, 12, 12, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 14,
+ 14, 14, 14, 14, 14, 14, 14, 15,
+ 15, 15, 14, 14, 15, 16, 15, 14,
+ 14, 15,  7, 10, 11, 12, 13, 13,
+ 13, 14, 14, 14, 14, 14, 14, 14,
+ 14, 15, 15, 15, 15, 15, 14, 15,
+ 16, 15, 15, 16, 15, 15, 15, 16,
+ 15, 16, 18, 17, 15, 15, 16, 16,
+ 17, 15,  8, 11, 13, 13, 14, 15,
+ 14, 16, 15, 16, 15, 15, 15, 15,
+ 15, 15, 17, 15,  9, 12, 14, 15,
+ 10, 13, 14, 15, 10, 13, 11, 14,
+ 11, 14, 11, 15, 12, 15, 12, 12,
+ 13, 15, 13, 14, 13, 14, 14, 14,
+ 14, 14, 15, 15, 15, 15, 14, 15,
+ 15, 16, 16, 16, 15, 16, 16, 15,
+ 16, 17, 16,
+};
+
+static const uint16_t levels0[60] = {
+317, 92, 62, 60, 19, 17, 10,  7,
+  6,  5,  5,  3,  3,  3,  2,  2,
+  2,  2,  2,  2,  2,  1,  2,  2,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,
+};
+
+static const uint16_t levels1[40] = {
+311, 91, 61, 28, 10,  6,  5,  2,
+  2,  2,  2,  2,  2,  2,  2,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+};
+
+static const uint16_t levels2[340] = {
+181,110, 78, 63, 61, 62, 60, 61,
+ 33, 41, 41, 19, 17, 19, 12, 11,
+  9, 11, 10,  6,  8,  7,  6,  4,
+  5,  5,  4,  4,  3,  4,  3,  5,
+  3,  4,  3,  3,  3,  3,  3,  3,
+  2,  2,  4,  2,  3,  2,  3,  3,
+  2,  2,  2,  2,  2,  2,  2,  2,
+  3,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  1,  2,  1,  2,  2,
+  2,  2,  1,  2,  1,  1,  1,  2,
+  2,  1,  2,  1,  2,  2,  2,  2,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,
+};
+
+static const uint16_t levels3[180] = {
+351,122, 76, 61, 41, 42, 24, 30,
+ 22, 19, 11,  9, 10,  8,  5,  5,
+  4,  5,  5,  3,  3,  3,  3,  3,
+  3,  3,  2,  2,  3,  2,  2,  2,
+  3,  3,  2,  2,  2,  3,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  1,  1,
+  2,  2,  1,  2,  1,  2,  2,  2,
+  2,  2,  2,  1,  2,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  2,
+  2,  1,  2,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,
+};
+
+static const uint16_t levels4[70] = {
+113, 68, 49, 42, 40, 32, 27, 15,
+ 10,  5,  3,  3,  3,  3,  2,  2,
+  2,  2,  2,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,
+};
+
+static const uint16_t levels5[40] = {
+214, 72, 42, 40, 18,  4,  4,  2,
+  2,  2,  2,  2,  1,  1,  2,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+};
+
+static const CoefVLCTable coef_vlcs[6] = {
+    {
+        sizeof(coef0_huffbits), coef0_huffcodes, coef0_huffbits, levels0,
+    },
+    {
+        sizeof(coef1_huffbits), coef1_huffcodes, coef1_huffbits, levels1,
+    },
+    {
+        sizeof(coef2_huffbits), coef2_huffcodes, coef2_huffbits, levels2,
+    },
+    {
+        sizeof(coef3_huffbits), coef3_huffcodes, coef3_huffbits, levels3,
+    },
+    {
+        sizeof(coef4_huffbits), coef4_huffcodes, coef4_huffbits, levels4,
+    },
+    {
+        sizeof(coef5_huffbits), coef5_huffcodes, coef5_huffbits, levels5,
+    },
+};
diff --git a/contrib/ffmpeg/libavcodec/wmadec.c b/contrib/ffmpeg/libavcodec/wmadec.c
new file mode 100644
index 000000000..684aea2c8
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/wmadec.c
@@ -0,0 +1,1337 @@
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file wmadec.c
+ * WMA compatible decoder.
+ * This decoder handles Microsoft Windows Media Audio data, versions 1 & 2.
+ * WMA v1 is identified by audio format 0x160 in Microsoft media files
+ * (ASF/AVI/WAV). WMA v2 is identified by audio format 0x161.
+ *
+ * To use this decoder, a calling application must supply the extra data
+ * bytes provided with the WMA data. These are the extra, codec-specific
+ * bytes at the end of a WAVEFORMATEX data structure. Transmit these bytes
+ * to the decoder using the extradata[_size] fields in AVCodecContext. There
+ * should be 4 extra bytes for v1 data and 6 extra bytes for v2 data.
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+
+/* size of blocks */
+#define BLOCK_MIN_BITS 7
+#define BLOCK_MAX_BITS 11
+#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
+
+#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
+
+/* XXX: find exact max size */
+#define HIGH_BAND_MAX_SIZE 16
+
+#define NB_LSP_COEFS 10
+
+/* XXX: is it a suitable value ? */
+#define MAX_CODED_SUPERFRAME_SIZE 16384
+
+#define MAX_CHANNELS 2
+
+#define NOISE_TAB_SIZE 8192
+
+#define LSP_POW_BITS 7
+
+#define VLCBITS 9
+#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
+
+#define EXPVLCBITS 8
+#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS)
+
+#define HGAINVLCBITS 9
+#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
+
+typedef struct WMADecodeContext {
+    GetBitContext gb;
+    int sample_rate;
+    int nb_channels;
+    int bit_rate;
+    int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */
+    int block_align;
+    int use_bit_reservoir;
+    int use_variable_block_len;
+    int use_exp_vlc;  /* exponent coding: 0 = lsp, 1 = vlc + delta */
+    int use_noise_coding; /* true if perceptual noise is added */
+    int byte_offset_bits;
+    VLC exp_vlc;
+    int exponent_sizes[BLOCK_NB_SIZES];
+    uint16_t exponent_bands[BLOCK_NB_SIZES][25];
+    int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */
+    int coefs_start;               /* first coded coef */
+    int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */
+    int exponent_high_sizes[BLOCK_NB_SIZES];
+    int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
+    VLC hgain_vlc;
+
+    /* coded values in high bands */
+    int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+    int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+
+    /* there are two possible tables for spectral coefficients */
+    VLC coef_vlc[2];
+    uint16_t *run_table[2];
+    uint16_t *level_table[2];
+    /* frame info */
+    int frame_len;       /* frame length in samples */
+    int frame_len_bits;  /* frame_len = 1 << frame_len_bits */
+    int nb_block_sizes;  /* number of block sizes */
+    /* block info */
+    int reset_block_lengths;
+    int block_len_bits; /* log2 of current block length */
+    int next_block_len_bits; /* log2 of next block length */
+    int prev_block_len_bits; /* log2 of prev block length */
+    int block_len; /* block length in samples */
+    int block_num; /* block number in current frame */
+    int block_pos; /* current position in frame */
+    uint8_t ms_stereo; /* true if mid/side stereo mode */
+    uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
+    DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]);
+    float max_exponent[MAX_CHANNELS];
+    int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
+    MDCTContext mdct_ctx[BLOCK_NB_SIZES];
+    float *windows[BLOCK_NB_SIZES];
+    DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */
+    /* output buffer for one frame and the last for IMDCT windowing */
+    DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]);
+    /* last frame info */
+    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
+    int last_bitoffset;
+    int last_superframe_len;
+    float noise_table[NOISE_TAB_SIZE];
+    int noise_index;
+    float noise_mult; /* XXX: suppress that and integrate it in the noise array */
+    /* lsp_to_curve tables */
+    float lsp_cos_table[BLOCK_MAX_SIZE];
+    float lsp_pow_e_table[256];
+    float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
+    float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
+    DSPContext dsp;
+
+#ifdef TRACE
+    int frame_count;
+#endif
+} WMADecodeContext;
+
+typedef struct CoefVLCTable {
+    int n; /* total number of codes */
+    const uint32_t *huffcodes; /* VLC bit values */
+    const uint8_t *huffbits;   /* VLC bit size */
+    const uint16_t *levels; /* table to build run/level tables */
+} CoefVLCTable;
+
+static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len);
+
+#include "wmadata.h"
+
+#ifdef TRACE
+static void dump_shorts(const char *name, const short *tab, int n)
+{
+    int i;
+
+    tprintf("%s[%d]:\n", name, n);
+    for(i=0;i<n;i++) {
+        if ((i & 7) == 0)
+            tprintf("%4d: ", i);
+        tprintf(" %5d.0", tab[i]);
+        if ((i & 7) == 7)
+            tprintf("\n");
+    }
+}
+
+static void dump_floats(const char *name, int prec, const float *tab, int n)
+{
+    int i;
+
+    tprintf("%s[%d]:\n", name, n);
+    for(i=0;i<n;i++) {
+        if ((i & 7) == 0)
+            tprintf("%4d: ", i);
+        tprintf(" %8.*f", prec, tab[i]);
+        if ((i & 7) == 7)
+            tprintf("\n");
+    }
+    if ((i & 7) != 0)
+        tprintf("\n");
+}
+#endif
+
+/* XXX: use same run/length optimization as mpeg decoders */
+static void init_coef_vlc(VLC *vlc,
+                          uint16_t **prun_table, uint16_t **plevel_table,
+                          const CoefVLCTable *vlc_table)
+{
+    int n = vlc_table->n;
+    const uint8_t *table_bits = vlc_table->huffbits;
+    const uint32_t *table_codes = vlc_table->huffcodes;
+    const uint16_t *levels_table = vlc_table->levels;
+    uint16_t *run_table, *level_table;
+    const uint16_t *p;
+    int i, l, j, level;
+
+    init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0);
+
+    run_table = av_malloc(n * sizeof(uint16_t));
+    level_table = av_malloc(n * sizeof(uint16_t));
+    p = levels_table;
+    i = 2;
+    level = 1;
+    while (i < n) {
+        l = *p++;
+        for(j=0;j<l;j++) {
+            run_table[i] = j;
+            level_table[i] = level;
+            i++;
+        }
+        level++;
+    }
+    *prun_table = run_table;
+    *plevel_table = level_table;
+}
+
+static int wma_decode_init(AVCodecContext * avctx)
+{
+    WMADecodeContext *s = avctx->priv_data;
+    int i, flags1, flags2;
+    float *window;
+    uint8_t *extradata;
+    float bps1, high_freq;
+    volatile float bps;
+    int sample_rate1;
+    int coef_vlc_table;
+
+    s->sample_rate = avctx->sample_rate;
+    s->nb_channels = avctx->channels;
+    s->bit_rate = avctx->bit_rate;
+    s->block_align = avctx->block_align;
+
+    dsputil_init(&s->dsp, avctx);
+
+    if (avctx->codec->id == CODEC_ID_WMAV1) {
+        s->version = 1;
+    } else {
+        s->version = 2;
+    }
+
+    /* extract flag infos */
+    flags1 = 0;
+    flags2 = 0;
+    extradata = avctx->extradata;
+    if (s->version == 1 && avctx->extradata_size >= 4) {
+        flags1 = extradata[0] | (extradata[1] << 8);
+        flags2 = extradata[2] | (extradata[3] << 8);
+    } else if (s->version == 2 && avctx->extradata_size >= 6) {
+        flags1 = extradata[0] | (extradata[1] << 8) |
+            (extradata[2] << 16) | (extradata[3] << 24);
+        flags2 = extradata[4] | (extradata[5] << 8);
+    }
+    s->use_exp_vlc = flags2 & 0x0001;
+    s->use_bit_reservoir = flags2 & 0x0002;
+    s->use_variable_block_len = flags2 & 0x0004;
+
+    /* compute MDCT block size */
+    if (s->sample_rate <= 16000) {
+        s->frame_len_bits = 9;
+    } else if (s->sample_rate <= 22050 ||
+               (s->sample_rate <= 32000 && s->version == 1)) {
+        s->frame_len_bits = 10;
+    } else {
+        s->frame_len_bits = 11;
+    }
+    s->frame_len = 1 << s->frame_len_bits;
+    if (s->use_variable_block_len) {
+        int nb_max, nb;
+        nb = ((flags2 >> 3) & 3) + 1;
+        if ((s->bit_rate / s->nb_channels) >= 32000)
+            nb += 2;
+        nb_max = s->frame_len_bits - BLOCK_MIN_BITS;
+        if (nb > nb_max)
+            nb = nb_max;
+        s->nb_block_sizes = nb + 1;
+    } else {
+        s->nb_block_sizes = 1;
+    }
+
+    /* init rate dependant parameters */
+    s->use_noise_coding = 1;
+    high_freq = s->sample_rate * 0.5;
+
+    /* if version 2, then the rates are normalized */
+    sample_rate1 = s->sample_rate;
+    if (s->version == 2) {
+        if (sample_rate1 >= 44100)
+            sample_rate1 = 44100;
+        else if (sample_rate1 >= 22050)
+            sample_rate1 = 22050;
+        else if (sample_rate1 >= 16000)
+            sample_rate1 = 16000;
+        else if (sample_rate1 >= 11025)
+            sample_rate1 = 11025;
+        else if (sample_rate1 >= 8000)
+            sample_rate1 = 8000;
+    }
+
+    bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate);
+    s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2;
+
+    /* compute high frequency value and choose if noise coding should
+       be activated */
+    bps1 = bps;
+    if (s->nb_channels == 2)
+        bps1 = bps * 1.6;
+    if (sample_rate1 == 44100) {
+        if (bps1 >= 0.61)
+            s->use_noise_coding = 0;
+        else
+            high_freq = high_freq * 0.4;
+    } else if (sample_rate1 == 22050) {
+        if (bps1 >= 1.16)
+            s->use_noise_coding = 0;
+        else if (bps1 >= 0.72)
+            high_freq = high_freq * 0.7;
+        else
+            high_freq = high_freq * 0.6;
+    } else if (sample_rate1 == 16000) {
+        if (bps > 0.5)
+            high_freq = high_freq * 0.5;
+        else
+            high_freq = high_freq * 0.3;
+    } else if (sample_rate1 == 11025) {
+        high_freq = high_freq * 0.7;
+    } else if (sample_rate1 == 8000) {
+        if (bps <= 0.625) {
+            high_freq = high_freq * 0.5;
+        } else if (bps > 0.75) {
+            s->use_noise_coding = 0;
+        } else {
+            high_freq = high_freq * 0.65;
+        }
+    } else {
+        if (bps >= 0.8) {
+            high_freq = high_freq * 0.75;
+        } else if (bps >= 0.6) {
+            high_freq = high_freq * 0.6;
+        } else {
+            high_freq = high_freq * 0.5;
+        }
+    }
+    dprintf("flags1=0x%x flags2=0x%x\n", flags1, flags2);
+    dprintf("version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n",
+           s->version, s->nb_channels, s->sample_rate, s->bit_rate,
+           s->block_align);
+    dprintf("bps=%f bps1=%f high_freq=%f bitoffset=%d\n",
+           bps, bps1, high_freq, s->byte_offset_bits);
+    dprintf("use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n",
+           s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes);
+
+    /* compute the scale factor band sizes for each MDCT block size */
+    {
+        int a, b, pos, lpos, k, block_len, i, j, n;
+        const uint8_t *table;
+
+        if (s->version == 1) {
+            s->coefs_start = 3;
+        } else {
+            s->coefs_start = 0;
+        }
+        for(k = 0; k < s->nb_block_sizes; k++) {
+            block_len = s->frame_len >> k;
+
+            if (s->version == 1) {
+                lpos = 0;
+                for(i=0;i<25;i++) {
+                    a = wma_critical_freqs[i];
+                    b = s->sample_rate;
+                    pos = ((block_len * 2 * a)  + (b >> 1)) / b;
+                    if (pos > block_len)
+                        pos = block_len;
+                    s->exponent_bands[0][i] = pos - lpos;
+                    if (pos >= block_len) {
+                        i++;
+                        break;
+                    }
+                    lpos = pos;
+                }
+                s->exponent_sizes[0] = i;
+            } else {
+                /* hardcoded tables */
+                table = NULL;
+                a = s->frame_len_bits - BLOCK_MIN_BITS - k;
+                if (a < 3) {
+                    if (s->sample_rate >= 44100)
+                        table = exponent_band_44100[a];
+                    else if (s->sample_rate >= 32000)
+                        table = exponent_band_32000[a];
+                    else if (s->sample_rate >= 22050)
+                        table = exponent_band_22050[a];
+                }
+                if (table) {
+                    n = *table++;
+                    for(i=0;i<n;i++)
+                        s->exponent_bands[k][i] = table[i];
+                    s->exponent_sizes[k] = n;
+                } else {
+                    j = 0;
+                    lpos = 0;
+                    for(i=0;i<25;i++) {
+                        a = wma_critical_freqs[i];
+                        b = s->sample_rate;
+                        pos = ((block_len * 2 * a)  + (b << 1)) / (4 * b);
+                        pos <<= 2;
+                        if (pos > block_len)
+                            pos = block_len;
+                        if (pos > lpos)
+                            s->exponent_bands[k][j++] = pos - lpos;
+                        if (pos >= block_len)
+                            break;
+                        lpos = pos;
+                    }
+                    s->exponent_sizes[k] = j;
+                }
+            }
+
+            /* max number of coefs */
+            s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k;
+            /* high freq computation */
+            s->high_band_start[k] = (int)((block_len * 2 * high_freq) /
+                                          s->sample_rate + 0.5);
+            n = s->exponent_sizes[k];
+            j = 0;
+            pos = 0;
+            for(i=0;i<n;i++) {
+                int start, end;
+                start = pos;
+                pos += s->exponent_bands[k][i];
+                end = pos;
+                if (start < s->high_band_start[k])
+                    start = s->high_band_start[k];
+                if (end > s->coefs_end[k])
+                    end = s->coefs_end[k];
+                if (end > start)
+                    s->exponent_high_bands[k][j++] = end - start;
+            }
+            s->exponent_high_sizes[k] = j;
+#if 0
+            tprintf("%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ",
+                  s->frame_len >> k,
+                  s->coefs_end[k],
+                  s->high_band_start[k],
+                  s->exponent_high_sizes[k]);
+            for(j=0;j<s->exponent_high_sizes[k];j++)
+                tprintf(" %d", s->exponent_high_bands[k][j]);
+            tprintf("\n");
+#endif
+        }
+    }
+
+#ifdef TRACE
+    {
+        int i, j;
+        for(i = 0; i < s->nb_block_sizes; i++) {
+            tprintf("%5d: n=%2d:",
+                   s->frame_len >> i,
+                   s->exponent_sizes[i]);
+            for(j=0;j<s->exponent_sizes[i];j++)
+                tprintf(" %d", s->exponent_bands[i][j]);
+            tprintf("\n");
+        }
+    }
+#endif
+
+    /* init MDCT */
+    for(i = 0; i < s->nb_block_sizes; i++)
+        ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
+
+    /* init MDCT windows : simple sinus window */
+    for(i = 0; i < s->nb_block_sizes; i++) {
+        int n, j;
+        float alpha;
+        n = 1 << (s->frame_len_bits - i);
+        window = av_malloc(sizeof(float) * n);
+        alpha = M_PI / (2.0 * n);
+        for(j=0;j<n;j++) {
+            window[n - j - 1] = sin((j + 0.5) * alpha);
+        }
+        s->windows[i] = window;
+    }
+
+    s->reset_block_lengths = 1;
+
+    if (s->use_noise_coding) {
+
+        /* init the noise generator */
+        if (s->use_exp_vlc)
+            s->noise_mult = 0.02;
+        else
+            s->noise_mult = 0.04;
+
+#ifdef TRACE
+        for(i=0;i<NOISE_TAB_SIZE;i++)
+            s->noise_table[i] = 1.0 * s->noise_mult;
+#else
+        {
+            unsigned int seed;
+            float norm;
+            seed = 1;
+            norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult;
+            for(i=0;i<NOISE_TAB_SIZE;i++) {
+                seed = seed * 314159 + 1;
+                s->noise_table[i] = (float)((int)seed) * norm;
+            }
+        }
+#endif
+        init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(hgain_huffbits),
+                 hgain_huffbits, 1, 1,
+                 hgain_huffcodes, 2, 2, 0);
+    }
+
+    if (s->use_exp_vlc) {
+        init_vlc(&s->exp_vlc, EXPVLCBITS, sizeof(scale_huffbits),
+                 scale_huffbits, 1, 1,
+                 scale_huffcodes, 4, 4, 0);
+    } else {
+        wma_lsp_to_curve_init(s, s->frame_len);
+    }
+
+    /* choose the VLC tables for the coefficients */
+    coef_vlc_table = 2;
+    if (s->sample_rate >= 32000) {
+        if (bps1 < 0.72)
+            coef_vlc_table = 0;
+        else if (bps1 < 1.16)
+            coef_vlc_table = 1;
+    }
+
+    init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0],
+                  &coef_vlcs[coef_vlc_table * 2]);
+    init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1],
+                  &coef_vlcs[coef_vlc_table * 2 + 1]);
+    return 0;
+}
+
+/* interpolate values for a bigger or smaller block. The block must
+   have multiple sizes */
+static void interpolate_array(float *scale, int old_size, int new_size)
+{
+    int i, j, jincr, k;
+    float v;
+
+    if (new_size > old_size) {
+        jincr = new_size / old_size;
+        j = new_size;
+        for(i = old_size - 1; i >=0; i--) {
+            v = scale[i];
+            k = jincr;
+            do {
+                scale[--j] = v;
+            } while (--k);
+        }
+    } else if (new_size < old_size) {
+        j = 0;
+        jincr = old_size / new_size;
+        for(i = 0; i < new_size; i++) {
+            scale[i] = scale[j];
+            j += jincr;
+        }
+    }
+}
+
+/* compute x^-0.25 with an exponent and mantissa table. We use linear
+   interpolation to reduce the mantissa table size at a small speed
+   expense (linear interpolation approximately doubles the number of
+   bits of precision). */
+static inline float pow_m1_4(WMADecodeContext *s, float x)
+{
+    union {
+        float f;
+        unsigned int v;
+    } u, t;
+    unsigned int e, m;
+    float a, b;
+
+    u.f = x;
+    e = u.v >> 23;
+    m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1);
+    /* build interpolation scale: 1 <= t < 2. */
+    t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23);
+    a = s->lsp_pow_m_table1[m];
+    b = s->lsp_pow_m_table2[m];
+    return s->lsp_pow_e_table[e] * (a + b * t.f);
+}
+
+static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len)
+{
+    float wdel, a, b;
+    int i, e, m;
+
+    wdel = M_PI / frame_len;
+    for(i=0;i<frame_len;i++)
+        s->lsp_cos_table[i] = 2.0f * cos(wdel * i);
+
+    /* tables for x^-0.25 computation */
+    for(i=0;i<256;i++) {
+        e = i - 126;
+        s->lsp_pow_e_table[i] = pow(2.0, e * -0.25);
+    }
+
+    /* NOTE: these two tables are needed to avoid two operations in
+       pow_m1_4 */
+    b = 1.0;
+    for(i=(1 << LSP_POW_BITS) - 1;i>=0;i--) {
+        m = (1 << LSP_POW_BITS) + i;
+        a = (float)m * (0.5 / (1 << LSP_POW_BITS));
+        a = pow(a, -0.25);
+        s->lsp_pow_m_table1[i] = 2 * a - b;
+        s->lsp_pow_m_table2[i] = b - a;
+        b = a;
+    }
+#if 0
+    for(i=1;i<20;i++) {
+        float v, r1, r2;
+        v = 5.0 / i;
+        r1 = pow_m1_4(s, v);
+        r2 = pow(v,-0.25);
+        printf("%f^-0.25=%f e=%f\n", v, r1, r2 - r1);
+    }
+#endif
+}
+
+/* NOTE: We use the same code as Vorbis here */
+/* XXX: optimize it further with SSE/3Dnow */
+static void wma_lsp_to_curve(WMADecodeContext *s,
+                             float *out, float *val_max_ptr,
+                             int n, float *lsp)
+{
+    int i, j;
+    float p, q, w, v, val_max;
+
+    val_max = 0;
+    for(i=0;i<n;i++) {
+        p = 0.5f;
+        q = 0.5f;
+        w = s->lsp_cos_table[i];
+        for(j=1;j<NB_LSP_COEFS;j+=2){
+            q *= w - lsp[j - 1];
+            p *= w - lsp[j];
+        }
+        p *= p * (2.0f - w);
+        q *= q * (2.0f + w);
+        v = p + q;
+        v = pow_m1_4(s, v);
+        if (v > val_max)
+            val_max = v;
+        out[i] = v;
+    }
+    *val_max_ptr = val_max;
+}
+
+/* decode exponents coded with LSP coefficients (same idea as Vorbis) */
+static void decode_exp_lsp(WMADecodeContext *s, int ch)
+{
+    float lsp_coefs[NB_LSP_COEFS];
+    int val, i;
+
+    for(i = 0; i < NB_LSP_COEFS; i++) {
+        if (i == 0 || i >= 8)
+            val = get_bits(&s->gb, 3);
+        else
+            val = get_bits(&s->gb, 4);
+        lsp_coefs[i] = lsp_codebook[i][val];
+    }
+
+    wma_lsp_to_curve(s, s->exponents[ch], &s->max_exponent[ch],
+                     s->block_len, lsp_coefs);
+}
+
+/* decode exponents coded with VLC codes */
+static int decode_exp_vlc(WMADecodeContext *s, int ch)
+{
+    int last_exp, n, code;
+    const uint16_t *ptr, *band_ptr;
+    float v, *q, max_scale, *q_end;
+
+    band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
+    ptr = band_ptr;
+    q = s->exponents[ch];
+    q_end = q + s->block_len;
+    max_scale = 0;
+    if (s->version == 1) {
+        last_exp = get_bits(&s->gb, 5) + 10;
+        /* XXX: use a table */
+        v = pow(10, last_exp * (1.0 / 16.0));
+        max_scale = v;
+        n = *ptr++;
+        do {
+            *q++ = v;
+        } while (--n);
+    }
+    last_exp = 36;
+    while (q < q_end) {
+        code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX);
+        if (code < 0)
+            return -1;
+        /* NOTE: this offset is the same as MPEG4 AAC ! */
+        last_exp += code - 60;
+        /* XXX: use a table */
+        v = pow(10, last_exp * (1.0 / 16.0));
+        if (v > max_scale)
+            max_scale = v;
+        n = *ptr++;
+        do {
+            *q++ = v;
+        } while (--n);
+    }
+    s->max_exponent[ch] = max_scale;
+    return 0;
+}
+
+/* return 0 if OK. return 1 if last block of frame. return -1 if
+   unrecorrable error. */
+static int wma_decode_block(WMADecodeContext *s)
+{
+    int n, v, a, ch, code, bsize;
+    int coef_nb_bits, total_gain, parse_exponents;
+    DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
+    int nb_coefs[MAX_CHANNELS];
+    float mdct_norm;
+
+#ifdef TRACE
+    tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num);
+#endif
+
+    /* compute current block length */
+    if (s->use_variable_block_len) {
+        n = av_log2(s->nb_block_sizes - 1) + 1;
+
+        if (s->reset_block_lengths) {
+            s->reset_block_lengths = 0;
+            v = get_bits(&s->gb, n);
+            if (v >= s->nb_block_sizes)
+                return -1;
+            s->prev_block_len_bits = s->frame_len_bits - v;
+            v = get_bits(&s->gb, n);
+            if (v >= s->nb_block_sizes)
+                return -1;
+            s->block_len_bits = s->frame_len_bits - v;
+        } else {
+            /* update block lengths */
+            s->prev_block_len_bits = s->block_len_bits;
+            s->block_len_bits = s->next_block_len_bits;
+        }
+        v = get_bits(&s->gb, n);
+        if (v >= s->nb_block_sizes)
+            return -1;
+        s->next_block_len_bits = s->frame_len_bits - v;
+    } else {
+        /* fixed block len */
+        s->next_block_len_bits = s->frame_len_bits;
+        s->prev_block_len_bits = s->frame_len_bits;
+        s->block_len_bits = s->frame_len_bits;
+    }
+
+    /* now check if the block length is coherent with the frame length */
+    s->block_len = 1 << s->block_len_bits;
+    if ((s->block_pos + s->block_len) > s->frame_len)
+        return -1;
+
+    if (s->nb_channels == 2) {
+        s->ms_stereo = get_bits(&s->gb, 1);
+    }
+    v = 0;
+    for(ch = 0; ch < s->nb_channels; ch++) {
+        a = get_bits(&s->gb, 1);
+        s->channel_coded[ch] = a;
+        v |= a;
+    }
+    /* if no channel coded, no need to go further */
+    /* XXX: fix potential framing problems */
+    if (!v)
+        goto next;
+
+    bsize = s->frame_len_bits - s->block_len_bits;
+
+    /* read total gain and extract corresponding number of bits for
+       coef escape coding */
+    total_gain = 1;
+    for(;;) {
+        a = get_bits(&s->gb, 7);
+        total_gain += a;
+        if (a != 127)
+            break;
+    }
+
+    if (total_gain < 15)
+        coef_nb_bits = 13;
+    else if (total_gain < 32)
+        coef_nb_bits = 12;
+    else if (total_gain < 40)
+        coef_nb_bits = 11;
+    else if (total_gain < 45)
+        coef_nb_bits = 10;
+    else
+        coef_nb_bits = 9;
+
+    /* compute number of coefficients */
+    n = s->coefs_end[bsize] - s->coefs_start;
+    for(ch = 0; ch < s->nb_channels; ch++)
+        nb_coefs[ch] = n;
+
+    /* complex coding */
+    if (s->use_noise_coding) {
+
+        for(ch = 0; ch < s->nb_channels; ch++) {
+            if (s->channel_coded[ch]) {
+                int i, n, a;
+                n = s->exponent_high_sizes[bsize];
+                for(i=0;i<n;i++) {
+                    a = get_bits(&s->gb, 1);
+                    s->high_band_coded[ch][i] = a;
+                    /* if noise coding, the coefficients are not transmitted */
+                    if (a)
+                        nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
+                }
+            }
+        }
+        for(ch = 0; ch < s->nb_channels; ch++) {
+            if (s->channel_coded[ch]) {
+                int i, n, val, code;
+
+                n = s->exponent_high_sizes[bsize];
+                val = (int)0x80000000;
+                for(i=0;i<n;i++) {
+                    if (s->high_band_coded[ch][i]) {
+                        if (val == (int)0x80000000) {
+                            val = get_bits(&s->gb, 7) - 19;
+                        } else {
+                            code = get_vlc2(&s->gb, s->hgain_vlc.table, HGAINVLCBITS, HGAINMAX);
+                            if (code < 0)
+                                return -1;
+                            val += code - 18;
+                        }
+                        s->high_band_values[ch][i] = val;
+                    }
+                }
+            }
+        }
+    }
+
+    /* exposant can be interpolated in short blocks. */
+    parse_exponents = 1;
+    if (s->block_len_bits != s->frame_len_bits) {
+        parse_exponents = get_bits(&s->gb, 1);
+    }
+
+    if (parse_exponents) {
+        for(ch = 0; ch < s->nb_channels; ch++) {
+            if (s->channel_coded[ch]) {
+                if (s->use_exp_vlc) {
+                    if (decode_exp_vlc(s, ch) < 0)
+                        return -1;
+                } else {
+                    decode_exp_lsp(s, ch);
+                }
+            }
+        }
+    } else {
+        for(ch = 0; ch < s->nb_channels; ch++) {
+            if (s->channel_coded[ch]) {
+                interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits,
+                                  s->block_len);
+            }
+        }
+    }
+
+    /* parse spectral coefficients : just RLE encoding */
+    for(ch = 0; ch < s->nb_channels; ch++) {
+        if (s->channel_coded[ch]) {
+            VLC *coef_vlc;
+            int level, run, sign, tindex;
+            int16_t *ptr, *eptr;
+            const uint16_t *level_table, *run_table;
+
+            /* special VLC tables are used for ms stereo because
+               there is potentially less energy there */
+            tindex = (ch == 1 && s->ms_stereo);
+            coef_vlc = &s->coef_vlc[tindex];
+            run_table = s->run_table[tindex];
+            level_table = s->level_table[tindex];
+            /* XXX: optimize */
+            ptr = &s->coefs1[ch][0];
+            eptr = ptr + nb_coefs[ch];
+            memset(ptr, 0, s->block_len * sizeof(int16_t));
+            for(;;) {
+                code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX);
+                if (code < 0)
+                    return -1;
+                if (code == 1) {
+                    /* EOB */
+                    break;
+                } else if (code == 0) {
+                    /* escape */
+                    level = get_bits(&s->gb, coef_nb_bits);
+                    /* NOTE: this is rather suboptimal. reading
+                       block_len_bits would be better */
+                    run = get_bits(&s->gb, s->frame_len_bits);
+                } else {
+                    /* normal code */
+                    run = run_table[code];
+                    level = level_table[code];
+                }
+                sign = get_bits(&s->gb, 1);
+                if (!sign)
+                    level = -level;
+                ptr += run;
+                if (ptr >= eptr)
+                {
+                    av_log(NULL, AV_LOG_ERROR, "overflow in spectral RLE, ignoring\n");
+                    break;
+                }
+                *ptr++ = level;
+                /* NOTE: EOB can be omitted */
+                if (ptr >= eptr)
+                    break;
+            }
+        }
+        if (s->version == 1 && s->nb_channels >= 2) {
+            align_get_bits(&s->gb);
+        }
+    }
+
+    /* normalize */
+    {
+        int n4 = s->block_len / 2;
+        mdct_norm = 1.0 / (float)n4;
+        if (s->version == 1) {
+            mdct_norm *= sqrt(n4);
+        }
+    }
+
+    /* finally compute the MDCT coefficients */
+    for(ch = 0; ch < s->nb_channels; ch++) {
+        if (s->channel_coded[ch]) {
+            int16_t *coefs1;
+            float *coefs, *exponents, mult, mult1, noise, *exp_ptr;
+            int i, j, n, n1, last_high_band;
+            float exp_power[HIGH_BAND_MAX_SIZE];
+
+            coefs1 = s->coefs1[ch];
+            exponents = s->exponents[ch];
+            mult = pow(10, total_gain * 0.05) / s->max_exponent[ch];
+            mult *= mdct_norm;
+            coefs = s->coefs[ch];
+            if (s->use_noise_coding) {
+                mult1 = mult;
+                /* very low freqs : noise */
+                for(i = 0;i < s->coefs_start; i++) {
+                    *coefs++ = s->noise_table[s->noise_index] * (*exponents++) * mult1;
+                    s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
+                }
+
+                n1 = s->exponent_high_sizes[bsize];
+
+                /* compute power of high bands */
+                exp_ptr = exponents +
+                    s->high_band_start[bsize] -
+                    s->coefs_start;
+                last_high_band = 0; /* avoid warning */
+                for(j=0;j<n1;j++) {
+                    n = s->exponent_high_bands[s->frame_len_bits -
+                                              s->block_len_bits][j];
+                    if (s->high_band_coded[ch][j]) {
+                        float e2, v;
+                        e2 = 0;
+                        for(i = 0;i < n; i++) {
+                            v = exp_ptr[i];
+                            e2 += v * v;
+                        }
+                        exp_power[j] = e2 / n;
+                        last_high_band = j;
+                        tprintf("%d: power=%f (%d)\n", j, exp_power[j], n);
+                    }
+                    exp_ptr += n;
+                }
+
+                /* main freqs and high freqs */
+                for(j=-1;j<n1;j++) {
+                    if (j < 0) {
+                        n = s->high_band_start[bsize] -
+                            s->coefs_start;
+                    } else {
+                        n = s->exponent_high_bands[s->frame_len_bits -
+                                                  s->block_len_bits][j];
+                    }
+                    if (j >= 0 && s->high_band_coded[ch][j]) {
+                        /* use noise with specified power */
+                        mult1 = sqrt(exp_power[j] / exp_power[last_high_band]);
+                        /* XXX: use a table */
+                        mult1 = mult1 * pow(10, s->high_band_values[ch][j] * 0.05);
+                        mult1 = mult1 / (s->max_exponent[ch] * s->noise_mult);
+                        mult1 *= mdct_norm;
+                        for(i = 0;i < n; i++) {
+                            noise = s->noise_table[s->noise_index];
+                            s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
+                            *coefs++ = (*exponents++) * noise * mult1;
+                        }
+                    } else {
+                        /* coded values + small noise */
+                        for(i = 0;i < n; i++) {
+                            noise = s->noise_table[s->noise_index];
+                            s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
+                            *coefs++ = ((*coefs1++) + noise) * (*exponents++) * mult;
+                        }
+                    }
+                }
+
+                /* very high freqs : noise */
+                n = s->block_len - s->coefs_end[bsize];
+                mult1 = mult * exponents[-1];
+                for(i = 0; i < n; i++) {
+                    *coefs++ = s->noise_table[s->noise_index] * mult1;
+                    s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
+                }
+            } else {
+                /* XXX: optimize more */
+                for(i = 0;i < s->coefs_start; i++)
+                    *coefs++ = 0.0;
+                n = nb_coefs[ch];
+                for(i = 0;i < n; i++) {
+                    *coefs++ = coefs1[i] * exponents[i] * mult;
+                }
+                n = s->block_len - s->coefs_end[bsize];
+                for(i = 0;i < n; i++)
+                    *coefs++ = 0.0;
+            }
+        }
+    }
+
+#ifdef TRACE
+    for(ch = 0; ch < s->nb_channels; ch++) {
+        if (s->channel_coded[ch]) {
+            dump_floats("exponents", 3, s->exponents[ch], s->block_len);
+            dump_floats("coefs", 1, s->coefs[ch], s->block_len);
+        }
+    }
+#endif
+
+    if (s->ms_stereo && s->channel_coded[1]) {
+        float a, b;
+        int i;
+
+        /* nominal case for ms stereo: we do it before mdct */
+        /* no need to optimize this case because it should almost
+           never happen */
+        if (!s->channel_coded[0]) {
+            tprintf("rare ms-stereo case happened\n");
+            memset(s->coefs[0], 0, sizeof(float) * s->block_len);
+            s->channel_coded[0] = 1;
+        }
+
+        for(i = 0; i < s->block_len; i++) {
+            a = s->coefs[0][i];
+            b = s->coefs[1][i];
+            s->coefs[0][i] = a + b;
+            s->coefs[1][i] = a - b;
+        }
+    }
+
+    /* build the window : we ensure that when the windows overlap
+       their squared sum is always 1 (MDCT reconstruction rule) */
+    /* XXX: merge with output */
+    {
+        int i, next_block_len, block_len, prev_block_len, n;
+        float *wptr;
+
+        block_len = s->block_len;
+        prev_block_len = 1 << s->prev_block_len_bits;
+        next_block_len = 1 << s->next_block_len_bits;
+
+        /* right part */
+        wptr = window + block_len;
+        if (block_len <= next_block_len) {
+            for(i=0;i<block_len;i++)
+                *wptr++ = s->windows[bsize][i];
+        } else {
+            /* overlap */
+            n = (block_len / 2) - (next_block_len / 2);
+            for(i=0;i<n;i++)
+                *wptr++ = 1.0;
+            for(i=0;i<next_block_len;i++)
+                *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i];
+            for(i=0;i<n;i++)
+                *wptr++ = 0.0;
+        }
+
+        /* left part */
+        wptr = window + block_len;
+        if (block_len <= prev_block_len) {
+            for(i=0;i<block_len;i++)
+                *--wptr = s->windows[bsize][i];
+        } else {
+            /* overlap */
+            n = (block_len / 2) - (prev_block_len / 2);
+            for(i=0;i<n;i++)
+                *--wptr = 1.0;
+            for(i=0;i<prev_block_len;i++)
+                *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i];
+            for(i=0;i<n;i++)
+                *--wptr = 0.0;
+        }
+    }
+
+
+    for(ch = 0; ch < s->nb_channels; ch++) {
+        if (s->channel_coded[ch]) {
+            DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
+            float *ptr;
+            int n4, index, n;
+
+            n = s->block_len;
+            n4 = s->block_len / 2;
+            s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
+                          output, s->coefs[ch], s->mdct_tmp);
+
+            /* XXX: optimize all that by build the window and
+               multipying/adding at the same time */
+
+            /* multiply by the window and add in the frame */
+            index = (s->frame_len / 2) + s->block_pos - n4;
+            ptr = &s->frame_out[ch][index];
+            s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+
+            /* specific fast case for ms-stereo : add to second
+               channel if it is not coded */
+            if (s->ms_stereo && !s->channel_coded[1]) {
+                ptr = &s->frame_out[1][index];
+                s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
+            }
+        }
+    }
+ next:
+    /* update block number */
+    s->block_num++;
+    s->block_pos += s->block_len;
+    if (s->block_pos >= s->frame_len)
+        return 1;
+    else
+        return 0;
+}
+
+/* decode a frame of frame_len samples */
+static int wma_decode_frame(WMADecodeContext *s, int16_t *samples)
+{
+    int ret, i, n, a, ch, incr;
+    int16_t *ptr;
+    float *iptr;
+
+#ifdef TRACE
+    tprintf("***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len);
+#endif
+
+    /* read each block */
+    s->block_num = 0;
+    s->block_pos = 0;
+    for(;;) {
+        ret = wma_decode_block(s);
+        if (ret < 0)
+            return -1;
+        if (ret)
+            break;
+    }
+
+    /* convert frame to integer */
+    n = s->frame_len;
+    incr = s->nb_channels;
+    for(ch = 0; ch < s->nb_channels; ch++) {
+        ptr = samples + ch;
+        iptr = s->frame_out[ch];
+
+        for(i=0;i<n;i++) {
+            a = lrintf(*iptr++);
+            if (a > 32767)
+                a = 32767;
+            else if (a < -32768)
+                a = -32768;
+            *ptr = a;
+            ptr += incr;
+        }
+        /* prepare for next block */
+        memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
+                s->frame_len * sizeof(float));
+        /* XXX: suppress this */
+        memset(&s->frame_out[ch][s->frame_len], 0,
+               s->frame_len * sizeof(float));
+    }
+
+#ifdef TRACE
+    dump_shorts("samples", samples, n * s->nb_channels);
+#endif
+    return 0;
+}
+
+static int wma_decode_superframe(AVCodecContext *avctx,
+                                 void *data, int *data_size,
+                                 uint8_t *buf, int buf_size)
+{
+    WMADecodeContext *s = avctx->priv_data;
+    int nb_frames, bit_offset, i, pos, len;
+    uint8_t *q;
+    int16_t *samples;
+
+    tprintf("***decode_superframe:\n");
+
+    if(buf_size==0){
+        s->last_superframe_len = 0;
+        return 0;
+    }
+
+    samples = data;
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    if (s->use_bit_reservoir) {
+        /* read super frame header */
+        get_bits(&s->gb, 4); /* super frame index */
+        nb_frames = get_bits(&s->gb, 4) - 1;
+
+        bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3);
+
+        if (s->last_superframe_len > 0) {
+            //        printf("skip=%d\n", s->last_bitoffset);
+            /* add bit_offset bits to last frame */
+            if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) >
+                MAX_CODED_SUPERFRAME_SIZE)
+                goto fail;
+            q = s->last_superframe + s->last_superframe_len;
+            len = bit_offset;
+            while (len > 7) {
+                *q++ = (get_bits)(&s->gb, 8);
+                len -= 8;
+            }
+            if (len > 0) {
+                *q++ = (get_bits)(&s->gb, len) << (8 - len);
+            }
+
+            /* XXX: bit_offset bits into last frame */
+            init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8);
+            /* skip unused bits */
+            if (s->last_bitoffset > 0)
+                skip_bits(&s->gb, s->last_bitoffset);
+            /* this frame is stored in the last superframe and in the
+               current one */
+            if (wma_decode_frame(s, samples) < 0)
+                goto fail;
+            samples += s->nb_channels * s->frame_len;
+        }
+
+        /* read each frame starting from bit_offset */
+        pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3;
+        init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8);
+        len = pos & 7;
+        if (len > 0)
+            skip_bits(&s->gb, len);
+
+        s->reset_block_lengths = 1;
+        for(i=0;i<nb_frames;i++) {
+            if (wma_decode_frame(s, samples) < 0)
+                goto fail;
+            samples += s->nb_channels * s->frame_len;
+        }
+
+        /* we copy the end of the frame in the last frame buffer */
+        pos = get_bits_count(&s->gb) + ((bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7);
+        s->last_bitoffset = pos & 7;
+        pos >>= 3;
+        len = buf_size - pos;
+        if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0) {
+            goto fail;
+        }
+        s->last_superframe_len = len;
+        memcpy(s->last_superframe, buf + pos, len);
+    } else {
+        /* single frame decode */
+        if (wma_decode_frame(s, samples) < 0)
+            goto fail;
+        samples += s->nb_channels * s->frame_len;
+    }
+    *data_size = (int8_t *)samples - (int8_t *)data;
+    return s->block_align;
+ fail:
+    /* when error, we reset the bit reservoir */
+    s->last_superframe_len = 0;
+    return -1;
+}
+
+static int wma_decode_end(AVCodecContext *avctx)
+{
+    WMADecodeContext *s = avctx->priv_data;
+    int i;
+
+    for(i = 0; i < s->nb_block_sizes; i++)
+        ff_mdct_end(&s->mdct_ctx[i]);
+    for(i = 0; i < s->nb_block_sizes; i++)
+        av_free(s->windows[i]);
+
+    if (s->use_exp_vlc) {
+        free_vlc(&s->exp_vlc);
+    }
+    if (s->use_noise_coding) {
+        free_vlc(&s->hgain_vlc);
+    }
+    for(i = 0;i < 2; i++) {
+        free_vlc(&s->coef_vlc[i]);
+        av_free(s->run_table[i]);
+        av_free(s->level_table[i]);
+    }
+
+    return 0;
+}
+
+AVCodec wmav1_decoder =
+{
+    "wmav1",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_WMAV1,
+    sizeof(WMADecodeContext),
+    wma_decode_init,
+    NULL,
+    wma_decode_end,
+    wma_decode_superframe,
+};
+
+AVCodec wmav2_decoder =
+{
+    "wmav2",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_WMAV2,
+    sizeof(WMADecodeContext),
+    wma_decode_init,
+    NULL,
+    wma_decode_end,
+    wma_decode_superframe,
+};
diff --git a/contrib/ffmpeg/libavcodec/wmv2.c b/contrib/ffmpeg/libavcodec/wmv2.c
new file mode 100644
index 000000000..5abc51775
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/wmv2.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file wmv2.c
+ * wmv2 codec.
+ */
+
+#include "simple_idct.h"
+
+#define SKIP_TYPE_NONE 0
+#define SKIP_TYPE_MPEG 1
+#define SKIP_TYPE_ROW  2
+#define SKIP_TYPE_COL  3
+
+
+typedef struct Wmv2Context{
+    MpegEncContext s;
+    int j_type_bit;
+    int j_type;
+    int flag3;
+    int flag63;
+    int abt_flag;
+    int abt_type;
+    int abt_type_table[6];
+    int per_mb_abt;
+    int per_block_abt;
+    int mspel_bit;
+    int cbp_table_index;
+    int top_left_mv_flag;
+    int per_mb_rl_bit;
+    int skip_type;
+    int hshift;
+
+    ScanTable abt_scantable[2];
+    DECLARE_ALIGNED_8(DCTELEM, abt_block2[6][64]);
+}Wmv2Context;
+
+static void wmv2_common_init(Wmv2Context * w){
+    MpegEncContext * const s= &w->s;
+
+    ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[0], wmv2_scantableA);
+    ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[1], wmv2_scantableB);
+}
+
+#ifdef CONFIG_ENCODERS
+
+static int encode_ext_header(Wmv2Context *w){
+    MpegEncContext * const s= &w->s;
+    PutBitContext pb;
+    int code;
+
+    init_put_bits(&pb, s->avctx->extradata, s->avctx->extradata_size);
+
+    put_bits(&pb, 5, s->avctx->time_base.den / s->avctx->time_base.num); //yes 29.97 -> 29
+    put_bits(&pb, 11, FFMIN(s->bit_rate/1024, 2047));
+
+    put_bits(&pb, 1, w->mspel_bit=1);
+    put_bits(&pb, 1, w->flag3=1);
+    put_bits(&pb, 1, w->abt_flag=1);
+    put_bits(&pb, 1, w->j_type_bit=1);
+    put_bits(&pb, 1, w->top_left_mv_flag=0);
+    put_bits(&pb, 1, w->per_mb_rl_bit=1);
+    put_bits(&pb, 3, code=1);
+
+    flush_put_bits(&pb);
+
+    s->slice_height = s->mb_height / code;
+
+    return 0;
+}
+
+static int wmv2_encode_init(AVCodecContext *avctx){
+    Wmv2Context * const w= avctx->priv_data;
+
+    if(MPV_encode_init(avctx) < 0)
+        return -1;
+
+    wmv2_common_init(w);
+
+    avctx->extradata_size= 4;
+    avctx->extradata= av_mallocz(avctx->extradata_size + 10);
+    encode_ext_header(w);
+
+    return 0;
+}
+
+#if 0 /* unused, remove? */
+static int wmv2_encode_end(AVCodecContext *avctx){
+
+    if(MPV_encode_end(avctx) < 0)
+        return -1;
+
+    avctx->extradata_size= 0;
+    av_freep(&avctx->extradata);
+
+    return 0;
+}
+#endif
+
+int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
+{
+    Wmv2Context * const w= (Wmv2Context*)s;
+
+    put_bits(&s->pb, 1, s->pict_type - 1);
+    if(s->pict_type == I_TYPE){
+        put_bits(&s->pb, 7, 0);
+    }
+    put_bits(&s->pb, 5, s->qscale);
+
+    s->dc_table_index = 1;
+    s->mv_table_index = 1; /* only if P frame */
+//    s->use_skip_mb_code = 1; /* only if P frame */
+    s->per_mb_rl_table = 0;
+    s->mspel= 0;
+    w->per_mb_abt=0;
+    w->abt_type=0;
+    w->j_type=0;
+
+    assert(s->flipflop_rounding);
+
+    if (s->pict_type == I_TYPE) {
+        assert(s->no_rounding==1);
+        if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type);
+
+        if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
+
+        if(!s->per_mb_rl_table){
+            code012(&s->pb, s->rl_chroma_table_index);
+            code012(&s->pb, s->rl_table_index);
+        }
+
+        put_bits(&s->pb, 1, s->dc_table_index);
+
+        s->inter_intra_pred= 0;
+    }else{
+        int cbp_index;
+
+        put_bits(&s->pb, 2, SKIP_TYPE_NONE);
+
+        code012(&s->pb, cbp_index=0);
+        if(s->qscale <= 10){
+            int map[3]= {0,2,1};
+            w->cbp_table_index= map[cbp_index];
+        }else if(s->qscale <= 20){
+            int map[3]= {1,0,2};
+            w->cbp_table_index= map[cbp_index];
+        }else{
+            int map[3]= {2,1,0};
+            w->cbp_table_index= map[cbp_index];
+        }
+
+        if(w->mspel_bit) put_bits(&s->pb, 1, s->mspel);
+
+        if(w->abt_flag){
+            put_bits(&s->pb, 1, w->per_mb_abt^1);
+            if(!w->per_mb_abt){
+                code012(&s->pb, w->abt_type);
+            }
+        }
+
+        if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
+
+        if(!s->per_mb_rl_table){
+            code012(&s->pb, s->rl_table_index);
+            s->rl_chroma_table_index = s->rl_table_index;
+        }
+        put_bits(&s->pb, 1, s->dc_table_index);
+        put_bits(&s->pb, 1, s->mv_table_index);
+
+        s->inter_intra_pred= 0;//(s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
+    }
+    s->esc3_level_length= 0;
+    s->esc3_run_length= 0;
+
+    return 0;
+}
+
+// nearly idential to wmv1 but thats just because we dont use the useless M$ crap features
+// its duplicated here in case someone wants to add support for these carp features
+void ff_wmv2_encode_mb(MpegEncContext * s,
+                       DCTELEM block[6][64],
+                       int motion_x, int motion_y)
+{
+    Wmv2Context * const w= (Wmv2Context*)s;
+    int cbp, coded_cbp, i;
+    int pred_x, pred_y;
+    uint8_t *coded_block;
+
+    handle_slices(s);
+
+    if (!s->mb_intra) {
+        /* compute cbp */
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+
+        put_bits(&s->pb,
+                 wmv2_inter_table[w->cbp_table_index][cbp + 64][1],
+                 wmv2_inter_table[w->cbp_table_index][cbp + 64][0]);
+
+        /* motion vector */
+        h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+        msmpeg4_encode_motion(s, motion_x - pred_x,
+                              motion_y - pred_y);
+    } else {
+        /* compute cbp */
+        cbp = 0;
+        coded_cbp = 0;
+        for (i = 0; i < 6; i++) {
+            int val, pred;
+            val = (s->block_last_index[i] >= 1);
+            cbp |= val << (5 - i);
+            if (i < 4) {
+                /* predict value for close blocks only for luma */
+                pred = coded_block_pred(s, i, &coded_block);
+                *coded_block = val;
+                val = val ^ pred;
+            }
+            coded_cbp |= val << (5 - i);
+        }
+#if 0
+        if (coded_cbp)
+            printf("cbp=%x %x\n", cbp, coded_cbp);
+#endif
+
+        if (s->pict_type == I_TYPE) {
+            put_bits(&s->pb,
+                     ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
+        } else {
+            put_bits(&s->pb,
+                     wmv2_inter_table[w->cbp_table_index][cbp][1],
+                     wmv2_inter_table[w->cbp_table_index][cbp][0]);
+        }
+        put_bits(&s->pb, 1, 0);         /* no AC prediction yet */
+        if(s->inter_intra_pred){
+            s->h263_aic_dir=0;
+            put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
+        }
+    }
+
+    for (i = 0; i < 6; i++) {
+        msmpeg4_encode_block(s, block[i], i);
+    }
+}
+#endif //CONFIG_ENCODERS
+
+static void parse_mb_skip(Wmv2Context * w){
+    int mb_x, mb_y;
+    MpegEncContext * const s= &w->s;
+    uint32_t * const mb_type= s->current_picture_ptr->mb_type;
+
+    w->skip_type= get_bits(&s->gb, 2);
+    switch(w->skip_type){
+    case SKIP_TYPE_NONE:
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_16x16 | MB_TYPE_L0;
+            }
+        }
+        break;
+    case SKIP_TYPE_MPEG:
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                mb_type[mb_y*s->mb_stride + mb_x]= (get_bits1(&s->gb) ? MB_TYPE_SKIP : 0) | MB_TYPE_16x16 | MB_TYPE_L0;
+            }
+        }
+        break;
+    case SKIP_TYPE_ROW:
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            if(get_bits1(&s->gb)){
+                for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                    mb_type[mb_y*s->mb_stride + mb_x]=  MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+                }
+            }else{
+                for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                    mb_type[mb_y*s->mb_stride + mb_x]= (get_bits1(&s->gb) ? MB_TYPE_SKIP : 0) | MB_TYPE_16x16 | MB_TYPE_L0;
+                }
+            }
+        }
+        break;
+    case SKIP_TYPE_COL:
+        for(mb_x=0; mb_x<s->mb_width; mb_x++){
+            if(get_bits1(&s->gb)){
+                for(mb_y=0; mb_y<s->mb_height; mb_y++){
+                    mb_type[mb_y*s->mb_stride + mb_x]=  MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+                }
+            }else{
+                for(mb_y=0; mb_y<s->mb_height; mb_y++){
+                    mb_type[mb_y*s->mb_stride + mb_x]= (get_bits1(&s->gb) ? MB_TYPE_SKIP : 0) | MB_TYPE_16x16 | MB_TYPE_L0;
+                }
+            }
+        }
+        break;
+    }
+}
+
+static int decode_ext_header(Wmv2Context *w){
+    MpegEncContext * const s= &w->s;
+    GetBitContext gb;
+    int fps;
+    int code;
+
+    if(s->avctx->extradata_size<4) return -1;
+
+    init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
+
+    fps                = get_bits(&gb, 5);
+    s->bit_rate        = get_bits(&gb, 11)*1024;
+    w->mspel_bit       = get_bits1(&gb);
+    w->flag3           = get_bits1(&gb);
+    w->abt_flag        = get_bits1(&gb);
+    w->j_type_bit      = get_bits1(&gb);
+    w->top_left_mv_flag= get_bits1(&gb);
+    w->per_mb_rl_bit   = get_bits1(&gb);
+    code               = get_bits(&gb, 3);
+
+    if(code==0) return -1;
+
+    s->slice_height = s->mb_height / code;
+
+    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+        av_log(s->avctx, AV_LOG_DEBUG, "fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d, slices:%d\n",
+        fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3,
+        code);
+    }
+    return 0;
+}
+
+int ff_wmv2_decode_picture_header(MpegEncContext * s)
+{
+    Wmv2Context * const w= (Wmv2Context*)s;
+    int code;
+
+#if 0
+{
+int i;
+for(i=0; i<s->gb.size*8; i++)
+    printf("%d", get_bits1(&s->gb));
+//    get_bits1(&s->gb);
+printf("END\n");
+return -1;
+}
+#endif
+    if(s->picture_number==0)
+        decode_ext_header(w);
+
+    s->pict_type = get_bits(&s->gb, 1) + 1;
+    if(s->pict_type == I_TYPE){
+        code = get_bits(&s->gb, 7);
+        av_log(s->avctx, AV_LOG_DEBUG, "I7:%X/\n", code);
+    }
+    s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
+    if(s->qscale < 0)
+       return -1;
+
+    return 0;
+}
+
+int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
+{
+    Wmv2Context * const w= (Wmv2Context*)s;
+
+    if (s->pict_type == I_TYPE) {
+        if(w->j_type_bit) w->j_type= get_bits1(&s->gb);
+        else              w->j_type= 0; //FIXME check
+
+        if(!w->j_type){
+            if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
+            else                 s->per_mb_rl_table= 0;
+
+            if(!s->per_mb_rl_table){
+                s->rl_chroma_table_index = decode012(&s->gb);
+                s->rl_table_index = decode012(&s->gb);
+            }
+
+            s->dc_table_index = get_bits1(&s->gb);
+        }
+        s->inter_intra_pred= 0;
+        s->no_rounding = 1;
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+            av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n",
+                s->qscale,
+                s->rl_chroma_table_index,
+                s->rl_table_index,
+                s->dc_table_index,
+                s->per_mb_rl_table,
+                w->j_type);
+        }
+    }else{
+        int cbp_index;
+        w->j_type=0;
+
+        parse_mb_skip(w);
+        cbp_index= decode012(&s->gb);
+        if(s->qscale <= 10){
+            int map[3]= {0,2,1};
+            w->cbp_table_index= map[cbp_index];
+        }else if(s->qscale <= 20){
+            int map[3]= {1,0,2};
+            w->cbp_table_index= map[cbp_index];
+        }else{
+            int map[3]= {2,1,0};
+            w->cbp_table_index= map[cbp_index];
+        }
+
+        if(w->mspel_bit) s->mspel= get_bits1(&s->gb);
+        else             s->mspel= 0; //FIXME check
+
+        if(w->abt_flag){
+            w->per_mb_abt= get_bits1(&s->gb)^1;
+            if(!w->per_mb_abt){
+                w->abt_type= decode012(&s->gb);
+            }
+        }
+
+        if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
+        else                 s->per_mb_rl_table= 0;
+
+        if(!s->per_mb_rl_table){
+            s->rl_table_index = decode012(&s->gb);
+            s->rl_chroma_table_index = s->rl_table_index;
+        }
+
+        s->dc_table_index = get_bits1(&s->gb);
+        s->mv_table_index = get_bits1(&s->gb);
+
+        s->inter_intra_pred= 0;//(s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
+        s->no_rounding ^= 1;
+
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+            av_log(s->avctx, AV_LOG_DEBUG, "rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n",
+                s->rl_table_index,
+                s->rl_chroma_table_index,
+                s->dc_table_index,
+                s->mv_table_index,
+                s->per_mb_rl_table,
+                s->qscale,
+                s->mspel,
+                w->per_mb_abt,
+                w->abt_type,
+                w->cbp_table_index,
+                s->inter_intra_pred);
+        }
+    }
+    s->esc3_level_length= 0;
+    s->esc3_run_length= 0;
+
+s->picture_number++; //FIXME ?
+
+
+//    if(w->j_type)
+//        return wmv2_decode_j_picture(w); //FIXME
+
+    if(w->j_type){
+        av_log(s->avctx, AV_LOG_ERROR, "J-type picture is not supported\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static inline int wmv2_decode_motion(Wmv2Context *w, int *mx_ptr, int *my_ptr){
+    MpegEncContext * const s= &w->s;
+    int ret;
+
+    ret= msmpeg4_decode_motion(s, mx_ptr, my_ptr);
+
+    if(ret<0) return -1;
+
+    if((((*mx_ptr)|(*my_ptr)) & 1) && s->mspel)
+        w->hshift= get_bits1(&s->gb);
+    else
+        w->hshift= 0;
+
+//printf("%d %d  ", *mx_ptr, *my_ptr);
+
+    return 0;
+}
+
+static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
+    MpegEncContext * const s= &w->s;
+    int xy, wrap, diff, type;
+    int16_t *A, *B, *C, *mot_val;
+
+    wrap = s->b8_stride;
+    xy = s->block_index[0];
+
+    mot_val = s->current_picture.motion_val[0][xy];
+
+    A = s->current_picture.motion_val[0][xy - 1];
+    B = s->current_picture.motion_val[0][xy - wrap];
+    C = s->current_picture.motion_val[0][xy + 2 - wrap];
+
+    if(s->mb_x && !s->first_slice_line && !s->mspel && w->top_left_mv_flag)
+        diff= FFMAX(FFABS(A[0] - B[0]), FFABS(A[1] - B[1]));
+    else
+        diff=0;
+
+    if(diff >= 8)
+        type= get_bits1(&s->gb);
+    else
+        type= 2;
+
+    if(type == 0){
+        *px= A[0];
+        *py= A[1];
+    }else if(type == 1){
+        *px= B[0];
+        *py= B[1];
+    }else{
+        /* special case for first (slice) line */
+        if (s->first_slice_line) {
+            *px = A[0];
+            *py = A[1];
+        } else {
+            *px = mid_pred(A[0], B[0], C[0]);
+            *py = mid_pred(A[1], B[1], C[1]);
+        }
+    }
+
+    return mot_val;
+}
+
+static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, int cbp){
+    MpegEncContext * const s= &w->s;
+    static const int sub_cbp_table[3]= {2,3,1};
+    int sub_cbp;
+
+    if(!cbp){
+        s->block_last_index[n] = -1;
+
+        return 0;
+    }
+
+    if(w->per_block_abt)
+        w->abt_type= decode012(&s->gb);
+#if 0
+    if(w->per_block_abt)
+        printf("B%d", w->abt_type);
+#endif
+    w->abt_type_table[n]= w->abt_type;
+
+    if(w->abt_type){
+//        const uint8_t *scantable= w->abt_scantable[w->abt_type-1].permutated;
+        const uint8_t *scantable= w->abt_scantable[w->abt_type-1].scantable;
+//        const uint8_t *scantable= w->abt_type-1 ? w->abt_scantable[1].permutated : w->abt_scantable[0].scantable;
+
+        sub_cbp= sub_cbp_table[ decode012(&s->gb) ];
+//        printf("S%d", sub_cbp);
+
+        if(sub_cbp&1){
+            if (msmpeg4_decode_block(s, block, n, 1, scantable) < 0)
+                return -1;
+        }
+
+        if(sub_cbp&2){
+            if (msmpeg4_decode_block(s, w->abt_block2[n], n, 1, scantable) < 0)
+                return -1;
+        }
+        s->block_last_index[n] = 63;
+
+        return 0;
+    }else{
+        return msmpeg4_decode_block(s, block, n, 1, s->inter_scantable.permutated);
+    }
+}
+
+static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){
+    MpegEncContext * const s= &w->s;
+
+  if (s->block_last_index[n] >= 0) {
+    switch(w->abt_type_table[n]){
+    case 0:
+        s->dsp.idct_add (dst, stride, block1);
+        break;
+    case 1:
+        simple_idct84_add(dst           , stride, block1);
+        simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
+        memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+        break;
+    case 2:
+        simple_idct48_add(dst           , stride, block1);
+        simple_idct48_add(dst + 4       , stride, w->abt_block2[n]);
+        memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");
+    }
+  }
+}
+
+void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){
+    Wmv2Context * const w= (Wmv2Context*)s;
+
+    wmv2_add_block(w, block1[0], dest_y                    , s->linesize, 0);
+    wmv2_add_block(w, block1[1], dest_y + 8                , s->linesize, 1);
+    wmv2_add_block(w, block1[2], dest_y +     8*s->linesize, s->linesize, 2);
+    wmv2_add_block(w, block1[3], dest_y + 8 + 8*s->linesize, s->linesize, 3);
+
+    if(s->flags&CODEC_FLAG_GRAY) return;
+
+    wmv2_add_block(w, block1[4], dest_cb                   , s->uvlinesize, 4);
+    wmv2_add_block(w, block1[5], dest_cr                   , s->uvlinesize, 5);
+}
+
+void ff_mspel_motion(MpegEncContext *s,
+                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
+                               int motion_x, int motion_y, int h)
+{
+    Wmv2Context * const w= (Wmv2Context*)s;
+    uint8_t *ptr;
+    int dxy, offset, mx, my, src_x, src_y, v_edge_pos, linesize, uvlinesize;
+    int emu=0;
+
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    dxy = 2*dxy + w->hshift;
+    src_x = s->mb_x * 16 + (motion_x >> 1);
+    src_y = s->mb_y * 16 + (motion_y >> 1);
+
+    /* WARNING: do no forget half pels */
+    v_edge_pos = s->v_edge_pos;
+    src_x = clip(src_x, -16, s->width);
+    src_y = clip(src_y, -16, s->height);
+    linesize   = s->linesize;
+    uvlinesize = s->uvlinesize;
+    ptr = ref_picture[0] + (src_y * linesize) + src_x;
+
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(src_x<1 || src_y<1 || src_x + 17  >= s->h_edge_pos
+                              || src_y + h+1 >= v_edge_pos){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - 1 - s->linesize, s->linesize, 19, 19,
+                             src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos);
+            ptr= s->edge_emu_buffer + 1 + s->linesize;
+            emu=1;
+        }
+    }
+
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y             , ptr             , linesize);
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y+8           , ptr+8           , linesize);
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y  +8*linesize, ptr  +8*linesize, linesize);
+    s->dsp.put_mspel_pixels_tab[dxy](dest_y+8+8*linesize, ptr+8+8*linesize, linesize);
+
+    if(s->flags&CODEC_FLAG_GRAY) return;
+
+    if (s->out_format == FMT_H263) {
+        dxy = 0;
+        if ((motion_x & 3) != 0)
+            dxy |= 1;
+        if ((motion_y & 3) != 0)
+            dxy |= 2;
+        mx = motion_x >> 2;
+        my = motion_y >> 2;
+    } else {
+        mx = motion_x / 2;
+        my = motion_y / 2;
+        dxy = ((my & 1) << 1) | (mx & 1);
+        mx >>= 1;
+        my >>= 1;
+    }
+
+    src_x = s->mb_x * 8 + mx;
+    src_y = s->mb_y * 8 + my;
+    src_x = clip(src_x, -8, s->width >> 1);
+    if (src_x == (s->width >> 1))
+        dxy &= ~1;
+    src_y = clip(src_y, -8, s->height >> 1);
+    if (src_y == (s->height >> 1))
+        dxy &= ~2;
+    offset = (src_y * uvlinesize) + src_x;
+    ptr = ref_picture[1] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9,
+                         src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[1][dxy](dest_cb, ptr, uvlinesize, h >> 1);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9,
+                         src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[1][dxy](dest_cr, ptr, uvlinesize, h >> 1);
+}
+
+
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
+{
+    Wmv2Context * const w= (Wmv2Context*)s;
+    int cbp, code, i;
+    uint8_t *coded_val;
+
+    if(w->j_type) return 0;
+
+    if (s->pict_type == P_TYPE) {
+        if(IS_SKIP(s->current_picture.mb_type[s->mb_y * s->mb_stride + s->mb_x])){
+            /* skip mb */
+            s->mb_intra = 0;
+            for(i=0;i<6;i++)
+                s->block_last_index[i] = -1;
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mv_type = MV_TYPE_16X16;
+            s->mv[0][0][0] = 0;
+            s->mv[0][0][1] = 0;
+            s->mb_skipped = 1;
+            w->hshift=0;
+            return 0;
+        }
+
+        code = get_vlc2(&s->gb, mb_non_intra_vlc[w->cbp_table_index].table, MB_NON_INTRA_VLC_BITS, 3);
+        if (code < 0)
+            return -1;
+        s->mb_intra = (~code & 0x40) >> 6;
+
+        cbp = code & 0x3f;
+    } else {
+        s->mb_intra = 1;
+        code = get_vlc2(&s->gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2);
+        if (code < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "II-cbp illegal at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        /* predict coded block pattern */
+        cbp = 0;
+        for(i=0;i<6;i++) {
+            int val = ((code >> (5 - i)) & 1);
+            if (i < 4) {
+                int pred = coded_block_pred(s, i, &coded_val);
+                val = val ^ pred;
+                *coded_val = val;
+            }
+            cbp |= val << (5 - i);
+        }
+    }
+
+    if (!s->mb_intra) {
+        int mx, my;
+//printf("P at %d %d\n", s->mb_x, s->mb_y);
+        wmv2_pred_motion(w, &mx, &my);
+
+        if(cbp){
+            s->dsp.clear_blocks(s->block[0]);
+            if(s->per_mb_rl_table){
+                s->rl_table_index = decode012(&s->gb);
+                s->rl_chroma_table_index = s->rl_table_index;
+            }
+
+            if(w->abt_flag && w->per_mb_abt){
+                w->per_block_abt= get_bits1(&s->gb);
+                if(!w->per_block_abt)
+                    w->abt_type= decode012(&s->gb);
+            }else
+                w->per_block_abt=0;
+        }
+
+        if (wmv2_decode_motion(w, &mx, &my) < 0)
+            return -1;
+
+        s->mv_dir = MV_DIR_FORWARD;
+        s->mv_type = MV_TYPE_16X16;
+        s->mv[0][0][0] = mx;
+        s->mv[0][0][1] = my;
+
+        for (i = 0; i < 6; i++) {
+            if (wmv2_decode_inter_block(w, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+            {
+                av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+                return -1;
+            }
+        }
+    } else {
+//if(s->pict_type==P_TYPE)
+//   printf("%d%d ", s->inter_intra_pred, cbp);
+//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24));
+        s->ac_pred = get_bits1(&s->gb);
+        if(s->inter_intra_pred){
+            s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1);
+//            printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y);
+        }
+        if(s->per_mb_rl_table && cbp){
+            s->rl_table_index = decode012(&s->gb);
+            s->rl_chroma_table_index = s->rl_table_index;
+        }
+
+        s->dsp.clear_blocks(s->block[0]);
+        for (i = 0; i < 6; i++) {
+            if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
+            {
+                av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int wmv2_decode_init(AVCodecContext *avctx){
+    Wmv2Context * const w= avctx->priv_data;
+
+    if(ff_h263_decode_init(avctx) < 0)
+        return -1;
+
+    wmv2_common_init(w);
+
+    return 0;
+}
+
+AVCodec wmv2_decoder = {
+    "wmv2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV2,
+    sizeof(Wmv2Context),
+    wmv2_decode_init,
+    NULL,
+    ff_h263_decode_end,
+    ff_h263_decode_frame,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+#ifdef CONFIG_ENCODERS
+AVCodec wmv2_encoder = {
+    "wmv2",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV2,
+    sizeof(Wmv2Context),
+    wmv2_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
+#endif
diff --git a/contrib/ffmpeg/libavcodec/wnv1.c b/contrib/ffmpeg/libavcodec/wnv1.c
new file mode 100644
index 000000000..46b31a5c5
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/wnv1.c
@@ -0,0 +1,146 @@
+/*
+ * Winnov WNV1 codec
+ * Copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file wnv1.c
+ * Winnov WNV1 codec.
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#include "bitstream.h"
+
+
+typedef struct WNV1Context{
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    int shift;
+    GetBitContext gb;
+} WNV1Context;
+
+static uint16_t code_tab[16][2]={
+{0x1FD,9}, {0xFD,8}, {0x7D,7}, {0x3D,6}, {0x1D,5}, {0x0D,4}, {0x005,3},
+{0x000,1},
+{0x004,3}, {0x0C,4}, {0x1C,5}, {0x3C,6}, {0x7C,7}, {0xFC,8}, {0x1FC,9}, {0xFF,8}
+};
+
+#define CODE_VLC_BITS 9
+static VLC code_vlc;
+
+/* returns modified base_value */
+static inline int wnv1_get_code(WNV1Context *w, int base_value)
+{
+    int v = get_vlc2(&w->gb, code_vlc.table, CODE_VLC_BITS, 1);
+
+    if(v==15)
+        return ff_reverse[ get_bits(&w->gb, 8 - w->shift) ];
+    else
+        return base_value + ((v - 7)<<w->shift);
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    WNV1Context * const l = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&l->pic;
+    unsigned char *Y,*U,*V;
+    int i, j;
+    int prev_y = 0, prev_u = 0, prev_v = 0;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference = 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->key_frame = 1;
+
+    for(i=8; i<buf_size; i++)
+        buf[i]= ff_reverse[ buf[i] ]; //FIXME ensure that the buffer is modifyable or use a temp one
+    init_get_bits(&l->gb, buf+8, (buf_size-8)*8);
+
+    if (buf[2] >> 4 == 6)
+        l->shift = 2;
+    else {
+        l->shift = 8 - (buf[2] >> 4);
+        if (l->shift > 4) {
+            av_log(avctx, AV_LOG_ERROR, "Unknown WNV1 frame header value %i, please upload file for study\n", buf[2] >> 4);
+            l->shift = 4;
+        }
+        if (l->shift < 1) {
+            av_log(avctx, AV_LOG_ERROR, "Unknown WNV1 frame header value %i, please upload file for study\n", buf[2] >> 4);
+            l->shift = 1;
+        }
+    }
+
+    Y = p->data[0];
+    U = p->data[1];
+    V = p->data[2];
+    for (j = 0; j < avctx->height; j++) {
+        for (i = 0; i < avctx->width / 2; i++) {
+            Y[i * 2] = wnv1_get_code(l, prev_y);
+            prev_u = U[i] = wnv1_get_code(l, prev_u);
+            prev_y = Y[(i * 2) + 1] = wnv1_get_code(l, Y[i * 2]);
+            prev_v = V[i] = wnv1_get_code(l, prev_v);
+        }
+        Y += p->linesize[0];
+        U += p->linesize[1];
+        V += p->linesize[2];
+    }
+
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = l->pic;
+
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx){
+    WNV1Context * const l = avctx->priv_data;
+
+    l->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_YUV422P;
+
+    if(!code_vlc.table){
+        init_vlc(&code_vlc, CODE_VLC_BITS, 16,
+                    &code_tab[0][1], 4, 2,
+                    &code_tab[0][0], 4, 2, 1);
+    }
+
+    return 0;
+}
+
+AVCodec wnv1_decoder = {
+    "wnv1",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WNV1,
+    sizeof(WNV1Context),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/ws-snd1.c b/contrib/ffmpeg/libavcodec/ws-snd1.c
new file mode 100644
index 000000000..eb4fe81d3
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/ws-snd1.c
@@ -0,0 +1,144 @@
+/*
+ * Westwood SNDx codecs
+ * Copyright (c) 2005 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+
+/**
+ * @file ws-snd.c
+ * Westwood SNDx codecs.
+ *
+ * Reference documents about VQA format and its audio codecs
+ * can be found here:
+ * http://www.multimedia.cx
+ */
+
+static const char ws_adpcm_2bit[] = { -2, -1, 0, 1};
+static const char ws_adpcm_4bit[] = {
+    -9, -8, -6, -5, -4, -3, -2, -1,
+     0,  1,  2,  3,  4,  5,  6,  8 };
+
+#define CLIP8(a) if(a>127)a=127;if(a<-128)a=-128;
+
+static int ws_snd_decode_init(AVCodecContext * avctx)
+{
+//    WSSNDContext *c = avctx->priv_data;
+
+    return 0;
+}
+
+static int ws_snd_decode_frame(AVCodecContext *avctx,
+                void *data, int *data_size,
+                uint8_t *buf, int buf_size)
+{
+//    WSSNDContext *c = avctx->priv_data;
+
+    int in_size, out_size;
+    int sample = 0;
+    int i;
+    short *samples = data;
+
+    if (!buf_size)
+        return 0;
+
+    out_size = LE_16(&buf[0]);
+    *data_size = out_size * 2;
+    in_size = LE_16(&buf[2]);
+    buf += 4;
+
+    if (in_size == out_size) {
+        for (i = 0; i < out_size; i++)
+            *samples++ = (*buf++ - 0x80) << 8;
+        return buf_size;
+    }
+
+    while (out_size > 0) {
+        int code;
+        uint8_t count;
+        code = (*buf) >> 6;
+        count = (*buf) & 0x3F;
+        buf++;
+        switch(code) {
+        case 0: /* ADPCM 2-bit */
+            for (count++; count > 0; count--) {
+                code = *buf++;
+                sample += ws_adpcm_2bit[code & 0x3];
+                CLIP8(sample);
+                *samples++ = sample << 8;
+                sample += ws_adpcm_2bit[(code >> 2) & 0x3];
+                CLIP8(sample);
+                *samples++ = sample << 8;
+                sample += ws_adpcm_2bit[(code >> 4) & 0x3];
+                CLIP8(sample);
+                *samples++ = sample << 8;
+                sample += ws_adpcm_2bit[(code >> 6) & 0x3];
+                CLIP8(sample);
+                *samples++ = sample << 8;
+                out_size -= 4;
+            }
+            break;
+        case 1: /* ADPCM 4-bit */
+            for (count++; count > 0; count--) {
+                code = *buf++;
+                sample += ws_adpcm_4bit[code & 0xF];
+                CLIP8(sample);
+                *samples++ = sample << 8;
+                sample += ws_adpcm_4bit[code >> 4];
+                CLIP8(sample);
+                *samples++ = sample << 8;
+                out_size -= 2;
+            }
+            break;
+        case 2: /* no compression */
+            if (count & 0x20) { /* big delta */
+                char t;
+                t = count;
+                t <<= 3;
+                sample += t >> 3;
+                *samples++ = sample << 8;
+                out_size--;
+            } else { /* copy */
+                for (count++; count > 0; count--) {
+                    *samples++ = (*buf++ - 0x80) << 8;
+                    out_size--;
+                }
+                sample = buf[-1] - 0x80;
+            }
+            break;
+        default: /* run */
+            for(count++; count > 0; count--) {
+                *samples++ = sample << 8;
+                out_size--;
+            }
+        }
+    }
+
+    return buf_size;
+}
+
+AVCodec ws_snd1_decoder = {
+    "ws_snd1",
+    CODEC_TYPE_AUDIO,
+    CODEC_ID_WESTWOOD_SND1,
+    0,
+    ws_snd_decode_init,
+    NULL,
+    NULL,
+    ws_snd_decode_frame,
+};
diff --git a/contrib/ffmpeg/libavcodec/x264.c b/contrib/ffmpeg/libavcodec/x264.c
new file mode 100644
index 000000000..b08678779
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/x264.c
@@ -0,0 +1,299 @@
+/*
+ * H.264 encoding using the x264 library
+ * Copyright (C) 2005  Mans Rullgard <mru@inprovide.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include <x264.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct X264Context {
+    x264_param_t params;
+    x264_t *enc;
+    x264_picture_t pic;
+    AVFrame out_pic;
+} X264Context;
+
+static void
+X264_log(void *p, int level, const char *fmt, va_list args)
+{
+    static const int level_map[] = {
+        [X264_LOG_ERROR]   = AV_LOG_ERROR,
+        [X264_LOG_WARNING] = AV_LOG_ERROR,
+        [X264_LOG_INFO]    = AV_LOG_INFO,
+        [X264_LOG_DEBUG]   = AV_LOG_DEBUG
+    };
+
+    if(level < 0 || level > X264_LOG_DEBUG)
+        return;
+
+    av_vlog(p, level_map[level], fmt, args);
+}
+
+
+static int
+encode_nals(uint8_t *buf, int size, x264_nal_t *nals, int nnal)
+{
+    uint8_t *p = buf;
+    int i;
+
+    for(i = 0; i < nnal; i++){
+        int s = x264_nal_encode(p, &size, 1, nals + i);
+        if(s < 0)
+            return -1;
+        p += s;
+    }
+
+    return p - buf;
+}
+
+static int
+X264_frame(AVCodecContext *ctx, uint8_t *buf, int bufsize, void *data)
+{
+    X264Context *x4 = ctx->priv_data;
+    AVFrame *frame = data;
+    x264_nal_t *nal;
+    int nnal, i;
+    x264_picture_t pic_out;
+
+    x4->pic.img.i_csp = X264_CSP_I420;
+    x4->pic.img.i_plane = 3;
+
+    if (frame) {
+        for(i = 0; i < 3; i++){
+            x4->pic.img.plane[i] = frame->data[i];
+            x4->pic.img.i_stride[i] = frame->linesize[i];
+        }
+
+        x4->pic.i_pts = frame->pts;
+        x4->pic.i_type = X264_TYPE_AUTO;
+    }
+
+    if(x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL,
+                           &pic_out))
+        return -1;
+
+    bufsize = encode_nals(buf, bufsize, nal, nnal);
+    if(bufsize < 0)
+        return -1;
+
+    /* FIXME: dts */
+    x4->out_pic.pts = pic_out.i_pts;
+
+    switch(pic_out.i_type){
+    case X264_TYPE_IDR:
+    case X264_TYPE_I:
+        x4->out_pic.pict_type = FF_I_TYPE;
+        break;
+    case X264_TYPE_P:
+        x4->out_pic.pict_type = FF_P_TYPE;
+        break;
+    case X264_TYPE_B:
+    case X264_TYPE_BREF:
+        x4->out_pic.pict_type = FF_B_TYPE;
+        break;
+    }
+
+    x4->out_pic.key_frame = pic_out.i_type == X264_TYPE_IDR;
+    x4->out_pic.quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
+
+    return bufsize;
+}
+
+static int
+X264_close(AVCodecContext *avctx)
+{
+    X264Context *x4 = avctx->priv_data;
+
+    if(x4->enc)
+        x264_encoder_close(x4->enc);
+
+    return 0;
+}
+
+static int
+X264_init(AVCodecContext *avctx)
+{
+    X264Context *x4 = avctx->priv_data;
+
+    x264_param_default(&x4->params);
+
+    x4->params.pf_log = X264_log;
+    x4->params.p_log_private = avctx;
+
+    x4->params.i_keyint_max = avctx->gop_size;
+    x4->params.rc.i_bitrate = avctx->bit_rate / 1000;
+    x4->params.rc.i_vbv_buffer_size = avctx->rc_buffer_size / 1000;
+    x4->params.rc.i_vbv_max_bitrate = avctx->rc_max_rate / 1000;
+    x4->params.rc.b_stat_write = (avctx->flags & CODEC_FLAG_PASS1);
+    if(avctx->flags & CODEC_FLAG_PASS2) x4->params.rc.b_stat_read = 1;
+    else{
+        if(avctx->crf){
+            x4->params.rc.i_rc_method = X264_RC_CRF;
+            x4->params.rc.f_rf_constant = avctx->crf;
+        }else if(avctx->cqp > -1){
+            x4->params.rc.i_rc_method = X264_RC_CQP;
+            x4->params.rc.i_qp_constant = avctx->cqp;
+        }
+    }
+
+    // if neither crf nor cqp modes are selected we have to enable the RC
+    // we do it this way because we cannot check if the bitrate has been set
+    if(!(avctx->crf || (avctx->cqp > -1))) x4->params.rc.i_rc_method = X264_RC_ABR;
+
+    x4->params.i_bframe = avctx->max_b_frames;
+    x4->params.b_cabac = avctx->coder_type == FF_CODER_TYPE_AC;
+    x4->params.b_bframe_adaptive = avctx->b_frame_strategy;
+    x4->params.i_bframe_bias = avctx->bframebias;
+    x4->params.b_bframe_pyramid = (avctx->flags2 & CODEC_FLAG2_BPYRAMID);
+    avctx->has_b_frames= (avctx->flags2 & CODEC_FLAG2_BPYRAMID) ? 2 : !!avctx->max_b_frames;
+
+    x4->params.i_keyint_min = avctx->keyint_min;
+    if(x4->params.i_keyint_min > x4->params.i_keyint_max)
+        x4->params.i_keyint_min = x4->params.i_keyint_max;
+
+    x4->params.i_scenecut_threshold = avctx->scenechange_threshold;
+
+    x4->params.b_deblocking_filter = (avctx->flags & CODEC_FLAG_LOOP_FILTER);
+    x4->params.i_deblocking_filter_alphac0 = avctx->deblockalpha;
+    x4->params.i_deblocking_filter_beta = avctx->deblockbeta;
+
+    x4->params.rc.i_qp_min = avctx->qmin;
+    x4->params.rc.i_qp_max = avctx->qmax;
+    x4->params.rc.i_qp_step = avctx->max_qdiff;
+
+    x4->params.rc.f_qcompress = avctx->qcompress;  /* 0.0 => cbr, 1.0 => constant qp */
+    x4->params.rc.f_qblur = avctx->qblur;        /* temporally blur quants */
+    x4->params.rc.f_complexity_blur = avctx->complexityblur;
+
+    x4->params.i_frame_reference = avctx->refs;
+
+    x4->params.i_width = avctx->width;
+    x4->params.i_height = avctx->height;
+    x4->params.vui.i_sar_width = avctx->sample_aspect_ratio.num;
+    x4->params.vui.i_sar_height = avctx->sample_aspect_ratio.den;
+    x4->params.i_fps_num = avctx->time_base.den;
+    x4->params.i_fps_den = avctx->time_base.num;
+
+    x4->params.analyse.inter = 0;
+    if(avctx->partitions){
+        if(avctx->partitions & X264_PART_I4X4)
+            x4->params.analyse.inter |= X264_ANALYSE_I4x4;
+        if(avctx->partitions & X264_PART_I8X8)
+            x4->params.analyse.inter |= X264_ANALYSE_I8x8;
+        if(avctx->partitions & X264_PART_P8X8)
+            x4->params.analyse.inter |= X264_ANALYSE_PSUB16x16;
+        if(avctx->partitions & X264_PART_P4X4)
+            x4->params.analyse.inter |= X264_ANALYSE_PSUB8x8;
+        if(avctx->partitions & X264_PART_B8X8)
+            x4->params.analyse.inter |= X264_ANALYSE_BSUB16x16;
+    }
+
+    x4->params.analyse.i_direct_mv_pred = avctx->directpred;
+
+    x4->params.analyse.b_weighted_bipred = (avctx->flags2 & CODEC_FLAG2_WPRED);
+
+    if(avctx->me_method == ME_EPZS)
+        x4->params.analyse.i_me_method = X264_ME_DIA;
+    else if(avctx->me_method == ME_HEX)
+        x4->params.analyse.i_me_method = X264_ME_HEX;
+    else if(avctx->me_method == ME_UMH)
+        x4->params.analyse.i_me_method = X264_ME_UMH;
+    else if(avctx->me_method == ME_FULL)
+        x4->params.analyse.i_me_method = X264_ME_ESA;
+    else x4->params.analyse.i_me_method = X264_ME_HEX;
+
+    x4->params.analyse.i_me_range = avctx->me_range;
+    x4->params.analyse.i_subpel_refine = avctx->me_subpel_quality;
+
+    x4->params.analyse.b_bframe_rdo = (avctx->flags2 & CODEC_FLAG2_BRDO);
+    x4->params.analyse.b_mixed_references =
+        (avctx->flags2 & CODEC_FLAG2_MIXED_REFS);
+    x4->params.analyse.b_chroma_me = (avctx->me_cmp & FF_CMP_CHROMA);
+    x4->params.analyse.b_transform_8x8 = (avctx->flags2 & CODEC_FLAG2_8X8DCT);
+    x4->params.analyse.b_fast_pskip = (avctx->flags2 & CODEC_FLAG2_FASTPSKIP);
+
+    x4->params.analyse.i_trellis = avctx->trellis;
+    x4->params.analyse.i_noise_reduction = avctx->noise_reduction;
+
+    if(avctx->level > 0) x4->params.i_level_idc = avctx->level;
+
+    x4->params.rc.f_rate_tolerance =
+        (float)avctx->bit_rate_tolerance/avctx->bit_rate;
+
+    if((avctx->rc_buffer_size != 0) &&
+            (avctx->rc_initial_buffer_occupancy <= avctx->rc_buffer_size)){
+        x4->params.rc.f_vbv_buffer_init =
+            (float)avctx->rc_initial_buffer_occupancy/avctx->rc_buffer_size;
+    }
+    else x4->params.rc.f_vbv_buffer_init = 0.9;
+
+    x4->params.rc.f_ip_factor = 1/fabs(avctx->i_quant_factor);
+    x4->params.rc.f_pb_factor = avctx->b_quant_factor;
+    x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
+    x4->params.rc.psz_rc_eq = avctx->rc_eq;
+
+    x4->params.analyse.b_psnr = (avctx->flags & CODEC_FLAG_PSNR);
+    x4->params.i_log_level = X264_LOG_DEBUG;
+
+    x4->params.b_aud = (avctx->flags2 & CODEC_FLAG2_AUD);
+
+    x4->params.i_threads = avctx->thread_count;
+
+    if(avctx->flags & CODEC_FLAG_GLOBAL_HEADER){
+        x4->params.b_repeat_headers = 0;
+    }
+
+    x4->enc = x264_encoder_open(&x4->params);
+    if(!x4->enc)
+        return -1;
+
+    avctx->coded_frame = &x4->out_pic;
+
+    if(avctx->flags & CODEC_FLAG_GLOBAL_HEADER){
+        x264_nal_t *nal;
+        int nnal, i, s = 0;
+
+        x264_encoder_headers(x4->enc, &nal, &nnal);
+
+        /* 5 bytes NAL header + worst case escaping */
+        for(i = 0; i < nnal; i++)
+            s += 5 + nal[i].i_payload * 4 / 3;
+
+        avctx->extradata = av_malloc(s);
+        avctx->extradata_size = encode_nals(avctx->extradata, s, nal, nnal);
+    }
+
+    return 0;
+}
+
+AVCodec x264_encoder = {
+    .name = "h264",
+    .type = CODEC_TYPE_VIDEO,
+    .id = CODEC_ID_H264,
+    .priv_data_size = sizeof(X264Context),
+    .init = X264_init,
+    .encode = X264_frame,
+    .close = X264_close,
+    .capabilities = CODEC_CAP_DELAY,
+    .pix_fmts = (enum PixelFormat[]) { PIX_FMT_YUV420P, -1 }
+};
diff --git a/contrib/ffmpeg/libavcodec/xan.c b/contrib/ffmpeg/libavcodec/xan.c
new file mode 100644
index 000000000..56ce87a95
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/xan.c
@@ -0,0 +1,492 @@
+/*
+ * Wing Commander/Xan Video Decoder
+ * Copyright (C) 2003 the ffmpeg project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file xan.c
+ * Xan video decoder for Wing Commander III computer game
+ * by Mario Brito (mbrito@student.dei.uc.pt)
+ * and Mike Melanson (melanson@pcisys.net)
+ *
+ * The xan_wc3 decoder outputs PAL8 data.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+typedef struct XanContext {
+
+    AVCodecContext *avctx;
+    AVFrame last_frame;
+    AVFrame current_frame;
+
+    unsigned char *buf;
+    int size;
+
+    /* scratch space */
+    unsigned char *buffer1;
+    int buffer1_size;
+    unsigned char *buffer2;
+    int buffer2_size;
+
+    int frame_size;
+
+} XanContext;
+
+static int xan_decode_init(AVCodecContext *avctx)
+{
+    XanContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    s->frame_size = 0;
+
+    if ((avctx->codec->id == CODEC_ID_XAN_WC3) &&
+        (s->avctx->palctrl == NULL)) {
+        av_log(avctx, AV_LOG_ERROR, " WC3 Xan video: palette expected.\n");
+        return -1;
+    }
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->has_b_frames = 0;
+
+    if(avcodec_check_dimensions(avctx, avctx->width, avctx->height))
+        return -1;
+
+    s->buffer1_size = avctx->width * avctx->height;
+    s->buffer1 = av_malloc(s->buffer1_size);
+    s->buffer2_size = avctx->width * avctx->height;
+    s->buffer2 = av_malloc(s->buffer2_size);
+    if (!s->buffer1 || !s->buffer2)
+        return -1;
+
+    return 0;
+}
+
+/* This function is used in lieu of memcpy(). This decoder can not use
+ * memcpy because the memory locations often overlap and
+ * memcpy doesn't like that; it's not uncommon, for example, for
+ * dest = src+1, to turn byte A into  pattern AAAAAAAA.
+ * This was originally repz movsb in Intel x86 ASM. */
+static inline void bytecopy(unsigned char *dest, unsigned char *src, int count)
+{
+    int i;
+
+    for (i = 0; i < count; i++)
+        dest[i] = src[i];
+}
+
+static int xan_huffman_decode(unsigned char *dest, unsigned char *src,
+    int dest_len)
+{
+    unsigned char byte = *src++;
+    unsigned char ival = byte + 0x16;
+    unsigned char * ptr = src + byte*2;
+    unsigned char val = ival;
+    int counter = 0;
+    unsigned char *dest_end = dest + dest_len;
+
+    unsigned char bits = *ptr++;
+
+    while ( val != 0x16 ) {
+        if ( (1 << counter) & bits )
+            val = src[byte + val - 0x17];
+        else
+            val = src[val - 0x17];
+
+        if ( val < 0x16 ) {
+            if (dest + 1 > dest_end)
+                return 0;
+            *dest++ = val;
+            val = ival;
+        }
+
+        if (counter++ == 7) {
+            counter = 0;
+            bits = *ptr++;
+        }
+    }
+
+    return 0;
+}
+
+static void xan_unpack(unsigned char *dest, unsigned char *src, int dest_len)
+{
+    unsigned char opcode;
+    int size;
+    int offset;
+    int byte1, byte2, byte3;
+    unsigned char *dest_end = dest + dest_len;
+
+    for (;;) {
+        opcode = *src++;
+
+        if ( (opcode & 0x80) == 0 ) {
+
+            offset = *src++;
+
+            size = opcode & 3;
+            if (dest + size > dest_end)
+                return;
+            bytecopy(dest, src, size);  dest += size;  src += size;
+
+            size = ((opcode & 0x1c) >> 2) + 3;
+            if (dest + size > dest_end)
+                return;
+            bytecopy (dest, dest - (((opcode & 0x60) << 3) + offset + 1), size);
+            dest += size;
+
+        } else if ( (opcode & 0x40) == 0 ) {
+
+            byte1 = *src++;
+            byte2 = *src++;
+
+            size = byte1 >> 6;
+            if (dest + size > dest_end)
+                return;
+            bytecopy (dest, src, size);  dest += size;  src += size;
+
+            size = (opcode & 0x3f) + 4;
+            if (dest + size > dest_end)
+                return;
+            bytecopy (dest, dest - (((byte1 & 0x3f) << 8) + byte2 + 1), size);
+            dest += size;
+
+        } else if ( (opcode & 0x20) == 0 ) {
+
+            byte1 = *src++;
+            byte2 = *src++;
+            byte3 = *src++;
+
+            size = opcode & 3;
+            if (dest + size > dest_end)
+                return;
+            bytecopy (dest, src, size);  dest += size;  src += size;
+
+            size = byte3 + 5 + ((opcode & 0xc) << 6);
+            if (dest + size > dest_end)
+                return;
+            bytecopy (dest,
+                dest - ((((opcode & 0x10) >> 4) << 0x10) + 1 + (byte1 << 8) + byte2),
+                size);
+            dest += size;
+        } else {
+            size = ((opcode & 0x1f) << 2) + 4;
+
+            if (size > 0x70)
+                break;
+
+            if (dest + size > dest_end)
+                return;
+            bytecopy (dest, src, size);  dest += size;  src += size;
+        }
+    }
+
+    size = opcode & 3;
+    bytecopy(dest, src, size);  dest += size;  src += size;
+}
+
+static void inline xan_wc3_output_pixel_run(XanContext *s,
+    unsigned char *pixel_buffer, int x, int y, int pixel_count)
+{
+    int stride;
+    int line_inc;
+    int index;
+    int current_x;
+    int width = s->avctx->width;
+    unsigned char *palette_plane;
+
+    palette_plane = s->current_frame.data[0];
+    stride = s->current_frame.linesize[0];
+    line_inc = stride - width;
+    index = y * stride + x;
+    current_x = x;
+    while((pixel_count--) && (index < s->frame_size)) {
+
+        /* don't do a memcpy() here; keyframes generally copy an entire
+         * frame of data and the stride needs to be accounted for */
+        palette_plane[index++] = *pixel_buffer++;
+
+        current_x++;
+        if (current_x >= width) {
+            index += line_inc;
+            current_x = 0;
+        }
+    }
+}
+
+static void inline xan_wc3_copy_pixel_run(XanContext *s,
+    int x, int y, int pixel_count, int motion_x, int motion_y)
+{
+    int stride;
+    int line_inc;
+    int curframe_index, prevframe_index;
+    int curframe_x, prevframe_x;
+    int width = s->avctx->width;
+    unsigned char *palette_plane, *prev_palette_plane;
+
+    palette_plane = s->current_frame.data[0];
+    prev_palette_plane = s->last_frame.data[0];
+    stride = s->current_frame.linesize[0];
+    line_inc = stride - width;
+    curframe_index = y * stride + x;
+    curframe_x = x;
+    prevframe_index = (y + motion_y) * stride + x + motion_x;
+    prevframe_x = x + motion_x;
+    while((pixel_count--) && (curframe_index < s->frame_size)) {
+
+        palette_plane[curframe_index++] =
+            prev_palette_plane[prevframe_index++];
+
+        curframe_x++;
+        if (curframe_x >= width) {
+            curframe_index += line_inc;
+            curframe_x = 0;
+        }
+
+        prevframe_x++;
+        if (prevframe_x >= width) {
+            prevframe_index += line_inc;
+            prevframe_x = 0;
+        }
+    }
+}
+
+static void xan_wc3_decode_frame(XanContext *s) {
+
+    int width = s->avctx->width;
+    int height = s->avctx->height;
+    int total_pixels = width * height;
+    unsigned char opcode;
+    unsigned char flag = 0;
+    int size = 0;
+    int motion_x, motion_y;
+    int x, y;
+
+    unsigned char *opcode_buffer = s->buffer1;
+    int opcode_buffer_size = s->buffer1_size;
+    unsigned char *imagedata_buffer = s->buffer2;
+    int imagedata_buffer_size = s->buffer2_size;
+
+    /* pointers to segments inside the compressed chunk */
+    unsigned char *huffman_segment;
+    unsigned char *size_segment;
+    unsigned char *vector_segment;
+    unsigned char *imagedata_segment;
+
+    huffman_segment =   s->buf + LE_16(&s->buf[0]);
+    size_segment =      s->buf + LE_16(&s->buf[2]);
+    vector_segment =    s->buf + LE_16(&s->buf[4]);
+    imagedata_segment = s->buf + LE_16(&s->buf[6]);
+
+    xan_huffman_decode(opcode_buffer, huffman_segment, opcode_buffer_size);
+
+    if (imagedata_segment[0] == 2)
+        xan_unpack(imagedata_buffer, &imagedata_segment[1],
+            imagedata_buffer_size);
+    else
+        imagedata_buffer = &imagedata_segment[1];
+
+    /* use the decoded data segments to build the frame */
+    x = y = 0;
+    while (total_pixels) {
+
+        opcode = *opcode_buffer++;
+        size = 0;
+
+        switch (opcode) {
+
+        case 0:
+            flag ^= 1;
+            continue;
+
+        case 1:
+        case 2:
+        case 3:
+        case 4:
+        case 5:
+        case 6:
+        case 7:
+        case 8:
+            size = opcode;
+            break;
+
+        case 12:
+        case 13:
+        case 14:
+        case 15:
+        case 16:
+        case 17:
+        case 18:
+            size += (opcode - 10);
+            break;
+
+        case 9:
+        case 19:
+            size = *size_segment++;
+            break;
+
+        case 10:
+        case 20:
+            size = BE_16(&size_segment[0]);
+            size_segment += 2;
+            break;
+
+        case 11:
+        case 21:
+            size = (size_segment[0] << 16) | (size_segment[1] << 8) |
+                size_segment[2];
+            size_segment += 3;
+            break;
+        }
+
+        if (opcode < 12) {
+            flag ^= 1;
+            if (flag) {
+                /* run of (size) pixels is unchanged from last frame */
+                xan_wc3_copy_pixel_run(s, x, y, size, 0, 0);
+            } else {
+                /* output a run of pixels from imagedata_buffer */
+                xan_wc3_output_pixel_run(s, imagedata_buffer, x, y, size);
+                imagedata_buffer += size;
+            }
+        } else {
+            /* run-based motion compensation from last frame */
+            motion_x = (*vector_segment >> 4) & 0xF;
+            motion_y = *vector_segment & 0xF;
+            vector_segment++;
+
+            /* sign extension */
+            if (motion_x & 0x8)
+                motion_x |= 0xFFFFFFF0;
+            if (motion_y & 0x8)
+                motion_y |= 0xFFFFFFF0;
+
+            /* copy a run of pixels from the previous frame */
+            xan_wc3_copy_pixel_run(s, x, y, size, motion_x, motion_y);
+
+            flag = 0;
+        }
+
+        /* coordinate accounting */
+        total_pixels -= size;
+        while (size) {
+            if (x + size >= width) {
+                y++;
+                size -= (width - x);
+                x = 0;
+            } else {
+                x += size;
+                size = 0;
+            }
+        }
+    }
+}
+
+static void xan_wc4_decode_frame(XanContext *s) {
+}
+
+static int xan_decode_frame(AVCodecContext *avctx,
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
+{
+    XanContext *s = avctx->priv_data;
+    AVPaletteControl *palette_control = avctx->palctrl;
+
+    if (avctx->get_buffer(avctx, &s->current_frame)) {
+        av_log(s->avctx, AV_LOG_ERROR, "  Xan Video: get_buffer() failed\n");
+        return -1;
+    }
+    s->current_frame.reference = 3;
+
+    if (!s->frame_size)
+        s->frame_size = s->current_frame.linesize[0] * s->avctx->height;
+
+    palette_control->palette_changed = 0;
+    memcpy(s->current_frame.data[1], palette_control->palette,
+        AVPALETTE_SIZE);
+    s->current_frame.palette_has_changed = 1;
+
+    s->buf = buf;
+    s->size = buf_size;
+
+    if (avctx->codec->id == CODEC_ID_XAN_WC3)
+        xan_wc3_decode_frame(s);
+    else if (avctx->codec->id == CODEC_ID_XAN_WC4)
+        xan_wc4_decode_frame(s);
+
+    /* release the last frame if it is allocated */
+    if (s->last_frame.data[0])
+        avctx->release_buffer(avctx, &s->last_frame);
+
+    /* shuffle frames */
+    s->last_frame = s->current_frame;
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = s->current_frame;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+static int xan_decode_end(AVCodecContext *avctx)
+{
+    XanContext *s = avctx->priv_data;
+
+    /* release the last frame */
+    if (s->last_frame.data[0])
+        avctx->release_buffer(avctx, &s->last_frame);
+
+    av_free(s->buffer1);
+    av_free(s->buffer2);
+
+    return 0;
+}
+
+AVCodec xan_wc3_decoder = {
+    "xan_wc3",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_XAN_WC3,
+    sizeof(XanContext),
+    xan_decode_init,
+    NULL,
+    xan_decode_end,
+    xan_decode_frame,
+    CODEC_CAP_DR1,
+};
+
+/*
+AVCodec xan_wc4_decoder = {
+    "xan_wc4",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_XAN_WC4,
+    sizeof(XanContext),
+    xan_decode_init,
+    NULL,
+    xan_decode_end,
+    xan_decode_frame,
+    CODEC_CAP_DR1,
+};
+*/
diff --git a/contrib/ffmpeg/libavcodec/xl.c b/contrib/ffmpeg/libavcodec/xl.c
new file mode 100644
index 000000000..67ad237e1
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/xl.c
@@ -0,0 +1,140 @@
+/*
+ * Miro VideoXL codec
+ * Copyright (c) 2004 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file xl.c
+ * Miro VideoXL codec.
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+
+typedef struct VideoXLContext{
+    AVCodecContext *avctx;
+    AVFrame pic;
+} VideoXLContext;
+
+static const int xl_table[32] = {
+   0,   1,   2,   3,   4,   5,   6,   7,
+   8,   9,  12,  15,  20,  25,  34,  46,
+  64,  82,  94, 103, 108, 113, 116, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127};
+
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *data_size,
+                        uint8_t *buf, int buf_size)
+{
+    VideoXLContext * const a = avctx->priv_data;
+    AVFrame * const p= (AVFrame*)&a->pic;
+    uint8_t *Y, *U, *V;
+    int i, j;
+    int stride;
+    uint32_t val;
+    int y0, y1, y2, y3, c0, c1;
+
+    if(p->data[0])
+        avctx->release_buffer(avctx, p);
+
+    p->reference = 0;
+    if(avctx->get_buffer(avctx, p) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+    p->pict_type= I_TYPE;
+    p->key_frame= 1;
+
+    Y = a->pic.data[0];
+    U = a->pic.data[1];
+    V = a->pic.data[2];
+
+    stride = avctx->width - 4;
+    for (i = 0; i < avctx->height; i++) {
+        /* lines are stored in reversed order */
+        buf += stride;
+
+        for (j = 0; j < avctx->width; j += 4) {
+            /* value is stored in LE dword with word swapped */
+            val = LE_32(buf);
+            buf -= 4;
+            val = ((val >> 16) & 0xFFFF) | ((val & 0xFFFF) << 16);
+
+            if(!j)
+                y0 = (val & 0x1F) << 2;
+            else
+                y0 = y3 + xl_table[val & 0x1F];
+            val >>= 5;
+            y1 = y0 + xl_table[val & 0x1F];
+            val >>= 5;
+            y2 = y1 + xl_table[val & 0x1F];
+            val >>= 6; /* align to word */
+            y3 = y2 + xl_table[val & 0x1F];
+            val >>= 5;
+            if(!j)
+                c0 = (val & 0x1F) << 2;
+            else
+                c0 += xl_table[val & 0x1F];
+            val >>= 5;
+            if(!j)
+                c1 = (val & 0x1F) << 2;
+            else
+                c1 += xl_table[val & 0x1F];
+
+            Y[j + 0] = y0 << 1;
+            Y[j + 1] = y1 << 1;
+            Y[j + 2] = y2 << 1;
+            Y[j + 3] = y3 << 1;
+
+            U[j >> 2] = c0 << 1;
+            V[j >> 2] = c1 << 1;
+        }
+
+        buf += avctx->width + 4;
+        Y += a->pic.linesize[0];
+        U += a->pic.linesize[1];
+        V += a->pic.linesize[2];
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = a->pic;
+
+    return buf_size;
+}
+
+static int decode_init(AVCodecContext *avctx){
+//    VideoXLContext * const a = avctx->priv_data;
+
+    avctx->pix_fmt= PIX_FMT_YUV411P;
+
+    return 0;
+}
+
+AVCodec xl_decoder = {
+    "xl",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VIXL,
+    sizeof(VideoXLContext),
+    decode_init,
+    NULL,
+    NULL,
+    decode_frame,
+    CODEC_CAP_DR1,
+};
diff --git a/contrib/ffmpeg/libavcodec/xvid_internal.h b/contrib/ffmpeg/libavcodec/xvid_internal.h
new file mode 100644
index 000000000..49c59c205
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/xvid_internal.h
@@ -0,0 +1,32 @@
+/*
+ * copyright (C) 2006 Corey Hickey
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef XVID_INTERNAL_H
+#define XVID_INTERNAL_H
+
+/**
+ * @file xvid_internal.h
+ * common functions for use with the XviD wrappers
+ */
+
+
+int av_tempfile(char *prefix, char **filename);
+
+#endif /* XVID_INTERNAL_H */
diff --git a/contrib/ffmpeg/libavcodec/xvid_rc.c b/contrib/ffmpeg/libavcodec/xvid_rc.c
new file mode 100644
index 000000000..6a0029e6d
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/xvid_rc.c
@@ -0,0 +1,148 @@
+/*
+ * xvid Rate control wrapper for lavc video encoders
+ *
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <xvid.h>
+#include <unistd.h>
+#include "avcodec.h"
+#include "xvid_internal.h"
+//#include "dsputil.h"
+#include "mpegvideo.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+extern unsigned int xvid_debug;
+
+int ff_xvid_rate_control_init(MpegEncContext *s){
+    char *tmp_name;
+    int fd, i;
+    xvid_plg_create_t xvid_plg_create;
+    xvid_plugin_2pass2_t xvid_2pass2;
+
+//xvid_debug=-1;
+
+    fd=av_tempfile("xvidrc.", &tmp_name);
+    if (fd == -1) {
+        av_log(NULL, AV_LOG_ERROR, "Can't create temporary pass2 file.\n");
+        return -1;
+    }
+
+    for(i=0; i<s->rc_context.num_entries; i++){
+        static const char *frame_types = " ipbs";
+        char tmp[256];
+        RateControlEntry *rce;
+
+        rce= &s->rc_context.entry[i];
+
+        snprintf(tmp, sizeof(tmp), "%c %d %d %d %d %d %d\n",
+            frame_types[rce->pict_type], (int)lrintf(rce->qscale / FF_QP2LAMBDA), rce->i_count, s->mb_num - rce->i_count - rce->skip_count,
+            rce->skip_count, (rce->i_tex_bits + rce->p_tex_bits + rce->misc_bits+7)/8, (rce->header_bits+rce->mv_bits+7)/8);
+
+//av_log(NULL, AV_LOG_ERROR, "%s\n", tmp);
+        write(fd, tmp, strlen(tmp));
+    }
+
+    close(fd);
+
+    memset(&xvid_2pass2, 0, sizeof(xvid_2pass2));
+    xvid_2pass2.version= XVID_MAKE_VERSION(1,1,0);
+    xvid_2pass2.filename= tmp_name;
+    xvid_2pass2.bitrate= s->avctx->bit_rate;
+    xvid_2pass2.vbv_size= s->avctx->rc_buffer_size;
+    xvid_2pass2.vbv_maxrate= s->avctx->rc_max_rate;
+    xvid_2pass2.vbv_initial= s->avctx->rc_initial_buffer_occupancy;
+
+    memset(&xvid_plg_create, 0, sizeof(xvid_plg_create));
+    xvid_plg_create.version= XVID_MAKE_VERSION(1,1,0);
+    xvid_plg_create.fbase= s->avctx->time_base.den;
+    xvid_plg_create.fincr= s->avctx->time_base.num;
+    xvid_plg_create.param= &xvid_2pass2;
+
+    if(xvid_plugin_2pass2(NULL, XVID_PLG_CREATE, &xvid_plg_create, &s->rc_context.non_lavc_opaque)<0){
+        av_log(NULL, AV_LOG_ERROR, "xvid_plugin_2pass2 failed\n");
+        return -1;
+    }
+    return 0;
+}
+
+float ff_xvid_rate_estimate_qscale(MpegEncContext *s, int dry_run){
+    xvid_plg_data_t xvid_plg_data;
+
+    memset(&xvid_plg_data, 0, sizeof(xvid_plg_data));
+    xvid_plg_data.version= XVID_MAKE_VERSION(1,1,0);
+    xvid_plg_data.width = s->width;
+    xvid_plg_data.height= s->height;
+    xvid_plg_data.mb_width = s->mb_width;
+    xvid_plg_data.mb_height= s->mb_height;
+    xvid_plg_data.fbase= s->avctx->time_base.den;
+    xvid_plg_data.fincr= s->avctx->time_base.num;
+    xvid_plg_data.min_quant[0]= s->avctx->qmin;
+    xvid_plg_data.min_quant[1]= s->avctx->qmin;
+    xvid_plg_data.min_quant[2]= s->avctx->qmin; //FIXME i/b factor & offset
+    xvid_plg_data.max_quant[0]= s->avctx->qmax;
+    xvid_plg_data.max_quant[1]= s->avctx->qmax;
+    xvid_plg_data.max_quant[2]= s->avctx->qmax; //FIXME i/b factor & offset
+    xvid_plg_data.bquant_offset = 0; //  100 * s->avctx->b_quant_offset;
+    xvid_plg_data.bquant_ratio = 100; // * s->avctx->b_quant_factor;
+
+#if 0
+    xvid_plg_data.stats.hlength= X
+#endif
+
+    if(!s->rc_context.dry_run_qscale){
+        if(s->picture_number){
+            xvid_plg_data.length=
+            xvid_plg_data.stats.length= (s->frame_bits + 7)/8;
+            xvid_plg_data.frame_num= s->rc_context.last_picture_number;
+            xvid_plg_data.quant= s->qscale;
+
+            xvid_plg_data.type= s->last_pict_type;
+            if(xvid_plugin_2pass2(s->rc_context.non_lavc_opaque, XVID_PLG_AFTER, &xvid_plg_data, NULL)){
+                av_log(s->avctx, AV_LOG_ERROR, "xvid_plugin_2pass2(handle, XVID_PLG_AFTER, ...) FAILED\n");
+                return -1;
+            }
+        }
+        s->rc_context.last_picture_number=
+        xvid_plg_data.frame_num= s->picture_number;
+        xvid_plg_data.quant= 0;
+        if(xvid_plugin_2pass2(s->rc_context.non_lavc_opaque, XVID_PLG_BEFORE, &xvid_plg_data, NULL)){
+            av_log(s->avctx, AV_LOG_ERROR, "xvid_plugin_2pass2(handle, XVID_PLG_BEFORE, ...) FAILED\n");
+            return -1;
+        }
+        s->rc_context.dry_run_qscale= xvid_plg_data.quant;
+    }
+    xvid_plg_data.quant= s->rc_context.dry_run_qscale;
+    if(!dry_run)
+        s->rc_context.dry_run_qscale= 0;
+
+    if(s->pict_type == B_TYPE) //FIXME this is not exactly identical to xvid
+        return xvid_plg_data.quant * FF_QP2LAMBDA * s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+    else
+        return xvid_plg_data.quant * FF_QP2LAMBDA;
+}
+
+void ff_xvid_rate_control_uninit(MpegEncContext *s){
+    xvid_plg_destroy_t xvid_plg_destroy;
+
+    xvid_plugin_2pass2(s->rc_context.non_lavc_opaque, XVID_PLG_DESTROY, &xvid_plg_destroy, NULL);
+}
+
diff --git a/contrib/ffmpeg/libavcodec/xvidff.c b/contrib/ffmpeg/libavcodec/xvidff.c
new file mode 100644
index 000000000..590fe4b30
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/xvidff.c
@@ -0,0 +1,768 @@
+/*
+ * Interface to xvidcore for mpeg4 encoding
+ * Copyright (c) 2004 Adam Thayer <krevnik@comcast.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file xvidmpeg4.c
+ * Interface to xvidcore for MPEG-4 compliant encoding.
+ * @author Adam Thayer (krevnik@comcast.net)
+ */
+
+#include <xvid.h>
+#include <unistd.h>
+#include "common.h"
+#include "avcodec.h"
+#include "xvid_internal.h"
+
+/**
+ * Buffer management macros.
+ */
+#define BUFFER_SIZE                 1024
+#define BUFFER_REMAINING(x)         (BUFFER_SIZE - strlen(x))
+#define BUFFER_CAT(x)               (&((x)[strlen(x)]))
+
+/* For PPC Use */
+#if HAVE_ALTIVEC==1
+extern int has_altivec(void);
+#endif
+
+/**
+ * Structure for the private XviD context.
+ * This stores all the private context for the codec.
+ */
+typedef struct xvid_context {
+    void *encoder_handle;          /** Handle for XviD Encoder */
+    int xsize, ysize;              /** Frame size */
+    int vop_flags;                 /** VOP flags for XviD Encoder */
+    int vol_flags;                 /** VOL flags for XviD Encoder */
+    int me_flags;                  /** Motion Estimation flags */
+    int qscale;                    /** Do we use constant scale? */
+    int quicktime_format;          /** Are we in a QT-based format? */
+    AVFrame encoded_picture;       /** Encoded frame information */
+    char *twopassbuffer;           /** Character buffer for two-pass */
+    char *old_twopassbuffer;       /** Old character buffer (two-pass) */
+    char *twopassfile;             /** second pass temp file name */
+    unsigned char *intra_matrix;   /** P-Frame Quant Matrix */
+    unsigned char *inter_matrix;   /** I-Frame Quant Matrix */
+} xvid_context_t;
+
+/**
+ * Structure for the private first-pass plugin.
+ */
+typedef struct xvid_ff_pass1 {
+    int     version;                /** XviD version */
+    xvid_context_t *context;        /** Pointer to private context */
+} xvid_ff_pass1_t;
+
+/* Prototypes - See function implementation for details */
+int xvid_strip_vol_header(AVCodecContext *avctx, unsigned char *frame, unsigned int header_len, unsigned int frame_len);
+int xvid_ff_2pass(void *ref, int opt, void *p1, void *p2);
+void xvid_correct_framerate(AVCodecContext *avctx);
+
+/**
+ * Creates the private context for the encoder.
+ * All buffers are allocated, settings are loaded from the user,
+ * and the encoder context created.
+ *
+ * @param avctx AVCodecContext pointer to context
+ * @return Returns 0 on success, -1 on failure
+ */
+int ff_xvid_encode_init(AVCodecContext *avctx)  {
+    int xerr, i;
+    int xvid_flags = avctx->flags;
+    xvid_context_t *x = avctx->priv_data;
+    uint16_t *intra, *inter;
+    int fd;
+
+    xvid_plugin_single_t single;
+    xvid_ff_pass1_t rc2pass1;
+    xvid_plugin_2pass2_t rc2pass2;
+    xvid_gbl_init_t xvid_gbl_init;
+    xvid_enc_create_t xvid_enc_create;
+    xvid_enc_plugin_t plugins[7];
+
+    /* Bring in VOP flags from ffmpeg command-line */
+    x->vop_flags = XVID_VOP_HALFPEL; /* Bare minimum quality */
+    if( xvid_flags & CODEC_FLAG_4MV )
+        x->vop_flags |= XVID_VOP_INTER4V; /* Level 3 */
+    if( xvid_flags & CODEC_FLAG_TRELLIS_QUANT)
+        x->vop_flags |= XVID_VOP_TRELLISQUANT; /* Level 5 */
+    if( xvid_flags & CODEC_FLAG_AC_PRED )
+        x->vop_flags |= XVID_VOP_HQACPRED; /* Level 6 */
+    if( xvid_flags & CODEC_FLAG_GRAY )
+        x->vop_flags |= XVID_VOP_GREYSCALE;
+
+    /* Decide which ME quality setting to use */
+    x->me_flags = 0;
+    switch( avctx->me_method ) {
+       case ME_FULL:   /* Quality 6 */
+           x->me_flags |=  XVID_ME_EXTSEARCH16
+                       |   XVID_ME_EXTSEARCH8;
+
+       case ME_EPZS:   /* Quality 4 */
+           x->me_flags |=  XVID_ME_ADVANCEDDIAMOND8
+                       |   XVID_ME_HALFPELREFINE8
+                       |   XVID_ME_CHROMA_PVOP
+                       |   XVID_ME_CHROMA_BVOP;
+
+       case ME_LOG:    /* Quality 2 */
+       case ME_PHODS:
+       case ME_X1:
+           x->me_flags |=  XVID_ME_ADVANCEDDIAMOND16
+                       |   XVID_ME_HALFPELREFINE16;
+
+       case ME_ZERO:   /* Quality 0 */
+       default:
+           break;
+    }
+
+    /* Decide how we should decide blocks */
+    switch( avctx->mb_decision ) {
+       case 2:
+           x->vop_flags |= XVID_VOP_MODEDECISION_RD;
+           x->me_flags |=  XVID_ME_HALFPELREFINE8_RD
+                       |   XVID_ME_QUARTERPELREFINE8_RD
+                       |   XVID_ME_EXTSEARCH_RD
+                       |   XVID_ME_CHECKPREDICTION_RD;
+       case 1:
+           if( !(x->vop_flags & XVID_VOP_MODEDECISION_RD) )
+               x->vop_flags |= XVID_VOP_FAST_MODEDECISION_RD;
+           x->me_flags |=  XVID_ME_HALFPELREFINE16_RD
+                       |   XVID_ME_QUARTERPELREFINE16_RD;
+
+       default:
+           break;
+    }
+
+    /* Bring in VOL flags from ffmpeg command-line */
+    x->vol_flags = 0;
+    if( xvid_flags & CODEC_FLAG_GMC ) {
+        x->vol_flags |= XVID_VOL_GMC;
+        x->me_flags |= XVID_ME_GME_REFINE;
+    }
+    if( xvid_flags & CODEC_FLAG_QPEL ) {
+        x->vol_flags |= XVID_VOL_QUARTERPEL;
+        x->me_flags |= XVID_ME_QUARTERPELREFINE16;
+        if( x->vop_flags & XVID_VOP_INTER4V )
+            x->me_flags |= XVID_ME_QUARTERPELREFINE8;
+    }
+
+    memset(&xvid_gbl_init, 0, sizeof(xvid_gbl_init));
+    xvid_gbl_init.version = XVID_VERSION;
+    xvid_gbl_init.debug = 0;
+
+#ifdef ARCH_POWERPC
+    /* XviD's PPC support is borked, use libavcodec to detect */
+#if HAVE_ALTIVEC==1
+    if( has_altivec() ) {
+        xvid_gbl_init.cpu_flags = XVID_CPU_FORCE | XVID_CPU_ALTIVEC;
+    } else
+#endif
+        xvid_gbl_init.cpu_flags = XVID_CPU_FORCE;
+#else
+    /* XviD can detect on x86 */
+    xvid_gbl_init.cpu_flags = 0;
+#endif
+
+    /* Initialize */
+    xvid_global(NULL, XVID_GBL_INIT, &xvid_gbl_init, NULL);
+
+    /* Create the encoder reference */
+    memset(&xvid_enc_create, 0, sizeof(xvid_enc_create));
+    xvid_enc_create.version = XVID_VERSION;
+
+    /* Store the desired frame size */
+    xvid_enc_create.width = x->xsize = avctx->width;
+    xvid_enc_create.height = x->ysize = avctx->height;
+
+    /* XviD can determine the proper profile to use */
+    /* xvid_enc_create.profile = XVID_PROFILE_S_L3; */
+
+    /* We don't use zones or threads */
+    xvid_enc_create.zones = NULL;
+    xvid_enc_create.num_zones = 0;
+    xvid_enc_create.num_threads = 0;
+
+    xvid_enc_create.plugins = plugins;
+    xvid_enc_create.num_plugins = 0;
+
+    /* Initialize Buffers */
+    x->twopassbuffer = NULL;
+    x->old_twopassbuffer = NULL;
+    x->twopassfile = NULL;
+
+    if( xvid_flags & CODEC_FLAG_PASS1 ) {
+        memset(&rc2pass1, 0, sizeof(xvid_ff_pass1_t));
+        rc2pass1.version = XVID_VERSION;
+        rc2pass1.context = x;
+        x->twopassbuffer = av_malloc(BUFFER_SIZE);
+        x->old_twopassbuffer = av_malloc(BUFFER_SIZE);
+        if( x->twopassbuffer == NULL || x->old_twopassbuffer == NULL ) {
+            av_log(avctx, AV_LOG_ERROR,
+                "XviD: Cannot allocate 2-pass log buffers\n");
+            return -1;
+        }
+        x->twopassbuffer[0] = x->old_twopassbuffer[0] = 0;
+
+        plugins[xvid_enc_create.num_plugins].func = xvid_ff_2pass;
+        plugins[xvid_enc_create.num_plugins].param = &rc2pass1;
+        xvid_enc_create.num_plugins++;
+    } else if( xvid_flags & CODEC_FLAG_PASS2 ) {
+        memset(&rc2pass2, 0, sizeof(xvid_plugin_2pass2_t));
+        rc2pass2.version = XVID_VERSION;
+        rc2pass2.bitrate = avctx->bit_rate;
+
+        fd = av_tempfile("xvidff.", &(x->twopassfile));
+        if( fd == -1 ) {
+            av_log(avctx, AV_LOG_ERROR,
+                "XviD: Cannot write 2-pass pipe\n");
+            return -1;
+        }
+
+        if( avctx->stats_in == NULL ) {
+            av_log(avctx, AV_LOG_ERROR,
+                "XviD: No 2-pass information loaded for second pass\n");
+            return -1;
+        }
+
+        if( strlen(avctx->stats_in) >
+              write(fd, avctx->stats_in, strlen(avctx->stats_in)) ) {
+            close(fd);
+            av_log(avctx, AV_LOG_ERROR,
+                "XviD: Cannot write to 2-pass pipe\n");
+            return -1;
+        }
+
+        close(fd);
+        rc2pass2.filename = x->twopassfile;
+        plugins[xvid_enc_create.num_plugins].func = xvid_plugin_2pass2;
+        plugins[xvid_enc_create.num_plugins].param = &rc2pass2;
+        xvid_enc_create.num_plugins++;
+    } else if( !(xvid_flags & CODEC_FLAG_QSCALE) ) {
+        /* Single Pass Bitrate Control! */
+        memset(&single, 0, sizeof(xvid_plugin_single_t));
+        single.version = XVID_VERSION;
+        single.bitrate = avctx->bit_rate;
+
+        plugins[xvid_enc_create.num_plugins].func = xvid_plugin_single;
+        plugins[xvid_enc_create.num_plugins].param = &single;
+        xvid_enc_create.num_plugins++;
+    }
+
+    /* Luminance Masking */
+    if( 0.0 != avctx->lumi_masking ) {
+        plugins[xvid_enc_create.num_plugins].func = xvid_plugin_lumimasking;
+        plugins[xvid_enc_create.num_plugins].param = NULL;
+        xvid_enc_create.num_plugins++;
+    }
+
+    /* Frame Rate and Key Frames */
+    xvid_correct_framerate(avctx);
+    xvid_enc_create.fincr = avctx->time_base.num;
+    xvid_enc_create.fbase = avctx->time_base.den;
+    if( avctx->gop_size > 0 )
+        xvid_enc_create.max_key_interval = avctx->gop_size;
+    else
+        xvid_enc_create.max_key_interval = 240; /* XviD's best default */
+
+    /* Quants */
+    if( xvid_flags & CODEC_FLAG_QSCALE ) x->qscale = 1;
+    else x->qscale = 0;
+
+    xvid_enc_create.min_quant[0] = avctx->qmin;
+    xvid_enc_create.min_quant[1] = avctx->qmin;
+    xvid_enc_create.min_quant[2] = avctx->qmin;
+    xvid_enc_create.max_quant[0] = avctx->qmax;
+    xvid_enc_create.max_quant[1] = avctx->qmax;
+    xvid_enc_create.max_quant[2] = avctx->qmax;
+
+    /* Quant Matrices */
+    x->intra_matrix = x->inter_matrix = NULL;
+    if( avctx->mpeg_quant )
+       x->vol_flags |= XVID_VOL_MPEGQUANT;
+    if( (avctx->intra_matrix || avctx->inter_matrix) ) {
+       x->vol_flags |= XVID_VOL_MPEGQUANT;
+
+       if( avctx->intra_matrix ) {
+           intra = avctx->intra_matrix;
+           x->intra_matrix = av_malloc(sizeof(unsigned char) * 64);
+       } else
+           intra = NULL;
+       if( avctx->inter_matrix ) {
+           inter = avctx->inter_matrix;
+           x->inter_matrix = av_malloc(sizeof(unsigned char) * 64);
+       } else
+           inter = NULL;
+
+       for( i = 0; i < 64; i++ ) {
+           if( intra )
+               x->intra_matrix[i] = (unsigned char)intra[i];
+           if( inter )
+               x->inter_matrix[i] = (unsigned char)inter[i];
+       }
+    }
+
+    /* Misc Settings */
+    xvid_enc_create.frame_drop_ratio = 0;
+    xvid_enc_create.global = 0;
+    if( xvid_flags & CODEC_FLAG_CLOSED_GOP )
+        xvid_enc_create.global |= XVID_GLOBAL_CLOSED_GOP;
+
+    /* Determines which codec mode we are operating in */
+    avctx->extradata = NULL;
+    avctx->extradata_size = 0;
+    if( xvid_flags & CODEC_FLAG_GLOBAL_HEADER ) {
+        /* In this case, we are claiming to be MPEG4 */
+        x->quicktime_format = 1;
+        avctx->codec_id = CODEC_ID_MPEG4;
+    } else {
+        /* We are claiming to be XviD */
+        x->quicktime_format = 0;
+        if(!avctx->codec_tag)
+            avctx->codec_tag = ff_get_fourcc("xvid");
+    }
+
+    /* Bframes */
+    xvid_enc_create.max_bframes = avctx->max_b_frames;
+    xvid_enc_create.bquant_offset = 100 * avctx->b_quant_offset;
+    xvid_enc_create.bquant_ratio = 100 * avctx->b_quant_factor;
+    if( avctx->max_b_frames > 0  && !x->quicktime_format ) xvid_enc_create.global |= XVID_GLOBAL_PACKED;
+
+    /* Create encoder context */
+    xerr = xvid_encore(NULL, XVID_ENC_CREATE, &xvid_enc_create, NULL);
+    if( xerr ) {
+        av_log(avctx, AV_LOG_ERROR, "XviD: Could not create encoder reference\n");
+        return -1;
+    }
+
+    x->encoder_handle = xvid_enc_create.handle;
+    avctx->coded_frame = &x->encoded_picture;
+
+    return 0;
+}
+
+/**
+ * Encodes a single frame.
+ *
+ * @param avctx AVCodecContext pointer to context
+ * @param frame Pointer to encoded frame buffer
+ * @param buf_size Size of encoded frame buffer
+ * @param data Pointer to AVFrame of unencoded frame
+ * @return Returns 0 on success, -1 on failure
+ */
+int ff_xvid_encode_frame(AVCodecContext *avctx,
+                         unsigned char *frame, int buf_size, void *data) {
+    int xerr, i;
+    char *tmp;
+    xvid_context_t *x = avctx->priv_data;
+    AVFrame *picture = data;
+    AVFrame *p = &(x->encoded_picture);
+
+    xvid_enc_frame_t xvid_enc_frame;
+    xvid_enc_stats_t xvid_enc_stats;
+
+    /* Start setting up the frame */
+    memset(&xvid_enc_frame, 0, sizeof(xvid_enc_frame));
+    xvid_enc_frame.version = XVID_VERSION;
+    memset(&xvid_enc_stats, 0, sizeof(xvid_enc_stats));
+    xvid_enc_stats.version = XVID_VERSION;
+    *p = *picture;
+
+    /* Let XviD know where to put the frame. */
+    xvid_enc_frame.bitstream = frame;
+    xvid_enc_frame.length = buf_size;
+
+    /* Initialize input image fields */
+    if( avctx->pix_fmt != PIX_FMT_YUV420P ) {
+        av_log(avctx, AV_LOG_ERROR, "XviD: Color spaces other than 420p not supported\n");
+        return -1;
+    }
+
+    xvid_enc_frame.input.csp = XVID_CSP_PLANAR; /* YUV420P */
+
+    for( i = 0; i < 4; i++ ) {
+        xvid_enc_frame.input.plane[i] = picture->data[i];
+        xvid_enc_frame.input.stride[i] = picture->linesize[i];
+    }
+
+    /* Encoder Flags */
+    xvid_enc_frame.vop_flags = x->vop_flags;
+    xvid_enc_frame.vol_flags = x->vol_flags;
+    xvid_enc_frame.motion = x->me_flags;
+    xvid_enc_frame.type = XVID_TYPE_AUTO;
+
+    /* Quant Setting */
+    if( x->qscale ) xvid_enc_frame.quant = picture->quality / FF_QP2LAMBDA;
+    else xvid_enc_frame.quant = 0;
+
+    /* Matrices */
+    xvid_enc_frame.quant_intra_matrix = x->intra_matrix;
+    xvid_enc_frame.quant_inter_matrix = x->inter_matrix;
+
+    /* Encode */
+    xerr = xvid_encore(x->encoder_handle, XVID_ENC_ENCODE,
+        &xvid_enc_frame, &xvid_enc_stats);
+
+    /* Two-pass log buffer swapping */
+    avctx->stats_out = NULL;
+    if( x->twopassbuffer ) {
+        tmp = x->old_twopassbuffer;
+        x->old_twopassbuffer = x->twopassbuffer;
+        x->twopassbuffer = tmp;
+        x->twopassbuffer[0] = 0;
+        if( x->old_twopassbuffer[0] != 0 ) {
+            avctx->stats_out = x->old_twopassbuffer;
+        }
+    }
+
+    if( 0 <= xerr ) {
+        p->quality = xvid_enc_stats.quant * FF_QP2LAMBDA;
+        if( xvid_enc_stats.type == XVID_TYPE_PVOP )
+            p->pict_type = FF_P_TYPE;
+        else if( xvid_enc_stats.type == XVID_TYPE_BVOP )
+            p->pict_type = FF_B_TYPE;
+        else if( xvid_enc_stats.type == XVID_TYPE_SVOP )
+            p->pict_type = FF_S_TYPE;
+        else
+            p->pict_type = FF_I_TYPE;
+        if( xvid_enc_frame.out_flags & XVID_KEYFRAME ) {
+            p->key_frame = 1;
+            if( x->quicktime_format )
+                return xvid_strip_vol_header(avctx, frame,
+                    xvid_enc_stats.hlength, xerr);
+         } else
+            p->key_frame = 0;
+
+        return xerr;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "XviD: Encoding Error Occurred: %i\n", xerr);
+        return -1;
+    }
+}
+
+/**
+ * Destroys the private context for the encoder.
+ * All buffers are freed, and the XviD encoder context is destroyed.
+ *
+ * @param avctx AVCodecContext pointer to context
+ * @return Returns 0, success guaranteed
+ */
+int ff_xvid_encode_close(AVCodecContext *avctx) {
+    xvid_context_t *x = avctx->priv_data;
+
+    xvid_encore(x->encoder_handle, XVID_ENC_DESTROY, NULL, NULL);
+
+    if( avctx->extradata != NULL )
+        av_free(avctx->extradata);
+    if( x->twopassbuffer != NULL ) {
+        av_free(x->twopassbuffer);
+        av_free(x->old_twopassbuffer);
+    }
+    if( x->twopassfile != NULL )
+        av_free(x->twopassfile);
+    if( x->intra_matrix != NULL )
+        av_free(x->intra_matrix);
+    if( x->inter_matrix != NULL )
+        av_free(x->inter_matrix);
+
+    return 0;
+}
+
+/**
+ * Routine to create a global VO/VOL header for MP4 container.
+ * What we do here is extract the header from the XviD bitstream
+ * as it is encoded. We also strip the repeated headers from the
+ * bitstream when a global header is requested for MPEG-4 ISO
+ * compliance.
+ *
+ * @param avctx AVCodecContext pointer to context
+ * @param frame Pointer to encoded frame data
+ * @param header_len Length of header to search
+ * @param frame_len Length of encoded frame data
+ * @return Returns new length of frame data
+ */
+int xvid_strip_vol_header(AVCodecContext *avctx,
+                  unsigned char *frame,
+                  unsigned int header_len,
+                  unsigned int frame_len) {
+    int vo_len = 0, i;
+
+    for( i = 0; i < header_len - 3; i++ ) {
+        if( frame[i] == 0x00 &&
+            frame[i+1] == 0x00 &&
+            frame[i+2] == 0x01 &&
+            frame[i+3] == 0xB6 ) {
+            vo_len = i;
+            break;
+        }
+    }
+
+    if( vo_len > 0 ) {
+        /* We need to store the header, so extract it */
+        if( avctx->extradata == NULL ) {
+            avctx->extradata = av_malloc(vo_len);
+            memcpy(avctx->extradata, frame, vo_len);
+            avctx->extradata_size = vo_len;
+        }
+        /* Less dangerous now, memmove properly copies the two
+           chunks of overlapping data */
+        memmove(frame, &(frame[vo_len]), frame_len - vo_len);
+        return frame_len - vo_len;
+    } else
+        return frame_len;
+}
+
+/**
+ * Routine to correct a possibly erroneous framerate being fed to us.
+ * XviD currently chokes on framerates where the ticks per frame is
+ * extremely large. This function works to correct problems in this area
+ * by estimating a new framerate and taking the simpler fraction of
+ * the two presented.
+ *
+ * @param avctx Context that contains the framerate to correct.
+ */
+void xvid_correct_framerate(AVCodecContext *avctx) {
+    int frate, fbase;
+    int est_frate, est_fbase;
+    int gcd;
+    float est_fps, fps;
+
+    frate = avctx->time_base.den;
+    fbase = avctx->time_base.num;
+
+    gcd = ff_gcd(frate, fbase);
+    if( gcd > 1 ) {
+        frate /= gcd;
+        fbase /= gcd;
+    }
+
+    if( frate <= 65000 && fbase <= 65000 ) {
+        avctx->time_base.den = frate;
+        avctx->time_base.num = fbase;
+        return;
+    }
+
+    fps = (float)frate / (float)fbase;
+    est_fps = roundf(fps * 1000.0) / 1000.0;
+
+    est_frate = (int)est_fps;
+    if( est_fps > (int)est_fps ) {
+        est_frate = (est_frate + 1) * 1000;
+        est_fbase = (int)roundf((float)est_frate / est_fps);
+    } else
+        est_fbase = 1;
+
+    gcd = ff_gcd(est_frate, est_fbase);
+    if( gcd > 1 ) {
+        est_frate /= gcd;
+        est_fbase /= gcd;
+    }
+
+    if( fbase > est_fbase ) {
+        avctx->time_base.den = est_frate;
+        avctx->time_base.num = est_fbase;
+        av_log(avctx, AV_LOG_DEBUG,
+            "XviD: framerate re-estimated: %.2f, %.3f%% correction\n",
+            est_fps, (((est_fps - fps)/fps) * 100.0));
+    } else {
+        avctx->time_base.den = frate;
+        avctx->time_base.num = fbase;
+    }
+}
+
+/*
+ * XviD 2-Pass Kludge Section
+ *
+ * XviD's default 2-pass doesn't allow us to create data as we need to, so
+ * this section spends time replacing the first pass plugin so we can write
+ * statistic information as libavcodec requests in. We have another kludge
+ * that allows us to pass data to the second pass in XviD without a custom
+ * rate-control plugin.
+ */
+
+/**
+ * Initializes the two-pass plugin and context.
+ *
+ * @param param Input construction parameter structure
+ * @param handle Private context handle
+ * @return Returns XVID_ERR_xxxx on failure, or 0 on success.
+ */
+static int xvid_ff_2pass_create(xvid_plg_create_t * param,
+                                void ** handle) {
+    xvid_ff_pass1_t *x = (xvid_ff_pass1_t *)param->param;
+    char *log = x->context->twopassbuffer;
+
+    /* Do a quick bounds check */
+    if( log == NULL )
+        return XVID_ERR_FAIL;
+
+    /* We use snprintf() */
+    /* This is because we can safely prevent a buffer overflow */
+    log[0] = 0;
+    snprintf(log, BUFFER_REMAINING(log),
+        "# ffmpeg 2-pass log file, using xvid codec\n");
+    snprintf(BUFFER_CAT(log), BUFFER_REMAINING(log),
+        "# Do not modify. libxvidcore version: %d.%d.%d\n\n",
+        XVID_VERSION_MAJOR(XVID_VERSION),
+        XVID_VERSION_MINOR(XVID_VERSION),
+        XVID_VERSION_PATCH(XVID_VERSION));
+
+    *handle = x->context;
+    return 0;
+}
+
+/**
+ * Destroys the two-pass plugin context.
+ *
+ * @param ref Context pointer for the plugin
+ * @param param Destrooy context
+ * @return Returns 0, success guaranteed
+ */
+static int xvid_ff_2pass_destroy(xvid_context_t *ref,
+                                xvid_plg_destroy_t *param) {
+    /* Currently cannot think of anything to do on destruction */
+    /* Still, the framework should be here for reference/use */
+    if( ref->twopassbuffer != NULL )
+        ref->twopassbuffer[0] = 0;
+    return 0;
+}
+
+/**
+ * Enables fast encode mode during the first pass.
+ *
+ * @param ref Context pointer for the plugin
+ * @param param Frame data
+ * @return Returns 0, success guaranteed
+ */
+static int xvid_ff_2pass_before(xvid_context_t *ref,
+                                xvid_plg_data_t *param) {
+    int motion_remove;
+    int motion_replacements;
+    int vop_remove;
+
+    /* Nothing to do here, result is changed too much */
+    if( param->zone && param->zone->mode == XVID_ZONE_QUANT )
+        return 0;
+
+    /* We can implement a 'turbo' first pass mode here */
+    param->quant = 2;
+
+    /* Init values */
+    motion_remove = ~XVID_ME_CHROMA_PVOP &
+                    ~XVID_ME_CHROMA_BVOP &
+                    ~XVID_ME_EXTSEARCH16 &
+                    ~XVID_ME_ADVANCEDDIAMOND16;
+    motion_replacements = XVID_ME_FAST_MODEINTERPOLATE |
+                          XVID_ME_SKIP_DELTASEARCH |
+                          XVID_ME_FASTREFINE16 |
+                          XVID_ME_BFRAME_EARLYSTOP;
+    vop_remove = ~XVID_VOP_MODEDECISION_RD &
+                 ~XVID_VOP_FAST_MODEDECISION_RD &
+                 ~XVID_VOP_TRELLISQUANT &
+                 ~XVID_VOP_INTER4V &
+                 ~XVID_VOP_HQACPRED;
+
+    param->vol_flags &= ~XVID_VOL_GMC;
+    param->vop_flags &= vop_remove;
+    param->motion_flags &= motion_remove;
+    param->motion_flags |= motion_replacements;
+
+    return 0;
+}
+
+/**
+ * Captures statistic data and writes it during first pass.
+ *
+ * @param ref Context pointer for the plugin
+ * @param param Statistic data
+ * @return Returns XVID_ERR_xxxx on failure, or 0 on success
+ */
+static int xvid_ff_2pass_after(xvid_context_t *ref,
+                                xvid_plg_data_t *param) {
+    char *log = ref->twopassbuffer;
+    char *frame_types = " ipbs";
+    char frame_type;
+
+    /* Quick bounds check */
+    if( log == NULL )
+        return XVID_ERR_FAIL;
+
+    /* Convert the type given to us into a character */
+    if( param->type < 5 && param->type > 0 ) {
+        frame_type = frame_types[param->type];
+    } else {
+        return XVID_ERR_FAIL;
+    }
+
+    snprintf(BUFFER_CAT(log), BUFFER_REMAINING(log),
+        "%c %d %d %d %d %d %d\n",
+        frame_type, param->stats.quant, param->stats.kblks, param->stats.mblks,
+        param->stats.ublks, param->stats.length, param->stats.hlength);
+
+    return 0;
+}
+
+/**
+ * Dispatch function for our custom plugin.
+ * This handles the dispatch for the XviD plugin. It passes data
+ * on to other functions for actual processing.
+ *
+ * @param ref Context pointer for the plugin
+ * @param cmd The task given for us to complete
+ * @param p1 First parameter (varies)
+ * @param p2 Second parameter (varies)
+ * @return Returns XVID_ERR_xxxx on failure, or 0 on success
+ */
+int xvid_ff_2pass(void *ref, int cmd, void *p1, void *p2) {
+    switch( cmd ) {
+        case XVID_PLG_INFO:
+        case XVID_PLG_FRAME:
+            return 0;
+
+        case XVID_PLG_BEFORE:
+            return xvid_ff_2pass_before(ref, p1);
+
+        case XVID_PLG_CREATE:
+            return xvid_ff_2pass_create(p1, p2);
+
+        case XVID_PLG_AFTER:
+            return xvid_ff_2pass_after(ref, p1);
+
+        case XVID_PLG_DESTROY:
+            return xvid_ff_2pass_destroy(ref, p1);
+
+        default:
+            return XVID_ERR_FAIL;
+    }
+}
+
+/**
+ * XviD codec definition for libavcodec.
+ */
+AVCodec xvid_encoder = {
+    "xvid",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_XVID,
+    sizeof(xvid_context_t),
+    ff_xvid_encode_init,
+    ff_xvid_encode_frame,
+    ff_xvid_encode_close,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
+};
diff --git a/contrib/ffmpeg/libavcodec/xvmcvideo.c b/contrib/ffmpeg/libavcodec/xvmcvideo.c
new file mode 100644
index 000000000..4a0677f6e
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/xvmcvideo.c
@@ -0,0 +1,318 @@
+/*
+ * XVideo Motion Compensation
+ * Copyright (c) 2003 Ivan Kalvachev
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+
+//avcodec include
+#include "avcodec.h"
+#include "dsputil.h"
+#include "mpegvideo.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#ifdef USE_FASTMEMCPY
+#include "libvo/fastmemcpy.h"
+#endif
+
+#ifdef HAVE_XVMC
+
+//X11 includes are in the xvmc_render.h
+//by replacing it with none-X one
+//XvMC emulation could be performed
+
+#include "xvmc_render.h"
+
+//#include "xvmc_debug.h"
+
+//set s->block
+inline void XVMC_init_block(MpegEncContext *s){
+xvmc_render_state_t * render;
+    render = (xvmc_render_state_t*)s->current_picture.data[2];
+    assert(render != NULL);
+    if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){
+        assert(0);
+        return;//make sure that this is render packet
+    }
+    s->block =(DCTELEM *)(render->data_blocks+(render->next_free_data_block_num)*64);
+}
+
+void XVMC_pack_pblocks(MpegEncContext *s, int cbp){
+int i,j;
+const int mb_block_count = 4+(1<<s->chroma_format);
+
+    j=0;
+    cbp<<= 12-mb_block_count;
+    for(i=0; i<mb_block_count; i++){
+        if(cbp & (1<<11)) {
+           s->pblocks[i] = (short *)(&s->block[(j++)]);
+        }else{
+           s->pblocks[i] = NULL;
+        }
+        cbp+=cbp;
+//        printf("s->pblocks[%d]=%p ,s->block=%p cbp=%d\n",i,s->pblocks[i],s->block,cbp);
+    }
+}
+
+//these functions should be called on every new field or/and frame
+//They should be safe if they are called few times for same field!
+int XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx){
+xvmc_render_state_t * render,* last, * next;
+
+    assert(avctx != NULL);
+
+    render = (xvmc_render_state_t*)s->current_picture.data[2];
+    assert(render != NULL);
+    if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) )
+        return -1;//make sure that this is render packet
+
+    render->picture_structure = s->picture_structure;
+    render->flags = (s->first_field)? 0: XVMC_SECOND_FIELD;
+
+//make sure that all data is drawn by XVMC_end_frame
+    assert(render->filled_mv_blocks_num==0);
+
+    render->p_future_surface = NULL;
+    render->p_past_surface = NULL;
+
+    switch(s->pict_type){
+        case  I_TYPE:
+            return 0;// no prediction from other frames
+        case  B_TYPE:
+            next = (xvmc_render_state_t*)s->next_picture.data[2];
+            assert(next!=NULL);
+            assert(next->state & MP_XVMC_STATE_PREDICTION);
+            if(next == NULL) return -1;
+            if(next->magic != MP_XVMC_RENDER_MAGIC) return -1;
+            render->p_future_surface = next->p_surface;
+            //no return here, going to set forward prediction
+        case  P_TYPE:
+            last = (xvmc_render_state_t*)s->last_picture.data[2];
+            if(last == NULL)// && !s->first_field)
+                last = render;//predict second field from the first
+            if(last->magic != MP_XVMC_RENDER_MAGIC) return -1;
+            assert(last->state & MP_XVMC_STATE_PREDICTION);
+            render->p_past_surface = last->p_surface;
+            return 0;
+    }
+
+return -1;
+}
+
+void XVMC_field_end(MpegEncContext *s){
+xvmc_render_state_t * render;
+    render = (xvmc_render_state_t*)s->current_picture.data[2];
+    assert(render != NULL);
+
+    if(render->filled_mv_blocks_num > 0){
+//        printf("xvmcvideo.c: rendering %d left blocks after last slice!!!\n",render->filled_mv_blocks_num );
+        ff_draw_horiz_band(s,0,0);
+    }
+}
+
+void XVMC_decode_mb(MpegEncContext *s){
+XvMCMacroBlock * mv_block;
+xvmc_render_state_t * render;
+int i,cbp,blocks_per_mb;
+
+const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
+
+
+    if(s->encoding){
+        av_log(s->avctx, AV_LOG_ERROR, "XVMC doesn't support encoding!!!\n");
+        return;
+    }
+
+   //from MPV_decode_mb(),
+    /* update DC predictors for P macroblocks */
+    if (!s->mb_intra) {
+        s->last_dc[0] =
+        s->last_dc[1] =
+        s->last_dc[2] =  128 << s->intra_dc_precision;
+    }
+
+   //MC doesn't skip blocks
+    s->mb_skipped = 0;
+
+
+   // do I need to export quant when I could not perform postprocessing?
+   // anyway, it doesn't hurrt
+    s->current_picture.qscale_table[mb_xy] = s->qscale;
+
+//START OF XVMC specific code
+    render = (xvmc_render_state_t*)s->current_picture.data[2];
+    assert(render!=NULL);
+    assert(render->magic==MP_XVMC_RENDER_MAGIC);
+    assert(render->mv_blocks);
+
+    //take the next free macroblock
+    mv_block = &render->mv_blocks[render->start_mv_blocks_num +
+                                   render->filled_mv_blocks_num ];
+
+// memset(mv_block,0,sizeof(XvMCMacroBlock));
+
+    mv_block->x = s->mb_x;
+    mv_block->y = s->mb_y;
+    mv_block->dct_type = s->interlaced_dct;//XVMC_DCT_TYPE_FRAME/FIELD;
+//    mv_block->motion_type = 0;  //zero to silense warnings
+    if(s->mb_intra){
+        mv_block->macroblock_type = XVMC_MB_TYPE_INTRA;//no MC, all done
+    }else{
+        mv_block->macroblock_type = XVMC_MB_TYPE_PATTERN;
+
+        if(s->mv_dir & MV_DIR_FORWARD){
+            mv_block->macroblock_type|= XVMC_MB_TYPE_MOTION_FORWARD;
+            //pmv[n][dir][xy]=mv[dir][n][xy]
+            mv_block->PMV[0][0][0] = s->mv[0][0][0];
+            mv_block->PMV[0][0][1] = s->mv[0][0][1];
+            mv_block->PMV[1][0][0] = s->mv[0][1][0];
+            mv_block->PMV[1][0][1] = s->mv[0][1][1];
+        }
+        if(s->mv_dir & MV_DIR_BACKWARD){
+            mv_block->macroblock_type|=XVMC_MB_TYPE_MOTION_BACKWARD;
+            mv_block->PMV[0][1][0] = s->mv[1][0][0];
+            mv_block->PMV[0][1][1] = s->mv[1][0][1];
+            mv_block->PMV[1][1][0] = s->mv[1][1][0];
+            mv_block->PMV[1][1][1] = s->mv[1][1][1];
+        }
+
+        switch(s->mv_type){
+            case  MV_TYPE_16X16:
+                mv_block->motion_type = XVMC_PREDICTION_FRAME;
+                break;
+            case  MV_TYPE_16X8:
+                mv_block->motion_type = XVMC_PREDICTION_16x8;
+                break;
+            case  MV_TYPE_FIELD:
+                mv_block->motion_type = XVMC_PREDICTION_FIELD;
+                if(s->picture_structure == PICT_FRAME){
+                    mv_block->PMV[0][0][1]<<=1;
+                    mv_block->PMV[1][0][1]<<=1;
+                    mv_block->PMV[0][1][1]<<=1;
+                    mv_block->PMV[1][1][1]<<=1;
+                }
+                break;
+            case  MV_TYPE_DMV:
+                mv_block->motion_type = XVMC_PREDICTION_DUAL_PRIME;
+                if(s->picture_structure == PICT_FRAME){
+
+                    mv_block->PMV[0][0][0] = s->mv[0][0][0];//top from top
+                    mv_block->PMV[0][0][1] = s->mv[0][0][1]<<1;
+
+                    mv_block->PMV[0][1][0] = s->mv[0][0][0];//bottom from bottom
+                    mv_block->PMV[0][1][1] = s->mv[0][0][1]<<1;
+
+                    mv_block->PMV[1][0][0] = s->mv[0][2][0];//dmv00, top from bottom
+                    mv_block->PMV[1][0][1] = s->mv[0][2][1]<<1;//dmv01
+
+                    mv_block->PMV[1][1][0] = s->mv[0][3][0];//dmv10, bottom from top
+                    mv_block->PMV[1][1][1] = s->mv[0][3][1]<<1;//dmv11
+
+                }else{
+                    mv_block->PMV[0][1][0] = s->mv[0][2][0];//dmv00
+                    mv_block->PMV[0][1][1] = s->mv[0][2][1];//dmv01
+                }
+                break;
+            default:
+                assert(0);
+        }
+
+        mv_block->motion_vertical_field_select = 0;
+
+//set correct field referenses
+        if(s->mv_type == MV_TYPE_FIELD || s->mv_type == MV_TYPE_16X8){
+            if( s->field_select[0][0] ) mv_block->motion_vertical_field_select|=1;
+            if( s->field_select[1][0] ) mv_block->motion_vertical_field_select|=2;
+            if( s->field_select[0][1] ) mv_block->motion_vertical_field_select|=4;
+            if( s->field_select[1][1] ) mv_block->motion_vertical_field_select|=8;
+        }
+    }//!intra
+//time to handle data blocks;
+    mv_block->index = render->next_free_data_block_num;
+
+    blocks_per_mb = 6;
+    if( s->chroma_format >= 2){
+        blocks_per_mb = 4 + (1 << (s->chroma_format));
+    }
+
+//  calculate cbp
+    cbp = 0;
+    for(i=0; i<blocks_per_mb; i++) {
+        cbp+= cbp;
+        if(s->block_last_index[i] >= 0)
+            cbp++;
+    }
+
+    if(s->flags & CODEC_FLAG_GRAY){
+        if(s->mb_intra){//intra frames are alwasy full chroma block
+            for(i=4; i<blocks_per_mb; i++){
+                memset(s->pblocks[i],0,sizeof(short)*8*8);//so we need to clear them
+                if(!render->unsigned_intra)
+                    s->pblocks[i][0] = 1<<10;
+            }
+        }else{
+            cbp&= 0xf << (blocks_per_mb - 4);
+            blocks_per_mb = 4;//Luminance blocks only
+        }
+    }
+    mv_block->coded_block_pattern = cbp;
+    if(cbp == 0)
+        mv_block->macroblock_type &= ~XVMC_MB_TYPE_PATTERN;
+
+    for(i=0; i<blocks_per_mb; i++){
+        if(s->block_last_index[i] >= 0){
+            // i do not have unsigned_intra MOCO to test, hope it is OK
+            if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) )
+                s->pblocks[i][0]-=1<<10;
+            if(!render->idct){
+                s->dsp.idct(s->pblocks[i]);
+                //!!TODO!clip!!!
+            }
+//copy blocks only if the codec doesn't support pblocks reordering
+            if(s->avctx->xvmc_acceleration == 1){
+                memcpy(&render->data_blocks[(render->next_free_data_block_num)*64],
+                        s->pblocks[i],sizeof(short)*8*8);
+            }else{
+/*              if(s->pblocks[i] != &render->data_blocks[
+                        (render->next_free_data_block_num)*64]){
+                   printf("ERROR mb(%d,%d) s->pblocks[i]=%p data_block[]=%p\n",
+                   s->mb_x,s->mb_y, s->pblocks[i],
+                   &render->data_blocks[(render->next_free_data_block_num)*64]);
+                }*/
+            }
+            render->next_free_data_block_num++;
+        }
+    }
+    render->filled_mv_blocks_num++;
+
+    assert(render->filled_mv_blocks_num <= render->total_number_of_mv_blocks);
+    assert(render->next_free_data_block_num <= render->total_number_of_data_blocks);
+
+
+    if(render->filled_mv_blocks_num >= render->total_number_of_mv_blocks)
+        ff_draw_horiz_band(s,0,0);
+
+// DumpRenderInfo(render);
+// DumpMBlockInfo(mv_block);
+
+}
+
+#endif
diff --git a/contrib/ffmpeg/libavcodec/zmbv.c b/contrib/ffmpeg/libavcodec/zmbv.c
new file mode 100644
index 000000000..fe3745e09
--- /dev/null
+++ b/contrib/ffmpeg/libavcodec/zmbv.c
@@ -0,0 +1,692 @@
+/*
+ * Zip Motion Blocks Video (ZMBV) decoder
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file zmbv.c
+ * Zip Motion Blocks Video decoder
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common.h"
+#include "avcodec.h"
+
+#ifdef CONFIG_ZLIB
+#include <zlib.h>
+#endif
+
+#define ZMBV_KEYFRAME 1
+#define ZMBV_DELTAPAL 2
+
+enum ZmbvFormat {
+    ZMBV_FMT_NONE  = 0,
+    ZMBV_FMT_1BPP  = 1,
+    ZMBV_FMT_2BPP  = 2,
+    ZMBV_FMT_4BPP  = 3,
+    ZMBV_FMT_8BPP  = 4,
+    ZMBV_FMT_15BPP = 5,
+    ZMBV_FMT_16BPP = 6,
+    ZMBV_FMT_24BPP = 7,
+    ZMBV_FMT_32BPP = 8
+};
+
+/*
+ * Decoder context
+ */
+typedef struct ZmbvContext {
+    AVCodecContext *avctx;
+    AVFrame pic;
+
+    int bpp;
+    unsigned int decomp_size;
+    uint8_t* decomp_buf;
+    uint8_t pal[768];
+    uint8_t *prev, *cur;
+    int width, height;
+    int fmt;
+    int comp;
+    int flags;
+    int bw, bh, bx, by;
+    int decomp_len;
+#ifdef CONFIG_ZLIB
+    z_stream zstream;
+#endif
+    int (*decode_intra)(struct ZmbvContext *c);
+    int (*decode_xor)(struct ZmbvContext *c);
+} ZmbvContext;
+
+/**
+ * Decode XOR'ed frame - 8bpp version
+ */
+
+static int zmbv_decode_xor_8(ZmbvContext *c)
+{
+    uint8_t *src = c->decomp_buf;
+    uint8_t *output, *prev;
+    int8_t *mvec;
+    int x, y;
+    int d, dx, dy, bw2, bh2;
+    int block;
+    int i, j;
+    int mx, my;
+
+    output = c->cur;
+    prev = c->prev;
+
+    if(c->flags & ZMBV_DELTAPAL){
+        for(i = 0; i < 768; i++)
+            c->pal[i] ^= *src++;
+    }
+
+    mvec = (int8_t*)src;
+    src += ((c->bx * c->by * 2 + 3) & ~3);
+
+    block = 0;
+    for(y = 0; y < c->height; y += c->bh) {
+        bh2 = ((c->height - y) > c->bh) ? c->bh : (c->height - y);
+        for(x = 0; x < c->width; x += c->bw) {
+            uint8_t *out, *tprev;
+
+            d = mvec[block] & 1;
+            dx = mvec[block] >> 1;
+            dy = mvec[block + 1] >> 1;
+            block += 2;
+
+            bw2 = ((c->width - x) > c->bw) ? c->bw : (c->width - x);
+
+            /* copy block - motion vectors out of bounds are used to zero blocks */
+            out = output + x;
+            tprev = prev + x + dx + dy * c->width;
+            mx = x + dx;
+            my = y + dy;
+            for(j = 0; j < bh2; j++){
+                if((my + j < 0) || (my + j >= c->height)) {
+                    memset(out, 0, bw2);
+                } else {
+                    for(i = 0; i < bw2; i++){
+                        if((mx + i < 0) || (mx + i >= c->width))
+                            out[i] = 0;
+                        else
+                            out[i] = tprev[i];
+                    }
+                }
+                out += c->width;
+                tprev += c->width;
+            }
+
+            if(d) { /* apply XOR'ed difference */
+                out = output + x;
+                for(j = 0; j < bh2; j++){
+                    for(i = 0; i < bw2; i++)
+                        out[i] ^= *src++;
+                    out += c->width;
+                }
+            }
+        }
+        output += c->width * c->bh;
+        prev += c->width * c->bh;
+    }
+    if(src - c->decomp_buf != c->decomp_len)
+        av_log(c->avctx, AV_LOG_ERROR, "Used %i of %i bytes\n", src-c->decomp_buf, c->decomp_len);
+    return 0;
+}
+
+/**
+ * Decode XOR'ed frame - 15bpp and 16bpp version
+ */
+
+static int zmbv_decode_xor_16(ZmbvContext *c)
+{
+    uint8_t *src = c->decomp_buf;
+    uint16_t *output, *prev;
+    int8_t *mvec;
+    int x, y;
+    int d, dx, dy, bw2, bh2;
+    int block;
+    int i, j;
+    int mx, my;
+
+    output = (uint16_t*)c->cur;
+    prev = (uint16_t*)c->prev;
+
+    mvec = (int8_t*)src;
+    src += ((c->bx * c->by * 2 + 3) & ~3);
+
+    block = 0;
+    for(y = 0; y < c->height; y += c->bh) {
+        bh2 = ((c->height - y) > c->bh) ? c->bh : (c->height - y);
+        for(x = 0; x < c->width; x += c->bw) {
+            uint16_t *out, *tprev;
+
+            d = mvec[block] & 1;
+            dx = mvec[block] >> 1;
+            dy = mvec[block + 1] >> 1;
+            block += 2;
+
+            bw2 = ((c->width - x) > c->bw) ? c->bw : (c->width - x);
+
+            /* copy block - motion vectors out of bounds are used to zero blocks */
+            out = output + x;
+            tprev = prev + x + dx + dy * c->width;
+            mx = x + dx;
+            my = y + dy;
+            for(j = 0; j < bh2; j++){
+                if((my + j < 0) || (my + j >= c->height)) {
+                    memset(out, 0, bw2 * 2);
+                } else {
+                    for(i = 0; i < bw2; i++){
+                        if((mx + i < 0) || (mx + i >= c->width))
+                            out[i] = 0;
+                        else
+                            out[i] = tprev[i];
+                    }
+                }
+                out += c->width;
+                tprev += c->width;
+            }
+
+            if(d) { /* apply XOR'ed difference */
+                out = output + x;
+                for(j = 0; j < bh2; j++){
+                    for(i = 0; i < bw2; i++) {
+                        out[i] ^= *((uint16_t*)src);
+                        src += 2;
+                    }
+                    out += c->width;
+                }
+            }
+        }
+        output += c->width * c->bh;
+        prev += c->width * c->bh;
+    }
+    if(src - c->decomp_buf != c->decomp_len)
+        av_log(c->avctx, AV_LOG_ERROR, "Used %i of %i bytes\n", src-c->decomp_buf, c->decomp_len);
+    return 0;
+}
+
+#ifdef ZMBV_ENABLE_24BPP
+/**
+ * Decode XOR'ed frame - 24bpp version
+ */
+
+static int zmbv_decode_xor_24(ZmbvContext *c)
+{
+    uint8_t *src = c->decomp_buf;
+    uint8_t *output, *prev;
+    int8_t *mvec;
+    int x, y;
+    int d, dx, dy, bw2, bh2;
+    int block;
+    int i, j;
+    int mx, my;
+    int stride;
+
+    output = c->cur;
+    prev = c->prev;
+
+    stride = c->width * 3;
+    mvec = (int8_t*)src;
+    src += ((c->bx * c->by * 2 + 3) & ~3);
+
+    block = 0;
+    for(y = 0; y < c->height; y += c->bh) {
+        bh2 = ((c->height - y) > c->bh) ? c->bh : (c->height - y);
+        for(x = 0; x < c->width; x += c->bw) {
+            uint8_t *out, *tprev;
+
+            d = mvec[block] & 1;
+            dx = mvec[block] >> 1;
+            dy = mvec[block + 1] >> 1;
+            block += 2;
+
+            bw2 = ((c->width - x) > c->bw) ? c->bw : (c->width - x);
+
+            /* copy block - motion vectors out of bounds are used to zero blocks */
+            out = output + x * 3;
+            tprev = prev + (x + dx) * 3 + dy * stride;
+            mx = x + dx;
+            my = y + dy;
+            for(j = 0; j < bh2; j++){
+                if((my + j < 0) || (my + j >= c->height)) {
+                    memset(out, 0, bw2 * 3);
+                } else {
+                    for(i = 0; i < bw2; i++){
+                        if((mx + i < 0) || (mx + i >= c->width)) {
+                            out[i * 3 + 0] = 0;
+                            out[i * 3 + 1] = 0;
+                            out[i * 3 + 2] = 0;
+                        } else {
+                            out[i * 3 + 0] = tprev[i * 3 + 0];
+                            out[i * 3 + 1] = tprev[i * 3 + 1];
+                            out[i * 3 + 2] = tprev[i * 3 + 2];
+                        }
+                    }
+                }
+                out += stride;
+                tprev += stride;
+            }
+
+            if(d) { /* apply XOR'ed difference */
+                out = output + x * 3;
+                for(j = 0; j < bh2; j++){
+                    for(i = 0; i < bw2; i++) {
+                        out[i * 3 + 0] ^= *src++;
+                        out[i * 3 + 1] ^= *src++;
+                        out[i * 3 + 2] ^= *src++;
+                    }
+                    out += stride;
+                }
+            }
+        }
+        output += stride * c->bh;
+        prev += stride * c->bh;
+    }
+    if(src - c->decomp_buf != c->decomp_len)
+        av_log(c->avctx, AV_LOG_ERROR, "Used %i of %i bytes\n", src-c->decomp_buf, c->decomp_len);
+    return 0;
+}
+#endif //ZMBV_ENABLE_24BPP
+
+/**
+ * Decode XOR'ed frame - 32bpp version
+ */
+
+static int zmbv_decode_xor_32(ZmbvContext *c)
+{
+    uint8_t *src = c->decomp_buf;
+    uint32_t *output, *prev;
+    int8_t *mvec;
+    int x, y;
+    int d, dx, dy, bw2, bh2;
+    int block;
+    int i, j;
+    int mx, my;
+
+    output = (uint32_t*)c->cur;
+    prev = (uint32_t*)c->prev;
+
+    mvec = (int8_t*)src;
+    src += ((c->bx * c->by * 2 + 3) & ~3);
+
+    block = 0;
+    for(y = 0; y < c->height; y += c->bh) {
+        bh2 = ((c->height - y) > c->bh) ? c->bh : (c->height - y);
+        for(x = 0; x < c->width; x += c->bw) {
+            uint32_t *out, *tprev;
+
+            d = mvec[block] & 1;
+            dx = mvec[block] >> 1;
+            dy = mvec[block + 1] >> 1;
+            block += 2;
+
+            bw2 = ((c->width - x) > c->bw) ? c->bw : (c->width - x);
+
+            /* copy block - motion vectors out of bounds are used to zero blocks */
+            out = output + x;
+            tprev = prev + x + dx + dy * c->width;
+            mx = x + dx;
+            my = y + dy;
+            for(j = 0; j < bh2; j++){
+                if((my + j < 0) || (my + j >= c->height)) {
+                    memset(out, 0, bw2 * 4);
+                } else {
+                    for(i = 0; i < bw2; i++){
+                        if((mx + i < 0) || (mx + i >= c->width))
+                            out[i] = 0;
+                        else
+                            out[i] = tprev[i];
+                    }
+                }
+                out += c->width;
+                tprev += c->width;
+            }
+
+            if(d) { /* apply XOR'ed difference */
+                out = output + x;
+                for(j = 0; j < bh2; j++){
+                    for(i = 0; i < bw2; i++) {
+                        out[i] ^= *((uint32_t*)src);
+                        src += 4;
+                    }
+                    out += c->width;
+                }
+            }
+        }
+        output += c->width * c->bh;
+        prev += c->width * c->bh;
+    }
+    if(src - c->decomp_buf != c->decomp_len)
+        av_log(c->avctx, AV_LOG_ERROR, "Used %i of %i bytes\n", src-c->decomp_buf, c->decomp_len);
+    return 0;
+}
+
+/**
+ * Decode intraframe
+ */
+static int zmbv_decode_intra(ZmbvContext *c)
+{
+    uint8_t *src = c->decomp_buf;
+
+    /* make the palette available on the way out */
+    if (c->fmt == ZMBV_FMT_8BPP) {
+        memcpy(c->pal, src, 768);
+        src += 768;
+    }
+
+    memcpy(c->cur, src, c->width * c->height * (c->bpp / 8));
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
+{
+    ZmbvContext * const c = (ZmbvContext *)avctx->priv_data;
+    uint8_t *outptr;
+#ifdef CONFIG_ZLIB
+    int zret = Z_OK; // Zlib return code
+#endif
+    int len = buf_size;
+    int hi_ver, lo_ver;
+
+    if(c->pic.data[0])
+            avctx->release_buffer(avctx, &c->pic);
+
+    c->pic.reference = 1;
+    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+    if(avctx->get_buffer(avctx, &c->pic) < 0){
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return -1;
+    }
+
+    outptr = c->pic.data[0]; // Output image pointer
+
+    /* parse header */
+    c->flags = buf[0];
+    buf++; len--;
+    if(c->flags & ZMBV_KEYFRAME) {
+        hi_ver = buf[0];
+        lo_ver = buf[1];
+        c->comp = buf[2];
+        c->fmt = buf[3];
+        c->bw = buf[4];
+        c->bh = buf[5];
+
+        buf += 6;
+        len -= 6;
+        av_log(avctx, AV_LOG_DEBUG, "Flags=%X ver=%i.%i comp=%i fmt=%i blk=%ix%i\n",c->flags,hi_ver,lo_ver,c->comp,c->fmt,c->bw,c->bh);
+        if(hi_ver != 0 || lo_ver != 1) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported version %i.%i\n", hi_ver, lo_ver);
+            return -1;
+        }
+        if(c->bw == 0 || c->bh == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported block size %ix%i\n", c->bw, c->bh);
+        }
+        if(c->comp != 0 && c->comp != 1) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported compression type %i\n", c->comp);
+            return -1;
+        }
+
+        switch(c->fmt) {
+        case ZMBV_FMT_8BPP:
+            c->bpp = 8;
+            c->decode_intra = zmbv_decode_intra;
+            c->decode_xor = zmbv_decode_xor_8;
+            break;
+        case ZMBV_FMT_15BPP:
+        case ZMBV_FMT_16BPP:
+            c->bpp = 16;
+            c->decode_intra = zmbv_decode_intra;
+            c->decode_xor = zmbv_decode_xor_16;
+            break;
+#ifdef ZMBV_ENABLE_24BPP
+        case ZMBV_FMT_24BPP:
+            c->bpp = 24;
+            c->decode_intra = zmbv_decode_intra;
+            c->decode_xor = zmbv_decode_xor_24;
+            break;
+#endif //ZMBV_ENABLE_24BPP
+        case ZMBV_FMT_32BPP:
+            c->bpp = 32;
+            c->decode_intra = zmbv_decode_intra;
+            c->decode_xor = zmbv_decode_xor_32;
+            break;
+        default:
+            c->decode_intra = NULL;
+            c->decode_xor = NULL;
+            av_log(avctx, AV_LOG_ERROR, "Unsupported (for now) format %i\n", c->fmt);
+            return -1;
+        }
+#ifdef CONFIG_ZLIB
+        zret = inflateReset(&c->zstream);
+        if (zret != Z_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
+            return -1;
+        }
+#else
+        av_log(avctx, AV_LOG_ERROR, "BUG! Zlib support not compiled in frame decoder.\n");
+        return -1;
+#endif  /* CONFIG_ZLIB */
+        c->cur = av_realloc(c->cur, avctx->width * avctx->height * (c->bpp / 8));
+        c->prev = av_realloc(c->prev, avctx->width * avctx->height * (c->bpp / 8));
+        c->bx = (c->width + c->bw - 1) / c->bw;
+        c->by = (c->height+ c->bh - 1) / c->bh;
+    }
+
+    if(c->decode_intra == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Error! Got no format or no keyframe!\n");
+        return -1;
+    }
+
+    if(c->comp == 0) { //Uncompressed data
+        memcpy(c->decomp_buf, buf, len);
+        c->decomp_size = 1;
+    } else { // ZLIB-compressed data
+#ifdef CONFIG_ZLIB
+        c->zstream.total_in = c->zstream.total_out = 0;
+        c->zstream.next_in = buf;
+        c->zstream.avail_in = len;
+        c->zstream.next_out = c->decomp_buf;
+        c->zstream.avail_out = c->decomp_size;
+        inflate(&c->zstream, Z_FINISH);
+        c->decomp_len = c->zstream.total_out;
+#else
+        av_log(avctx, AV_LOG_ERROR, "BUG! Zlib support not compiled in frame decoder.\n");
+        return -1;
+#endif
+    }
+    if(c->flags & ZMBV_KEYFRAME) {
+        c->pic.key_frame = 1;
+        c->pic.pict_type = FF_I_TYPE;
+        c->decode_intra(c);
+    } else {
+        c->pic.key_frame = 0;
+        c->pic.pict_type = FF_P_TYPE;
+        c->decode_xor(c);
+    }
+
+    /* update frames */
+    {
+        uint8_t *out, *src;
+        int i, j;
+
+        out = c->pic.data[0];
+        src = c->cur;
+        switch(c->fmt) {
+        case ZMBV_FMT_8BPP:
+            for(j = 0; j < c->height; j++) {
+                for(i = 0; i < c->width; i++) {
+                    out[i * 3 + 0] = c->pal[(*src) * 3 + 0];
+                    out[i * 3 + 1] = c->pal[(*src) * 3 + 1];
+                    out[i * 3 + 2] = c->pal[(*src) * 3 + 2];
+                    *src++;
+                }
+                out += c->pic.linesize[0];
+            }
+            break;
+        case ZMBV_FMT_15BPP:
+            for(j = 0; j < c->height; j++) {
+                for(i = 0; i < c->width; i++) {
+                    uint16_t tmp = LE_16(src);
+                    src += 2;
+                    out[i * 3 + 0] = (tmp & 0x7C00) >> 7;
+                    out[i * 3 + 1] = (tmp & 0x03E0) >> 2;
+                    out[i * 3 + 2] = (tmp & 0x001F) << 3;
+                }
+                out += c->pic.linesize[0];
+            }
+            break;
+        case ZMBV_FMT_16BPP:
+            for(j = 0; j < c->height; j++) {
+                for(i = 0; i < c->width; i++) {
+                    uint16_t tmp = LE_16(src);
+                    src += 2;
+                    out[i * 3 + 0] = (tmp & 0xF800) >> 8;
+                    out[i * 3 + 1] = (tmp & 0x07E0) >> 3;
+                    out[i * 3 + 2] = (tmp & 0x001F) << 3;
+                }
+                out += c->pic.linesize[0];
+            }
+            break;
+#ifdef ZMBV_ENABLE_24BPP
+        case ZMBV_FMT_24BPP:
+            for(j = 0; j < c->height; j++) {
+                memcpy(out, src, c->width * 3);
+                src += c->width * 3;
+                out += c->pic.linesize[0];
+            }
+            break;
+#endif //ZMBV_ENABLE_24BPP
+        case ZMBV_FMT_32BPP:
+            for(j = 0; j < c->height; j++) {
+                for(i = 0; i < c->width; i++) {
+                    uint32_t tmp = LE_32(src);
+                    src += 4;
+                    out[i * 3 + 0] = tmp >> 16;
+                    out[i * 3 + 1] = tmp >> 8;
+                    out[i * 3 + 2] = tmp >> 0;
+                }
+                out += c->pic.linesize[0];
+            }
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Cannot handle format %i\n", c->fmt);
+        }
+        memcpy(c->prev, c->cur, c->width * c->height * (c->bpp / 8));
+    }
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = c->pic;
+
+    /* always report that the buffer was completely consumed */
+    return buf_size;
+}
+
+
+
+/*
+ *
+ * Init zmbv decoder
+ *
+ */
+static int decode_init(AVCodecContext *avctx)
+{
+    ZmbvContext * const c = (ZmbvContext *)avctx->priv_data;
+    int zret; // Zlib return code
+
+    c->avctx = avctx;
+    avctx->has_b_frames = 0;
+
+    c->pic.data[0] = NULL;
+    c->width = avctx->width;
+    c->height = avctx->height;
+
+    if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+        return 1;
+    }
+    c->bpp = avctx->bits_per_sample;
+
+#ifdef CONFIG_ZLIB
+    // Needed if zlib unused or init aborted before inflateInit
+    memset(&(c->zstream), 0, sizeof(z_stream));
+#else
+    av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n");
+    return 1;
+#endif
+    avctx->pix_fmt = PIX_FMT_RGB24;
+    c->decomp_size = (avctx->width + 255) * 4 * (avctx->height + 64);
+
+    /* Allocate decompression buffer */
+    if (c->decomp_size) {
+        if ((c->decomp_buf = av_malloc(c->decomp_size)) == NULL) {
+            av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
+            return 1;
+        }
+    }
+
+#ifdef CONFIG_ZLIB
+    c->zstream.zalloc = Z_NULL;
+    c->zstream.zfree = Z_NULL;
+    c->zstream.opaque = Z_NULL;
+    zret = inflateInit(&(c->zstream));
+    if (zret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret);
+        return 1;
+    }
+#endif
+
+    return 0;
+}
+
+
+
+/*
+ *
+ * Uninit zmbv decoder
+ *
+ */
+static int decode_end(AVCodecContext *avctx)
+{
+    ZmbvContext * const c = (ZmbvContext *)avctx->priv_data;
+
+    av_freep(&c->decomp_buf);
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+#ifdef CONFIG_ZLIB
+    inflateEnd(&(c->zstream));
+#endif
+    av_freep(&c->cur);
+    av_freep(&c->prev);
+
+    return 0;
+}
+
+AVCodec zmbv_decoder = {
+    "zmbv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_ZMBV,
+    sizeof(ZmbvContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame
+};
+